From 248d9a5b63bba72bfc316b8a48c6163fce5acc22 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas <paulius.zaleckas@gmail.com> Date: Thu, 18 Feb 2010 21:53:01 +0200 Subject: [PATCH] ARM: Use cache alignment from asm/cache.h Make code more optimal for ARM variants with different cache line size. Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com> --- arch/arm/boot/compressed/head.S | 11 ++++++----- arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/kernel/entry-armv.S | 31 ++++++++++++++++--------------- arch/arm/kernel/entry-common.S | 7 ++++--- arch/arm/kernel/head.S | 3 ++- arch/arm/kernel/vmlinux.lds.S | 5 +++-- arch/arm/lib/copy_page.S | 2 +- arch/arm/lib/memchr.S | 3 ++- arch/arm/lib/memset.S | 3 ++- arch/arm/lib/memzero.S | 3 ++- arch/arm/lib/strchr.S | 3 ++- arch/arm/lib/strncpy_from_user.S | 3 ++- arch/arm/lib/strnlen_user.S | 3 ++- arch/arm/lib/strrchr.S | 3 ++- arch/arm/mm/abort-ev4.S | 3 ++- arch/arm/mm/abort-nommu.S | 3 ++- 16 files changed, 51 insertions(+), 37 deletions(-) --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ #include <linux/linkage.h> +#include <asm/cache.h> /* * Debugging stuff @@ -355,7 +356,7 @@ params: ldr r0, =0x10000100 @ params_p * This routine must preserve: * r4, r5, r6, r7, r8 */ - .align 5 + .align L1_CACHE_SHIFT cache_on: mov r3, #8 @ cache_on function b call_cache_fn @@ -544,7 +545,7 @@ __common_mmu_cache_on: mcr p15, 0, r3, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c3, c0, 0 @ load domain access control b 1f - .align 5 @ cache line aligned + .align L1_CACHE_SHIFT @ cache line aligned 1: mcr p15, 0, r0, c1, c0, 0 @ load control register mrc p15, 0, r0, c1, c0, 0 @ and read it back to sub pc, lr, r0, lsr #32 @ properly flush pipeline @@ -563,7 +564,7 @@ __common_mmu_cache_on: * r8 = atags pointer * r9-r12,r14 = corrupted */ - .align 5 + .align L1_CACHE_SHIFT reloc_start: add r9, r5, r0 sub r9, r9, #128 @ do not copy the stack debug_reloc_start @@ -793,7 +794,7 @@ proc_types: * This routine must preserve: * r4, r6, r7 */ - .align 5 + .align L1_CACHE_SHIFT cache_off: mov r3, #12 @ cache_off function b call_cache_fn @@ -868,7 +869,7 @@ __armv3_mmu_cache_off: * This routine must preserve: * r0, r4, r5, r6, r7 */ - .align 5 + .align L1_CACHE_SHIFT cache_clean_flush: mov r3, #16 b call_cache_fn --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -23,6 +23,7 @@ #include <asm/unwind.h> #include <asm/unistd.h> #include <asm/tls.h> +#include <asm/cache.h> #include "entry-header.S" @@ -167,7 +168,7 @@ ENDPROC(__und_invalid) stmia r5, {r0 - r4} .endm - .align 5 + .align L1_CACHE_SHIFT __dabt_svc: svc_entry @@ -215,7 +216,7 @@ __dabt_svc: UNWIND(.fnend ) ENDPROC(__dabt_svc) - .align 5 + .align L1_CACHE_SHIFT __irq_svc: svc_entry @@ -259,7 +260,7 @@ svc_preempt: b 1b #endif - .align 5 + .align L1_CACHE_SHIFT __und_svc: #ifdef CONFIG_KPROBES @ If a kprobe is about to simulate a "stmdb sp..." instruction, @@ -305,7 +306,7 @@ __und_svc: UNWIND(.fnend ) ENDPROC(__und_svc) - .align 5 + .align L1_CACHE_SHIFT __pabt_svc: svc_entry @@ -341,7 +342,7 @@ __pabt_svc: UNWIND(.fnend ) ENDPROC(__pabt_svc) - .align 5 + .align L1_CACHE_SHIFT .LCcralign: .word cr_alignment #ifdef MULTI_DABORT @@ -414,7 +415,7 @@ ENDPROC(__pabt_svc) #endif .endm - .align 5 + .align L1_CACHE_SHIFT __dabt_usr: usr_entry kuser_cmpxchg_check @@ -446,7 +447,7 @@ __dabt_usr: UNWIND(.fnend ) ENDPROC(__dabt_usr) - .align 5 + .align L1_CACHE_SHIFT __irq_usr: usr_entry kuser_cmpxchg_check @@ -475,7 +476,7 @@ ENDPROC(__irq_usr) .ltorg - .align 5 + .align L1_CACHE_SHIFT __und_usr: usr_entry @@ -691,7 +692,7 @@ __und_usr_unknown: b do_undefinstr ENDPROC(__und_usr_unknown) - .align 5 + .align L1_CACHE_SHIFT __pabt_usr: usr_entry @@ -805,7 +806,7 @@ ENDPROC(__switch_to) #endif .endm - .align 5 + .align L1_CACHE_SHIFT .globl __kuser_helper_start __kuser_helper_start: @@ -845,7 +846,7 @@ __kuser_memory_barrier: @ 0xffff0fa0 smp_dmb usr_ret lr - .align 5 + .align L1_CACHE_SHIFT /* * Reference prototype: @@ -972,7 +973,7 @@ kuser_cmpxchg_fixup: #endif - .align 5 + .align L1_CACHE_SHIFT /* * Reference prototype: @@ -1050,7 +1051,7 @@ __kuser_helper_end: * of which is copied into r0 for the mode specific abort handler. */ .macro vector_stub, name, mode, correction=0 - .align 5 + .align L1_CACHE_SHIFT vector_\name: .if \correction @@ -1181,7 +1182,7 @@ __stubs_start: .long __und_invalid @ e .long __und_invalid @ f - .align 5 + .align L1_CACHE_SHIFT /*============================================================================= * Undefined FIQs @@ -1211,7 +1212,7 @@ vector_addrexcptn: * We group all the following data together to optimise * for CPUs with separate I & D caches. */ - .align 5 + .align L1_CACHE_SHIFT .LCvswi: .word vector_swi --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -10,13 +10,14 @@ #include <asm/unistd.h> #include <asm/ftrace.h> +#include <asm/cache.h> #include <mach/entry-macro.S> #include <asm/unwind.h> #include "entry-header.S" - .align 5 + .align L1_CACHE_SHIFT /* * This is the fast syscall return path. We do as little as * possible here, and this includes saving r0 back into the SVC @@ -260,7 +261,7 @@ ENDPROC(ftrace_stub) #define A710(code...) #endif - .align 5 + .align L1_CACHE_SHIFT ENTRY(vector_swi) sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 @@ -404,7 +405,7 @@ __sys_trace_return: bl syscall_trace b ret_slow_syscall - .align 5 + .align L1_CACHE_SHIFT #ifdef CONFIG_ALIGNMENT_TRAP .type __cr_alignment, #object __cr_alignment: --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -21,6 +21,7 @@ #include <asm/memory.h> #include <asm/thread_info.h> #include <asm/system.h> +#include <asm/cache.h> #ifdef CONFIG_DEBUG_LL #include <mach/debug-macro.S> @@ -373,7 +374,7 @@ ENDPROC(__enable_mmu) * * other registers depend on the function called upon completion */ - .align 5 + .align L1_CACHE_SHIFT __turn_mmu_on: mov r0, r0 mcr p15, 0, r0, c1, c0, 0 @ write control reg --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -7,6 +7,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#include <asm/cache.h> #define PROC_INFO \ VMLINUX_SYMBOL(__proc_info_begin) = .; \ --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -17,7 +17,7 @@ #define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) .text - .align 5 + .align L1_CACHE_SHIFT /* * StrongARM optimised copy_page routine * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -11,9 +11,10 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT ENTRY(memchr) 1: subs r2, r2, #1 bmi 2f --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -11,9 +11,10 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT .word 0 1: subs r2, r2, #4 @ 1 do we have enough --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -9,9 +9,10 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT .word 0 /* * Align the pointer in r0. r3 contains the number of bytes that we are --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -11,9 +11,10 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT ENTRY(strchr) and r1, r1, #0xff 1: ldrb r2, [r0], #1 --- a/arch/arm/lib/strncpy_from_user.S +++ b/arch/arm/lib/strncpy_from_user.S @@ -10,9 +10,10 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/errno.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT /* * Copy a string from user space to kernel space. --- a/arch/arm/lib/strnlen_user.S +++ b/arch/arm/lib/strnlen_user.S @@ -10,9 +10,10 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/errno.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT /* Prototype: unsigned long __strnlen_user(const char *str, long n) * Purpose : get length of a string in user memory --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -11,9 +11,10 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text - .align 5 + .align L1_CACHE_SHIFT ENTRY(strrchr) mov r3, #0 1: ldrb r2, [r0], #1 --- a/arch/arm/mm/abort-ev4.S +++ b/arch/arm/mm/abort-ev4.S @@ -1,5 +1,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> /* * Function: v4_early_abort * @@ -17,7 +18,7 @@ * abort here if the I-TLB and D-TLB aren't seeing the same * picture. Unfortunately, this does happen. We live with it. */ - .align 5 + .align L1_CACHE_SHIFT ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR --- a/arch/arm/mm/abort-nommu.S +++ b/arch/arm/mm/abort-nommu.S @@ -1,5 +1,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> /* * Function: nommu_early_abort * @@ -12,7 +13,7 @@ * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. * Just fill zero into the registers. */ - .align 5 + .align L1_CACHE_SHIFT ENTRY(nommu_early_abort) mov r0, #0 @ clear r0, r1 (no FSR/FAR) mov r1, #0