439 lines
9.8 KiB
Diff
439 lines
9.8 KiB
Diff
From 248d9a5b63bba72bfc316b8a48c6163fce5acc22 Mon Sep 17 00:00:00 2001
|
|
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
|
|
Date: Thu, 18 Feb 2010 21:53:01 +0200
|
|
Subject: [PATCH] ARM: Use cache alignment from asm/cache.h
|
|
|
|
Make code more optimal for ARM variants with
|
|
different cache line size.
|
|
|
|
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
|
|
---
|
|
arch/arm/boot/compressed/head.S | 11 ++++++-----
|
|
arch/arm/include/asm/dma-mapping.h | 2 +-
|
|
arch/arm/kernel/entry-armv.S | 31 ++++++++++++++++---------------
|
|
arch/arm/kernel/entry-common.S | 7 ++++---
|
|
arch/arm/kernel/head.S | 3 ++-
|
|
arch/arm/kernel/vmlinux.lds.S | 5 +++--
|
|
arch/arm/lib/copy_page.S | 2 +-
|
|
arch/arm/lib/memchr.S | 3 ++-
|
|
arch/arm/lib/memset.S | 3 ++-
|
|
arch/arm/lib/memzero.S | 3 ++-
|
|
arch/arm/lib/strchr.S | 3 ++-
|
|
arch/arm/lib/strncpy_from_user.S | 3 ++-
|
|
arch/arm/lib/strnlen_user.S | 3 ++-
|
|
arch/arm/lib/strrchr.S | 3 ++-
|
|
arch/arm/mm/abort-ev4.S | 3 ++-
|
|
arch/arm/mm/abort-nommu.S | 3 ++-
|
|
16 files changed, 51 insertions(+), 37 deletions(-)
|
|
|
|
--- a/arch/arm/boot/compressed/head.S
|
|
+++ b/arch/arm/boot/compressed/head.S
|
|
@@ -9,6 +9,7 @@
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
#include <linux/linkage.h>
|
|
+#include <asm/cache.h>
|
|
|
|
/*
|
|
* Debugging stuff
|
|
@@ -349,7 +350,7 @@ params: ldr r0, =0x10000100 @ params_p
|
|
* This routine must preserve:
|
|
* r4, r5, r6, r7, r8
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
cache_on: mov r3, #8 @ cache_on function
|
|
b call_cache_fn
|
|
|
|
@@ -537,7 +538,7 @@ __common_mmu_cache_on:
|
|
mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
|
|
mcr p15, 0, r1, c3, c0, 0 @ load domain access control
|
|
b 1f
|
|
- .align 5 @ cache line aligned
|
|
+ .align L1_CACHE_SHIFT @ cache line aligned
|
|
1: mcr p15, 0, r0, c1, c0, 0 @ load control register
|
|
mrc p15, 0, r0, c1, c0, 0 @ and read it back to
|
|
sub pc, lr, r0, lsr #32 @ properly flush pipeline
|
|
@@ -556,7 +557,7 @@ __common_mmu_cache_on:
|
|
* r8 = atags pointer
|
|
* r9-r12,r14 = corrupted
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
reloc_start: add r9, r5, r0
|
|
sub r9, r9, #128 @ do not copy the stack
|
|
debug_reloc_start
|
|
@@ -786,7 +787,7 @@ proc_types:
|
|
* This routine must preserve:
|
|
* r4, r6, r7
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
cache_off: mov r3, #12 @ cache_off function
|
|
b call_cache_fn
|
|
|
|
@@ -861,7 +862,7 @@ __armv3_mmu_cache_off:
|
|
* This routine must preserve:
|
|
* r0, r4, r5, r6, r7
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
cache_clean_flush:
|
|
mov r3, #16
|
|
b call_cache_fn
|
|
--- a/arch/arm/kernel/entry-armv.S
|
|
+++ b/arch/arm/kernel/entry-armv.S
|
|
@@ -23,6 +23,7 @@
|
|
#include <asm/unwind.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/tls.h>
|
|
+#include <asm/cache.h>
|
|
|
|
#include "entry-header.S"
|
|
|
|
@@ -165,7 +166,7 @@ ENDPROC(__und_invalid)
|
|
stmia r5, {r0 - r4}
|
|
.endm
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__dabt_svc:
|
|
svc_entry
|
|
|
|
@@ -213,7 +214,7 @@ __dabt_svc:
|
|
UNWIND(.fnend )
|
|
ENDPROC(__dabt_svc)
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__irq_svc:
|
|
svc_entry
|
|
|
|
@@ -257,7 +258,7 @@ svc_preempt:
|
|
b 1b
|
|
#endif
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__und_svc:
|
|
#ifdef CONFIG_KPROBES
|
|
@ If a kprobe is about to simulate a "stmdb sp..." instruction,
|
|
@@ -303,7 +304,7 @@ __und_svc:
|
|
UNWIND(.fnend )
|
|
ENDPROC(__und_svc)
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__pabt_svc:
|
|
svc_entry
|
|
|
|
@@ -339,7 +340,7 @@ __pabt_svc:
|
|
UNWIND(.fnend )
|
|
ENDPROC(__pabt_svc)
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
.LCcralign:
|
|
.word cr_alignment
|
|
#ifdef MULTI_DABORT
|
|
@@ -412,7 +413,7 @@ ENDPROC(__pabt_svc)
|
|
#endif
|
|
.endm
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__dabt_usr:
|
|
usr_entry
|
|
kuser_cmpxchg_check
|
|
@@ -444,7 +445,7 @@ __dabt_usr:
|
|
UNWIND(.fnend )
|
|
ENDPROC(__dabt_usr)
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__irq_usr:
|
|
usr_entry
|
|
kuser_cmpxchg_check
|
|
@@ -473,7 +474,7 @@ ENDPROC(__irq_usr)
|
|
|
|
.ltorg
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__und_usr:
|
|
usr_entry
|
|
|
|
@@ -689,7 +690,7 @@ __und_usr_unknown:
|
|
b do_undefinstr
|
|
ENDPROC(__und_usr_unknown)
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__pabt_usr:
|
|
usr_entry
|
|
|
|
@@ -803,7 +804,7 @@ ENDPROC(__switch_to)
|
|
#endif
|
|
.endm
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
.globl __kuser_helper_start
|
|
__kuser_helper_start:
|
|
|
|
@@ -843,7 +844,7 @@ __kuser_memory_barrier: @ 0xffff0fa0
|
|
smp_dmb
|
|
usr_ret lr
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
/*
|
|
* Reference prototype:
|
|
@@ -973,7 +974,7 @@ kuser_cmpxchg_fixup:
|
|
|
|
#endif
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
/*
|
|
* Reference prototype:
|
|
@@ -1051,7 +1052,7 @@ __kuser_helper_end:
|
|
* of which is copied into r0 for the mode specific abort handler.
|
|
*/
|
|
.macro vector_stub, name, mode, correction=0
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
vector_\name:
|
|
.if \correction
|
|
@@ -1182,7 +1183,7 @@ __stubs_start:
|
|
.long __und_invalid @ e
|
|
.long __und_invalid @ f
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
/*=============================================================================
|
|
* Undefined FIQs
|
|
@@ -1212,7 +1213,7 @@ vector_addrexcptn:
|
|
* We group all the following data together to optimise
|
|
* for CPUs with separate I & D caches.
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
.LCvswi:
|
|
.word vector_swi
|
|
--- a/arch/arm/kernel/entry-common.S
|
|
+++ b/arch/arm/kernel/entry-common.S
|
|
@@ -10,13 +10,14 @@
|
|
|
|
#include <asm/unistd.h>
|
|
#include <asm/ftrace.h>
|
|
+#include <asm/cache.h>
|
|
#include <mach/entry-macro.S>
|
|
#include <asm/unwind.h>
|
|
|
|
#include "entry-header.S"
|
|
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
/*
|
|
* This is the fast syscall return path. We do as little as
|
|
* possible here, and this includes saving r0 back into the SVC
|
|
@@ -221,7 +222,7 @@ ftrace_stub:
|
|
#define A710(code...)
|
|
#endif
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(vector_swi)
|
|
sub sp, sp, #S_FRAME_SIZE
|
|
stmia sp, {r0 - r12} @ Calling r0 - r12
|
|
@@ -354,7 +355,7 @@ __sys_trace_return:
|
|
bl syscall_trace
|
|
b ret_slow_syscall
|
|
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
#ifdef CONFIG_ALIGNMENT_TRAP
|
|
.type __cr_alignment, #object
|
|
__cr_alignment:
|
|
--- a/arch/arm/kernel/head.S
|
|
+++ b/arch/arm/kernel/head.S
|
|
@@ -21,6 +21,7 @@
|
|
#include <asm/memory.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/system.h>
|
|
+#include <asm/cache.h>
|
|
|
|
#if (PHYS_OFFSET & 0x001fffff)
|
|
#error "PHYS_OFFSET must be at an even 2MiB boundary!"
|
|
@@ -192,7 +193,7 @@ ENDPROC(__enable_mmu)
|
|
*
|
|
* other registers depend on the function called upon completion
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
__turn_mmu_on:
|
|
mov r0, r0
|
|
mcr p15, 0, r0, c1, c0, 0 @ write control reg
|
|
--- a/arch/arm/kernel/vmlinux.lds.S
|
|
+++ b/arch/arm/kernel/vmlinux.lds.S
|
|
@@ -7,6 +7,7 @@
|
|
#include <asm/thread_info.h>
|
|
#include <asm/memory.h>
|
|
#include <asm/page.h>
|
|
+#include <asm/cache.h>
|
|
|
|
OUTPUT_ARCH(arm)
|
|
ENTRY(stext)
|
|
--- a/arch/arm/lib/copy_page.S
|
|
+++ b/arch/arm/lib/copy_page.S
|
|
@@ -17,7 +17,7 @@
|
|
#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
/*
|
|
* StrongARM optimised copy_page routine
|
|
* now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
|
|
--- a/arch/arm/lib/memchr.S
|
|
+++ b/arch/arm/lib/memchr.S
|
|
@@ -11,9 +11,10 @@
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(memchr)
|
|
1: subs r2, r2, #1
|
|
bmi 2f
|
|
--- a/arch/arm/lib/memset.S
|
|
+++ b/arch/arm/lib/memset.S
|
|
@@ -11,9 +11,10 @@
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
.word 0
|
|
|
|
1: subs r2, r2, #4 @ 1 do we have enough
|
|
--- a/arch/arm/lib/memzero.S
|
|
+++ b/arch/arm/lib/memzero.S
|
|
@@ -9,9 +9,10 @@
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
.word 0
|
|
/*
|
|
* Align the pointer in r0. r3 contains the number of bytes that we are
|
|
--- a/arch/arm/lib/strchr.S
|
|
+++ b/arch/arm/lib/strchr.S
|
|
@@ -11,9 +11,10 @@
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(strchr)
|
|
and r1, r1, #0xff
|
|
1: ldrb r2, [r0], #1
|
|
--- a/arch/arm/lib/strncpy_from_user.S
|
|
+++ b/arch/arm/lib/strncpy_from_user.S
|
|
@@ -10,9 +10,10 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/errno.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
/*
|
|
* Copy a string from user space to kernel space.
|
|
--- a/arch/arm/lib/strnlen_user.S
|
|
+++ b/arch/arm/lib/strnlen_user.S
|
|
@@ -10,9 +10,10 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/errno.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
|
|
/* Prototype: unsigned long __strnlen_user(const char *str, long n)
|
|
* Purpose : get length of a string in user memory
|
|
--- a/arch/arm/lib/strrchr.S
|
|
+++ b/arch/arm/lib/strrchr.S
|
|
@@ -11,9 +11,10 @@
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
|
|
.text
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(strrchr)
|
|
mov r3, #0
|
|
1: ldrb r2, [r0], #1
|
|
--- a/arch/arm/mm/abort-ev4.S
|
|
+++ b/arch/arm/mm/abort-ev4.S
|
|
@@ -1,5 +1,6 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
/*
|
|
* Function: v4_early_abort
|
|
*
|
|
@@ -17,7 +18,7 @@
|
|
* abort here if the I-TLB and D-TLB aren't seeing the same
|
|
* picture. Unfortunately, this does happen. We live with it.
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(v4_early_abort)
|
|
mrc p15, 0, r1, c5, c0, 0 @ get FSR
|
|
mrc p15, 0, r0, c6, c0, 0 @ get FAR
|
|
--- a/arch/arm/mm/abort-nommu.S
|
|
+++ b/arch/arm/mm/abort-nommu.S
|
|
@@ -1,5 +1,6 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
+#include <asm/cache.h>
|
|
/*
|
|
* Function: nommu_early_abort
|
|
*
|
|
@@ -12,7 +13,7 @@
|
|
* Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
|
|
* Just fill zero into the registers.
|
|
*/
|
|
- .align 5
|
|
+ .align L1_CACHE_SHIFT
|
|
ENTRY(nommu_early_abort)
|
|
mov r0, #0 @ clear r0, r1 (no FSR/FAR)
|
|
mov r1, #0
|