diff options
author | Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> | 2013-05-16 05:34:30 -0400 |
---|---|---|
committer | Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> | 2013-06-20 06:24:11 -0400 |
commit | 7604537bbb5720376e8c9e6bc74a8e6305e3094d (patch) | |
tree | d832833f3217e04eac90b0be1c9359ace9763d7a /arch | |
parent | 8cf72172d739639f2699131821a3ebc291287cf2 (diff) |
ARM: kernel: implement stack pointer save array through MPIDR hashing
Current implementation of cpu_{suspend}/cpu_{resume} relies on the MPIDR
to index the array of pointers where the context is saved and restored.
The current approach works as long as the MPIDR can be considered a
linear index, so that the pointers array can simply be dereferenced by
using the MPIDR[7:0] value.
On ARM multi-cluster systems, where the MPIDR may not be a linear index,
to properly dereference the stack pointer array, a mapping function should
be applied to it so that it can be used for arrays look-ups.
This patch adds code in the cpu_{suspend}/cpu_{resume} implementation
that relies on shifting and ORing hashing method to map a MPIDR value to a
set of buckets precomputed at boot to have a collision free mapping from
MPIDR to context pointers.
The hashing algorithm must be simple, fast, and implementable with few
instructions since in the cpu_resume path the mapping is carried out with
the MMU off and the I-cache off, hence code and data are fetched from DRAM
with no-caching available. Simplicity is counterbalanced with a little
increase of memory (allocated dynamically) for stack pointers buckets, that
should be anyway fairly limited on most systems.
Memory for context pointers is allocated in a early_initcall with
size precomputed and stashed previously in kernel data structures.
Memory for context pointers is allocated through kmalloc; this
guarantees contiguous physical addresses for the allocated memory which
is fundamental to the correct functioning of the resume mechanism that
relies on the context pointer array to be a chunk of contiguous physical
memory. Virtual to physical address conversion for the context pointer
array base is carried out at boot to avoid fiddling with virt_to_phys
conversions in the cpu_resume path which is quite fragile and should be
optimized to execute as few instructions as possible.
Virtual and physical context pointer base array addresses are stashed in a
struct that is accessible from assembly using values generated through the
asm-offsets.c mechanism.
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/include/asm/smp_plat.h | 10 | ||||
-rw-r--r-- | arch/arm/include/asm/suspend.h | 5 | ||||
-rw-r--r-- | arch/arm/kernel/asm-offsets.c | 6 | ||||
-rw-r--r-- | arch/arm/kernel/sleep.S | 97 | ||||
-rw-r--r-- | arch/arm/kernel/suspend.c | 20 |
5 files changed, 118 insertions, 20 deletions
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index f75f8a234b3f..6e63f29f41b7 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h | |||
@@ -70,9 +70,15 @@ static inline int get_logical_index(u32 mpidr) | |||
70 | return -EINVAL; | 70 | return -EINVAL; |
71 | } | 71 | } |
72 | 72 | ||
73 | /* | ||
74 | * NOTE ! Assembly code relies on the following | ||
75 | * structure memory layout in order to carry out load | ||
76 | * multiple from its base address. For more | ||
77 | * information check arch/arm/kernel/sleep.S | ||
78 | */ | ||
73 | struct mpidr_hash { | 79 | struct mpidr_hash { |
74 | u32 mask; | 80 | u32 mask; /* used by sleep.S */ |
75 | u32 shift_aff[3]; | 81 | u32 shift_aff[3]; /* used by sleep.S */ |
76 | u32 bits; | 82 | u32 bits; |
77 | }; | 83 | }; |
78 | 84 | ||
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 1c0a551ae375..cd20029bcd94 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef __ASM_ARM_SUSPEND_H | 1 | #ifndef __ASM_ARM_SUSPEND_H |
2 | #define __ASM_ARM_SUSPEND_H | 2 | #define __ASM_ARM_SUSPEND_H |
3 | 3 | ||
4 | struct sleep_save_sp { | ||
5 | u32 *save_ptr_stash; | ||
6 | u32 save_ptr_stash_phys; | ||
7 | }; | ||
8 | |||
4 | extern void cpu_resume(void); | 9 | extern void cpu_resume(void); |
5 | extern int cpu_suspend(unsigned long, int (*)(unsigned long)); | 10 | extern int cpu_suspend(unsigned long, int (*)(unsigned long)); |
6 | 11 | ||
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..ded041711beb 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/thread_info.h> | 23 | #include <asm/thread_info.h> |
24 | #include <asm/memory.h> | 24 | #include <asm/memory.h> |
25 | #include <asm/procinfo.h> | 25 | #include <asm/procinfo.h> |
26 | #include <asm/suspend.h> | ||
26 | #include <asm/hardware/cache-l2x0.h> | 27 | #include <asm/hardware/cache-l2x0.h> |
27 | #include <linux/kbuild.h> | 28 | #include <linux/kbuild.h> |
28 | 29 | ||
@@ -145,6 +146,11 @@ int main(void) | |||
145 | #ifdef MULTI_CACHE | 146 | #ifdef MULTI_CACHE |
146 | DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all)); | 147 | DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all)); |
147 | #endif | 148 | #endif |
149 | #ifdef CONFIG_ARM_CPU_SUSPEND | ||
150 | DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); | ||
151 | DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); | ||
152 | DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); | ||
153 | #endif | ||
148 | BLANK(); | 154 | BLANK(); |
149 | DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); | 155 | DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); |
150 | DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); | 156 | DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); |
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf33415c..db1536b8b30b 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S | |||
@@ -7,6 +7,49 @@ | |||
7 | .text | 7 | .text |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Implementation of MPIDR hash algorithm through shifting | ||
11 | * and OR'ing. | ||
12 | * | ||
13 | * @dst: register containing hash result | ||
14 | * @rs0: register containing affinity level 0 bit shift | ||
15 | * @rs1: register containing affinity level 1 bit shift | ||
16 | * @rs2: register containing affinity level 2 bit shift | ||
17 | * @mpidr: register containing MPIDR value | ||
18 | * @mask: register containing MPIDR mask | ||
19 | * | ||
20 | * Pseudo C-code: | ||
21 | * | ||
22 | *u32 dst; | ||
23 | * | ||
24 | *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) { | ||
25 | * u32 aff0, aff1, aff2; | ||
26 | * u32 mpidr_masked = mpidr & mask; | ||
27 | * aff0 = mpidr_masked & 0xff; | ||
28 | * aff1 = mpidr_masked & 0xff00; | ||
29 | * aff2 = mpidr_masked & 0xff0000; | ||
30 | * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2); | ||
31 | *} | ||
32 | * Input registers: rs0, rs1, rs2, mpidr, mask | ||
33 | * Output register: dst | ||
34 | * Note: input and output registers must be disjoint register sets | ||
35 | (eg: a macro instance with mpidr = r1 and dst = r1 is invalid) | ||
36 | */ | ||
37 | .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask | ||
38 | and \mpidr, \mpidr, \mask @ mask out MPIDR bits | ||
39 | and \dst, \mpidr, #0xff @ mask=aff0 | ||
40 | ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0 | ||
41 | THUMB( lsr \dst, \dst, \rs0 ) | ||
42 | and \mask, \mpidr, #0xff00 @ mask = aff1 | ||
43 | ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1) | ||
44 | THUMB( lsr \mask, \mask, \rs1 ) | ||
45 | THUMB( orr \dst, \dst, \mask ) | ||
46 | and \mask, \mpidr, #0xff0000 @ mask = aff2 | ||
47 | ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2) | ||
48 | THUMB( lsr \mask, \mask, \rs2 ) | ||
49 | THUMB( orr \dst, \dst, \mask ) | ||
50 | .endm | ||
51 | |||
52 | /* | ||
10 | * Save CPU state for a suspend. This saves the CPU general purpose | 53 | * Save CPU state for a suspend. This saves the CPU general purpose |
11 | * registers, and allocates space on the kernel stack to save the CPU | 54 | * registers, and allocates space on the kernel stack to save the CPU |
12 | * specific registers and some other data for resume. | 55 | * specific registers and some other data for resume. |
@@ -29,12 +72,18 @@ ENTRY(__cpu_suspend) | |||
29 | mov r1, r4 @ size of save block | 72 | mov r1, r4 @ size of save block |
30 | mov r2, r5 @ virtual SP | 73 | mov r2, r5 @ virtual SP |
31 | ldr r3, =sleep_save_sp | 74 | ldr r3, =sleep_save_sp |
32 | #ifdef CONFIG_SMP | 75 | ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] |
33 | ALT_SMP(mrc p15, 0, lr, c0, c0, 5) | 76 | ALT_SMP(mrc p15, 0, r9, c0, c0, 5) |
34 | ALT_UP(mov lr, #0) | 77 | ALT_UP_B(1f) |
35 | and lr, lr, #15 | 78 | ldr r8, =mpidr_hash |
79 | /* | ||
80 | * This ldmia relies on the memory layout of the mpidr_hash | ||
81 | * struct mpidr_hash. | ||
82 | */ | ||
83 | ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts | ||
84 | compute_mpidr_hash lr, r5, r6, r7, r9, r4 | ||
36 | add r3, r3, lr, lsl #2 | 85 | add r3, r3, lr, lsl #2 |
37 | #endif | 86 | 1: |
38 | bl __cpu_suspend_save | 87 | bl __cpu_suspend_save |
39 | adr lr, BSYM(cpu_suspend_abort) | 88 | adr lr, BSYM(cpu_suspend_abort) |
40 | ldmfd sp!, {r0, pc} @ call suspend fn | 89 | ldmfd sp!, {r0, pc} @ call suspend fn |
@@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu) | |||
81 | .data | 130 | .data |
82 | .align | 131 | .align |
83 | ENTRY(cpu_resume) | 132 | ENTRY(cpu_resume) |
84 | #ifdef CONFIG_SMP | 133 | mov r1, #0 |
85 | adr r0, sleep_save_sp | 134 | ALT_SMP(mrc p15, 0, r0, c0, c0, 5) |
86 | ALT_SMP(mrc p15, 0, r1, c0, c0, 5) | 135 | ALT_UP_B(1f) |
87 | ALT_UP(mov r1, #0) | 136 | adr r2, mpidr_hash_ptr |
88 | and r1, r1, #15 | 137 | ldr r3, [r2] |
89 | ldr r0, [r0, r1, lsl #2] @ stack phys addr | 138 | add r2, r2, r3 @ r2 = struct mpidr_hash phys address |
90 | #else | 139 | /* |
91 | ldr r0, sleep_save_sp @ stack phys addr | 140 | * This ldmia relies on the memory layout of the mpidr_hash |
92 | #endif | 141 | * struct mpidr_hash. |
142 | */ | ||
143 | ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts | ||
144 | compute_mpidr_hash r1, r4, r5, r6, r0, r3 | ||
145 | 1: | ||
146 | adr r0, _sleep_save_sp | ||
147 | ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] | ||
148 | ldr r0, [r0, r1, lsl #2] | ||
149 | |||
93 | setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off | 150 | setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off |
94 | @ load phys pgd, stack, resume fn | 151 | @ load phys pgd, stack, resume fn |
95 | ARM( ldmia r0!, {r1, sp, pc} ) | 152 | ARM( ldmia r0!, {r1, sp, pc} ) |
@@ -98,7 +155,11 @@ THUMB( mov sp, r2 ) | |||
98 | THUMB( bx r3 ) | 155 | THUMB( bx r3 ) |
99 | ENDPROC(cpu_resume) | 156 | ENDPROC(cpu_resume) |
100 | 157 | ||
101 | sleep_save_sp: | 158 | .align 2 |
102 | .rept CONFIG_NR_CPUS | 159 | mpidr_hash_ptr: |
103 | .long 0 @ preserve stack phys ptr here | 160 | .long mpidr_hash - . @ mpidr_hash struct offset |
104 | .endr | 161 | |
162 | .type sleep_save_sp, #object | ||
163 | ENTRY(sleep_save_sp) | ||
164 | _sleep_save_sp: | ||
165 | .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp | ||
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 38a50676213b..41cf3cbf756d 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c | |||
@@ -1,9 +1,12 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/slab.h> | ||
2 | 3 | ||
4 | #include <asm/cacheflush.h> | ||
3 | #include <asm/idmap.h> | 5 | #include <asm/idmap.h> |
4 | #include <asm/pgalloc.h> | 6 | #include <asm/pgalloc.h> |
5 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable.h> |
6 | #include <asm/memory.h> | 8 | #include <asm/memory.h> |
9 | #include <asm/smp_plat.h> | ||
7 | #include <asm/suspend.h> | 10 | #include <asm/suspend.h> |
8 | #include <asm/tlbflush.h> | 11 | #include <asm/tlbflush.h> |
9 | 12 | ||
@@ -82,3 +85,20 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) | |||
82 | outer_clean_range(virt_to_phys(save_ptr), | 85 | outer_clean_range(virt_to_phys(save_ptr), |
83 | virt_to_phys(save_ptr) + sizeof(*save_ptr)); | 86 | virt_to_phys(save_ptr) + sizeof(*save_ptr)); |
84 | } | 87 | } |
88 | |||
89 | extern struct sleep_save_sp sleep_save_sp; | ||
90 | |||
91 | static int cpu_suspend_alloc_sp(void) | ||
92 | { | ||
93 | void *ctx_ptr; | ||
94 | /* ctx_ptr is an array of physical addresses */ | ||
95 | ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL); | ||
96 | |||
97 | if (WARN_ON(!ctx_ptr)) | ||
98 | return -ENOMEM; | ||
99 | sleep_save_sp.save_ptr_stash = ctx_ptr; | ||
100 | sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); | ||
101 | sync_cache_w(&sleep_save_sp); | ||
102 | return 0; | ||
103 | } | ||
104 | early_initcall(cpu_suspend_alloc_sp); | ||