diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2013-08-10 12:35:02 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2013-08-30 11:56:58 -0400 |
commit | 35f059761c5ac313d13372fe3cdaa41bce3d0dbf (patch) | |
tree | 1a8f7e0eba01afac74c081348530fccd63dc48e4 /arch/tile | |
parent | 4036c7d3542ce82ea343bf95dd05ca46aefba9aa (diff) |
tilegx: change how we find the kernel stack
Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0)
to hold the CPU number and the top of the current kernel stack
by using the low bits to hold the CPU number, and using the high
bits to hold the address of the page just above where we'd want
the kernel stack to be. That way we could initialize a new SP
when first entering the kernel by just masking the SPR value and
subtracting a couple of words.
However, it's actually more useful to be able to place an arbitrary
kernel-top value in the SPR. This allows us to create a new stack
context (e.g. for virtualization) with an arbitrary top-of-stack VA.
To make this work, we now store the CPU number in the high bits,
above the highest legal VA bit (42 bits in the current tilegx
microarchitecture). The full 42 bits are thus available to store the
top of stack value. Getting the current cpu (a relatively common
operation) is still fast; it's now a shift rather than a mask.
We make this change only for tilegx, since tilepro has too few SPR
bits to do this, and we don't need this support on tilepro anyway.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile')
-rw-r--r-- | arch/tile/include/asm/processor.h | 47 | ||||
-rw-r--r-- | arch/tile/kernel/head_32.S | 3 | ||||
-rw-r--r-- | arch/tile/kernel/head_64.S | 6 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 7 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_64.S | 21 | ||||
-rw-r--r-- | arch/tile/kernel/stack.c | 10 |
6 files changed, 57 insertions, 37 deletions
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 461322b473b5..230b830e94d4 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -148,9 +148,10 @@ struct thread_struct { | |||
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Start with "sp" this many bytes below the top of the kernel stack. | 150 | * Start with "sp" this many bytes below the top of the kernel stack. |
151 | * This preserves the invariant that a called function may write to *sp. | 151 | * This allows us to be cache-aware when handling the initial save |
152 | * of the pt_regs value to the stack. | ||
152 | */ | 153 | */ |
153 | #define STACK_TOP_DELTA 8 | 154 | #define STACK_TOP_DELTA 64 |
154 | 155 | ||
155 | /* | 156 | /* |
156 | * When entering the kernel via a fault, start with the top of the | 157 | * When entering the kernel via a fault, start with the top of the |
@@ -234,15 +235,15 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags); | |||
234 | unsigned long get_wchan(struct task_struct *p); | 235 | unsigned long get_wchan(struct task_struct *p); |
235 | 236 | ||
236 | /* Return initial ksp value for given task. */ | 237 | /* Return initial ksp value for given task. */ |
237 | #define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) | 238 | #define task_ksp0(task) \ |
239 | ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) | ||
238 | 240 | ||
239 | /* Return some info about the user process TASK. */ | 241 | /* Return some info about the user process TASK. */ |
240 | #define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) | ||
241 | #define task_pt_regs(task) \ | 242 | #define task_pt_regs(task) \ |
242 | ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) | 243 | ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) |
243 | #define current_pt_regs() \ | 244 | #define current_pt_regs() \ |
244 | ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ | 245 | ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ |
245 | (KSTK_PTREGS_GAP - 1)) - 1) | 246 | STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) |
246 | #define task_sp(task) (task_pt_regs(task)->sp) | 247 | #define task_sp(task) (task_pt_regs(task)->sp) |
247 | #define task_pc(task) (task_pt_regs(task)->pc) | 248 | #define task_pc(task) (task_pt_regs(task)->pc) |
248 | /* Aliases for pc and sp (used in fs/proc/array.c) */ | 249 | /* Aliases for pc and sp (used in fs/proc/array.c) */ |
@@ -355,20 +356,38 @@ extern int kdata_huge; | |||
355 | #define KERNEL_PL CONFIG_KERNEL_PL | 356 | #define KERNEL_PL CONFIG_KERNEL_PL |
356 | 357 | ||
357 | /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ | 358 | /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ |
358 | #define CPU_LOG_MASK_VALUE 12 | 359 | #ifdef __tilegx__ |
359 | #define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) | 360 | #define CPU_SHIFT 48 |
360 | #if CONFIG_NR_CPUS > CPU_MASK_VALUE | 361 | #if CHIP_VA_WIDTH() > CPU_SHIFT |
361 | # error Too many cpus! | 362 | # error Too many VA bits! |
362 | #endif | 363 | #endif |
364 | #define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) | ||
363 | #define raw_smp_processor_id() \ | 365 | #define raw_smp_processor_id() \ |
364 | ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) | 366 | ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) |
365 | #define get_current_ksp0() \ | 367 | #define get_current_ksp0() \ |
366 | (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) | 368 | ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ |
369 | (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) | ||
370 | #define next_current_ksp0(task) ({ \ | ||
371 | unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ | ||
372 | unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ | ||
373 | __ksp0 | __cpu; \ | ||
374 | }) | ||
375 | #else | ||
376 | #define LOG2_NR_CPU_IDS 6 | ||
377 | #define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) | ||
378 | #define raw_smp_processor_id() \ | ||
379 | ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) | ||
380 | #define get_current_ksp0() \ | ||
381 | (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) | ||
367 | #define next_current_ksp0(task) ({ \ | 382 | #define next_current_ksp0(task) ({ \ |
368 | unsigned long __ksp0 = task_ksp0(task); \ | 383 | unsigned long __ksp0 = task_ksp0(task); \ |
369 | int __cpu = raw_smp_processor_id(); \ | 384 | int __cpu = raw_smp_processor_id(); \ |
370 | BUG_ON(__ksp0 & CPU_MASK_VALUE); \ | 385 | BUG_ON(__ksp0 & MAX_CPU_ID); \ |
371 | __ksp0 | __cpu; \ | 386 | __ksp0 | __cpu; \ |
372 | }) | 387 | }) |
388 | #endif | ||
389 | #if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) | ||
390 | # error Too many cpus! | ||
391 | #endif | ||
373 | 392 | ||
374 | #endif /* _ASM_TILE_PROCESSOR_H */ | 393 | #endif /* _ASM_TILE_PROCESSOR_H */ |
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index d1527fce2861..f3f17b0283ff 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S | |||
@@ -86,7 +86,7 @@ ENTRY(_start) | |||
86 | /* | 86 | /* |
87 | * Load up our per-cpu offset. When the first (master) tile | 87 | * Load up our per-cpu offset. When the first (master) tile |
88 | * boots, this value is still zero, so we will load boot_pc | 88 | * boots, this value is still zero, so we will load boot_pc |
89 | * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. | 89 | * with start_kernel, and boot_sp at the top of init_stack. |
90 | * The master tile initializes the per-cpu offset array, so that | 90 | * The master tile initializes the per-cpu offset array, so that |
91 | * when subsequent (secondary) tiles boot, they will instead load | 91 | * when subsequent (secondary) tiles boot, they will instead load |
92 | * from their per-cpu versions of boot_sp and boot_pc. | 92 | * from their per-cpu versions of boot_sp and boot_pc. |
@@ -126,7 +126,6 @@ ENTRY(_start) | |||
126 | lw sp, r1 | 126 | lw sp, r1 |
127 | or r4, sp, r4 | 127 | or r4, sp, r4 |
128 | mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ | 128 | mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ |
129 | addi sp, sp, -STACK_TOP_DELTA | ||
130 | { | 129 | { |
131 | move lr, zero /* stop backtraces in the called function */ | 130 | move lr, zero /* stop backtraces in the called function */ |
132 | jr r0 | 131 | jr r0 |
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 969e4f81f3b3..652b81426158 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S | |||
@@ -158,7 +158,7 @@ ENTRY(_start) | |||
158 | /* | 158 | /* |
159 | * Load up our per-cpu offset. When the first (master) tile | 159 | * Load up our per-cpu offset. When the first (master) tile |
160 | * boots, this value is still zero, so we will load boot_pc | 160 | * boots, this value is still zero, so we will load boot_pc |
161 | * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. | 161 | * with start_kernel, and boot_sp with at the top of init_stack. |
162 | * The master tile initializes the per-cpu offset array, so that | 162 | * The master tile initializes the per-cpu offset array, so that |
163 | * when subsequent (secondary) tiles boot, they will instead load | 163 | * when subsequent (secondary) tiles boot, they will instead load |
164 | * from their per-cpu versions of boot_sp and boot_pc. | 164 | * from their per-cpu versions of boot_sp and boot_pc. |
@@ -202,9 +202,9 @@ ENTRY(_start) | |||
202 | } | 202 | } |
203 | ld r0, r0 | 203 | ld r0, r0 |
204 | ld sp, r1 | 204 | ld sp, r1 |
205 | or r4, sp, r4 | 205 | shli r4, r4, CPU_SHIFT |
206 | bfins r4, sp, 0, CPU_SHIFT-1 | ||
206 | mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ | 207 | mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ |
207 | addi sp, sp, -STACK_TOP_DELTA | ||
208 | { | 208 | { |
209 | move lr, zero /* stop backtraces in the called function */ | 209 | move lr, zero /* stop backtraces in the called function */ |
210 | jr r0 | 210 | jr r0 |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 9c0c3cb6aab0..f3d26f48e659 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -185,7 +185,7 @@ intvec_\vecname: | |||
185 | * point sp at the top aligned address on the actual stack page. | 185 | * point sp at the top aligned address on the actual stack page. |
186 | */ | 186 | */ |
187 | mfspr r0, SPR_SYSTEM_SAVE_K_0 | 187 | mfspr r0, SPR_SYSTEM_SAVE_K_0 |
188 | mm r0, r0, zero, LOG2_THREAD_SIZE, 31 | 188 | mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 |
189 | 189 | ||
190 | 0: | 190 | 0: |
191 | /* | 191 | /* |
@@ -203,6 +203,9 @@ intvec_\vecname: | |||
203 | * cache line 1: r14...r29 | 203 | * cache line 1: r14...r29 |
204 | * cache line 0: 2 x frame, r0..r13 | 204 | * cache line 0: 2 x frame, r0..r13 |
205 | */ | 205 | */ |
206 | #if STACK_TOP_DELTA != 64 | ||
207 | #error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() | ||
208 | #endif | ||
206 | andi r0, r0, -64 | 209 | andi r0, r0, -64 |
207 | 210 | ||
208 | /* | 211 | /* |
@@ -464,7 +467,7 @@ intvec_\vecname: | |||
464 | } | 467 | } |
465 | { | 468 | { |
466 | auli r21, r21, ha16(__per_cpu_offset) | 469 | auli r21, r21, ha16(__per_cpu_offset) |
467 | mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 | 470 | mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 |
468 | } | 471 | } |
469 | s2a r20, r20, r21 | 472 | s2a r20, r20, r21 |
470 | lw tp, r20 | 473 | lw tp, r20 |
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index df19d4f3946e..3b35bb490d3e 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S | |||
@@ -132,13 +132,9 @@ intvec_\vecname: | |||
132 | mfspr r3, SPR_SYSTEM_SAVE_K_0 | 132 | mfspr r3, SPR_SYSTEM_SAVE_K_0 |
133 | 133 | ||
134 | /* Get &thread_info->unalign_jit_tmp[0] in r3. */ | 134 | /* Get &thread_info->unalign_jit_tmp[0] in r3. */ |
135 | bfexts r3, r3, 0, CPU_SHIFT-1 | ||
135 | mm r3, zero, LOG2_THREAD_SIZE, 63 | 136 | mm r3, zero, LOG2_THREAD_SIZE, 63 |
136 | #if THREAD_SIZE < 65536 | 137 | addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET |
137 | addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) | ||
138 | #else | ||
139 | addli r3, r3, -(PAGE_SIZE/2) | ||
140 | addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) | ||
141 | #endif | ||
142 | 138 | ||
143 | /* | 139 | /* |
144 | * Save r0, r1, r2 into thread_info array r3 points to | 140 | * Save r0, r1, r2 into thread_info array r3 points to |
@@ -365,13 +361,13 @@ intvec_\vecname: | |||
365 | 361 | ||
366 | 2: | 362 | 2: |
367 | /* | 363 | /* |
368 | * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and | 364 | * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and |
369 | * the current stack top in the higher bits. So we recover | 365 | * the current stack top in the lower bits. So we recover |
370 | * our stack top by just masking off the low bits, then | 366 | * our starting stack value by sign-extending the low bits, then |
371 | * point sp at the top aligned address on the actual stack page. | 367 | * point sp at the top aligned address on the actual stack page. |
372 | */ | 368 | */ |
373 | mfspr r0, SPR_SYSTEM_SAVE_K_0 | 369 | mfspr r0, SPR_SYSTEM_SAVE_K_0 |
374 | mm r0, zero, LOG2_THREAD_SIZE, 63 | 370 | bfexts r0, r0, 0, CPU_SHIFT-1 |
375 | 371 | ||
376 | 0: | 372 | 0: |
377 | /* | 373 | /* |
@@ -393,6 +389,9 @@ intvec_\vecname: | |||
393 | * cache line 1: r6...r13 | 389 | * cache line 1: r6...r13 |
394 | * cache line 0: 2 x frame, r0..r5 | 390 | * cache line 0: 2 x frame, r0..r5 |
395 | */ | 391 | */ |
392 | #if STACK_TOP_DELTA != 64 | ||
393 | #error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() | ||
394 | #endif | ||
396 | andi r0, r0, -64 | 395 | andi r0, r0, -64 |
397 | 396 | ||
398 | /* | 397 | /* |
@@ -690,7 +689,7 @@ intvec_\vecname: | |||
690 | } | 689 | } |
691 | { | 690 | { |
692 | shl16insli r21, r21, hw1(__per_cpu_offset) | 691 | shl16insli r21, r21, hw1(__per_cpu_offset) |
693 | bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 | 692 | bfextu r20, r20, CPU_SHIFT, 63 |
694 | } | 693 | } |
695 | shl16insli r21, r21, hw0(__per_cpu_offset) | 694 | shl16insli r21, r21, hw0(__per_cpu_offset) |
696 | shl3add r20, r20, r21 | 695 | shl3add r20, r20, r21 |
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index a9db923bb9eb..24fd223df65d 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c | |||
@@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs) | |||
197 | { | 197 | { |
198 | int cpu = raw_smp_processor_id(); | 198 | int cpu = raw_smp_processor_id(); |
199 | unsigned long ksp0 = get_current_ksp0(); | 199 | unsigned long ksp0 = get_current_ksp0(); |
200 | unsigned long ksp0_base = ksp0 - THREAD_SIZE; | 200 | unsigned long ksp0_base = ksp0 & -THREAD_SIZE; |
201 | unsigned long sp = stack_pointer; | 201 | unsigned long sp = stack_pointer; |
202 | 202 | ||
203 | if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { | 203 | if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { |
204 | pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" | 204 | pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n" |
205 | " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", | 205 | " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", |
206 | cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); | 206 | cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); |
207 | } | 207 | } |
208 | 208 | ||
209 | else if (sp < ksp0_base + sizeof(struct thread_info)) { | 209 | else if (sp < ksp0_base + sizeof(struct thread_info)) { |
210 | pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" | 210 | pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n" |
211 | " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", | 211 | " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", |
212 | cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); | 212 | cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); |
213 | } | 213 | } |
214 | } | 214 | } |
215 | 215 | ||