aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2013-08-10 12:35:02 -0400
committerChris Metcalf <cmetcalf@tilera.com>2013-08-30 11:56:58 -0400
commit35f059761c5ac313d13372fe3cdaa41bce3d0dbf (patch)
tree1a8f7e0eba01afac74c081348530fccd63dc48e4 /arch/tile
parent4036c7d3542ce82ea343bf95dd05ca46aefba9aa (diff)
tilegx: change how we find the kernel stack
Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/include/asm/processor.h47
-rw-r--r--arch/tile/kernel/head_32.S3
-rw-r--r--arch/tile/kernel/head_64.S6
-rw-r--r--arch/tile/kernel/intvec_32.S7
-rw-r--r--arch/tile/kernel/intvec_64.S21
-rw-r--r--arch/tile/kernel/stack.c10
6 files changed, 57 insertions, 37 deletions
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 461322b473b5..230b830e94d4 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -148,9 +148,10 @@ struct thread_struct {
148 148
149/* 149/*
150 * Start with "sp" this many bytes below the top of the kernel stack. 150 * Start with "sp" this many bytes below the top of the kernel stack.
151 * This preserves the invariant that a called function may write to *sp. 151 * This allows us to be cache-aware when handling the initial save
152 * of the pt_regs value to the stack.
152 */ 153 */
153#define STACK_TOP_DELTA 8 154#define STACK_TOP_DELTA 64
154 155
155/* 156/*
156 * When entering the kernel via a fault, start with the top of the 157 * When entering the kernel via a fault, start with the top of the
@@ -234,15 +235,15 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags);
234unsigned long get_wchan(struct task_struct *p); 235unsigned long get_wchan(struct task_struct *p);
235 236
236/* Return initial ksp value for given task. */ 237/* Return initial ksp value for given task. */
237#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) 238#define task_ksp0(task) \
239 ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA)
238 240
239/* Return some info about the user process TASK. */ 241/* Return some info about the user process TASK. */
240#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
241#define task_pt_regs(task) \ 242#define task_pt_regs(task) \
242 ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) 243 ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
243#define current_pt_regs() \ 244#define current_pt_regs() \
244 ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ 245 ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
245 (KSTK_PTREGS_GAP - 1)) - 1) 246 STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1)
246#define task_sp(task) (task_pt_regs(task)->sp) 247#define task_sp(task) (task_pt_regs(task)->sp)
247#define task_pc(task) (task_pt_regs(task)->pc) 248#define task_pc(task) (task_pt_regs(task)->pc)
248/* Aliases for pc and sp (used in fs/proc/array.c) */ 249/* Aliases for pc and sp (used in fs/proc/array.c) */
@@ -355,20 +356,38 @@ extern int kdata_huge;
355#define KERNEL_PL CONFIG_KERNEL_PL 356#define KERNEL_PL CONFIG_KERNEL_PL
356 357
357/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ 358/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
358#define CPU_LOG_MASK_VALUE 12 359#ifdef __tilegx__
359#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) 360#define CPU_SHIFT 48
360#if CONFIG_NR_CPUS > CPU_MASK_VALUE 361#if CHIP_VA_WIDTH() > CPU_SHIFT
361# error Too many cpus! 362# error Too many VA bits!
362#endif 363#endif
364#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1)
363#define raw_smp_processor_id() \ 365#define raw_smp_processor_id() \
364 ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) 366 ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT))
365#define get_current_ksp0() \ 367#define get_current_ksp0() \
366 (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) 368 ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \
369 (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT)))
370#define next_current_ksp0(task) ({ \
371 unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \
372 unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \
373 __ksp0 | __cpu; \
374})
375#else
376#define LOG2_NR_CPU_IDS 6
377#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1)
378#define raw_smp_processor_id() \
379 ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID)
380#define get_current_ksp0() \
381 (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID)
367#define next_current_ksp0(task) ({ \ 382#define next_current_ksp0(task) ({ \
368 unsigned long __ksp0 = task_ksp0(task); \ 383 unsigned long __ksp0 = task_ksp0(task); \
369 int __cpu = raw_smp_processor_id(); \ 384 int __cpu = raw_smp_processor_id(); \
370 BUG_ON(__ksp0 & CPU_MASK_VALUE); \ 385 BUG_ON(__ksp0 & MAX_CPU_ID); \
371 __ksp0 | __cpu; \ 386 __ksp0 | __cpu; \
372}) 387})
388#endif
389#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1)
390# error Too many cpus!
391#endif
373 392
374#endif /* _ASM_TILE_PROCESSOR_H */ 393#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index d1527fce2861..f3f17b0283ff 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -86,7 +86,7 @@ ENTRY(_start)
86 /* 86 /*
87 * Load up our per-cpu offset. When the first (master) tile 87 * Load up our per-cpu offset. When the first (master) tile
88 * boots, this value is still zero, so we will load boot_pc 88 * boots, this value is still zero, so we will load boot_pc
89 * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. 89 * with start_kernel, and boot_sp at the top of init_stack.
90 * The master tile initializes the per-cpu offset array, so that 90 * The master tile initializes the per-cpu offset array, so that
91 * when subsequent (secondary) tiles boot, they will instead load 91 * when subsequent (secondary) tiles boot, they will instead load
92 * from their per-cpu versions of boot_sp and boot_pc. 92 * from their per-cpu versions of boot_sp and boot_pc.
@@ -126,7 +126,6 @@ ENTRY(_start)
126 lw sp, r1 126 lw sp, r1
127 or r4, sp, r4 127 or r4, sp, r4
128 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ 128 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
129 addi sp, sp, -STACK_TOP_DELTA
130 { 129 {
131 move lr, zero /* stop backtraces in the called function */ 130 move lr, zero /* stop backtraces in the called function */
132 jr r0 131 jr r0
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index 969e4f81f3b3..652b81426158 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -158,7 +158,7 @@ ENTRY(_start)
158 /* 158 /*
159 * Load up our per-cpu offset. When the first (master) tile 159 * Load up our per-cpu offset. When the first (master) tile
160 * boots, this value is still zero, so we will load boot_pc 160 * boots, this value is still zero, so we will load boot_pc
161 * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. 161 * with start_kernel, and boot_sp with at the top of init_stack.
162 * The master tile initializes the per-cpu offset array, so that 162 * The master tile initializes the per-cpu offset array, so that
163 * when subsequent (secondary) tiles boot, they will instead load 163 * when subsequent (secondary) tiles boot, they will instead load
164 * from their per-cpu versions of boot_sp and boot_pc. 164 * from their per-cpu versions of boot_sp and boot_pc.
@@ -202,9 +202,9 @@ ENTRY(_start)
202 } 202 }
203 ld r0, r0 203 ld r0, r0
204 ld sp, r1 204 ld sp, r1
205 or r4, sp, r4 205 shli r4, r4, CPU_SHIFT
206 bfins r4, sp, 0, CPU_SHIFT-1
206 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ 207 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
207 addi sp, sp, -STACK_TOP_DELTA
208 { 208 {
209 move lr, zero /* stop backtraces in the called function */ 209 move lr, zero /* stop backtraces in the called function */
210 jr r0 210 jr r0
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 9c0c3cb6aab0..f3d26f48e659 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -185,7 +185,7 @@ intvec_\vecname:
185 * point sp at the top aligned address on the actual stack page. 185 * point sp at the top aligned address on the actual stack page.
186 */ 186 */
187 mfspr r0, SPR_SYSTEM_SAVE_K_0 187 mfspr r0, SPR_SYSTEM_SAVE_K_0
188 mm r0, r0, zero, LOG2_THREAD_SIZE, 31 188 mm r0, r0, zero, LOG2_NR_CPU_IDS, 31
189 189
1900: 1900:
191 /* 191 /*
@@ -203,6 +203,9 @@ intvec_\vecname:
203 * cache line 1: r14...r29 203 * cache line 1: r14...r29
204 * cache line 0: 2 x frame, r0..r13 204 * cache line 0: 2 x frame, r0..r13
205 */ 205 */
206#if STACK_TOP_DELTA != 64
207#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
208#endif
206 andi r0, r0, -64 209 andi r0, r0, -64
207 210
208 /* 211 /*
@@ -464,7 +467,7 @@ intvec_\vecname:
464 } 467 }
465 { 468 {
466 auli r21, r21, ha16(__per_cpu_offset) 469 auli r21, r21, ha16(__per_cpu_offset)
467 mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 470 mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
468 } 471 }
469 s2a r20, r20, r21 472 s2a r20, r20, r21
470 lw tp, r20 473 lw tp, r20
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index df19d4f3946e..3b35bb490d3e 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -132,13 +132,9 @@ intvec_\vecname:
132 mfspr r3, SPR_SYSTEM_SAVE_K_0 132 mfspr r3, SPR_SYSTEM_SAVE_K_0
133 133
134 /* Get &thread_info->unalign_jit_tmp[0] in r3. */ 134 /* Get &thread_info->unalign_jit_tmp[0] in r3. */
135 bfexts r3, r3, 0, CPU_SHIFT-1
135 mm r3, zero, LOG2_THREAD_SIZE, 63 136 mm r3, zero, LOG2_THREAD_SIZE, 63
136#if THREAD_SIZE < 65536 137 addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
137 addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
138#else
139 addli r3, r3, -(PAGE_SIZE/2)
140 addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
141#endif
142 138
143 /* 139 /*
144 * Save r0, r1, r2 into thread_info array r3 points to 140 * Save r0, r1, r2 into thread_info array r3 points to
@@ -365,13 +361,13 @@ intvec_\vecname:
365 361
3662: 3622:
367 /* 363 /*
368 * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and 364 * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
369 * the current stack top in the higher bits. So we recover 365 * the current stack top in the lower bits. So we recover
370 * our stack top by just masking off the low bits, then 366 * our starting stack value by sign-extending the low bits, then
371 * point sp at the top aligned address on the actual stack page. 367 * point sp at the top aligned address on the actual stack page.
372 */ 368 */
373 mfspr r0, SPR_SYSTEM_SAVE_K_0 369 mfspr r0, SPR_SYSTEM_SAVE_K_0
374 mm r0, zero, LOG2_THREAD_SIZE, 63 370 bfexts r0, r0, 0, CPU_SHIFT-1
375 371
3760: 3720:
377 /* 373 /*
@@ -393,6 +389,9 @@ intvec_\vecname:
393 * cache line 1: r6...r13 389 * cache line 1: r6...r13
394 * cache line 0: 2 x frame, r0..r5 390 * cache line 0: 2 x frame, r0..r5
395 */ 391 */
392#if STACK_TOP_DELTA != 64
393#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
394#endif
396 andi r0, r0, -64 395 andi r0, r0, -64
397 396
398 /* 397 /*
@@ -690,7 +689,7 @@ intvec_\vecname:
690 } 689 }
691 { 690 {
692 shl16insli r21, r21, hw1(__per_cpu_offset) 691 shl16insli r21, r21, hw1(__per_cpu_offset)
693 bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 692 bfextu r20, r20, CPU_SHIFT, 63
694 } 693 }
695 shl16insli r21, r21, hw0(__per_cpu_offset) 694 shl16insli r21, r21, hw0(__per_cpu_offset)
696 shl3add r20, r20, r21 695 shl3add r20, r20, r21
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index a9db923bb9eb..24fd223df65d 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs)
197{ 197{
198 int cpu = raw_smp_processor_id(); 198 int cpu = raw_smp_processor_id();
199 unsigned long ksp0 = get_current_ksp0(); 199 unsigned long ksp0 = get_current_ksp0();
200 unsigned long ksp0_base = ksp0 - THREAD_SIZE; 200 unsigned long ksp0_base = ksp0 & -THREAD_SIZE;
201 unsigned long sp = stack_pointer; 201 unsigned long sp = stack_pointer;
202 202
203 if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { 203 if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
204 pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" 204 pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n"
205 " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", 205 " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
206 cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); 206 cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
207 } 207 }
208 208
209 else if (sp < ksp0_base + sizeof(struct thread_info)) { 209 else if (sp < ksp0_base + sizeof(struct thread_info)) {
210 pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" 210 pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n"
211 " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", 211 " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
212 cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); 212 cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
213 } 213 }
214} 214}
215 215