aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2009-09-03 15:27:15 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-03 15:30:51 -0400
commit1ea0d14e480c245683927eecc03a70faf06e80c8 (patch)
treecc43a6966799aa55c0f076c1217c557fb107563c /arch
parent23386d63bbb3199cf247313ec088878d72debcfd (diff)
x86/i386: Make sure stack-protector segment base is cache aligned
The Intel Optimization Reference Guide says: In Intel Atom microarchitecture, the address generation unit assumes that the segment base will be 0 by default. Non-zero segment base will cause load and store operations to experience a delay. - If the segment base isn't aligned to a cache line boundary, the max throughput of memory operations is reduced to one [e]very 9 cycles. [...] Assembly/Compiler Coding Rule 15. (H impact, ML generality) For Intel Atom processors, use segments with base set to 0 whenever possible; avoid non-zero segment base address that is not aligned to cache line boundary at all cost. We can't avoid having a non-zero base for the stack-protector segment, but we can make it cache-aligned. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: <stable@kernel.org> LKML-Reference: <4AA01893.6000507@goop.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/stackprotector.h4
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/head_32.S1
5 files changed, 15 insertions, 6 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1cf..e597ecc8753c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,17 @@ extern unsigned long kernel_eflags;
403extern asmlinkage void ignore_sysret(void); 403extern asmlinkage void ignore_sysret(void);
404#else /* X86_64 */ 404#else /* X86_64 */
405#ifdef CONFIG_CC_STACKPROTECTOR 405#ifdef CONFIG_CC_STACKPROTECTOR
406DECLARE_PER_CPU(unsigned long, stack_canary); 406/*
407 * Make sure stack canary segment base is cached-aligned:
408 * "For Intel Atom processors, avoid non zero segment base address
409 * that is not aligned to cache line boundary at all cost."
410 * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
411 */
412struct stack_canary {
413 char __pad[20]; /* canary at %gs:20 */
414 unsigned long canary;
415};
416DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
407#endif 417#endif
408#endif /* X86_64 */ 418#endif /* X86_64 */
409 419
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 44efdff3975d..157517763565 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -78,14 +78,14 @@ static __always_inline void boot_init_stack_canary(void)
78#ifdef CONFIG_X86_64 78#ifdef CONFIG_X86_64
79 percpu_write(irq_stack_union.stack_canary, canary); 79 percpu_write(irq_stack_union.stack_canary, canary);
80#else 80#else
81 percpu_write(stack_canary, canary); 81 percpu_write(stack_canary.canary, canary);
82#endif 82#endif
83} 83}
84 84
85static inline void setup_stack_canary_segment(int cpu) 85static inline void setup_stack_canary_segment(int cpu)
86{ 86{
87#ifdef CONFIG_X86_32 87#ifdef CONFIG_X86_32
88 unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; 88 unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); 89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
90 struct desc_struct desc; 90 struct desc_struct desc;
91 91
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 75c49c782e20..f08f97374892 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
31 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 31 "movl %P[task_canary](%[next]), %%ebx\n\t" \
32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
33#define __switch_canary_oparam \ 33#define __switch_canary_oparam \
34 , [stack_canary] "=m" (per_cpu_var(stack_canary)) 34 , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
35#define __switch_canary_iparam \ 35#define __switch_canary_iparam \
36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
37#else /* CC_STACKPROTECTOR */ 37#else /* CC_STACKPROTECTOR */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ced07ba5e937..7d84bc4c1188 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
1043#else /* CONFIG_X86_64 */ 1043#else /* CONFIG_X86_64 */
1044 1044
1045#ifdef CONFIG_CC_STACKPROTECTOR 1045#ifdef CONFIG_CC_STACKPROTECTOR
1046DEFINE_PER_CPU(unsigned long, stack_canary); 1046DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
1047#endif 1047#endif
1048 1048
1049/* Make sure %fs and %gs are initialized properly in idle threads */ 1049/* Make sure %fs and %gs are initialized properly in idle threads */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index cc827ac9e8d3..7ffec6b3b331 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP
439 jne 1f 439 jne 1f
440 movl $per_cpu__gdt_page,%eax 440 movl $per_cpu__gdt_page,%eax
441 movl $per_cpu__stack_canary,%ecx 441 movl $per_cpu__stack_canary,%ecx
442 subl $20, %ecx
443 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 442 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
444 shrl $16, %ecx 443 shrl $16, %ecx
445 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 444 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)