diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2009-09-03 15:27:15 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-03 15:30:51 -0400 |
commit | 1ea0d14e480c245683927eecc03a70faf06e80c8 (patch) | |
tree | cc43a6966799aa55c0f076c1217c557fb107563c /arch | |
parent | 23386d63bbb3199cf247313ec088878d72debcfd (diff) |
x86/i386: Make sure stack-protector segment base is cache aligned
The Intel Optimization Reference Guide says:
In Intel Atom microarchitecture, the address generation unit
assumes that the segment base will be 0 by default. Non-zero
segment base will cause load and store operations to experience
a delay.
- If the segment base isn't aligned to a cache line
boundary, the max throughput of memory operations is
reduced to one [e]very 9 cycles.
[...]
Assembly/Compiler Coding Rule 15. (H impact, ML generality)
For Intel Atom processors, use segments with base set to 0
whenever possible; avoid non-zero segment base address that is
not aligned to cache line boundary at all cost.
We can't avoid having a non-zero base for the stack-protector
segment, but we can make it cache-aligned.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: <stable@kernel.org>
LKML-Reference: <4AA01893.6000507@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/processor.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/stackprotector.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/system.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 1 |
5 files changed, 15 insertions, 6 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7768269b1cf..e597ecc8753c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -403,7 +403,17 @@ extern unsigned long kernel_eflags; | |||
403 | extern asmlinkage void ignore_sysret(void); | 403 | extern asmlinkage void ignore_sysret(void); |
404 | #else /* X86_64 */ | 404 | #else /* X86_64 */ |
405 | #ifdef CONFIG_CC_STACKPROTECTOR | 405 | #ifdef CONFIG_CC_STACKPROTECTOR |
406 | DECLARE_PER_CPU(unsigned long, stack_canary); | 406 | /* |
407 | * Make sure stack canary segment base is cached-aligned: | ||
408 | * "For Intel Atom processors, avoid non zero segment base address | ||
409 | * that is not aligned to cache line boundary at all cost." | ||
410 | * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) | ||
411 | */ | ||
412 | struct stack_canary { | ||
413 | char __pad[20]; /* canary at %gs:20 */ | ||
414 | unsigned long canary; | ||
415 | }; | ||
416 | DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; | ||
407 | #endif | 417 | #endif |
408 | #endif /* X86_64 */ | 418 | #endif /* X86_64 */ |
409 | 419 | ||
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 44efdff3975d..157517763565 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
@@ -78,14 +78,14 @@ static __always_inline void boot_init_stack_canary(void) | |||
78 | #ifdef CONFIG_X86_64 | 78 | #ifdef CONFIG_X86_64 |
79 | percpu_write(irq_stack_union.stack_canary, canary); | 79 | percpu_write(irq_stack_union.stack_canary, canary); |
80 | #else | 80 | #else |
81 | percpu_write(stack_canary, canary); | 81 | percpu_write(stack_canary.canary, canary); |
82 | #endif | 82 | #endif |
83 | } | 83 | } |
84 | 84 | ||
85 | static inline void setup_stack_canary_segment(int cpu) | 85 | static inline void setup_stack_canary_segment(int cpu) |
86 | { | 86 | { |
87 | #ifdef CONFIG_X86_32 | 87 | #ifdef CONFIG_X86_32 |
88 | unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; | 88 | unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); |
89 | struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); | 89 | struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); |
90 | struct desc_struct desc; | 90 | struct desc_struct desc; |
91 | 91 | ||
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 75c49c782e20..f08f97374892 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
31 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | 31 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ |
32 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | 32 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" |
33 | #define __switch_canary_oparam \ | 33 | #define __switch_canary_oparam \ |
34 | , [stack_canary] "=m" (per_cpu_var(stack_canary)) | 34 | , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) |
35 | #define __switch_canary_iparam \ | 35 | #define __switch_canary_iparam \ |
36 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 36 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
37 | #else /* CC_STACKPROTECTOR */ | 37 | #else /* CC_STACKPROTECTOR */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ced07ba5e937..7d84bc4c1188 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); | |||
1043 | #else /* CONFIG_X86_64 */ | 1043 | #else /* CONFIG_X86_64 */ |
1044 | 1044 | ||
1045 | #ifdef CONFIG_CC_STACKPROTECTOR | 1045 | #ifdef CONFIG_CC_STACKPROTECTOR |
1046 | DEFINE_PER_CPU(unsigned long, stack_canary); | 1046 | DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; |
1047 | #endif | 1047 | #endif |
1048 | 1048 | ||
1049 | /* Make sure %fs and %gs are initialized properly in idle threads */ | 1049 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index cc827ac9e8d3..7ffec6b3b331 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP | |||
439 | jne 1f | 439 | jne 1f |
440 | movl $per_cpu__gdt_page,%eax | 440 | movl $per_cpu__gdt_page,%eax |
441 | movl $per_cpu__stack_canary,%ecx | 441 | movl $per_cpu__stack_canary,%ecx |
442 | subl $20, %ecx | ||
443 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 442 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
444 | shrl $16, %ecx | 443 | shrl $16, %ecx |
445 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 444 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |