aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2011-08-05 09:15:08 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-08-05 15:26:44 -0400
commitdfb09f9b7ab03fd367740e541a5caf830ed56726 (patch)
tree8bd8fdbbf3fb67f7d0aed73a1e8e1c7034ed2d54 /arch
parent13f9a3737c903ace57d8aaebe81a3bbaeb0aa0a2 (diff)
x86, amd: Avoid cache aliasing penalties on AMD family 15h
This patch provides performance tuning for the "Bulldozer" CPU. With its shared instruction cache there is a chance of generating an excessive number of cache cross-invalidates when running specific workloads on the cores of a compute module. This excessive amount of cross-invalidations can be observed if cache lines backed by shared physical memory alias in bits [14:12] of their virtual addresses, as those bits are used for the index generation. This patch addresses the issue by clearing all the bits in the [14:12] slice of the file mapping's virtual address at generation time, thus forcing those bits the same for all mappings of a single shared library across processes and, in doing so, avoids instruction cache aliases. It also adds the command line option "align_va_addr=(32|64|on|off)" with which virtual address alignment can be enabled for 32-bit or 64-bit x86 individually, or both, or be completely disabled. This change leaves virtual region address allocation on other families and/or vendors unaffected. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> Link: http://lkml.kernel.org/r/1312550110-24160-2-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/elf.h31
-rw-r--r--arch/x86/kernel/cpu/amd.c13
-rw-r--r--arch/x86/kernel/sys_x86_64.c81
-rw-r--r--arch/x86/mm/mmap.c15
-rw-r--r--arch/x86/vdso/vma.c9
5 files changed, 131 insertions, 18 deletions
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f2ad2163109d..5f962df30d0f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -4,6 +4,7 @@
4/* 4/*
5 * ELF register definitions.. 5 * ELF register definitions..
6 */ 6 */
7#include <linux/thread_info.h>
7 8
8#include <asm/ptrace.h> 9#include <asm/ptrace.h>
9#include <asm/user.h> 10#include <asm/user.h>
@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
320extern unsigned long arch_randomize_brk(struct mm_struct *mm); 321extern unsigned long arch_randomize_brk(struct mm_struct *mm);
321#define arch_randomize_brk arch_randomize_brk 322#define arch_randomize_brk arch_randomize_brk
322 323
324/*
325 * True on X86_32 or when emulating IA32 on X86_64
326 */
327static inline int mmap_is_ia32(void)
328{
329#ifdef CONFIG_X86_32
330 return 1;
331#endif
332#ifdef CONFIG_IA32_EMULATION
333 if (test_thread_flag(TIF_IA32))
334 return 1;
335#endif
336 return 0;
337}
338
339/* The first two values are special, do not change. See align_addr() */
340enum align_flags {
341 ALIGN_VA_32 = BIT(0),
342 ALIGN_VA_64 = BIT(1),
343 ALIGN_VDSO = BIT(2),
344 ALIGN_TOPDOWN = BIT(3),
345};
346
347struct va_alignment {
348 int flags;
349 unsigned long mask;
350} ____cacheline_aligned;
351
352extern struct va_alignment va_align;
353extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
323#endif /* _ASM_X86_ELF_H */ 354#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed393dfce..b0234bcbd32a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -458,6 +458,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
458 "with P0 frequency!\n"); 458 "with P0 frequency!\n");
459 } 459 }
460 } 460 }
461
462 if (c->x86 == 0x15) {
463 unsigned long upperbit;
464 u32 cpuid, assoc;
465
466 cpuid = cpuid_edx(0x80000005);
467 assoc = cpuid >> 16 & 0xff;
468 upperbit = ((cpuid >> 24) << 10) / assoc;
469
470 va_align.mask = (upperbit - 1) & PAGE_MASK;
471 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
472
473 }
461} 474}
462 475
463static void __cpuinit init_amd(struct cpuinfo_x86 *c) 476static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ff14a5044ce6..aaa8d09c7195 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,6 +18,72 @@
18#include <asm/ia32.h> 18#include <asm/ia32.h>
19#include <asm/syscalls.h> 19#include <asm/syscalls.h>
20 20
21struct __read_mostly va_alignment va_align = {
22 .flags = -1,
23};
24
25/*
26 * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
27 *
28 * @flags denotes the allocation direction - bottomup or topdown -
29 * or vDSO; see call sites below.
30 */
31unsigned long align_addr(unsigned long addr, struct file *filp,
32 enum align_flags flags)
33{
34 unsigned long tmp_addr;
35
36 /* handle 32- and 64-bit case with a single conditional */
37 if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
38 return addr;
39
40 if (!(current->flags & PF_RANDOMIZE))
41 return addr;
42
43 if (!((flags & ALIGN_VDSO) || filp))
44 return addr;
45
46 tmp_addr = addr;
47
48 /*
49 * We need an address which is <= than the original
50 * one only when in topdown direction.
51 */
52 if (!(flags & ALIGN_TOPDOWN))
53 tmp_addr += va_align.mask;
54
55 tmp_addr &= ~va_align.mask;
56
57 return tmp_addr;
58}
59
60static int __init control_va_addr_alignment(char *str)
61{
62 /* guard against enabling this on other CPU families */
63 if (va_align.flags < 0)
64 return 1;
65
66 if (*str == 0)
67 return 1;
68
69 if (*str == '=')
70 str++;
71
72 if (!strcmp(str, "32"))
73 va_align.flags = ALIGN_VA_32;
74 else if (!strcmp(str, "64"))
75 va_align.flags = ALIGN_VA_64;
76 else if (!strcmp(str, "off"))
77 va_align.flags = 0;
78 else if (!strcmp(str, "on"))
79 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
80 else
81 return 0;
82
83 return 1;
84}
85__setup("align_va_addr", control_va_addr_alignment);
86
21SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, 87SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
22 unsigned long, prot, unsigned long, flags, 88 unsigned long, prot, unsigned long, flags,
23 unsigned long, fd, unsigned long, off) 89 unsigned long, fd, unsigned long, off)
@@ -92,6 +158,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
92 start_addr = addr; 158 start_addr = addr;
93 159
94full_search: 160full_search:
161
162 addr = align_addr(addr, filp, 0);
163
95 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 164 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
96 /* At this point: (!vma || addr < vma->vm_end). */ 165 /* At this point: (!vma || addr < vma->vm_end). */
97 if (end - len < addr) { 166 if (end - len < addr) {
@@ -117,6 +186,7 @@ full_search:
117 mm->cached_hole_size = vma->vm_start - addr; 186 mm->cached_hole_size = vma->vm_start - addr;
118 187
119 addr = vma->vm_end; 188 addr = vma->vm_end;
189 addr = align_addr(addr, filp, 0);
120 } 190 }
121} 191}
122 192
@@ -161,10 +231,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
161 231
162 /* make sure it can fit in the remaining address space */ 232 /* make sure it can fit in the remaining address space */
163 if (addr > len) { 233 if (addr > len) {
164 vma = find_vma(mm, addr-len); 234 unsigned long tmp_addr = align_addr(addr - len, filp,
165 if (!vma || addr <= vma->vm_start) 235 ALIGN_TOPDOWN);
236
237 vma = find_vma(mm, tmp_addr);
238 if (!vma || tmp_addr + len <= vma->vm_start)
166 /* remember the address as a hint for next time */ 239 /* remember the address as a hint for next time */
167 return mm->free_area_cache = addr-len; 240 return mm->free_area_cache = tmp_addr;
168 } 241 }
169 242
170 if (mm->mmap_base < len) 243 if (mm->mmap_base < len)
@@ -173,6 +246,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
173 addr = mm->mmap_base-len; 246 addr = mm->mmap_base-len;
174 247
175 do { 248 do {
249 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
250
176 /* 251 /*
177 * Lookup failure means no vma is above this address, 252 * Lookup failure means no vma is above this address,
178 * else if new region fits below vma->vm_start, 253 * else if new region fits below vma->vm_start,
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab5194fd9d..d4c073630175 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -51,21 +51,6 @@ static unsigned int stack_maxrandom_size(void)
51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) 51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
52#define MAX_GAP (TASK_SIZE/6*5) 52#define MAX_GAP (TASK_SIZE/6*5)
53 53
54/*
55 * True on X86_32 or when emulating IA32 on X86_64
56 */
57static int mmap_is_ia32(void)
58{
59#ifdef CONFIG_X86_32
60 return 1;
61#endif
62#ifdef CONFIG_IA32_EMULATION
63 if (test_thread_flag(TIF_IA32))
64 return 1;
65#endif
66 return 0;
67}
68
69static int mmap_is_legacy(void) 54static int mmap_is_legacy(void)
70{ 55{
71 if (current->personality & ADDR_COMPAT_LAYOUT) 56 if (current->personality & ADDR_COMPAT_LAYOUT)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7abd2be0f9b9..caa42ce17577 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -69,6 +69,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
69 addr = start + (offset << PAGE_SHIFT); 69 addr = start + (offset << PAGE_SHIFT);
70 if (addr >= end) 70 if (addr >= end)
71 addr = end; 71 addr = end;
72
73 /*
74 * page-align it here so that get_unmapped_area doesn't
75 * align it wrongfully again to the next page. addr can come in 4K
76 * unaligned here as a result of stack start randomization.
77 */
78 addr = PAGE_ALIGN(addr);
79 addr = align_addr(addr, NULL, ALIGN_VDSO);
80
72 return addr; 81 return addr;
73} 82}
74 83