aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt13
-rw-r--r--arch/x86/include/asm/elf.h31
-rw-r--r--arch/x86/kernel/cpu/amd.c13
-rw-r--r--arch/x86/kernel/sys_x86_64.c81
-rw-r--r--arch/x86/mm/mmap.c15
-rw-r--r--arch/x86/vdso/vma.c9
6 files changed, 144 insertions, 18 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index aa47be71df4c..af73c036b7e6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -299,6 +299,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
299 behaviour to be specified. Bit 0 enables warnings, 299 behaviour to be specified. Bit 0 enables warnings,
300 bit 1 enables fixups, and bit 2 sends a segfault. 300 bit 1 enables fixups, and bit 2 sends a segfault.
301 301
302 align_va_addr= [X86-64]
303 Align virtual addresses by clearing slice [14:12] when
304 allocating a VMA at process creation time. This option
305 gives you up to 3% performance improvement on AMD F15h
306 machines (where it is enabled by default) for a
307 CPU-intensive style benchmark, and it can vary highly in
308 a microbenchmark depending on workload and compiler.
309
310 1: only for 32-bit processes
311 2: only for 64-bit processes
312 on: enable for both 32- and 64-bit processes
313 off: disable for both 32- and 64-bit processes
314
302 amd_iommu= [HW,X86-84] 315 amd_iommu= [HW,X86-84]
303 Pass parameters to the AMD IOMMU driver in the system. 316 Pass parameters to the AMD IOMMU driver in the system.
304 Possible values are: 317 Possible values are:
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f2ad2163109d..5f962df30d0f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -4,6 +4,7 @@
4/* 4/*
5 * ELF register definitions.. 5 * ELF register definitions..
6 */ 6 */
7#include <linux/thread_info.h>
7 8
8#include <asm/ptrace.h> 9#include <asm/ptrace.h>
9#include <asm/user.h> 10#include <asm/user.h>
@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
320extern unsigned long arch_randomize_brk(struct mm_struct *mm); 321extern unsigned long arch_randomize_brk(struct mm_struct *mm);
321#define arch_randomize_brk arch_randomize_brk 322#define arch_randomize_brk arch_randomize_brk
322 323
324/*
325 * True on X86_32 or when emulating IA32 on X86_64
326 */
327static inline int mmap_is_ia32(void)
328{
329#ifdef CONFIG_X86_32
330 return 1;
331#endif
332#ifdef CONFIG_IA32_EMULATION
333 if (test_thread_flag(TIF_IA32))
334 return 1;
335#endif
336 return 0;
337}
338
339/* The first two values are special, do not change. See align_addr() */
340enum align_flags {
341 ALIGN_VA_32 = BIT(0),
342 ALIGN_VA_64 = BIT(1),
343 ALIGN_VDSO = BIT(2),
344 ALIGN_TOPDOWN = BIT(3),
345};
346
347struct va_alignment {
348 int flags;
349 unsigned long mask;
350} ____cacheline_aligned;
351
352extern struct va_alignment va_align;
353extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
323#endif /* _ASM_X86_ELF_H */ 354#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed393dfce..b0234bcbd32a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -458,6 +458,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
458 "with P0 frequency!\n"); 458 "with P0 frequency!\n");
459 } 459 }
460 } 460 }
461
462 if (c->x86 == 0x15) {
463 unsigned long upperbit;
464 u32 cpuid, assoc;
465
466 cpuid = cpuid_edx(0x80000005);
467 assoc = cpuid >> 16 & 0xff;
468 upperbit = ((cpuid >> 24) << 10) / assoc;
469
470 va_align.mask = (upperbit - 1) & PAGE_MASK;
471 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
472
473 }
461} 474}
462 475
463static void __cpuinit init_amd(struct cpuinfo_x86 *c) 476static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ff14a5044ce6..aaa8d09c7195 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,6 +18,72 @@
18#include <asm/ia32.h> 18#include <asm/ia32.h>
19#include <asm/syscalls.h> 19#include <asm/syscalls.h>
20 20
21struct __read_mostly va_alignment va_align = {
22 .flags = -1,
23};
24
25/*
26 * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
27 *
28 * @flags denotes the allocation direction - bottomup or topdown -
29 * or vDSO; see call sites below.
30 */
31unsigned long align_addr(unsigned long addr, struct file *filp,
32 enum align_flags flags)
33{
34 unsigned long tmp_addr;
35
36 /* handle 32- and 64-bit case with a single conditional */
37 if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
38 return addr;
39
40 if (!(current->flags & PF_RANDOMIZE))
41 return addr;
42
43 if (!((flags & ALIGN_VDSO) || filp))
44 return addr;
45
46 tmp_addr = addr;
47
48 /*
49 * We need an address which is <= than the original
50 * one only when in topdown direction.
51 */
52 if (!(flags & ALIGN_TOPDOWN))
53 tmp_addr += va_align.mask;
54
55 tmp_addr &= ~va_align.mask;
56
57 return tmp_addr;
58}
59
60static int __init control_va_addr_alignment(char *str)
61{
62 /* guard against enabling this on other CPU families */
63 if (va_align.flags < 0)
64 return 1;
65
66 if (*str == 0)
67 return 1;
68
69 if (*str == '=')
70 str++;
71
72 if (!strcmp(str, "32"))
73 va_align.flags = ALIGN_VA_32;
74 else if (!strcmp(str, "64"))
75 va_align.flags = ALIGN_VA_64;
76 else if (!strcmp(str, "off"))
77 va_align.flags = 0;
78 else if (!strcmp(str, "on"))
79 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
80 else
81 return 0;
82
83 return 1;
84}
85__setup("align_va_addr", control_va_addr_alignment);
86
21SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, 87SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
22 unsigned long, prot, unsigned long, flags, 88 unsigned long, prot, unsigned long, flags,
23 unsigned long, fd, unsigned long, off) 89 unsigned long, fd, unsigned long, off)
@@ -92,6 +158,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
92 start_addr = addr; 158 start_addr = addr;
93 159
94full_search: 160full_search:
161
162 addr = align_addr(addr, filp, 0);
163
95 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 164 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
96 /* At this point: (!vma || addr < vma->vm_end). */ 165 /* At this point: (!vma || addr < vma->vm_end). */
97 if (end - len < addr) { 166 if (end - len < addr) {
@@ -117,6 +186,7 @@ full_search:
117 mm->cached_hole_size = vma->vm_start - addr; 186 mm->cached_hole_size = vma->vm_start - addr;
118 187
119 addr = vma->vm_end; 188 addr = vma->vm_end;
189 addr = align_addr(addr, filp, 0);
120 } 190 }
121} 191}
122 192
@@ -161,10 +231,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
161 231
162 /* make sure it can fit in the remaining address space */ 232 /* make sure it can fit in the remaining address space */
163 if (addr > len) { 233 if (addr > len) {
164 vma = find_vma(mm, addr-len); 234 unsigned long tmp_addr = align_addr(addr - len, filp,
165 if (!vma || addr <= vma->vm_start) 235 ALIGN_TOPDOWN);
236
237 vma = find_vma(mm, tmp_addr);
238 if (!vma || tmp_addr + len <= vma->vm_start)
166 /* remember the address as a hint for next time */ 239 /* remember the address as a hint for next time */
167 return mm->free_area_cache = addr-len; 240 return mm->free_area_cache = tmp_addr;
168 } 241 }
169 242
170 if (mm->mmap_base < len) 243 if (mm->mmap_base < len)
@@ -173,6 +246,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
173 addr = mm->mmap_base-len; 246 addr = mm->mmap_base-len;
174 247
175 do { 248 do {
249 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
250
176 /* 251 /*
177 * Lookup failure means no vma is above this address, 252 * Lookup failure means no vma is above this address,
178 * else if new region fits below vma->vm_start, 253 * else if new region fits below vma->vm_start,
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab5194fd9d..d4c073630175 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -51,21 +51,6 @@ static unsigned int stack_maxrandom_size(void)
51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) 51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
52#define MAX_GAP (TASK_SIZE/6*5) 52#define MAX_GAP (TASK_SIZE/6*5)
53 53
54/*
55 * True on X86_32 or when emulating IA32 on X86_64
56 */
57static int mmap_is_ia32(void)
58{
59#ifdef CONFIG_X86_32
60 return 1;
61#endif
62#ifdef CONFIG_IA32_EMULATION
63 if (test_thread_flag(TIF_IA32))
64 return 1;
65#endif
66 return 0;
67}
68
69static int mmap_is_legacy(void) 54static int mmap_is_legacy(void)
70{ 55{
71 if (current->personality & ADDR_COMPAT_LAYOUT) 56 if (current->personality & ADDR_COMPAT_LAYOUT)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7abd2be0f9b9..caa42ce17577 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -69,6 +69,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
69 addr = start + (offset << PAGE_SHIFT); 69 addr = start + (offset << PAGE_SHIFT);
70 if (addr >= end) 70 if (addr >= end)
71 addr = end; 71 addr = end;
72
73 /*
74 * page-align it here so that get_unmapped_area doesn't
75 * align it wrongfully again to the next page. addr can come in 4K
76 * unaligned here as a result of stack start randomization.
77 */
78 addr = PAGE_ALIGN(addr);
79 addr = align_addr(addr, NULL, ALIGN_VDSO);
80
72 return addr; 81 return addr;
73} 82}
74 83