diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 10:24:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 10:24:22 -0400 |
commit | e1d20beae70eb918cca7f07a77ce199fd148fdd2 (patch) | |
tree | 7c7c5dc1fd97b84ec45ca1d7603d37b410e81335 | |
parent | cbbfb0ae2ca979222297062647ced653682a6cc7 (diff) | |
parent | ec3a94188df7d28b374868d9a2a0face910e62ab (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar:
"The main changes in this cycle were the fsgsbase related preparatory
patches from Chang S. Bae - but there's also an optimized
memcpy_flushcache() and a cleanup for the __cmpxchg_double() assembly
glue"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/fsgsbase/64: Clean up various details
x86/segments: Introduce the 'CPUNODE' naming to better document the segment limit CPU/node NR trick
x86/vdso: Initialize the CPU/node NR segment descriptor earlier
x86/vdso: Introduce helper functions for CPU and node number
x86/segments/64: Rename the GDT PER_CPU entry to CPU_NUMBER
x86/fsgsbase/64: Factor out FS/GS segment loading from __switch_to()
x86/fsgsbase/64: Convert the ELF core dump code to the new FSGSBASE helpers
x86/fsgsbase/64: Make ptrace use the new FS/GS base helpers
x86/fsgsbase/64: Introduce FS/GS base helper functions
x86/fsgsbase/64: Fix ptrace() to read the FS/GS base accurately
x86/asm: Use CC_SET()/CC_OUT() in __cmpxchg_double()
x86/asm: Optimize memcpy_flushcache()
-rw-r--r-- | arch/x86/entry/vdso/vgetcpu.c | 8 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vma.c | 38 | ||||
-rw-r--r-- | arch/x86/include/asm/cmpxchg.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/elf.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/fsgsbase.h | 49 | ||||
-rw-r--r-- | arch/x86/include/asm/segment.h | 46 | ||||
-rw-r--r-- | arch/x86/include/asm/string_64.h | 20 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 26 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 183 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 28 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_64.c | 4 |
12 files changed, 299 insertions, 143 deletions
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c index 8ec3d1f4ce9a..f86ab0ae1777 100644 --- a/arch/x86/entry/vdso/vgetcpu.c +++ b/arch/x86/entry/vdso/vgetcpu.c | |||
@@ -13,14 +13,8 @@ | |||
13 | notrace long | 13 | notrace long |
14 | __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) | 14 | __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) |
15 | { | 15 | { |
16 | unsigned int p; | 16 | vdso_read_cpunode(cpu, node); |
17 | 17 | ||
18 | p = __getcpu(); | ||
19 | |||
20 | if (cpu) | ||
21 | *cpu = p & VGETCPU_CPU_MASK; | ||
22 | if (node) | ||
23 | *node = p >> 12; | ||
24 | return 0; | 18 | return 0; |
25 | } | 19 | } |
26 | 20 | ||
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 5b8b556dbb12..3f9d43f26f63 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c | |||
@@ -332,40 +332,6 @@ static __init int vdso_setup(char *s) | |||
332 | return 0; | 332 | return 0; |
333 | } | 333 | } |
334 | __setup("vdso=", vdso_setup); | 334 | __setup("vdso=", vdso_setup); |
335 | #endif | ||
336 | |||
337 | #ifdef CONFIG_X86_64 | ||
338 | static void vgetcpu_cpu_init(void *arg) | ||
339 | { | ||
340 | int cpu = smp_processor_id(); | ||
341 | struct desc_struct d = { }; | ||
342 | unsigned long node = 0; | ||
343 | #ifdef CONFIG_NUMA | ||
344 | node = cpu_to_node(cpu); | ||
345 | #endif | ||
346 | if (static_cpu_has(X86_FEATURE_RDTSCP)) | ||
347 | write_rdtscp_aux((node << 12) | cpu); | ||
348 | |||
349 | /* | ||
350 | * Store cpu number in limit so that it can be loaded | ||
351 | * quickly in user space in vgetcpu. (12 bits for the CPU | ||
352 | * and 8 bits for the node) | ||
353 | */ | ||
354 | d.limit0 = cpu | ((node & 0xf) << 12); | ||
355 | d.limit1 = node >> 4; | ||
356 | d.type = 5; /* RO data, expand down, accessed */ | ||
357 | d.dpl = 3; /* Visible to user code */ | ||
358 | d.s = 1; /* Not a system segment */ | ||
359 | d.p = 1; /* Present */ | ||
360 | d.d = 1; /* 32-bit */ | ||
361 | |||
362 | write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | ||
363 | } | ||
364 | |||
365 | static int vgetcpu_online(unsigned int cpu) | ||
366 | { | ||
367 | return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); | ||
368 | } | ||
369 | 335 | ||
370 | static int __init init_vdso(void) | 336 | static int __init init_vdso(void) |
371 | { | 337 | { |
@@ -375,9 +341,7 @@ static int __init init_vdso(void) | |||
375 | init_vdso_image(&vdso_image_x32); | 341 | init_vdso_image(&vdso_image_x32); |
376 | #endif | 342 | #endif |
377 | 343 | ||
378 | /* notifier priority > KVM */ | 344 | return 0; |
379 | return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE, | ||
380 | "x86/vdso/vma:online", vgetcpu_online, NULL); | ||
381 | } | 345 | } |
382 | subsys_initcall(init_vdso); | 346 | subsys_initcall(init_vdso); |
383 | #endif /* CONFIG_X86_64 */ | 347 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index a55d79b233d3..bfb85e5844ab 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -242,10 +242,12 @@ extern void __add_wrong_size(void) | |||
242 | BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ | 242 | BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ |
243 | VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ | 243 | VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ |
244 | VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ | 244 | VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ |
245 | asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ | 245 | asm volatile(pfx "cmpxchg%c5b %1" \ |
246 | : "=a" (__ret), "+d" (__old2), \ | 246 | CC_SET(e) \ |
247 | "+m" (*(p1)), "+m" (*(p2)) \ | 247 | : CC_OUT(e) (__ret), \ |
248 | : "i" (2 * sizeof(long)), "a" (__old1), \ | 248 | "+m" (*(p1)), "+m" (*(p2)), \ |
249 | "+a" (__old1), "+d" (__old2) \ | ||
250 | : "i" (2 * sizeof(long)), \ | ||
249 | "b" (__new1), "c" (__new2)); \ | 251 | "b" (__new1), "c" (__new2)); \ |
250 | __ret; \ | 252 | __ret; \ |
251 | }) | 253 | }) |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index a357031d85b5..69c0f892e310 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/ptrace.h> | 10 | #include <asm/ptrace.h> |
11 | #include <asm/user.h> | 11 | #include <asm/user.h> |
12 | #include <asm/auxvec.h> | 12 | #include <asm/auxvec.h> |
13 | #include <asm/fsgsbase.h> | ||
13 | 14 | ||
14 | typedef unsigned long elf_greg_t; | 15 | typedef unsigned long elf_greg_t; |
15 | 16 | ||
@@ -204,7 +205,6 @@ void set_personality_ia32(bool); | |||
204 | 205 | ||
205 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ | 206 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ |
206 | do { \ | 207 | do { \ |
207 | unsigned long base; \ | ||
208 | unsigned v; \ | 208 | unsigned v; \ |
209 | (pr_reg)[0] = (regs)->r15; \ | 209 | (pr_reg)[0] = (regs)->r15; \ |
210 | (pr_reg)[1] = (regs)->r14; \ | 210 | (pr_reg)[1] = (regs)->r14; \ |
@@ -227,8 +227,8 @@ do { \ | |||
227 | (pr_reg)[18] = (regs)->flags; \ | 227 | (pr_reg)[18] = (regs)->flags; \ |
228 | (pr_reg)[19] = (regs)->sp; \ | 228 | (pr_reg)[19] = (regs)->sp; \ |
229 | (pr_reg)[20] = (regs)->ss; \ | 229 | (pr_reg)[20] = (regs)->ss; \ |
230 | rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ | 230 | (pr_reg)[21] = x86_fsbase_read_cpu(); \ |
231 | rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ | 231 | (pr_reg)[22] = x86_gsbase_read_cpu_inactive(); \ |
232 | asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ | 232 | asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ |
233 | asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ | 233 | asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ |
234 | asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ | 234 | asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ |
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h new file mode 100644 index 000000000000..eb377b6e9eed --- /dev/null +++ b/arch/x86/include/asm/fsgsbase.h | |||
@@ -0,0 +1,49 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _ASM_FSGSBASE_H | ||
3 | #define _ASM_FSGSBASE_H | ||
4 | |||
5 | #ifndef __ASSEMBLY__ | ||
6 | |||
7 | #ifdef CONFIG_X86_64 | ||
8 | |||
9 | #include <asm/msr-index.h> | ||
10 | |||
11 | /* | ||
12 | * Read/write a task's FSBASE or GSBASE. This returns the value that | ||
13 | * the FS/GS base would have (if the task were to be resumed). These | ||
14 | * work on the current task or on a non-running (typically stopped | ||
15 | * ptrace child) task. | ||
16 | */ | ||
17 | extern unsigned long x86_fsbase_read_task(struct task_struct *task); | ||
18 | extern unsigned long x86_gsbase_read_task(struct task_struct *task); | ||
19 | extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); | ||
20 | extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); | ||
21 | |||
22 | /* Helper functions for reading/writing FS/GS base */ | ||
23 | |||
24 | static inline unsigned long x86_fsbase_read_cpu(void) | ||
25 | { | ||
26 | unsigned long fsbase; | ||
27 | |||
28 | rdmsrl(MSR_FS_BASE, fsbase); | ||
29 | |||
30 | return fsbase; | ||
31 | } | ||
32 | |||
33 | static inline unsigned long x86_gsbase_read_cpu_inactive(void) | ||
34 | { | ||
35 | unsigned long gsbase; | ||
36 | |||
37 | rdmsrl(MSR_KERNEL_GS_BASE, gsbase); | ||
38 | |||
39 | return gsbase; | ||
40 | } | ||
41 | |||
42 | extern void x86_fsbase_write_cpu(unsigned long fsbase); | ||
43 | extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); | ||
44 | |||
45 | #endif /* CONFIG_X86_64 */ | ||
46 | |||
47 | #endif /* __ASSEMBLY__ */ | ||
48 | |||
49 | #endif /* _ASM_FSGSBASE_H */ | ||
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index e293c122d0d5..a314087add07 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -186,8 +186,7 @@ | |||
186 | #define GDT_ENTRY_TLS_MIN 12 | 186 | #define GDT_ENTRY_TLS_MIN 12 |
187 | #define GDT_ENTRY_TLS_MAX 14 | 187 | #define GDT_ENTRY_TLS_MAX 14 |
188 | 188 | ||
189 | /* Abused to load per CPU data from limit */ | 189 | #define GDT_ENTRY_CPUNODE 15 |
190 | #define GDT_ENTRY_PER_CPU 15 | ||
191 | 190 | ||
192 | /* | 191 | /* |
193 | * Number of entries in the GDT table: | 192 | * Number of entries in the GDT table: |
@@ -207,7 +206,7 @@ | |||
207 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | 206 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) |
208 | #define __USER32_DS __USER_DS | 207 | #define __USER32_DS __USER_DS |
209 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | 208 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) |
210 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) | 209 | #define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) |
211 | 210 | ||
212 | #endif | 211 | #endif |
213 | 212 | ||
@@ -225,6 +224,47 @@ | |||
225 | #define GDT_ENTRY_TLS_ENTRIES 3 | 224 | #define GDT_ENTRY_TLS_ENTRIES 3 |
226 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) | 225 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) |
227 | 226 | ||
227 | #ifdef CONFIG_X86_64 | ||
228 | |||
229 | /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ | ||
230 | #define VDSO_CPUNODE_BITS 12 | ||
231 | #define VDSO_CPUNODE_MASK 0xfff | ||
232 | |||
233 | #ifndef __ASSEMBLY__ | ||
234 | |||
235 | /* Helper functions to store/load CPU and node numbers */ | ||
236 | |||
237 | static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) | ||
238 | { | ||
239 | return (node << VDSO_CPUNODE_BITS) | cpu; | ||
240 | } | ||
241 | |||
242 | static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) | ||
243 | { | ||
244 | unsigned int p; | ||
245 | |||
246 | /* | ||
247 | * Load CPU and node number from the GDT. LSL is faster than RDTSCP | ||
248 | * and works on all CPUs. This is volatile so that it orders | ||
249 | * correctly with respect to barrier() and to keep GCC from cleverly | ||
250 | * hoisting it out of the calling function. | ||
251 | * | ||
252 | * If RDPID is available, use it. | ||
253 | */ | ||
254 | alternative_io ("lsl %[seg],%[p]", | ||
255 | ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ | ||
256 | X86_FEATURE_RDPID, | ||
257 | [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); | ||
258 | |||
259 | if (cpu) | ||
260 | *cpu = (p & VDSO_CPUNODE_MASK); | ||
261 | if (node) | ||
262 | *node = (p >> VDSO_CPUNODE_BITS); | ||
263 | } | ||
264 | |||
265 | #endif /* !__ASSEMBLY__ */ | ||
266 | #endif /* CONFIG_X86_64 */ | ||
267 | |||
228 | #ifdef __KERNEL__ | 268 | #ifdef __KERNEL__ |
229 | 269 | ||
230 | /* | 270 | /* |
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index d33f92b9fa22..7ad41bfcc16c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -149,7 +149,25 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) | |||
149 | 149 | ||
150 | #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE | 150 | #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE |
151 | #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 | 151 | #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 |
152 | void memcpy_flushcache(void *dst, const void *src, size_t cnt); | 152 | void __memcpy_flushcache(void *dst, const void *src, size_t cnt); |
153 | static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) | ||
154 | { | ||
155 | if (__builtin_constant_p(cnt)) { | ||
156 | switch (cnt) { | ||
157 | case 4: | ||
158 | asm ("movntil %1, %0" : "=m"(*(u32 *)dst) : "r"(*(u32 *)src)); | ||
159 | return; | ||
160 | case 8: | ||
161 | asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); | ||
162 | return; | ||
163 | case 16: | ||
164 | asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); | ||
165 | asm ("movntiq %1, %0" : "=m"(*(u64 *)(dst + 8)) : "r"(*(u64 *)(src + 8))); | ||
166 | return; | ||
167 | } | ||
168 | } | ||
169 | __memcpy_flushcache(dst, src, cnt); | ||
170 | } | ||
153 | #endif | 171 | #endif |
154 | 172 | ||
155 | #endif /* __KERNEL__ */ | 173 | #endif /* __KERNEL__ */ |
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 53748541c487..056a61c8c5c7 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -77,30 +77,4 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) | |||
77 | ++s->seq; | 77 | ++s->seq; |
78 | } | 78 | } |
79 | 79 | ||
80 | #ifdef CONFIG_X86_64 | ||
81 | |||
82 | #define VGETCPU_CPU_MASK 0xfff | ||
83 | |||
84 | static inline unsigned int __getcpu(void) | ||
85 | { | ||
86 | unsigned int p; | ||
87 | |||
88 | /* | ||
89 | * Load per CPU data from GDT. LSL is faster than RDTSCP and | ||
90 | * works on all CPUs. This is volatile so that it orders | ||
91 | * correctly wrt barrier() and to keep gcc from cleverly | ||
92 | * hoisting it out of the calling function. | ||
93 | * | ||
94 | * If RDPID is available, use it. | ||
95 | */ | ||
96 | alternative_io ("lsl %[seg],%[p]", | ||
97 | ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ | ||
98 | X86_FEATURE_RDPID, | ||
99 | [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); | ||
100 | |||
101 | return p; | ||
102 | } | ||
103 | |||
104 | #endif /* CONFIG_X86_64 */ | ||
105 | |||
106 | #endif /* _ASM_X86_VGTOD_H */ | 80 | #endif /* _ASM_X86_VGTOD_H */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10e5ccfa9278..0b99a7fae6be 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1669,6 +1669,29 @@ static void wait_for_master_cpu(int cpu) | |||
1669 | #endif | 1669 | #endif |
1670 | } | 1670 | } |
1671 | 1671 | ||
1672 | #ifdef CONFIG_X86_64 | ||
1673 | static void setup_getcpu(int cpu) | ||
1674 | { | ||
1675 | unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); | ||
1676 | struct desc_struct d = { }; | ||
1677 | |||
1678 | if (static_cpu_has(X86_FEATURE_RDTSCP)) | ||
1679 | write_rdtscp_aux(cpudata); | ||
1680 | |||
1681 | /* Store CPU and node number in limit. */ | ||
1682 | d.limit0 = cpudata; | ||
1683 | d.limit1 = cpudata >> 16; | ||
1684 | |||
1685 | d.type = 5; /* RO data, expand down, accessed */ | ||
1686 | d.dpl = 3; /* Visible to user code */ | ||
1687 | d.s = 1; /* Not a system segment */ | ||
1688 | d.p = 1; /* Present */ | ||
1689 | d.d = 1; /* 32-bit */ | ||
1690 | |||
1691 | write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S); | ||
1692 | } | ||
1693 | #endif | ||
1694 | |||
1672 | /* | 1695 | /* |
1673 | * cpu_init() initializes state that is per-CPU. Some data is already | 1696 | * cpu_init() initializes state that is per-CPU. Some data is already |
1674 | * initialized (naturally) in the bootstrap process, such as the GDT | 1697 | * initialized (naturally) in the bootstrap process, such as the GDT |
@@ -1706,6 +1729,7 @@ void cpu_init(void) | |||
1706 | early_cpu_to_node(cpu) != NUMA_NO_NODE) | 1729 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1707 | set_numa_node(early_cpu_to_node(cpu)); | 1730 | set_numa_node(early_cpu_to_node(cpu)); |
1708 | #endif | 1731 | #endif |
1732 | setup_getcpu(cpu); | ||
1709 | 1733 | ||
1710 | me = current; | 1734 | me = current; |
1711 | 1735 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ea5ea850348d..d6674a425714 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <asm/vdso.h> | 54 | #include <asm/vdso.h> |
55 | #include <asm/intel_rdt_sched.h> | 55 | #include <asm/intel_rdt_sched.h> |
56 | #include <asm/unistd.h> | 56 | #include <asm/unistd.h> |
57 | #include <asm/fsgsbase.h> | ||
57 | #ifdef CONFIG_IA32_EMULATION | 58 | #ifdef CONFIG_IA32_EMULATION |
58 | /* Not included via unistd.h */ | 59 | /* Not included via unistd.h */ |
59 | #include <asm/unistd_32_ia32.h> | 60 | #include <asm/unistd_32_ia32.h> |
@@ -286,6 +287,138 @@ static __always_inline void load_seg_legacy(unsigned short prev_index, | |||
286 | } | 287 | } |
287 | } | 288 | } |
288 | 289 | ||
290 | static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, | ||
291 | struct thread_struct *next) | ||
292 | { | ||
293 | load_seg_legacy(prev->fsindex, prev->fsbase, | ||
294 | next->fsindex, next->fsbase, FS); | ||
295 | load_seg_legacy(prev->gsindex, prev->gsbase, | ||
296 | next->gsindex, next->gsbase, GS); | ||
297 | } | ||
298 | |||
299 | static unsigned long x86_fsgsbase_read_task(struct task_struct *task, | ||
300 | unsigned short selector) | ||
301 | { | ||
302 | unsigned short idx = selector >> 3; | ||
303 | unsigned long base; | ||
304 | |||
305 | if (likely((selector & SEGMENT_TI_MASK) == 0)) { | ||
306 | if (unlikely(idx >= GDT_ENTRIES)) | ||
307 | return 0; | ||
308 | |||
309 | /* | ||
310 | * There are no user segments in the GDT with nonzero bases | ||
311 | * other than the TLS segments. | ||
312 | */ | ||
313 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
314 | return 0; | ||
315 | |||
316 | idx -= GDT_ENTRY_TLS_MIN; | ||
317 | base = get_desc_base(&task->thread.tls_array[idx]); | ||
318 | } else { | ||
319 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
320 | struct ldt_struct *ldt; | ||
321 | |||
322 | /* | ||
323 | * If performance here mattered, we could protect the LDT | ||
324 | * with RCU. This is a slow path, though, so we can just | ||
325 | * take the mutex. | ||
326 | */ | ||
327 | mutex_lock(&task->mm->context.lock); | ||
328 | ldt = task->mm->context.ldt; | ||
329 | if (unlikely(idx >= ldt->nr_entries)) | ||
330 | base = 0; | ||
331 | else | ||
332 | base = get_desc_base(ldt->entries + idx); | ||
333 | mutex_unlock(&task->mm->context.lock); | ||
334 | #else | ||
335 | base = 0; | ||
336 | #endif | ||
337 | } | ||
338 | |||
339 | return base; | ||
340 | } | ||
341 | |||
342 | void x86_fsbase_write_cpu(unsigned long fsbase) | ||
343 | { | ||
344 | /* | ||
345 | * Set the selector to 0 as a notion, that the segment base is | ||
346 | * overwritten, which will be checked for skipping the segment load | ||
347 | * during context switch. | ||
348 | */ | ||
349 | loadseg(FS, 0); | ||
350 | wrmsrl(MSR_FS_BASE, fsbase); | ||
351 | } | ||
352 | |||
353 | void x86_gsbase_write_cpu_inactive(unsigned long gsbase) | ||
354 | { | ||
355 | /* Set the selector to 0 for the same reason as %fs above. */ | ||
356 | loadseg(GS, 0); | ||
357 | wrmsrl(MSR_KERNEL_GS_BASE, gsbase); | ||
358 | } | ||
359 | |||
360 | unsigned long x86_fsbase_read_task(struct task_struct *task) | ||
361 | { | ||
362 | unsigned long fsbase; | ||
363 | |||
364 | if (task == current) | ||
365 | fsbase = x86_fsbase_read_cpu(); | ||
366 | else if (task->thread.fsindex == 0) | ||
367 | fsbase = task->thread.fsbase; | ||
368 | else | ||
369 | fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); | ||
370 | |||
371 | return fsbase; | ||
372 | } | ||
373 | |||
374 | unsigned long x86_gsbase_read_task(struct task_struct *task) | ||
375 | { | ||
376 | unsigned long gsbase; | ||
377 | |||
378 | if (task == current) | ||
379 | gsbase = x86_gsbase_read_cpu_inactive(); | ||
380 | else if (task->thread.gsindex == 0) | ||
381 | gsbase = task->thread.gsbase; | ||
382 | else | ||
383 | gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); | ||
384 | |||
385 | return gsbase; | ||
386 | } | ||
387 | |||
388 | int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) | ||
389 | { | ||
390 | /* | ||
391 | * Not strictly needed for %fs, but do it for symmetry | ||
392 | * with %gs | ||
393 | */ | ||
394 | if (unlikely(fsbase >= TASK_SIZE_MAX)) | ||
395 | return -EPERM; | ||
396 | |||
397 | preempt_disable(); | ||
398 | task->thread.fsbase = fsbase; | ||
399 | if (task == current) | ||
400 | x86_fsbase_write_cpu(fsbase); | ||
401 | task->thread.fsindex = 0; | ||
402 | preempt_enable(); | ||
403 | |||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) | ||
408 | { | ||
409 | if (unlikely(gsbase >= TASK_SIZE_MAX)) | ||
410 | return -EPERM; | ||
411 | |||
412 | preempt_disable(); | ||
413 | task->thread.gsbase = gsbase; | ||
414 | if (task == current) | ||
415 | x86_gsbase_write_cpu_inactive(gsbase); | ||
416 | task->thread.gsindex = 0; | ||
417 | preempt_enable(); | ||
418 | |||
419 | return 0; | ||
420 | } | ||
421 | |||
289 | int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | 422 | int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
290 | unsigned long arg, struct task_struct *p, unsigned long tls) | 423 | unsigned long arg, struct task_struct *p, unsigned long tls) |
291 | { | 424 | { |
@@ -473,10 +606,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
473 | if (unlikely(next->ds | prev->ds)) | 606 | if (unlikely(next->ds | prev->ds)) |
474 | loadsegment(ds, next->ds); | 607 | loadsegment(ds, next->ds); |
475 | 608 | ||
476 | load_seg_legacy(prev->fsindex, prev->fsbase, | 609 | x86_fsgsbase_load(prev, next); |
477 | next->fsindex, next->fsbase, FS); | ||
478 | load_seg_legacy(prev->gsindex, prev->gsbase, | ||
479 | next->gsindex, next->gsbase, GS); | ||
480 | 610 | ||
481 | switch_fpu_finish(next_fpu, cpu); | 611 | switch_fpu_finish(next_fpu, cpu); |
482 | 612 | ||
@@ -627,54 +757,25 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) | |||
627 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) | 757 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) |
628 | { | 758 | { |
629 | int ret = 0; | 759 | int ret = 0; |
630 | int doit = task == current; | ||
631 | int cpu; | ||
632 | 760 | ||
633 | switch (option) { | 761 | switch (option) { |
634 | case ARCH_SET_GS: | 762 | case ARCH_SET_GS: { |
635 | if (arg2 >= TASK_SIZE_MAX) | 763 | ret = x86_gsbase_write_task(task, arg2); |
636 | return -EPERM; | ||
637 | cpu = get_cpu(); | ||
638 | task->thread.gsindex = 0; | ||
639 | task->thread.gsbase = arg2; | ||
640 | if (doit) { | ||
641 | load_gs_index(0); | ||
642 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); | ||
643 | } | ||
644 | put_cpu(); | ||
645 | break; | 764 | break; |
646 | case ARCH_SET_FS: | 765 | } |
647 | /* Not strictly needed for fs, but do it for symmetry | 766 | case ARCH_SET_FS: { |
648 | with gs */ | 767 | ret = x86_fsbase_write_task(task, arg2); |
649 | if (arg2 >= TASK_SIZE_MAX) | ||
650 | return -EPERM; | ||
651 | cpu = get_cpu(); | ||
652 | task->thread.fsindex = 0; | ||
653 | task->thread.fsbase = arg2; | ||
654 | if (doit) { | ||
655 | /* set the selector to 0 to not confuse __switch_to */ | ||
656 | loadsegment(fs, 0); | ||
657 | ret = wrmsrl_safe(MSR_FS_BASE, arg2); | ||
658 | } | ||
659 | put_cpu(); | ||
660 | break; | 768 | break; |
769 | } | ||
661 | case ARCH_GET_FS: { | 770 | case ARCH_GET_FS: { |
662 | unsigned long base; | 771 | unsigned long base = x86_fsbase_read_task(task); |
663 | 772 | ||
664 | if (doit) | ||
665 | rdmsrl(MSR_FS_BASE, base); | ||
666 | else | ||
667 | base = task->thread.fsbase; | ||
668 | ret = put_user(base, (unsigned long __user *)arg2); | 773 | ret = put_user(base, (unsigned long __user *)arg2); |
669 | break; | 774 | break; |
670 | } | 775 | } |
671 | case ARCH_GET_GS: { | 776 | case ARCH_GET_GS: { |
672 | unsigned long base; | 777 | unsigned long base = x86_gsbase_read_task(task); |
673 | 778 | ||
674 | if (doit) | ||
675 | rdmsrl(MSR_KERNEL_GS_BASE, base); | ||
676 | else | ||
677 | base = task->thread.gsbase; | ||
678 | ret = put_user(base, (unsigned long __user *)arg2); | 779 | ret = put_user(base, (unsigned long __user *)arg2); |
679 | break; | 780 | break; |
680 | } | 781 | } |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e2ee403865eb..d8f49c7384a3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/hw_breakpoint.h> | 39 | #include <asm/hw_breakpoint.h> |
40 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
41 | #include <asm/syscall.h> | 41 | #include <asm/syscall.h> |
42 | #include <asm/fsgsbase.h> | ||
42 | 43 | ||
43 | #include "tls.h" | 44 | #include "tls.h" |
44 | 45 | ||
@@ -396,12 +397,11 @@ static int putreg(struct task_struct *child, | |||
396 | if (value >= TASK_SIZE_MAX) | 397 | if (value >= TASK_SIZE_MAX) |
397 | return -EIO; | 398 | return -EIO; |
398 | /* | 399 | /* |
399 | * When changing the segment base, use do_arch_prctl_64 | 400 | * When changing the FS base, use the same |
400 | * to set either thread.fs or thread.fsindex and the | 401 | * mechanism as for do_arch_prctl_64(). |
401 | * corresponding GDT slot. | ||
402 | */ | 402 | */ |
403 | if (child->thread.fsbase != value) | 403 | if (child->thread.fsbase != value) |
404 | return do_arch_prctl_64(child, ARCH_SET_FS, value); | 404 | return x86_fsbase_write_task(child, value); |
405 | return 0; | 405 | return 0; |
406 | case offsetof(struct user_regs_struct,gs_base): | 406 | case offsetof(struct user_regs_struct,gs_base): |
407 | /* | 407 | /* |
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child, | |||
410 | if (value >= TASK_SIZE_MAX) | 410 | if (value >= TASK_SIZE_MAX) |
411 | return -EIO; | 411 | return -EIO; |
412 | if (child->thread.gsbase != value) | 412 | if (child->thread.gsbase != value) |
413 | return do_arch_prctl_64(child, ARCH_SET_GS, value); | 413 | return x86_gsbase_write_task(child, value); |
414 | return 0; | 414 | return 0; |
415 | #endif | 415 | #endif |
416 | } | 416 | } |
@@ -434,20 +434,10 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) | |||
434 | return get_flags(task); | 434 | return get_flags(task); |
435 | 435 | ||
436 | #ifdef CONFIG_X86_64 | 436 | #ifdef CONFIG_X86_64 |
437 | case offsetof(struct user_regs_struct, fs_base): { | 437 | case offsetof(struct user_regs_struct, fs_base): |
438 | /* | 438 | return x86_fsbase_read_task(task); |
439 | * XXX: This will not behave as expected if called on | 439 | case offsetof(struct user_regs_struct, gs_base): |
440 | * current or if fsindex != 0. | 440 | return x86_gsbase_read_task(task); |
441 | */ | ||
442 | return task->thread.fsbase; | ||
443 | } | ||
444 | case offsetof(struct user_regs_struct, gs_base): { | ||
445 | /* | ||
446 | * XXX: This will not behave as expected if called on | ||
447 | * current or if fsindex != 0. | ||
448 | */ | ||
449 | return task->thread.gsbase; | ||
450 | } | ||
451 | #endif | 441 | #endif |
452 | } | 442 | } |
453 | 443 | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index fefe64436398..1bd837cdc4b1 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -153,7 +153,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) | |||
153 | return rc; | 153 | return rc; |
154 | } | 154 | } |
155 | 155 | ||
156 | void memcpy_flushcache(void *_dst, const void *_src, size_t size) | 156 | void __memcpy_flushcache(void *_dst, const void *_src, size_t size) |
157 | { | 157 | { |
158 | unsigned long dest = (unsigned long) _dst; | 158 | unsigned long dest = (unsigned long) _dst; |
159 | unsigned long source = (unsigned long) _src; | 159 | unsigned long source = (unsigned long) _src; |
@@ -216,7 +216,7 @@ void memcpy_flushcache(void *_dst, const void *_src, size_t size) | |||
216 | clean_cache_range((void *) dest, size); | 216 | clean_cache_range((void *) dest, size); |
217 | } | 217 | } |
218 | } | 218 | } |
219 | EXPORT_SYMBOL_GPL(memcpy_flushcache); | 219 | EXPORT_SYMBOL_GPL(__memcpy_flushcache); |
220 | 220 | ||
221 | void memcpy_page_flushcache(char *to, struct page *page, size_t offset, | 221 | void memcpy_page_flushcache(char *to, struct page *page, size_t offset, |
222 | size_t len) | 222 | size_t len) |