diff options
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/boot.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/internal.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/xstate.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/lguest.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 34 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 161 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/verify_cpu.S | 50 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 29 | ||||
-rw-r--r-- | drivers/lguest/core.c | 74 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 6 | ||||
-rw-r--r-- | tools/testing/selftests/x86/vdso_restorer.c | 88 |
17 files changed, 342 insertions, 151 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 8602f06c759f..1a50e09c945b 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c | |||
@@ -126,23 +126,23 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
126 | * | 126 | * |
127 | * On Xen, we don't appear to have that guarantee, but Xen still | 127 | * On Xen, we don't appear to have that guarantee, but Xen still |
128 | * supplies a valid seqlock using the version field. | 128 | * supplies a valid seqlock using the version field. |
129 | 129 | * | |
130 | * We only do pvclock vdso timing at all if | 130 | * We only do pvclock vdso timing at all if |
131 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to | 131 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to |
132 | * mean that all vCPUs have matching pvti and that the TSC is | 132 | * mean that all vCPUs have matching pvti and that the TSC is |
133 | * synced, so we can just look at vCPU 0's pvti. | 133 | * synced, so we can just look at vCPU 0's pvti. |
134 | */ | 134 | */ |
135 | 135 | ||
136 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { | ||
137 | *mode = VCLOCK_NONE; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | do { | 136 | do { |
142 | version = pvti->version; | 137 | version = pvti->version; |
143 | 138 | ||
144 | smp_rmb(); | 139 | smp_rmb(); |
145 | 140 | ||
141 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { | ||
142 | *mode = VCLOCK_NONE; | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | tsc = rdtsc_ordered(); | 146 | tsc = rdtsc_ordered(); |
147 | pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; | 147 | pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; |
148 | pvti_tsc_shift = pvti->tsc_shift; | 148 | pvti_tsc_shift = pvti->tsc_shift; |
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62..6b8d6e8cd449 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #define BOOT_HEAP_SIZE 0x400000 | 27 | #define BOOT_HEAP_SIZE 0x400000 |
28 | #else /* !CONFIG_KERNEL_BZIP2 */ | 28 | #else /* !CONFIG_KERNEL_BZIP2 */ |
29 | 29 | ||
30 | #define BOOT_HEAP_SIZE 0x8000 | 30 | #define BOOT_HEAP_SIZE 0x10000 |
31 | 31 | ||
32 | #endif /* !CONFIG_KERNEL_BZIP2 */ | 32 | #endif /* !CONFIG_KERNEL_BZIP2 */ |
33 | 33 | ||
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index eadcdd5bb946..0fd440df63f1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h | |||
@@ -42,6 +42,7 @@ extern void fpu__init_cpu_xstate(void); | |||
42 | extern void fpu__init_system(struct cpuinfo_x86 *c); | 42 | extern void fpu__init_system(struct cpuinfo_x86 *c); |
43 | extern void fpu__init_check_bugs(void); | 43 | extern void fpu__init_check_bugs(void); |
44 | extern void fpu__resume_cpu(void); | 44 | extern void fpu__resume_cpu(void); |
45 | extern u64 fpu__get_supported_xfeatures_mask(void); | ||
45 | 46 | ||
46 | /* | 47 | /* |
47 | * Debugging facility: | 48 | * Debugging facility: |
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3a6c89b70307..af30fdeb140d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h | |||
@@ -20,15 +20,16 @@ | |||
20 | 20 | ||
21 | /* Supported features which support lazy state saving */ | 21 | /* Supported features which support lazy state saving */ |
22 | #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ | 22 | #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ |
23 | XFEATURE_MASK_SSE | \ | 23 | XFEATURE_MASK_SSE) |
24 | |||
25 | /* Supported features which require eager state saving */ | ||
26 | #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ | ||
27 | XFEATURE_MASK_BNDCSR | \ | ||
24 | XFEATURE_MASK_YMM | \ | 28 | XFEATURE_MASK_YMM | \ |
25 | XFEATURE_MASK_OPMASK | \ | 29 | XFEATURE_MASK_OPMASK | \ |
26 | XFEATURE_MASK_ZMM_Hi256 | \ | 30 | XFEATURE_MASK_ZMM_Hi256 | \ |
27 | XFEATURE_MASK_Hi16_ZMM) | 31 | XFEATURE_MASK_Hi16_ZMM) |
28 | 32 | ||
29 | /* Supported features which require eager state saving */ | ||
30 | #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) | ||
31 | |||
32 | /* All currently supported features */ | 33 | /* All currently supported features */ |
33 | #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) | 34 | #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) |
34 | 35 | ||
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 3bbc07a57a31..73d0c9b92087 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h | |||
@@ -12,7 +12,9 @@ | |||
12 | #define GUEST_PL 1 | 12 | #define GUEST_PL 1 |
13 | 13 | ||
14 | /* Page for Switcher text itself, then two pages per cpu */ | 14 | /* Page for Switcher text itself, then two pages per cpu */ |
15 | #define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids) | 15 | #define SWITCHER_TEXT_PAGES (1) |
16 | #define SWITCHER_STACK_PAGES (2 * nr_cpu_ids) | ||
17 | #define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES) | ||
16 | 18 | ||
17 | /* Where we map the Switcher, in both Host and Guest. */ | 19 | /* Where we map the Switcher, in both Host and Guest. */ |
18 | extern unsigned long switcher_addr; | 20 | extern unsigned long switcher_addr; |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 379cd3658799..bfd9b2a35a0b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
116 | #endif | 116 | #endif |
117 | cpumask_set_cpu(cpu, mm_cpumask(next)); | 117 | cpumask_set_cpu(cpu, mm_cpumask(next)); |
118 | 118 | ||
119 | /* Re-load page tables */ | 119 | /* |
120 | * Re-load page tables. | ||
121 | * | ||
122 | * This logic has an ordering constraint: | ||
123 | * | ||
124 | * CPU 0: Write to a PTE for 'next' | ||
125 | * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. | ||
126 | * CPU 1: set bit 1 in next's mm_cpumask | ||
127 | * CPU 1: load from the PTE that CPU 0 writes (implicit) | ||
128 | * | ||
129 | * We need to prevent an outcome in which CPU 1 observes | ||
130 | * the new PTE value and CPU 0 observes bit 1 clear in | ||
131 | * mm_cpumask. (If that occurs, then the IPI will never | ||
132 | * be sent, and CPU 0's TLB will contain a stale entry.) | ||
133 | * | ||
134 | * The bad outcome can occur if either CPU's load is | ||
135 | * reordered before that CPU's store, so both CPUs must | ||
136 | * execute full barriers to prevent this from happening. | ||
137 | * | ||
138 | * Thus, switch_mm needs a full barrier between the | ||
139 | * store to mm_cpumask and any operation that could load | ||
140 | * from next->pgd. TLB fills are special and can happen | ||
141 | * due to instruction fetches or for no reason at all, | ||
142 | * and neither LOCK nor MFENCE orders them. | ||
143 | * Fortunately, load_cr3() is serializing and gives the | ||
144 | * ordering guarantee we need. | ||
145 | * | ||
146 | */ | ||
120 | load_cr3(next->pgd); | 147 | load_cr3(next->pgd); |
148 | |||
121 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | 149 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); |
122 | 150 | ||
123 | /* Stop flush ipis for the previous mm */ | 151 | /* Stop flush ipis for the previous mm */ |
@@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
156 | * schedule, protecting us from simultaneous changes. | 184 | * schedule, protecting us from simultaneous changes. |
157 | */ | 185 | */ |
158 | cpumask_set_cpu(cpu, mm_cpumask(next)); | 186 | cpumask_set_cpu(cpu, mm_cpumask(next)); |
187 | |||
159 | /* | 188 | /* |
160 | * We were in lazy tlb mode and leave_mm disabled | 189 | * We were in lazy tlb mode and leave_mm disabled |
161 | * tlb flush IPI delivery. We must reload CR3 | 190 | * tlb flush IPI delivery. We must reload CR3 |
162 | * to make sure to use no freed page tables. | 191 | * to make sure to use no freed page tables. |
192 | * | ||
193 | * As above, load_cr3() is serializing and orders TLB | ||
194 | * fills with respect to the mm_cpumask write. | ||
163 | */ | 195 | */ |
164 | load_cr3(next->pgd); | 196 | load_cr3(next->pgd); |
165 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | 197 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e678ddeed030..a07956a08936 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -434,8 +434,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) | |||
434 | */ | 434 | */ |
435 | int ht_nodeid = c->initial_apicid; | 435 | int ht_nodeid = c->initial_apicid; |
436 | 436 | ||
437 | if (ht_nodeid >= 0 && | 437 | if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE) |
438 | __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
439 | node = __apicid_to_node[ht_nodeid]; | 438 | node = __apicid_to_node[ht_nodeid]; |
440 | /* Pick a nearby node */ | 439 | /* Pick a nearby node */ |
441 | if (!node_online(node)) | 440 | if (!node_online(node)) |
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7b2978ab30df..6d9f0a7ef4c8 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -3,8 +3,11 @@ | |||
3 | */ | 3 | */ |
4 | #include <asm/fpu/internal.h> | 4 | #include <asm/fpu/internal.h> |
5 | #include <asm/tlbflush.h> | 5 | #include <asm/tlbflush.h> |
6 | #include <asm/setup.h> | ||
7 | #include <asm/cmdline.h> | ||
6 | 8 | ||
7 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/init.h> | ||
8 | 11 | ||
9 | /* | 12 | /* |
10 | * Initialize the TS bit in CR0 according to the style of context-switches | 13 | * Initialize the TS bit in CR0 according to the style of context-switches |
@@ -270,20 +273,52 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
270 | */ | 273 | */ |
271 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | 274 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; |
272 | 275 | ||
273 | static int __init eager_fpu_setup(char *s) | 276 | /* |
277 | * Find supported xfeatures based on cpu features and command-line input. | ||
278 | * This must be called after fpu__init_parse_early_param() is called and | ||
279 | * xfeatures_mask is enumerated. | ||
280 | */ | ||
281 | u64 __init fpu__get_supported_xfeatures_mask(void) | ||
274 | { | 282 | { |
275 | if (!strcmp(s, "on")) | 283 | /* Support all xfeatures known to us */ |
276 | eagerfpu = ENABLE; | 284 | if (eagerfpu != DISABLE) |
277 | else if (!strcmp(s, "off")) | 285 | return XCNTXT_MASK; |
278 | eagerfpu = DISABLE; | 286 | |
279 | else if (!strcmp(s, "auto")) | 287 | /* Warning of xfeatures being disabled for no eagerfpu mode */ |
280 | eagerfpu = AUTO; | 288 | if (xfeatures_mask & XFEATURE_MASK_EAGER) { |
281 | return 1; | 289 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", |
290 | xfeatures_mask & XFEATURE_MASK_EAGER); | ||
291 | } | ||
292 | |||
293 | /* Return a mask that masks out all features requiring eagerfpu mode */ | ||
294 | return ~XFEATURE_MASK_EAGER; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Disable features dependent on eagerfpu. | ||
299 | */ | ||
300 | static void __init fpu__clear_eager_fpu_features(void) | ||
301 | { | ||
302 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
303 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
304 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
305 | setup_clear_cpu_cap(X86_FEATURE_AVX512F); | ||
306 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | ||
307 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | ||
308 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | ||
282 | } | 309 | } |
283 | __setup("eagerfpu=", eager_fpu_setup); | ||
284 | 310 | ||
285 | /* | 311 | /* |
286 | * Pick the FPU context switching strategy: | 312 | * Pick the FPU context switching strategy: |
313 | * | ||
314 | * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of | ||
315 | * the following is true: | ||
316 | * | ||
317 | * (1) the cpu has xsaveopt, as it has the optimization and doing eager | ||
318 | * FPU switching has a relatively low cost compared to a plain xsave; | ||
319 | * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU | ||
320 | * switching. Should the kernel boot with noxsaveopt, we support MPX | ||
321 | * with eager FPU switching at a higher cost. | ||
287 | */ | 322 | */ |
288 | static void __init fpu__init_system_ctx_switch(void) | 323 | static void __init fpu__init_system_ctx_switch(void) |
289 | { | 324 | { |
@@ -295,19 +330,11 @@ static void __init fpu__init_system_ctx_switch(void) | |||
295 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | 330 | WARN_ON_FPU(current->thread.fpu.fpstate_active); |
296 | current_thread_info()->status = 0; | 331 | current_thread_info()->status = 0; |
297 | 332 | ||
298 | /* Auto enable eagerfpu for xsaveopt */ | ||
299 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) | 333 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) |
300 | eagerfpu = ENABLE; | 334 | eagerfpu = ENABLE; |
301 | 335 | ||
302 | if (xfeatures_mask & XFEATURE_MASK_EAGER) { | 336 | if (xfeatures_mask & XFEATURE_MASK_EAGER) |
303 | if (eagerfpu == DISABLE) { | 337 | eagerfpu = ENABLE; |
304 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | ||
305 | xfeatures_mask & XFEATURE_MASK_EAGER); | ||
306 | xfeatures_mask &= ~XFEATURE_MASK_EAGER; | ||
307 | } else { | ||
308 | eagerfpu = ENABLE; | ||
309 | } | ||
310 | } | ||
311 | 338 | ||
312 | if (eagerfpu == ENABLE) | 339 | if (eagerfpu == ENABLE) |
313 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | 340 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); |
@@ -316,11 +343,48 @@ static void __init fpu__init_system_ctx_switch(void) | |||
316 | } | 343 | } |
317 | 344 | ||
318 | /* | 345 | /* |
346 | * We parse fpu parameters early because fpu__init_system() is executed | ||
347 | * before parse_early_param(). | ||
348 | */ | ||
349 | static void __init fpu__init_parse_early_param(void) | ||
350 | { | ||
351 | /* | ||
352 | * No need to check "eagerfpu=auto" again, since it is the | ||
353 | * initial default. | ||
354 | */ | ||
355 | if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { | ||
356 | eagerfpu = DISABLE; | ||
357 | fpu__clear_eager_fpu_features(); | ||
358 | } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { | ||
359 | eagerfpu = ENABLE; | ||
360 | } | ||
361 | |||
362 | if (cmdline_find_option_bool(boot_command_line, "no387")) | ||
363 | setup_clear_cpu_cap(X86_FEATURE_FPU); | ||
364 | |||
365 | if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { | ||
366 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
367 | setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); | ||
368 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
369 | } | ||
370 | |||
371 | if (cmdline_find_option_bool(boot_command_line, "noxsave")) | ||
372 | fpu__xstate_clear_all_cpu_caps(); | ||
373 | |||
374 | if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) | ||
375 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
376 | |||
377 | if (cmdline_find_option_bool(boot_command_line, "noxsaves")) | ||
378 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
379 | } | ||
380 | |||
381 | /* | ||
319 | * Called on the boot CPU once per system bootup, to set up the initial | 382 | * Called on the boot CPU once per system bootup, to set up the initial |
320 | * FPU state that is later cloned into all processes: | 383 | * FPU state that is later cloned into all processes: |
321 | */ | 384 | */ |
322 | void __init fpu__init_system(struct cpuinfo_x86 *c) | 385 | void __init fpu__init_system(struct cpuinfo_x86 *c) |
323 | { | 386 | { |
387 | fpu__init_parse_early_param(); | ||
324 | fpu__init_system_early_generic(c); | 388 | fpu__init_system_early_generic(c); |
325 | 389 | ||
326 | /* | 390 | /* |
@@ -344,62 +408,3 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) | |||
344 | 408 | ||
345 | fpu__init_system_ctx_switch(); | 409 | fpu__init_system_ctx_switch(); |
346 | } | 410 | } |
347 | |||
348 | /* | ||
349 | * Boot parameter to turn off FPU support and fall back to math-emu: | ||
350 | */ | ||
351 | static int __init no_387(char *s) | ||
352 | { | ||
353 | setup_clear_cpu_cap(X86_FEATURE_FPU); | ||
354 | return 1; | ||
355 | } | ||
356 | __setup("no387", no_387); | ||
357 | |||
358 | /* | ||
359 | * Disable all xstate CPU features: | ||
360 | */ | ||
361 | static int __init x86_noxsave_setup(char *s) | ||
362 | { | ||
363 | if (strlen(s)) | ||
364 | return 0; | ||
365 | |||
366 | fpu__xstate_clear_all_cpu_caps(); | ||
367 | |||
368 | return 1; | ||
369 | } | ||
370 | __setup("noxsave", x86_noxsave_setup); | ||
371 | |||
372 | /* | ||
373 | * Disable the XSAVEOPT instruction specifically: | ||
374 | */ | ||
375 | static int __init x86_noxsaveopt_setup(char *s) | ||
376 | { | ||
377 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
378 | |||
379 | return 1; | ||
380 | } | ||
381 | __setup("noxsaveopt", x86_noxsaveopt_setup); | ||
382 | |||
383 | /* | ||
384 | * Disable the XSAVES instruction: | ||
385 | */ | ||
386 | static int __init x86_noxsaves_setup(char *s) | ||
387 | { | ||
388 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
389 | |||
390 | return 1; | ||
391 | } | ||
392 | __setup("noxsaves", x86_noxsaves_setup); | ||
393 | |||
394 | /* | ||
395 | * Disable FX save/restore and SSE support: | ||
396 | */ | ||
397 | static int __init x86_nofxsr_setup(char *s) | ||
398 | { | ||
399 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
400 | setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); | ||
401 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
402 | |||
403 | return 1; | ||
404 | } | ||
405 | __setup("nofxsr", x86_nofxsr_setup); | ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 40f100285984..d425cda5ae6d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -52,6 +52,7 @@ void fpu__xstate_clear_all_cpu_caps(void) | |||
52 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | 52 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); |
53 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | 53 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); |
54 | setup_clear_cpu_cap(X86_FEATURE_MPX); | 54 | setup_clear_cpu_cap(X86_FEATURE_MPX); |
55 | setup_clear_cpu_cap(X86_FEATURE_XGETBV1); | ||
55 | } | 56 | } |
56 | 57 | ||
57 | /* | 58 | /* |
@@ -632,8 +633,7 @@ void __init fpu__init_system_xstate(void) | |||
632 | BUG(); | 633 | BUG(); |
633 | } | 634 | } |
634 | 635 | ||
635 | /* Support only the state known to the OS: */ | 636 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); |
636 | xfeatures_mask = xfeatures_mask & XCNTXT_MASK; | ||
637 | 637 | ||
638 | /* Enable xstate instructions to be able to continue with initialization: */ | 638 | /* Enable xstate instructions to be able to continue with initialization: */ |
639 | fpu__init_cpu_xstate(); | 639 | fpu__init_cpu_xstate(); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d64889aa2d46..ab0adc0fa5db 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
182 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), | 182 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), |
183 | }, | 183 | }, |
184 | }, | 184 | }, |
185 | { /* Handle problems with rebooting on the iMac10,1. */ | ||
186 | .callback = set_pci_reboot, | ||
187 | .ident = "Apple iMac10,1", | ||
188 | .matches = { | ||
189 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
190 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), | ||
191 | }, | ||
192 | }, | ||
185 | 193 | ||
186 | /* ASRock */ | 194 | /* ASRock */ |
187 | { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ | 195 | { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 4cf401f581e7..07efb35ee4bc 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -48,31 +48,31 @@ verify_cpu: | |||
48 | pushfl | 48 | pushfl |
49 | popl %eax | 49 | popl %eax |
50 | cmpl %eax,%ebx | 50 | cmpl %eax,%ebx |
51 | jz verify_cpu_no_longmode # cpu has no cpuid | 51 | jz .Lverify_cpu_no_longmode # cpu has no cpuid |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | movl $0x0,%eax # See if cpuid 1 is implemented | 54 | movl $0x0,%eax # See if cpuid 1 is implemented |
55 | cpuid | 55 | cpuid |
56 | cmpl $0x1,%eax | 56 | cmpl $0x1,%eax |
57 | jb verify_cpu_no_longmode # no cpuid 1 | 57 | jb .Lverify_cpu_no_longmode # no cpuid 1 |
58 | 58 | ||
59 | xor %di,%di | 59 | xor %di,%di |
60 | cmpl $0x68747541,%ebx # AuthenticAMD | 60 | cmpl $0x68747541,%ebx # AuthenticAMD |
61 | jnz verify_cpu_noamd | 61 | jnz .Lverify_cpu_noamd |
62 | cmpl $0x69746e65,%edx | 62 | cmpl $0x69746e65,%edx |
63 | jnz verify_cpu_noamd | 63 | jnz .Lverify_cpu_noamd |
64 | cmpl $0x444d4163,%ecx | 64 | cmpl $0x444d4163,%ecx |
65 | jnz verify_cpu_noamd | 65 | jnz .Lverify_cpu_noamd |
66 | mov $1,%di # cpu is from AMD | 66 | mov $1,%di # cpu is from AMD |
67 | jmp verify_cpu_check | 67 | jmp .Lverify_cpu_check |
68 | 68 | ||
69 | verify_cpu_noamd: | 69 | .Lverify_cpu_noamd: |
70 | cmpl $0x756e6547,%ebx # GenuineIntel? | 70 | cmpl $0x756e6547,%ebx # GenuineIntel? |
71 | jnz verify_cpu_check | 71 | jnz .Lverify_cpu_check |
72 | cmpl $0x49656e69,%edx | 72 | cmpl $0x49656e69,%edx |
73 | jnz verify_cpu_check | 73 | jnz .Lverify_cpu_check |
74 | cmpl $0x6c65746e,%ecx | 74 | cmpl $0x6c65746e,%ecx |
75 | jnz verify_cpu_check | 75 | jnz .Lverify_cpu_check |
76 | 76 | ||
77 | # only call IA32_MISC_ENABLE when: | 77 | # only call IA32_MISC_ENABLE when: |
78 | # family > 6 || (family == 6 && model >= 0xd) | 78 | # family > 6 || (family == 6 && model >= 0xd) |
@@ -83,59 +83,59 @@ verify_cpu_noamd: | |||
83 | andl $0x0ff00f00, %eax # mask family and extended family | 83 | andl $0x0ff00f00, %eax # mask family and extended family |
84 | shrl $8, %eax | 84 | shrl $8, %eax |
85 | cmpl $6, %eax | 85 | cmpl $6, %eax |
86 | ja verify_cpu_clear_xd # family > 6, ok | 86 | ja .Lverify_cpu_clear_xd # family > 6, ok |
87 | jb verify_cpu_check # family < 6, skip | 87 | jb .Lverify_cpu_check # family < 6, skip |
88 | 88 | ||
89 | andl $0x000f00f0, %ecx # mask model and extended model | 89 | andl $0x000f00f0, %ecx # mask model and extended model |
90 | shrl $4, %ecx | 90 | shrl $4, %ecx |
91 | cmpl $0xd, %ecx | 91 | cmpl $0xd, %ecx |
92 | jb verify_cpu_check # family == 6, model < 0xd, skip | 92 | jb .Lverify_cpu_check # family == 6, model < 0xd, skip |
93 | 93 | ||
94 | verify_cpu_clear_xd: | 94 | .Lverify_cpu_clear_xd: |
95 | movl $MSR_IA32_MISC_ENABLE, %ecx | 95 | movl $MSR_IA32_MISC_ENABLE, %ecx |
96 | rdmsr | 96 | rdmsr |
97 | btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE | 97 | btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE |
98 | jnc verify_cpu_check # only write MSR if bit was changed | 98 | jnc .Lverify_cpu_check # only write MSR if bit was changed |
99 | wrmsr | 99 | wrmsr |
100 | 100 | ||
101 | verify_cpu_check: | 101 | .Lverify_cpu_check: |
102 | movl $0x1,%eax # Does the cpu have what it takes | 102 | movl $0x1,%eax # Does the cpu have what it takes |
103 | cpuid | 103 | cpuid |
104 | andl $REQUIRED_MASK0,%edx | 104 | andl $REQUIRED_MASK0,%edx |
105 | xorl $REQUIRED_MASK0,%edx | 105 | xorl $REQUIRED_MASK0,%edx |
106 | jnz verify_cpu_no_longmode | 106 | jnz .Lverify_cpu_no_longmode |
107 | 107 | ||
108 | movl $0x80000000,%eax # See if extended cpuid is implemented | 108 | movl $0x80000000,%eax # See if extended cpuid is implemented |
109 | cpuid | 109 | cpuid |
110 | cmpl $0x80000001,%eax | 110 | cmpl $0x80000001,%eax |
111 | jb verify_cpu_no_longmode # no extended cpuid | 111 | jb .Lverify_cpu_no_longmode # no extended cpuid |
112 | 112 | ||
113 | movl $0x80000001,%eax # Does the cpu have what it takes | 113 | movl $0x80000001,%eax # Does the cpu have what it takes |
114 | cpuid | 114 | cpuid |
115 | andl $REQUIRED_MASK1,%edx | 115 | andl $REQUIRED_MASK1,%edx |
116 | xorl $REQUIRED_MASK1,%edx | 116 | xorl $REQUIRED_MASK1,%edx |
117 | jnz verify_cpu_no_longmode | 117 | jnz .Lverify_cpu_no_longmode |
118 | 118 | ||
119 | verify_cpu_sse_test: | 119 | .Lverify_cpu_sse_test: |
120 | movl $1,%eax | 120 | movl $1,%eax |
121 | cpuid | 121 | cpuid |
122 | andl $SSE_MASK,%edx | 122 | andl $SSE_MASK,%edx |
123 | cmpl $SSE_MASK,%edx | 123 | cmpl $SSE_MASK,%edx |
124 | je verify_cpu_sse_ok | 124 | je .Lverify_cpu_sse_ok |
125 | test %di,%di | 125 | test %di,%di |
126 | jz verify_cpu_no_longmode # only try to force SSE on AMD | 126 | jz .Lverify_cpu_no_longmode # only try to force SSE on AMD |
127 | movl $MSR_K7_HWCR,%ecx | 127 | movl $MSR_K7_HWCR,%ecx |
128 | rdmsr | 128 | rdmsr |
129 | btr $15,%eax # enable SSE | 129 | btr $15,%eax # enable SSE |
130 | wrmsr | 130 | wrmsr |
131 | xor %di,%di # don't loop | 131 | xor %di,%di # don't loop |
132 | jmp verify_cpu_sse_test # try again | 132 | jmp .Lverify_cpu_sse_test # try again |
133 | 133 | ||
134 | verify_cpu_no_longmode: | 134 | .Lverify_cpu_no_longmode: |
135 | popf # Restore caller passed flags | 135 | popf # Restore caller passed flags |
136 | movl $1,%eax | 136 | movl $1,%eax |
137 | ret | 137 | ret |
138 | verify_cpu_sse_ok: | 138 | .Lverify_cpu_sse_ok: |
139 | popf # Restore caller passed flags | 139 | popf # Restore caller passed flags |
140 | xorl %eax, %eax | 140 | xorl %eax, %eax |
141 | ret | 141 | ret |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec081fe0ce2c..8829482d69ec 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -814,8 +814,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, | |||
814 | if (phys_addr < (phys_addr_t)0x40000000) | 814 | if (phys_addr < (phys_addr_t)0x40000000) |
815 | return; | 815 | return; |
816 | 816 | ||
817 | if (IS_ALIGNED(addr, PAGE_SIZE) && | 817 | if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { |
818 | IS_ALIGNED(next, PAGE_SIZE)) { | ||
819 | /* | 818 | /* |
820 | * Do not free direct mapping pages since they were | 819 | * Do not free direct mapping pages since they were |
821 | * freed when offlining, or simplely not in use. | 820 | * freed when offlining, or simplely not in use. |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6000ad7f560c..fc6a4c8f6e2a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -66,6 +66,9 @@ void update_page_count(int level, unsigned long pages) | |||
66 | 66 | ||
67 | static void split_page_count(int level) | 67 | static void split_page_count(int level) |
68 | { | 68 | { |
69 | if (direct_pages_count[level] == 0) | ||
70 | return; | ||
71 | |||
69 | direct_pages_count[level]--; | 72 | direct_pages_count[level]--; |
70 | direct_pages_count[level - 1] += PTRS_PER_PTE; | 73 | direct_pages_count[level - 1] += PTRS_PER_PTE; |
71 | } | 74 | } |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8ddb5d0d66fb..8f4cc3dfac32 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -161,7 +161,10 @@ void flush_tlb_current_task(void) | |||
161 | preempt_disable(); | 161 | preempt_disable(); |
162 | 162 | ||
163 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | 163 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
164 | |||
165 | /* This is an implicit full barrier that synchronizes with switch_mm. */ | ||
164 | local_flush_tlb(); | 166 | local_flush_tlb(); |
167 | |||
165 | trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); | 168 | trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); |
166 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 169 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
167 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 170 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
@@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
188 | unsigned long base_pages_to_flush = TLB_FLUSH_ALL; | 191 | unsigned long base_pages_to_flush = TLB_FLUSH_ALL; |
189 | 192 | ||
190 | preempt_disable(); | 193 | preempt_disable(); |
191 | if (current->active_mm != mm) | 194 | if (current->active_mm != mm) { |
195 | /* Synchronize with switch_mm. */ | ||
196 | smp_mb(); | ||
197 | |||
192 | goto out; | 198 | goto out; |
199 | } | ||
193 | 200 | ||
194 | if (!current->mm) { | 201 | if (!current->mm) { |
195 | leave_mm(smp_processor_id()); | 202 | leave_mm(smp_processor_id()); |
203 | |||
204 | /* Synchronize with switch_mm. */ | ||
205 | smp_mb(); | ||
206 | |||
196 | goto out; | 207 | goto out; |
197 | } | 208 | } |
198 | 209 | ||
199 | if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) | 210 | if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) |
200 | base_pages_to_flush = (end - start) >> PAGE_SHIFT; | 211 | base_pages_to_flush = (end - start) >> PAGE_SHIFT; |
201 | 212 | ||
213 | /* | ||
214 | * Both branches below are implicit full barriers (MOV to CR or | ||
215 | * INVLPG) that synchronize with switch_mm. | ||
216 | */ | ||
202 | if (base_pages_to_flush > tlb_single_page_flush_ceiling) { | 217 | if (base_pages_to_flush > tlb_single_page_flush_ceiling) { |
203 | base_pages_to_flush = TLB_FLUSH_ALL; | 218 | base_pages_to_flush = TLB_FLUSH_ALL; |
204 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | 219 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
@@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) | |||
228 | preempt_disable(); | 243 | preempt_disable(); |
229 | 244 | ||
230 | if (current->active_mm == mm) { | 245 | if (current->active_mm == mm) { |
231 | if (current->mm) | 246 | if (current->mm) { |
247 | /* | ||
248 | * Implicit full barrier (INVLPG) that synchronizes | ||
249 | * with switch_mm. | ||
250 | */ | ||
232 | __flush_tlb_one(start); | 251 | __flush_tlb_one(start); |
233 | else | 252 | } else { |
234 | leave_mm(smp_processor_id()); | 253 | leave_mm(smp_processor_id()); |
254 | |||
255 | /* Synchronize with switch_mm. */ | ||
256 | smp_mb(); | ||
257 | } | ||
235 | } | 258 | } |
236 | 259 | ||
237 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 260 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 312ffd3d0017..9e385b38debf 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -22,7 +22,8 @@ | |||
22 | 22 | ||
23 | unsigned long switcher_addr; | 23 | unsigned long switcher_addr; |
24 | struct page **lg_switcher_pages; | 24 | struct page **lg_switcher_pages; |
25 | static struct vm_struct *switcher_vma; | 25 | static struct vm_struct *switcher_text_vma; |
26 | static struct vm_struct *switcher_stacks_vma; | ||
26 | 27 | ||
27 | /* This One Big lock protects all inter-guest data structures. */ | 28 | /* This One Big lock protects all inter-guest data structures. */ |
28 | DEFINE_MUTEX(lguest_lock); | 29 | DEFINE_MUTEX(lguest_lock); |
@@ -83,54 +84,80 @@ static __init int map_switcher(void) | |||
83 | } | 84 | } |
84 | 85 | ||
85 | /* | 86 | /* |
87 | * Copy in the compiled-in Switcher code (from x86/switcher_32.S). | ||
88 | * It goes in the first page, which we map in momentarily. | ||
89 | */ | ||
90 | memcpy(kmap(lg_switcher_pages[0]), start_switcher_text, | ||
91 | end_switcher_text - start_switcher_text); | ||
92 | kunmap(lg_switcher_pages[0]); | ||
93 | |||
94 | /* | ||
86 | * We place the Switcher underneath the fixmap area, which is the | 95 | * We place the Switcher underneath the fixmap area, which is the |
87 | * highest virtual address we can get. This is important, since we | 96 | * highest virtual address we can get. This is important, since we |
88 | * tell the Guest it can't access this memory, so we want its ceiling | 97 | * tell the Guest it can't access this memory, so we want its ceiling |
89 | * as high as possible. | 98 | * as high as possible. |
90 | */ | 99 | */ |
91 | switcher_addr = FIXADDR_START - (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE; | 100 | switcher_addr = FIXADDR_START - TOTAL_SWITCHER_PAGES*PAGE_SIZE; |
92 | 101 | ||
93 | /* | 102 | /* |
94 | * Now we reserve the "virtual memory area" we want. We might | 103 | * Now we reserve the "virtual memory area"s we want. We might |
95 | * not get it in theory, but in practice it's worked so far. | 104 | * not get them in theory, but in practice it's worked so far. |
96 | * The end address needs +1 because __get_vm_area allocates an | 105 | * |
97 | * extra guard page, so we need space for that. | 106 | * We want the switcher text to be read-only and executable, and |
107 | * the stacks to be read-write and non-executable. | ||
98 | */ | 108 | */ |
99 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, | 109 | switcher_text_vma = __get_vm_area(PAGE_SIZE, VM_ALLOC|VM_NO_GUARD, |
100 | VM_ALLOC, switcher_addr, switcher_addr | 110 | switcher_addr, |
101 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); | 111 | switcher_addr + PAGE_SIZE); |
102 | if (!switcher_vma) { | 112 | |
113 | if (!switcher_text_vma) { | ||
103 | err = -ENOMEM; | 114 | err = -ENOMEM; |
104 | printk("lguest: could not map switcher pages high\n"); | 115 | printk("lguest: could not map switcher pages high\n"); |
105 | goto free_pages; | 116 | goto free_pages; |
106 | } | 117 | } |
107 | 118 | ||
119 | switcher_stacks_vma = __get_vm_area(SWITCHER_STACK_PAGES * PAGE_SIZE, | ||
120 | VM_ALLOC|VM_NO_GUARD, | ||
121 | switcher_addr + PAGE_SIZE, | ||
122 | switcher_addr + TOTAL_SWITCHER_PAGES * PAGE_SIZE); | ||
123 | if (!switcher_stacks_vma) { | ||
124 | err = -ENOMEM; | ||
125 | printk("lguest: could not map switcher pages high\n"); | ||
126 | goto free_text_vma; | ||
127 | } | ||
128 | |||
108 | /* | 129 | /* |
109 | * This code actually sets up the pages we've allocated to appear at | 130 | * This code actually sets up the pages we've allocated to appear at |
110 | * switcher_addr. map_vm_area() takes the vma we allocated above, the | 131 | * switcher_addr. map_vm_area() takes the vma we allocated above, the |
111 | * kind of pages we're mapping (kernel pages), and a pointer to our | 132 | * kind of pages we're mapping (kernel text pages and kernel writable |
112 | * array of struct pages. | 133 | * pages respectively), and a pointer to our array of struct pages. |
113 | */ | 134 | */ |
114 | err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages); | 135 | err = map_vm_area(switcher_text_vma, PAGE_KERNEL_RX, lg_switcher_pages); |
136 | if (err) { | ||
137 | printk("lguest: text map_vm_area failed: %i\n", err); | ||
138 | goto free_vmas; | ||
139 | } | ||
140 | |||
141 | err = map_vm_area(switcher_stacks_vma, PAGE_KERNEL, | ||
142 | lg_switcher_pages + SWITCHER_TEXT_PAGES); | ||
115 | if (err) { | 143 | if (err) { |
116 | printk("lguest: map_vm_area failed: %i\n", err); | 144 | printk("lguest: stacks map_vm_area failed: %i\n", err); |
117 | goto free_vma; | 145 | goto free_vmas; |
118 | } | 146 | } |
119 | 147 | ||
120 | /* | 148 | /* |
121 | * Now the Switcher is mapped at the right address, we can't fail! | 149 | * Now the Switcher is mapped at the right address, we can't fail! |
122 | * Copy in the compiled-in Switcher code (from x86/switcher_32.S). | ||
123 | */ | 150 | */ |
124 | memcpy(switcher_vma->addr, start_switcher_text, | ||
125 | end_switcher_text - start_switcher_text); | ||
126 | |||
127 | printk(KERN_INFO "lguest: mapped switcher at %p\n", | 151 | printk(KERN_INFO "lguest: mapped switcher at %p\n", |
128 | switcher_vma->addr); | 152 | switcher_text_vma->addr); |
129 | /* And we succeeded... */ | 153 | /* And we succeeded... */ |
130 | return 0; | 154 | return 0; |
131 | 155 | ||
132 | free_vma: | 156 | free_vmas: |
133 | vunmap(switcher_vma->addr); | 157 | /* Undoes map_vm_area and __get_vm_area */ |
158 | vunmap(switcher_stacks_vma->addr); | ||
159 | free_text_vma: | ||
160 | vunmap(switcher_text_vma->addr); | ||
134 | free_pages: | 161 | free_pages: |
135 | i = TOTAL_SWITCHER_PAGES; | 162 | i = TOTAL_SWITCHER_PAGES; |
136 | free_some_pages: | 163 | free_some_pages: |
@@ -148,7 +175,8 @@ static void unmap_switcher(void) | |||
148 | unsigned int i; | 175 | unsigned int i; |
149 | 176 | ||
150 | /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */ | 177 | /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */ |
151 | vunmap(switcher_vma->addr); | 178 | vunmap(switcher_text_vma->addr); |
179 | vunmap(switcher_stacks_vma->addr); | ||
152 | /* Now we just need to free the pages we copied the switcher into */ | 180 | /* Now we just need to free the pages we copied the switcher into */ |
153 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) | 181 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) |
154 | __free_pages(lg_switcher_pages[i], 0); | 182 | __free_pages(lg_switcher_pages[i], 0); |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index eabcff411984..d0c473f65850 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -4,9 +4,11 @@ include ../lib.mk | |||
4 | 4 | ||
5 | .PHONY: all all_32 all_64 warn_32bit_failure clean | 5 | .PHONY: all all_32 all_64 warn_32bit_failure clean |
6 | 6 | ||
7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall | 7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall |
8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ | 8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ |
9 | test_FCMOV test_FCOMI test_FISTTP | 9 | test_FCMOV test_FCOMI test_FISTTP \ |
10 | ldt_gdt \ | ||
11 | vdso_restorer | ||
10 | 12 | ||
11 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) | 13 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) |
12 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) | 14 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) |
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c new file mode 100644 index 000000000000..cb038424a403 --- /dev/null +++ b/tools/testing/selftests/x86/vdso_restorer.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * vdso_restorer.c - tests vDSO-based signal restore | ||
3 | * Copyright (c) 2015 Andrew Lutomirski | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * This makes sure that sa_restorer == NULL keeps working on 32-bit | ||
15 | * configurations. Modern glibc doesn't use it under any circumstances, | ||
16 | * so it's easy to overlook breakage. | ||
17 | * | ||
18 | * 64-bit userspace has never supported sa_restorer == NULL, so this is | ||
19 | * 32-bit only. | ||
20 | */ | ||
21 | |||
22 | #define _GNU_SOURCE | ||
23 | |||
24 | #include <err.h> | ||
25 | #include <stdio.h> | ||
26 | #include <string.h> | ||
27 | #include <signal.h> | ||
28 | #include <unistd.h> | ||
29 | #include <syscall.h> | ||
30 | #include <sys/syscall.h> | ||
31 | |||
32 | /* Open-code this -- the headers are too messy to easily use them. */ | ||
33 | struct real_sigaction { | ||
34 | void *handler; | ||
35 | unsigned long flags; | ||
36 | void *restorer; | ||
37 | unsigned int mask[2]; | ||
38 | }; | ||
39 | |||
40 | static volatile sig_atomic_t handler_called; | ||
41 | |||
42 | static void handler_with_siginfo(int sig, siginfo_t *info, void *ctx_void) | ||
43 | { | ||
44 | handler_called = 1; | ||
45 | } | ||
46 | |||
47 | static void handler_without_siginfo(int sig) | ||
48 | { | ||
49 | handler_called = 1; | ||
50 | } | ||
51 | |||
52 | int main() | ||
53 | { | ||
54 | int nerrs = 0; | ||
55 | struct real_sigaction sa; | ||
56 | |||
57 | memset(&sa, 0, sizeof(sa)); | ||
58 | sa.handler = handler_with_siginfo; | ||
59 | sa.flags = SA_SIGINFO; | ||
60 | sa.restorer = NULL; /* request kernel-provided restorer */ | ||
61 | |||
62 | if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0) | ||
63 | err(1, "raw rt_sigaction syscall"); | ||
64 | |||
65 | raise(SIGUSR1); | ||
66 | |||
67 | if (handler_called) { | ||
68 | printf("[OK]\tSA_SIGINFO handler returned successfully\n"); | ||
69 | } else { | ||
70 | printf("[FAIL]\tSA_SIGINFO handler was not called\n"); | ||
71 | nerrs++; | ||
72 | } | ||
73 | |||
74 | sa.flags = 0; | ||
75 | sa.handler = handler_without_siginfo; | ||
76 | if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0) | ||
77 | err(1, "raw sigaction syscall"); | ||
78 | handler_called = 0; | ||
79 | |||
80 | raise(SIGUSR1); | ||
81 | |||
82 | if (handler_called) { | ||
83 | printf("[OK]\t!SA_SIGINFO handler returned successfully\n"); | ||
84 | } else { | ||
85 | printf("[FAIL]\t!SA_SIGINFO handler was not called\n"); | ||
86 | nerrs++; | ||
87 | } | ||
88 | } | ||