diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-17 14:01:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-17 14:01:34 -0400 |
commit | 42dc2a3048247109b0a5ee6345226cbd3e4f6410 (patch) | |
tree | c94d869eb2dc502002c3327015b5651804e43f1c | |
parent | 1345df21ac542daa9d8613f89a3f0b5f32d636f8 (diff) | |
parent | 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d (diff) |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:
- misc fixes all around the map
- block non-root vm86(old) if mmap_min_addr != 0
- two small debuggability improvements
- removal of obsolete paravirt op
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/platform: Fix Geode LX timekeeping in the generic x86 build
x86/apic: Serialize LVTT and TSC_DEADLINE writes
x86/ioapic: Force affinity setting in setup_ioapic_dest()
x86/paravirt: Remove the unused pv_time_ops::get_tsc_khz method
x86/ldt: Fix small LDT allocation for Xen
x86/vm86: Fix the misleading CONFIG_VM86 Kconfig help text
x86/cpu: Print family/model/stepping in hex
x86/vm86: Block non-root vm86(old) if mmap_min_addr != 0
x86/alternatives: Make optimize_nops() interrupt safe and synced
x86/mm/srat: Print non-volatile flag in SRAT
x86/cpufeatures: Enable cpuid for Intel SHA extensions
-rw-r--r-- | arch/x86/Kconfig | 23 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/vm86_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/srat.c | 5 | ||||
-rw-r--r-- | tools/testing/selftests/x86/entry_from_vm86.c | 5 |
12 files changed, 75 insertions, 29 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7aef2d52daa0..328c8352480c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1006,7 +1006,7 @@ config X86_THERMAL_VECTOR | |||
1006 | depends on X86_MCE_INTEL | 1006 | depends on X86_MCE_INTEL |
1007 | 1007 | ||
1008 | config X86_LEGACY_VM86 | 1008 | config X86_LEGACY_VM86 |
1009 | bool "Legacy VM86 support (obsolete)" | 1009 | bool "Legacy VM86 support" |
1010 | default n | 1010 | default n |
1011 | depends on X86_32 | 1011 | depends on X86_32 |
1012 | ---help--- | 1012 | ---help--- |
@@ -1018,19 +1018,20 @@ config X86_LEGACY_VM86 | |||
1018 | available to accelerate real mode DOS programs. However, any | 1018 | available to accelerate real mode DOS programs. However, any |
1019 | recent version of DOSEMU, X, or vbetool should be fully | 1019 | recent version of DOSEMU, X, or vbetool should be fully |
1020 | functional even without kernel VM86 support, as they will all | 1020 | functional even without kernel VM86 support, as they will all |
1021 | fall back to (pretty well performing) software emulation. | 1021 | fall back to software emulation. Nevertheless, if you are using |
1022 | a 16-bit DOS program where 16-bit performance matters, vm86 | ||
1023 | mode might be faster than emulation and you might want to | ||
1024 | enable this option. | ||
1022 | 1025 | ||
1023 | Anything that works on a 64-bit kernel is unlikely to need | 1026 | Note that any app that works on a 64-bit kernel is unlikely to |
1024 | this option, as 64-bit kernels don't, and can't, support V8086 | 1027 | need this option, as 64-bit kernels don't, and can't, support |
1025 | mode. This option is also unrelated to 16-bit protected mode | 1028 | V8086 mode. This option is also unrelated to 16-bit protected |
1026 | and is not needed to run most 16-bit programs under Wine. | 1029 | mode and is not needed to run most 16-bit programs under Wine. |
1027 | 1030 | ||
1028 | Enabling this option adds considerable attack surface to the | 1031 | Enabling this option increases the complexity of the kernel |
1029 | kernel and slows down system calls and exception handling. | 1032 | and slows down exception handling a tiny bit. |
1030 | 1033 | ||
1031 | Unless you use very old userspace or need the last drop of | 1034 | If unsure, say N here. |
1032 | performance in your real mode DOS games and can't use KVM, | ||
1033 | say N here. | ||
1034 | 1035 | ||
1035 | config VM86 | 1036 | config VM86 |
1036 | bool | 1037 | bool |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 477fc28050e4..e6cf2ad350d1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -241,6 +241,7 @@ | |||
241 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | 241 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
242 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | 242 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
243 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | 243 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
244 | #define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ | ||
244 | 245 | ||
245 | /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ | 246 | /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ |
246 | #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ | 247 | #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4fa7c6..31247b5bff7c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -97,7 +97,6 @@ struct pv_lazy_ops { | |||
97 | struct pv_time_ops { | 97 | struct pv_time_ops { |
98 | unsigned long long (*sched_clock)(void); | 98 | unsigned long long (*sched_clock)(void); |
99 | unsigned long long (*steal_clock)(int cpu); | 99 | unsigned long long (*steal_clock)(int cpu); |
100 | unsigned long (*get_tsc_khz)(void); | ||
101 | }; | 100 | }; |
102 | 101 | ||
103 | struct pv_cpu_ops { | 102 | struct pv_cpu_ops { |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827eb86cf..25f909362b7a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -338,10 +338,15 @@ done: | |||
338 | 338 | ||
339 | static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) | 339 | static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) |
340 | { | 340 | { |
341 | unsigned long flags; | ||
342 | |||
341 | if (instr[0] != 0x90) | 343 | if (instr[0] != 0x90) |
342 | return; | 344 | return; |
343 | 345 | ||
346 | local_irq_save(flags); | ||
344 | add_nops(instr + (a->instrlen - a->padlen), a->padlen); | 347 | add_nops(instr + (a->instrlen - a->padlen), a->padlen); |
348 | sync_core(); | ||
349 | local_irq_restore(flags); | ||
345 | 350 | ||
346 | DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", | 351 | DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", |
347 | instr, a->instrlen - a->padlen, a->padlen); | 352 | instr, a->instrlen - a->padlen, a->padlen); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46aa405..24e94ce454e2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
336 | apic_write(APIC_LVTT, lvtt_value); | 336 | apic_write(APIC_LVTT, lvtt_value); |
337 | 337 | ||
338 | if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { | 338 | if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { |
339 | /* | ||
340 | * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, | ||
341 | * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. | ||
342 | * According to Intel, MFENCE can do the serialization here. | ||
343 | */ | ||
344 | asm volatile("mfence" : : : "memory"); | ||
345 | |||
339 | printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); | 346 | printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); |
340 | return; | 347 | return; |
341 | } | 348 | } |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f826530..5c60bb162622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) | |||
2522 | int pin, ioapic, irq, irq_entry; | 2522 | int pin, ioapic, irq, irq_entry; |
2523 | const struct cpumask *mask; | 2523 | const struct cpumask *mask; |
2524 | struct irq_data *idata; | 2524 | struct irq_data *idata; |
2525 | struct irq_chip *chip; | ||
2525 | 2526 | ||
2526 | if (skip_ioapic_setup == 1) | 2527 | if (skip_ioapic_setup == 1) |
2527 | return; | 2528 | return; |
@@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) | |||
2545 | else | 2546 | else |
2546 | mask = apic->target_cpus(); | 2547 | mask = apic->target_cpus(); |
2547 | 2548 | ||
2548 | irq_set_affinity(irq, mask); | 2549 | chip = irq_data_get_irq_chip(idata); |
2550 | chip->irq_set_affinity(idata, mask, false); | ||
2549 | } | 2551 | } |
2550 | |||
2551 | } | 2552 | } |
2552 | #endif | 2553 | #endif |
2553 | 2554 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c22ec8..de22ea7ff82f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) | |||
1110 | else | 1110 | else |
1111 | printk(KERN_CONT "%d86", c->x86); | 1111 | printk(KERN_CONT "%d86", c->x86); |
1112 | 1112 | ||
1113 | printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); | 1113 | printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); |
1114 | 1114 | ||
1115 | if (c->x86_mask || c->cpuid_level >= 0) | 1115 | if (c->x86_mask || c->cpuid_level >= 0) |
1116 | printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); | 1116 | printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); |
1117 | else | 1117 | else |
1118 | printk(KERN_CONT ")\n"); | 1118 | printk(KERN_CONT ")\n"); |
1119 | 1119 | ||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc0525f1c1..6acc9dd91f36 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) | |||
58 | if (alloc_size > PAGE_SIZE) | 58 | if (alloc_size > PAGE_SIZE) |
59 | new_ldt->entries = vzalloc(alloc_size); | 59 | new_ldt->entries = vzalloc(alloc_size); |
60 | else | 60 | else |
61 | new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); | 61 | new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); |
62 | 62 | ||
63 | if (!new_ldt->entries) { | 63 | if (!new_ldt->entries) { |
64 | kfree(new_ldt); | 64 | kfree(new_ldt); |
@@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) | |||
95 | if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) | 95 | if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) |
96 | vfree(ldt->entries); | 96 | vfree(ldt->entries); |
97 | else | 97 | else |
98 | kfree(ldt->entries); | 98 | free_page((unsigned long)ldt->entries); |
99 | kfree(ldt); | 99 | kfree(ldt); |
100 | } | 100 | } |
101 | 101 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c8d52cb4cb6e..c3f7602cd038 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/hypervisor.h> | 21 | #include <asm/hypervisor.h> |
22 | #include <asm/nmi.h> | 22 | #include <asm/nmi.h> |
23 | #include <asm/x86_init.h> | 23 | #include <asm/x86_init.h> |
24 | #include <asm/geode.h> | ||
24 | 25 | ||
25 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ | 26 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ |
26 | EXPORT_SYMBOL(cpu_khz); | 27 | EXPORT_SYMBOL(cpu_khz); |
@@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); | |||
1013 | 1014 | ||
1014 | static void __init check_system_tsc_reliable(void) | 1015 | static void __init check_system_tsc_reliable(void) |
1015 | { | 1016 | { |
1016 | #ifdef CONFIG_MGEODE_LX | 1017 | #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) |
1017 | /* RTSC counts during suspend */ | 1018 | if (is_geode_lx()) { |
1019 | /* RTSC counts during suspend */ | ||
1018 | #define RTSC_SUSP 0x100 | 1020 | #define RTSC_SUSP 0x100 |
1019 | unsigned long res_low, res_high; | 1021 | unsigned long res_low, res_high; |
1020 | 1022 | ||
1021 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 1023 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
1022 | /* Geode_LX - the OLPC CPU has a very reliable TSC */ | 1024 | /* Geode_LX - the OLPC CPU has a very reliable TSC */ |
1023 | if (res_low & RTSC_SUSP) | 1025 | if (res_low & RTSC_SUSP) |
1024 | tsc_clocksource_reliable = 1; | 1026 | tsc_clocksource_reliable = 1; |
1027 | } | ||
1025 | #endif | 1028 | #endif |
1026 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | 1029 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
1027 | tsc_clocksource_reliable = 1; | 1030 | tsc_clocksource_reliable = 1; |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856bd2b..524619351961 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/audit.h> | 45 | #include <linux/audit.h> |
46 | #include <linux/stddef.h> | 46 | #include <linux/stddef.h> |
47 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
48 | #include <linux/security.h> | ||
48 | 49 | ||
49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
50 | #include <asm/io.h> | 51 | #include <asm/io.h> |
@@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | |||
232 | struct pt_regs *regs = current_pt_regs(); | 233 | struct pt_regs *regs = current_pt_regs(); |
233 | unsigned long err = 0; | 234 | unsigned long err = 0; |
234 | 235 | ||
236 | err = security_mmap_addr(0); | ||
237 | if (err) { | ||
238 | /* | ||
239 | * vm86 cannot virtualize the address space, so vm86 users | ||
240 | * need to manage the low 1MB themselves using mmap. Given | ||
241 | * that BIOS places important data in the first page, vm86 | ||
242 | * is essentially useless if mmap_min_addr != 0. DOSEMU, | ||
243 | * for example, won't even bother trying to use vm86 if it | ||
244 | * can't map a page at virtual address 0. | ||
245 | * | ||
246 | * To reduce the available kernel attack surface, simply | ||
247 | * disallow vm86(old) for users who cannot mmap at va 0. | ||
248 | * | ||
249 | * The implementation of security_mmap_addr will allow | ||
250 | * suitably privileged users to map va 0 even if | ||
251 | * vm.mmap_min_addr is set above 0, and we want this | ||
252 | * behavior for vm86 as well, as it ensures that legacy | ||
253 | * tools like vbetool will not fail just because of | ||
254 | * vm.mmap_min_addr. | ||
255 | */ | ||
256 | pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", | ||
257 | current->comm, task_pid_nr(current), | ||
258 | from_kuid_munged(&init_user_ns, current_uid())); | ||
259 | return -EPERM; | ||
260 | } | ||
261 | |||
235 | if (!vm86) { | 262 | if (!vm86) { |
236 | if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) | 263 | if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) |
237 | return -ENOMEM; | 264 | return -ENOMEM; |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a60aa6e..c2aea63bee20 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
192 | 192 | ||
193 | node_set(node, numa_nodes_parsed); | 193 | node_set(node, numa_nodes_parsed); |
194 | 194 | ||
195 | pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", | 195 | pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", |
196 | node, pxm, | 196 | node, pxm, |
197 | (unsigned long long) start, (unsigned long long) end - 1, | 197 | (unsigned long long) start, (unsigned long long) end - 1, |
198 | hotpluggable ? " hotplug" : ""); | 198 | hotpluggable ? " hotplug" : "", |
199 | ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); | ||
199 | 200 | ||
200 | /* Mark hotplug range in memblock. */ | 201 | /* Mark hotplug range in memblock. */ |
201 | if (hotpluggable && memblock_mark_hotplug(start, ma->length)) | 202 | if (hotpluggable && memblock_mark_hotplug(start, ma->length)) |
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 9a43a59a9bb4..421c607a8856 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c | |||
@@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, | |||
116 | v86->regs.eip = eip; | 116 | v86->regs.eip = eip; |
117 | ret = vm86(VM86_ENTER, v86); | 117 | ret = vm86(VM86_ENTER, v86); |
118 | 118 | ||
119 | if (ret == -1 && errno == ENOSYS) { | 119 | if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { |
120 | printf("[SKIP]\tvm86 not supported\n"); | 120 | printf("[SKIP]\tvm86 %s\n", |
121 | errno == ENOSYS ? "not supported" : "not allowed"); | ||
121 | return false; | 122 | return false; |
122 | } | 123 | } |
123 | 124 | ||