aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/boot/compressed/aslr.c9
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/efi.h3
-rw-r--r--arch/x86/include/asm/pgtable.h14
-rw-r--r--arch/x86/include/asm/tlbflush.h6
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c43
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c48
-rw-r--r--arch/x86/kernel/ftrace.c83
-rw-r--r--arch/x86/kernel/head_32.S7
-rw-r--r--arch/x86/kernel/head_64.S6
-rw-r--r--arch/x86/kernel/irq.c9
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/quirks.c37
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/tsc.c11
-rw-r--r--arch/x86/kernel/tsc_msr.c30
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/mm/fault.c61
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/srat.c16
-rw-r--r--arch/x86/mm/tlb.c52
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c12
-rw-r--r--arch/x86/platform/efi/efi.c25
-rw-r--r--arch/x86/platform/efi/efi_32.c6
-rw-r--r--arch/x86/platform/efi/efi_64.c9
-rw-r--r--arch/x86/xen/enlighten.c12
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/p2m.c17
45 files changed, 416 insertions, 226 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 940e50ebfafa..0af5250d914f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -444,6 +444,7 @@ config X86_INTEL_MID
444 bool "Intel MID platform support" 444 bool "Intel MID platform support"
445 depends on X86_32 445 depends on X86_32
446 depends on X86_EXTENDED_PLATFORM 446 depends on X86_EXTENDED_PLATFORM
447 depends on X86_PLATFORM_DEVICES
447 depends on PCI 448 depends on PCI
448 depends on PCI_GOANY 449 depends on PCI_GOANY
449 depends on X86_IO_APIC 450 depends on X86_IO_APIC
@@ -1051,9 +1052,9 @@ config MICROCODE_INTEL
1051 This options enables microcode patch loading support for Intel 1052 This options enables microcode patch loading support for Intel
1052 processors. 1053 processors.
1053 1054
1054 For latest news and information on obtaining all the required 1055 For the current Intel microcode data package go to
1055 Intel ingredients for this driver, check: 1056 <https://downloadcenter.intel.com> and search for
1056 <http://www.urbanmyth.org/microcode/>. 1057 'Linux Processor Microcode Data File'.
1057 1058
1058config MICROCODE_AMD 1059config MICROCODE_AMD
1059 bool "AMD microcode loading support" 1060 bool "AMD microcode loading support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0f3621ed1db6..321a52ccf63a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -184,6 +184,7 @@ config HAVE_MMIOTRACE_SUPPORT
184config X86_DECODER_SELFTEST 184config X86_DECODER_SELFTEST
185 bool "x86 instruction decoder selftest" 185 bool "x86 instruction decoder selftest"
186 depends on DEBUG_KERNEL && KPROBES 186 depends on DEBUG_KERNEL && KPROBES
187 depends on !COMPILE_TEST
187 ---help--- 188 ---help---
188 Perform x86 instruction decoder selftests at build time. 189 Perform x86 instruction decoder selftests at build time.
189 This option is useful for checking the sanity of x86 instruction 190 This option is useful for checking the sanity of x86 instruction
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 90a21f430117..4dbf967da50d 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -111,7 +111,7 @@ struct mem_vector {
111}; 111};
112 112
113#define MEM_AVOID_MAX 5 113#define MEM_AVOID_MAX 5
114struct mem_vector mem_avoid[MEM_AVOID_MAX]; 114static struct mem_vector mem_avoid[MEM_AVOID_MAX];
115 115
116static bool mem_contains(struct mem_vector *region, struct mem_vector *item) 116static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
117{ 117{
@@ -180,7 +180,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
180} 180}
181 181
182/* Does this memory vector overlap a known avoided area? */ 182/* Does this memory vector overlap a known avoided area? */
183bool mem_avoid_overlap(struct mem_vector *img) 183static bool mem_avoid_overlap(struct mem_vector *img)
184{ 184{
185 int i; 185 int i;
186 186
@@ -192,8 +192,9 @@ bool mem_avoid_overlap(struct mem_vector *img)
192 return false; 192 return false;
193} 193}
194 194
195unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; 195static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
196unsigned long slot_max = 0; 196 CONFIG_PHYSICAL_ALIGN];
197static unsigned long slot_max;
197 198
198static void slots_append(unsigned long addr) 199static void slots_append(unsigned long addr)
199{ 200{
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index a54ee1d054d9..aaac3b2fb746 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void);
19extern void amd_flush_garts(void); 19extern void amd_flush_garts(void);
20extern int amd_numa_init(void); 20extern int amd_numa_init(void);
21extern int amd_get_subcaches(int); 21extern int amd_get_subcaches(int);
22extern int amd_set_subcaches(int, int); 22extern int amd_set_subcaches(int, unsigned long);
23 23
24struct amd_l3_cache { 24struct amd_l3_cache {
25 unsigned indices; 25 unsigned indices;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3b978c472d08..acd86c850414 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -132,6 +132,9 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
132extern void efi_sync_low_kernel_mappings(void); 132extern void efi_sync_low_kernel_mappings(void);
133extern void efi_setup_page_tables(void); 133extern void efi_setup_page_tables(void);
134extern void __init old_map_region(efi_memory_desc_t *md); 134extern void __init old_map_region(efi_memory_desc_t *md);
135extern void __init runtime_code_page_mkexec(void);
136extern void __init efi_runtime_mkexec(void);
137extern void __init efi_apply_memmap_quirks(void);
135 138
136struct efi_setup_data { 139struct efi_setup_data {
137 u64 fw_vendor; 140 u64 fw_vendor;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index bbc8b12fa443..5ad38ad07890 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -445,10 +445,20 @@ static inline int pte_same(pte_t a, pte_t b)
445 return a.pte == b.pte; 445 return a.pte == b.pte;
446} 446}
447 447
448static inline int pteval_present(pteval_t pteval)
449{
450 /*
451 * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this
452 * way clearly states that the intent is that protnone and numa
453 * hinting ptes are considered present for the purposes of
454 * pagetable operations like zapping, protection changes, gup etc.
455 */
456 return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA);
457}
458
448static inline int pte_present(pte_t a) 459static inline int pte_present(pte_t a)
449{ 460{
450 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | 461 return pteval_present(pte_flags(a));
451 _PAGE_NUMA);
452} 462}
453 463
454#define pte_accessible pte_accessible 464#define pte_accessible pte_accessible
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e6d90babc245..04905bfc508b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
62 62
63static inline void __flush_tlb_one(unsigned long addr) 63static inline void __flush_tlb_one(unsigned long addr)
64{ 64{
65 count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); 65 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
66 __flush_tlb_single(addr); 66 __flush_tlb_single(addr);
67} 67}
68 68
@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)
93 */ 93 */
94static inline void __flush_tlb_up(void) 94static inline void __flush_tlb_up(void)
95{ 95{
96 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 96 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
97 __flush_tlb(); 97 __flush_tlb();
98} 98}
99 99
100static inline void flush_tlb_all(void) 100static inline void flush_tlb_all(void)
101{ 101{
102 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 102 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
103 __flush_tlb_all(); 103 __flush_tlb_all();
104} 104}
105 105
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 57ae63cd6ee2..94605c0e9cee 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -66,6 +66,6 @@ extern void tsc_save_sched_clock_state(void);
66extern void tsc_restore_sched_clock_state(void); 66extern void tsc_restore_sched_clock_state(void);
67 67
68/* MSR based TSC calibration for Intel Atom SoC platforms */ 68/* MSR based TSC calibration for Intel Atom SoC platforms */
69int try_msr_calibrate_tsc(unsigned long *fast_calibrate); 69unsigned long try_msr_calibrate_tsc(void);
70 70
71#endif /* _ASM_X86_TSC_H */ 71#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 787e1bb5aafc..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -52,8 +52,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
52extern int m2p_add_override(unsigned long mfn, struct page *page, 52extern int m2p_add_override(unsigned long mfn, struct page *page,
53 struct gnttab_map_grant_ref *kmap_op); 53 struct gnttab_map_grant_ref *kmap_op);
54extern int m2p_remove_override(struct page *page, 54extern int m2p_remove_override(struct page *page,
55 struct gnttab_map_grant_ref *kmap_op, 55 struct gnttab_map_grant_ref *kmap_op);
56 unsigned long mfn);
57extern struct page *m2p_find_override(unsigned long mfn); 56extern struct page *m2p_find_override(unsigned long mfn);
58extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 57extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
59 58
@@ -122,7 +121,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
122 pfn = m2p_find_override_pfn(mfn, ~0); 121 pfn = m2p_find_override_pfn(mfn, ~0);
123 } 122 }
124 123
125 /* 124 /*
126 * pfn is ~0 if there are no entries in the m2p for mfn or if the 125 * pfn is ~0 if there are no entries in the m2p for mfn or if the
127 * entry doesn't map back to the mfn and m2p_override doesn't have a 126 * entry doesn't map back to the mfn and m2p_override doesn't have a
128 * valid entry for it. 127 * valid entry for it.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 59554dca96ec..dec8de4e1663 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu)
179 return (mask >> (4 * cuid)) & 0xf; 179 return (mask >> (4 * cuid)) & 0xf;
180} 180}
181 181
182int amd_set_subcaches(int cpu, int mask) 182int amd_set_subcaches(int cpu, unsigned long mask)
183{ 183{
184 static unsigned int reset, ban; 184 static unsigned int reset, ban;
185 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); 185 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d3153e281d72..c67ffa686064 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -767,10 +767,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
767 767
768static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) 768static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
769{ 769{
770 tlb_flushall_shift = 5; 770 tlb_flushall_shift = 6;
771
772 if (c->x86 <= 0x11)
773 tlb_flushall_shift = 4;
774} 771}
775 772
776static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 773static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 24b6fd10625a..8e28bf2fc3ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -284,8 +284,13 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
284 raw_local_save_flags(eflags); 284 raw_local_save_flags(eflags);
285 BUG_ON(eflags & X86_EFLAGS_AC); 285 BUG_ON(eflags & X86_EFLAGS_AC);
286 286
287 if (cpu_has(c, X86_FEATURE_SMAP)) 287 if (cpu_has(c, X86_FEATURE_SMAP)) {
288#ifdef CONFIG_X86_SMAP
288 set_in_cr4(X86_CR4_SMAP); 289 set_in_cr4(X86_CR4_SMAP);
290#else
291 clear_in_cr4(X86_CR4_SMAP);
292#endif
293 }
289} 294}
290 295
291/* 296/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3db61c644e44..5cd9bfabd645 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -640,21 +640,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
640 case 0x61d: /* six-core 45 nm xeon "Dunnington" */ 640 case 0x61d: /* six-core 45 nm xeon "Dunnington" */
641 tlb_flushall_shift = -1; 641 tlb_flushall_shift = -1;
642 break; 642 break;
643 case 0x63a: /* Ivybridge */
644 tlb_flushall_shift = 2;
645 break;
643 case 0x61a: /* 45 nm nehalem, "Bloomfield" */ 646 case 0x61a: /* 45 nm nehalem, "Bloomfield" */
644 case 0x61e: /* 45 nm nehalem, "Lynnfield" */ 647 case 0x61e: /* 45 nm nehalem, "Lynnfield" */
645 case 0x625: /* 32 nm nehalem, "Clarkdale" */ 648 case 0x625: /* 32 nm nehalem, "Clarkdale" */
646 case 0x62c: /* 32 nm nehalem, "Gulftown" */ 649 case 0x62c: /* 32 nm nehalem, "Gulftown" */
647 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ 650 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
648 case 0x62f: /* 32 nm Xeon E7 */ 651 case 0x62f: /* 32 nm Xeon E7 */
649 tlb_flushall_shift = 6;
650 break;
651 case 0x62a: /* SandyBridge */ 652 case 0x62a: /* SandyBridge */
652 case 0x62d: /* SandyBridge, "Romely-EP" */ 653 case 0x62d: /* SandyBridge, "Romely-EP" */
653 tlb_flushall_shift = 5;
654 break;
655 case 0x63a: /* Ivybridge */
656 tlb_flushall_shift = 1;
657 break;
658 default: 654 default:
659 tlb_flushall_shift = 6; 655 tlb_flushall_shift = 6;
660 } 656 }
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 8384c0fa206f..617a9e284245 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -285,6 +285,15 @@ static void __init collect_cpu_sig_on_bsp(void *arg)
285 285
286 uci->cpu_sig.sig = cpuid_eax(0x00000001); 286 uci->cpu_sig.sig = cpuid_eax(0x00000001);
287} 287}
288
289static void __init get_bsp_sig(void)
290{
291 unsigned int bsp = boot_cpu_data.cpu_index;
292 struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
293
294 if (!uci->cpu_sig.sig)
295 smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
296}
288#else 297#else
289void load_ucode_amd_ap(void) 298void load_ucode_amd_ap(void)
290{ 299{
@@ -337,31 +346,37 @@ void load_ucode_amd_ap(void)
337 346
338int __init save_microcode_in_initrd_amd(void) 347int __init save_microcode_in_initrd_amd(void)
339{ 348{
349 unsigned long cont;
340 enum ucode_state ret; 350 enum ucode_state ret;
341 u32 eax; 351 u32 eax;
342 352
343#ifdef CONFIG_X86_32 353 if (!container)
344 unsigned int bsp = boot_cpu_data.cpu_index; 354 return -EINVAL;
345 struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
346
347 if (!uci->cpu_sig.sig)
348 smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
349 355
356#ifdef CONFIG_X86_32
357 get_bsp_sig();
358 cont = (unsigned long)container;
359#else
350 /* 360 /*
351 * Take into account the fact that the ramdisk might get relocated 361 * We need the physical address of the container for both bitness since
352 * and therefore we need to recompute the container's position in 362 * boot_params.hdr.ramdisk_image is a physical address.
353 * virtual memory space.
354 */ 363 */
355 container = (u8 *)(__va((u32)relocated_ramdisk) + 364 cont = __pa(container);
356 ((u32)container - boot_params.hdr.ramdisk_image));
357#endif 365#endif
366
367 /*
368 * Take into account the fact that the ramdisk might get relocated and
369 * therefore we need to recompute the container's position in virtual
370 * memory space.
371 */
372 if (relocated_ramdisk)
373 container = (u8 *)(__va(relocated_ramdisk) +
374 (cont - boot_params.hdr.ramdisk_image));
375
358 if (ucode_new_rev) 376 if (ucode_new_rev)
359 pr_info("microcode: updated early to new patch_level=0x%08x\n", 377 pr_info("microcode: updated early to new patch_level=0x%08x\n",
360 ucode_new_rev); 378 ucode_new_rev);
361 379
362 if (!container)
363 return -EINVAL;
364
365 eax = cpuid_eax(0x00000001); 380 eax = cpuid_eax(0x00000001);
366 eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); 381 eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
367 382
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index ce2d0a2c3e4f..0e25a1bc5ab5 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
683 } 683 }
684 684
685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
686 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 686 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
687 __flush_tlb(); 687 __flush_tlb();
688 688
689 /* Save MTRR state */ 689 /* Save MTRR state */
@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
697static void post_set(void) __releases(set_atomicity_lock) 697static void post_set(void) __releases(set_atomicity_lock)
698{ 698{
699 /* Flush TLBs (no need to flush caches - they are disabled) */ 699 /* Flush TLBs (no need to flush caches - they are disabled) */
700 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 700 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
701 __flush_tlb(); 701 __flush_tlb();
702 702
703 /* Intel (P6) standard MTRRs */ 703 /* Intel (P6) standard MTRRs */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b88645191fe5..79f9f848bee4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1192 for (i = 0; i < cpuc->n_events; i++) { 1192 for (i = 0; i < cpuc->n_events; i++) {
1193 if (event == cpuc->event_list[i]) { 1193 if (event == cpuc->event_list[i]) {
1194 1194
1195 if (i >= cpuc->n_events - cpuc->n_added)
1196 --cpuc->n_added;
1197
1195 if (x86_pmu.put_event_constraints) 1198 if (x86_pmu.put_event_constraints)
1196 x86_pmu.put_event_constraints(cpuc, event); 1199 x86_pmu.put_event_constraints(cpuc, event);
1197 1200
@@ -1521,6 +1524,8 @@ static int __init init_hw_perf_events(void)
1521 1524
1522 pr_cont("%s PMU driver.\n", x86_pmu.name); 1525 pr_cont("%s PMU driver.\n", x86_pmu.name);
1523 1526
1527 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1528
1524 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) 1529 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1525 quirk->func(); 1530 quirk->func();
1526 1531
@@ -1534,7 +1539,6 @@ static int __init init_hw_perf_events(void)
1534 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1539 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1535 0, x86_pmu.num_counters, 0, 0); 1540 0, x86_pmu.num_counters, 0, 0);
1536 1541
1537 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1538 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1542 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1539 1543
1540 if (x86_pmu.event_attrs) 1544 if (x86_pmu.event_attrs)
@@ -1820,9 +1824,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
1820 if (ret) 1824 if (ret)
1821 return ret; 1825 return ret;
1822 1826
1827 if (x86_pmu.attr_rdpmc_broken)
1828 return -ENOTSUPP;
1829
1823 if (!!val != !!x86_pmu.attr_rdpmc) { 1830 if (!!val != !!x86_pmu.attr_rdpmc) {
1824 x86_pmu.attr_rdpmc = !!val; 1831 x86_pmu.attr_rdpmc = !!val;
1825 smp_call_function(change_rdpmc, (void *)val, 1); 1832 on_each_cpu(change_rdpmc, (void *)val, 1);
1826 } 1833 }
1827 1834
1828 return count; 1835 return count;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c1a861829d81..4972c244d0bc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -409,6 +409,7 @@ struct x86_pmu {
409 /* 409 /*
410 * sysfs attrs 410 * sysfs attrs
411 */ 411 */
412 int attr_rdpmc_broken;
412 int attr_rdpmc; 413 int attr_rdpmc;
413 struct attribute **format_attrs; 414 struct attribute **format_attrs;
414 struct attribute **event_attrs; 415 struct attribute **event_attrs;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0fa4f242f050..aa333d966886 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1361,10 +1361,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1361 intel_pmu_disable_all(); 1361 intel_pmu_disable_all();
1362 handled = intel_pmu_drain_bts_buffer(); 1362 handled = intel_pmu_drain_bts_buffer();
1363 status = intel_pmu_get_status(); 1363 status = intel_pmu_get_status();
1364 if (!status) { 1364 if (!status)
1365 intel_pmu_enable_all(0); 1365 goto done;
1366 return handled;
1367 }
1368 1366
1369 loops = 0; 1367 loops = 0;
1370again: 1368again:
@@ -2310,10 +2308,7 @@ __init int intel_pmu_init(void)
2310 if (version > 1) 2308 if (version > 1)
2311 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 2309 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
2312 2310
2313 /* 2311 if (boot_cpu_has(X86_FEATURE_PDCM)) {
2314 * v2 and above have a perf capabilities MSR
2315 */
2316 if (version > 1) {
2317 u64 capabilities; 2312 u64 capabilities;
2318 2313
2319 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); 2314 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 29c248799ced..c88f7f4b03ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -501,8 +501,11 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
501 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 501 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
502 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 502 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
503 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), 503 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
504 SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
504 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), 505 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
506 SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
505 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), 507 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
508 SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
506 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), 509 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
507 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), 510 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
508 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), 511 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
@@ -1178,10 +1181,15 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1178 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 1181 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
1179 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 1182 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
1180 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), 1183 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
1184 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
1185 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
1186 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
1181 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), 1187 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
1188 SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
1182 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), 1189 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
1190 SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
1183 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), 1191 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
1184 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), 1192 SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
1185 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), 1193 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
1186 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), 1194 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
1187 SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), 1195 SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b1e2fe115323..7c1a0c07b607 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -231,31 +231,49 @@ static __initconst const struct x86_pmu p6_pmu = {
231 231
232}; 232};
233 233
234static __init void p6_pmu_rdpmc_quirk(void)
235{
236 if (boot_cpu_data.x86_mask < 9) {
237 /*
238 * PPro erratum 26; fixed in stepping 9 and above.
239 */
240 pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
241 x86_pmu.attr_rdpmc_broken = 1;
242 x86_pmu.attr_rdpmc = 0;
243 }
244}
245
234__init int p6_pmu_init(void) 246__init int p6_pmu_init(void)
235{ 247{
248 x86_pmu = p6_pmu;
249
236 switch (boot_cpu_data.x86_model) { 250 switch (boot_cpu_data.x86_model) {
237 case 1: 251 case 1: /* Pentium Pro */
238 case 3: /* Pentium Pro */ 252 x86_add_quirk(p6_pmu_rdpmc_quirk);
239 case 5: 253 break;
240 case 6: /* Pentium II */ 254
241 case 7: 255 case 3: /* Pentium II - Klamath */
242 case 8: 256 case 5: /* Pentium II - Deschutes */
243 case 11: /* Pentium III */ 257 case 6: /* Pentium II - Mendocino */
244 case 9:
245 case 13:
246 /* Pentium M */
247 break; 258 break;
259
260 case 7: /* Pentium III - Katmai */
261 case 8: /* Pentium III - Coppermine */
262 case 10: /* Pentium III Xeon */
263 case 11: /* Pentium III - Tualatin */
264 break;
265
266 case 9: /* Pentium M - Banias */
267 case 13: /* Pentium M - Dothan */
268 break;
269
248 default: 270 default:
249 pr_cont("unsupported p6 CPU model %d ", 271 pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
250 boot_cpu_data.x86_model);
251 return -ENODEV; 272 return -ENODEV;
252 } 273 }
253 274
254 x86_pmu = p6_pmu;
255
256 memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, 275 memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
257 sizeof(hw_cache_event_ids)); 276 sizeof(hw_cache_event_ids));
258 277
259
260 return 0; 278 return 0;
261} 279}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d4bdd253fea7..e6253195a301 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -77,8 +77,7 @@ within(unsigned long addr, unsigned long start, unsigned long end)
77 return addr >= start && addr < end; 77 return addr >= start && addr < end;
78} 78}
79 79
80static int 80static unsigned long text_ip_addr(unsigned long ip)
81do_ftrace_mod_code(unsigned long ip, const void *new_code)
82{ 81{
83 /* 82 /*
84 * On x86_64, kernel text mappings are mapped read-only with 83 * On x86_64, kernel text mappings are mapped read-only with
@@ -91,7 +90,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 90 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
92 ip = (unsigned long)__va(__pa_symbol(ip)); 91 ip = (unsigned long)__va(__pa_symbol(ip));
93 92
94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 93 return ip;
95} 94}
96 95
97static const unsigned char *ftrace_nop_replace(void) 96static const unsigned char *ftrace_nop_replace(void)
@@ -123,8 +122,10 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
123 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 122 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
124 return -EINVAL; 123 return -EINVAL;
125 124
125 ip = text_ip_addr(ip);
126
126 /* replace the text with the new text */ 127 /* replace the text with the new text */
127 if (do_ftrace_mod_code(ip, new_code)) 128 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
128 return -EPERM; 129 return -EPERM;
129 130
130 sync_core(); 131 sync_core();
@@ -221,37 +222,51 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
221 return -EINVAL; 222 return -EINVAL;
222} 223}
223 224
224int ftrace_update_ftrace_func(ftrace_func_t func) 225static unsigned long ftrace_update_func;
226
227static int update_ftrace_func(unsigned long ip, void *new)
225{ 228{
226 unsigned long ip = (unsigned long)(&ftrace_call); 229 unsigned char old[MCOUNT_INSN_SIZE];
227 unsigned char old[MCOUNT_INSN_SIZE], *new;
228 int ret; 230 int ret;
229 231
230 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 232 memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
231 new = ftrace_call_replace(ip, (unsigned long)func); 233
234 ftrace_update_func = ip;
235 /* Make sure the breakpoints see the ftrace_update_func update */
236 smp_wmb();
232 237
233 /* See comment above by declaration of modifying_ftrace_code */ 238 /* See comment above by declaration of modifying_ftrace_code */
234 atomic_inc(&modifying_ftrace_code); 239 atomic_inc(&modifying_ftrace_code);
235 240
236 ret = ftrace_modify_code(ip, old, new); 241 ret = ftrace_modify_code(ip, old, new);
237 242
243 atomic_dec(&modifying_ftrace_code);
244
245 return ret;
246}
247
248int ftrace_update_ftrace_func(ftrace_func_t func)
249{
250 unsigned long ip = (unsigned long)(&ftrace_call);
251 unsigned char *new;
252 int ret;
253
254 new = ftrace_call_replace(ip, (unsigned long)func);
255 ret = update_ftrace_func(ip, new);
256
238 /* Also update the regs callback function */ 257 /* Also update the regs callback function */
239 if (!ret) { 258 if (!ret) {
240 ip = (unsigned long)(&ftrace_regs_call); 259 ip = (unsigned long)(&ftrace_regs_call);
241 memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
242 new = ftrace_call_replace(ip, (unsigned long)func); 260 new = ftrace_call_replace(ip, (unsigned long)func);
243 ret = ftrace_modify_code(ip, old, new); 261 ret = update_ftrace_func(ip, new);
244 } 262 }
245 263
246 atomic_dec(&modifying_ftrace_code);
247
248 return ret; 264 return ret;
249} 265}
250 266
251static int is_ftrace_caller(unsigned long ip) 267static int is_ftrace_caller(unsigned long ip)
252{ 268{
253 if (ip == (unsigned long)(&ftrace_call) || 269 if (ip == ftrace_update_func)
254 ip == (unsigned long)(&ftrace_regs_call))
255 return 1; 270 return 1;
256 271
257 return 0; 272 return 0;
@@ -677,45 +692,41 @@ int __init ftrace_dyn_arch_init(void *data)
677#ifdef CONFIG_DYNAMIC_FTRACE 692#ifdef CONFIG_DYNAMIC_FTRACE
678extern void ftrace_graph_call(void); 693extern void ftrace_graph_call(void);
679 694
680static int ftrace_mod_jmp(unsigned long ip, 695static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
681 int old_offset, int new_offset)
682{ 696{
683 unsigned char code[MCOUNT_INSN_SIZE]; 697 static union ftrace_code_union calc;
684 698
685 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 699 /* Jmp not a call (ignore the .e8) */
686 return -EFAULT; 700 calc.e8 = 0xe9;
701 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
687 702
688 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 703 /*
689 return -EINVAL; 704 * ftrace external locks synchronize the access to the static variable.
705 */
706 return calc.code;
707}
690 708
691 *(int *)(&code[1]) = new_offset; 709static int ftrace_mod_jmp(unsigned long ip, void *func)
710{
711 unsigned char *new;
692 712
693 if (do_ftrace_mod_code(ip, &code)) 713 new = ftrace_jmp_replace(ip, (unsigned long)func);
694 return -EPERM;
695 714
696 return 0; 715 return update_ftrace_func(ip, new);
697} 716}
698 717
699int ftrace_enable_ftrace_graph_caller(void) 718int ftrace_enable_ftrace_graph_caller(void)
700{ 719{
701 unsigned long ip = (unsigned long)(&ftrace_graph_call); 720 unsigned long ip = (unsigned long)(&ftrace_graph_call);
702 int old_offset, new_offset;
703 721
704 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 722 return ftrace_mod_jmp(ip, &ftrace_graph_caller);
705 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
706
707 return ftrace_mod_jmp(ip, old_offset, new_offset);
708} 723}
709 724
710int ftrace_disable_ftrace_graph_caller(void) 725int ftrace_disable_ftrace_graph_caller(void)
711{ 726{
712 unsigned long ip = (unsigned long)(&ftrace_graph_call); 727 unsigned long ip = (unsigned long)(&ftrace_graph_call);
713 int old_offset, new_offset;
714
715 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
716 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
717 728
718 return ftrace_mod_jmp(ip, old_offset, new_offset); 729 return ftrace_mod_jmp(ip, &ftrace_stub);
719} 730}
720 731
721#endif /* !CONFIG_DYNAMIC_FTRACE */ 732#endif /* !CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 81ba27679f18..f36bd42d6f0c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -544,6 +544,10 @@ ENDPROC(early_idt_handlers)
544 /* This is global to keep gas from relaxing the jumps */ 544 /* This is global to keep gas from relaxing the jumps */
545ENTRY(early_idt_handler) 545ENTRY(early_idt_handler)
546 cld 546 cld
547
548 cmpl $2,(%esp) # X86_TRAP_NMI
549 je is_nmi # Ignore NMI
550
547 cmpl $2,%ss:early_recursion_flag 551 cmpl $2,%ss:early_recursion_flag
548 je hlt_loop 552 je hlt_loop
549 incl %ss:early_recursion_flag 553 incl %ss:early_recursion_flag
@@ -594,8 +598,9 @@ ex_entry:
594 pop %edx 598 pop %edx
595 pop %ecx 599 pop %ecx
596 pop %eax 600 pop %eax
597 addl $8,%esp /* drop vector number and error code */
598 decl %ss:early_recursion_flag 601 decl %ss:early_recursion_flag
602is_nmi:
603 addl $8,%esp /* drop vector number and error code */
599 iret 604 iret
600ENDPROC(early_idt_handler) 605ENDPROC(early_idt_handler)
601 606
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e1aabdb314c8..a468c0a65c42 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -343,6 +343,9 @@ early_idt_handlers:
343ENTRY(early_idt_handler) 343ENTRY(early_idt_handler)
344 cld 344 cld
345 345
346 cmpl $2,(%rsp) # X86_TRAP_NMI
347 je is_nmi # Ignore NMI
348
346 cmpl $2,early_recursion_flag(%rip) 349 cmpl $2,early_recursion_flag(%rip)
347 jz 1f 350 jz 1f
348 incl early_recursion_flag(%rip) 351 incl early_recursion_flag(%rip)
@@ -405,8 +408,9 @@ ENTRY(early_idt_handler)
405 popq %rdx 408 popq %rdx
406 popq %rcx 409 popq %rcx
407 popq %rax 410 popq %rax
408 addq $16,%rsp # drop vector number and error code
409 decl early_recursion_flag(%rip) 411 decl early_recursion_flag(%rip)
412is_nmi:
413 addq $16,%rsp # drop vector number and error code
410 INTERRUPT_RETURN 414 INTERRUPT_RETURN
411ENDPROC(early_idt_handler) 415ENDPROC(early_idt_handler)
412 416
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index dbb60878b744..d99f31d9a750 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -266,6 +266,14 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
266EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 266EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
267 267
268#ifdef CONFIG_HOTPLUG_CPU 268#ifdef CONFIG_HOTPLUG_CPU
269
270/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
271 * below, which is protected by stop_machine(). Putting them on the stack
272 * results in a stack frame overflow. Dynamically allocating could result in a
273 * failure so declare these two cpumasks as global.
274 */
275static struct cpumask affinity_new, online_new;
276
269/* 277/*
270 * This cpu is going to be removed and its vectors migrated to the remaining 278 * This cpu is going to be removed and its vectors migrated to the remaining
271 * online cpus. Check to see if there are enough vectors in the remaining cpus. 279 * online cpus. Check to see if there are enough vectors in the remaining cpus.
@@ -277,7 +285,6 @@ int check_irq_vectors_for_cpu_disable(void)
277 unsigned int this_cpu, vector, this_count, count; 285 unsigned int this_cpu, vector, this_count, count;
278 struct irq_desc *desc; 286 struct irq_desc *desc;
279 struct irq_data *data; 287 struct irq_data *data;
280 struct cpumask affinity_new, online_new;
281 288
282 this_cpu = smp_processor_id(); 289 this_cpu = smp_processor_id();
283 cpumask_copy(&online_new, cpu_online_mask); 290 cpumask_copy(&online_new, cpu_online_mask);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4eabc160696f..679cef0791cd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -279,5 +279,7 @@ void arch_crash_save_vmcoreinfo(void)
279 VMCOREINFO_SYMBOL(node_data); 279 VMCOREINFO_SYMBOL(node_data);
280 VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); 280 VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
281#endif 281#endif
282 vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
283 (unsigned long)&_text - __START_KERNEL);
282} 284}
283 285
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 872079a67e4d..f7d0672481fd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,8 +100,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
100 flag |= __GFP_ZERO; 100 flag |= __GFP_ZERO;
101again: 101again:
102 page = NULL; 102 page = NULL;
103 if (!(flag & GFP_ATOMIC)) 103 /* CMA can be used only in the context which permits sleeping */
104 if (flag & __GFP_WAIT)
104 page = dma_alloc_from_contiguous(dev, count, get_order(size)); 105 page = dma_alloc_from_contiguous(dev, count, get_order(size));
106 /* fallback */
105 if (!page) 107 if (!page)
106 page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); 108 page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
107 if (!page) 109 if (!page)
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 04ee1e2e4c02..7c6acd4b8995 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
571 quirk_amd_nb_node); 571 quirk_amd_nb_node);
572 572
573#endif 573#endif
574
575#ifdef CONFIG_PCI
576/*
577 * Processor does not ensure DRAM scrub read/write sequence
578 * is atomic wrt accesses to CC6 save state area. Therefore
579 * if a concurrent scrub read/write access is to same address
580 * the entry may appear as if it is not written. This quirk
581 * applies to Fam16h models 00h-0Fh
582 *
583 * See "Revision Guide" for AMD F16h models 00h-0fh,
584 * document 51810 rev. 3.04, Nov 2013
585 */
586static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
587{
588 u32 val;
589
590 /*
591 * Suggested workaround:
592 * set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b
593 */
594 pci_read_config_dword(dev, 0x58, &val);
595 if (val & 0x1F) {
596 val &= ~(0x1F);
597 pci_write_config_dword(dev, 0x58, val);
598 }
599
600 pci_read_config_dword(dev, 0x5C, &val);
601 if (val & BIT(0)) {
602 val &= ~BIT(0);
603 pci_write_config_dword(dev, 0x5c, val);
604 }
605}
606
607DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
608 amd_disable_seq_and_redirect_scrub);
609
610#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 06853e670354..ce72964b2f46 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1239,14 +1239,8 @@ void __init setup_arch(char **cmdline_p)
1239 register_refined_jiffies(CLOCK_TICK_RATE); 1239 register_refined_jiffies(CLOCK_TICK_RATE);
1240 1240
1241#ifdef CONFIG_EFI 1241#ifdef CONFIG_EFI
1242 /* Once setup is done above, unmap the EFI memory map on 1242 if (efi_enabled(EFI_BOOT))
1243 * mismatched firmware/kernel archtectures since there is no 1243 efi_apply_memmap_quirks();
1244 * support for runtime services.
1245 */
1246 if (efi_enabled(EFI_BOOT) && !efi_is_native()) {
1247 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
1248 efi_unmap_memmap();
1249 }
1250#endif 1244#endif
1251} 1245}
1252 1246
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 19e5adb49a27..cfbe99f88830 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -209,7 +209,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
209 * dance when its actually needed. 209 * dance when its actually needed.
210 */ 210 */
211 211
212 preempt_disable(); 212 preempt_disable_notrace();
213 data = this_cpu_read(cyc2ns.head); 213 data = this_cpu_read(cyc2ns.head);
214 tail = this_cpu_read(cyc2ns.tail); 214 tail = this_cpu_read(cyc2ns.tail);
215 215
@@ -229,7 +229,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
229 if (!--data->__count) 229 if (!--data->__count)
230 this_cpu_write(cyc2ns.tail, data); 230 this_cpu_write(cyc2ns.tail, data);
231 } 231 }
232 preempt_enable(); 232 preempt_enable_notrace();
233 233
234 return ns; 234 return ns;
235} 235}
@@ -653,13 +653,10 @@ unsigned long native_calibrate_tsc(void)
653 653
654 /* Calibrate TSC using MSR for Intel Atom SoCs */ 654 /* Calibrate TSC using MSR for Intel Atom SoCs */
655 local_irq_save(flags); 655 local_irq_save(flags);
656 i = try_msr_calibrate_tsc(&fast_calibrate); 656 fast_calibrate = try_msr_calibrate_tsc();
657 local_irq_restore(flags); 657 local_irq_restore(flags);
658 if (i >= 0) { 658 if (fast_calibrate)
659 if (i == 0)
660 pr_warn("Fast TSC calibration using MSR failed\n");
661 return fast_calibrate; 659 return fast_calibrate;
662 }
663 660
664 local_irq_save(flags); 661 local_irq_save(flags);
665 fast_calibrate = quick_pit_calibrate(); 662 fast_calibrate = quick_pit_calibrate();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 8b5434f4389f..92ae6acac8a7 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -53,7 +53,7 @@ static struct freq_desc freq_desc_tables[] = {
53 /* TNG */ 53 /* TNG */
54 { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } }, 54 { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } },
55 /* VLV2 */ 55 /* VLV2 */
56 { 6, 0x37, 1, { 0, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, 56 { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
57 /* ANN */ 57 /* ANN */
58 { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, 58 { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
59}; 59};
@@ -77,21 +77,18 @@ static int match_cpu(u8 family, u8 model)
77 77
78/* 78/*
79 * Do MSR calibration only for known/supported CPUs. 79 * Do MSR calibration only for known/supported CPUs.
80 * Return values: 80 *
81 * -1: CPU is unknown/unsupported for MSR based calibration 81 * Returns the calibration value or 0 if MSR calibration failed.
82 * 0: CPU is known/supported, but calibration failed
83 * 1: CPU is known/supported, and calibration succeeded
84 */ 82 */
85int try_msr_calibrate_tsc(unsigned long *fast_calibrate) 83unsigned long try_msr_calibrate_tsc(void)
86{ 84{
87 int cpu_index;
88 u32 lo, hi, ratio, freq_id, freq; 85 u32 lo, hi, ratio, freq_id, freq;
86 unsigned long res;
87 int cpu_index;
89 88
90 cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); 89 cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
91 if (cpu_index < 0) 90 if (cpu_index < 0)
92 return -1; 91 return 0;
93
94 *fast_calibrate = 0;
95 92
96 if (freq_desc_tables[cpu_index].msr_plat) { 93 if (freq_desc_tables[cpu_index].msr_plat) {
97 rdmsr(MSR_PLATFORM_INFO, lo, hi); 94 rdmsr(MSR_PLATFORM_INFO, lo, hi);
@@ -103,7 +100,7 @@ int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
103 pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); 100 pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio);
104 101
105 if (!ratio) 102 if (!ratio)
106 return 0; 103 goto fail;
107 104
108 /* Get FSB FREQ ID */ 105 /* Get FSB FREQ ID */
109 rdmsr(MSR_FSB_FREQ, lo, hi); 106 rdmsr(MSR_FSB_FREQ, lo, hi);
@@ -112,16 +109,19 @@ int try_msr_calibrate_tsc(unsigned long *fast_calibrate)
112 pr_info("Resolved frequency ID: %u, frequency: %u KHz\n", 109 pr_info("Resolved frequency ID: %u, frequency: %u KHz\n",
113 freq_id, freq); 110 freq_id, freq);
114 if (!freq) 111 if (!freq)
115 return 0; 112 goto fail;
116 113
117 /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ 114 /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
118 *fast_calibrate = freq * ratio; 115 res = freq * ratio;
119 pr_info("TSC runs at %lu KHz\n", *fast_calibrate); 116 pr_info("TSC runs at %lu KHz\n", res);
120 117
121#ifdef CONFIG_X86_LOCAL_APIC 118#ifdef CONFIG_X86_LOCAL_APIC
122 lapic_timer_frequency = (freq * 1000) / HZ; 119 lapic_timer_frequency = (freq * 1000) / HZ;
123 pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency); 120 pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
124#endif 121#endif
122 return res;
125 123
126 return 1; 124fail:
125 pr_warn("Fast TSC calibration using MSR failed\n");
126 return 0;
127} 127}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e50425d0f5f7..9b531351a587 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2672,6 +2672,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2672 break; 2672 break;
2673 } 2673 }
2674 2674
2675 drop_large_spte(vcpu, iterator.sptep);
2675 if (!is_shadow_present_pte(*iterator.sptep)) { 2676 if (!is_shadow_present_pte(*iterator.sptep)) {
2676 u64 base_addr = iterator.addr; 2677 u64 base_addr = iterator.addr;
2677 2678
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a06f101ef64b..392752834751 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6688,7 +6688,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6688 else if (is_page_fault(intr_info)) 6688 else if (is_page_fault(intr_info))
6689 return enable_ept; 6689 return enable_ept;
6690 else if (is_no_device(intr_info) && 6690 else if (is_no_device(intr_info) &&
6691 !(nested_read_cr0(vmcs12) & X86_CR0_TS)) 6691 !(vmcs12->guest_cr0 & X86_CR0_TS))
6692 return 0; 6692 return 0;
6693 return vmcs12->exception_bitmap & 6693 return vmcs12->exception_bitmap &
6694 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6694 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39c28f09dfd5..2b8578432d5b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6186,7 +6186,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6186 frag->len -= len; 6186 frag->len -= len;
6187 } 6187 }
6188 6188
6189 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { 6189 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
6190 vcpu->mmio_needed = 0; 6190 vcpu->mmio_needed = 0;
6191 6191
6192 /* FIXME: return into emulator if single-stepping. */ 6192 /* FIXME: return into emulator if single-stepping. */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d591c895803..a10c8c792161 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1001,6 +1001,12 @@ static int fault_in_kernel_space(unsigned long address)
1001 1001
1002static inline bool smap_violation(int error_code, struct pt_regs *regs) 1002static inline bool smap_violation(int error_code, struct pt_regs *regs)
1003{ 1003{
1004 if (!IS_ENABLED(CONFIG_X86_SMAP))
1005 return false;
1006
1007 if (!static_cpu_has(X86_FEATURE_SMAP))
1008 return false;
1009
1004 if (error_code & PF_USER) 1010 if (error_code & PF_USER)
1005 return false; 1011 return false;
1006 1012
@@ -1014,13 +1020,17 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1014 * This routine handles page faults. It determines the address, 1020 * This routine handles page faults. It determines the address,
1015 * and the problem, and then passes it off to one of the appropriate 1021 * and the problem, and then passes it off to one of the appropriate
1016 * routines. 1022 * routines.
1023 *
1024 * This function must have noinline because both callers
1025 * {,trace_}do_page_fault() have notrace on. Having this an actual function
1026 * guarantees there's a function trace entry.
1017 */ 1027 */
1018static void __kprobes 1028static void __kprobes noinline
1019__do_page_fault(struct pt_regs *regs, unsigned long error_code) 1029__do_page_fault(struct pt_regs *regs, unsigned long error_code,
1030 unsigned long address)
1020{ 1031{
1021 struct vm_area_struct *vma; 1032 struct vm_area_struct *vma;
1022 struct task_struct *tsk; 1033 struct task_struct *tsk;
1023 unsigned long address;
1024 struct mm_struct *mm; 1034 struct mm_struct *mm;
1025 int fault; 1035 int fault;
1026 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 1036 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -1028,9 +1038,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1028 tsk = current; 1038 tsk = current;
1029 mm = tsk->mm; 1039 mm = tsk->mm;
1030 1040
1031 /* Get the faulting address: */
1032 address = read_cr2();
1033
1034 /* 1041 /*
1035 * Detect and handle instructions that would cause a page fault for 1042 * Detect and handle instructions that would cause a page fault for
1036 * both a tracked kernel page and a userspace page. 1043 * both a tracked kernel page and a userspace page.
@@ -1087,11 +1094,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1087 if (unlikely(error_code & PF_RSVD)) 1094 if (unlikely(error_code & PF_RSVD))
1088 pgtable_bad(regs, error_code, address); 1095 pgtable_bad(regs, error_code, address);
1089 1096
1090 if (static_cpu_has(X86_FEATURE_SMAP)) { 1097 if (unlikely(smap_violation(error_code, regs))) {
1091 if (unlikely(smap_violation(error_code, regs))) { 1098 bad_area_nosemaphore(regs, error_code, address);
1092 bad_area_nosemaphore(regs, error_code, address); 1099 return;
1093 return;
1094 }
1095 } 1100 }
1096 1101
1097 /* 1102 /*
@@ -1244,32 +1249,50 @@ good_area:
1244 up_read(&mm->mmap_sem); 1249 up_read(&mm->mmap_sem);
1245} 1250}
1246 1251
1247dotraplinkage void __kprobes 1252dotraplinkage void __kprobes notrace
1248do_page_fault(struct pt_regs *regs, unsigned long error_code) 1253do_page_fault(struct pt_regs *regs, unsigned long error_code)
1249{ 1254{
1255 unsigned long address = read_cr2(); /* Get the faulting address */
1250 enum ctx_state prev_state; 1256 enum ctx_state prev_state;
1251 1257
1258 /*
1259 * We must have this function tagged with __kprobes, notrace and call
1260 * read_cr2() before calling anything else. To avoid calling any kind
1261 * of tracing machinery before we've observed the CR2 value.
1262 *
1263 * exception_{enter,exit}() contain all sorts of tracepoints.
1264 */
1265
1252 prev_state = exception_enter(); 1266 prev_state = exception_enter();
1253 __do_page_fault(regs, error_code); 1267 __do_page_fault(regs, error_code, address);
1254 exception_exit(prev_state); 1268 exception_exit(prev_state);
1255} 1269}
1256 1270
1257static void trace_page_fault_entries(struct pt_regs *regs, 1271#ifdef CONFIG_TRACING
1272static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
1258 unsigned long error_code) 1273 unsigned long error_code)
1259{ 1274{
1260 if (user_mode(regs)) 1275 if (user_mode(regs))
1261 trace_page_fault_user(read_cr2(), regs, error_code); 1276 trace_page_fault_user(address, regs, error_code);
1262 else 1277 else
1263 trace_page_fault_kernel(read_cr2(), regs, error_code); 1278 trace_page_fault_kernel(address, regs, error_code);
1264} 1279}
1265 1280
1266dotraplinkage void __kprobes 1281dotraplinkage void __kprobes notrace
1267trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) 1282trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
1268{ 1283{
1284 /*
1285 * The exception_enter and tracepoint processing could
1286 * trigger another page faults (user space callchain
1287 * reading) and destroy the original cr2 value, so read
1288 * the faulting address now.
1289 */
1290 unsigned long address = read_cr2();
1269 enum ctx_state prev_state; 1291 enum ctx_state prev_state;
1270 1292
1271 prev_state = exception_enter(); 1293 prev_state = exception_enter();
1272 trace_page_fault_entries(regs, error_code); 1294 trace_page_fault_entries(address, regs, error_code);
1273 __do_page_fault(regs, error_code); 1295 __do_page_fault(regs, error_code, address);
1274 exception_exit(prev_state); 1296 exception_exit(prev_state);
1275} 1297}
1298#endif /* CONFIG_TRACING */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 81b2750f3666..27aa0455fab3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
493 struct numa_memblk *mb = &mi->blk[i]; 493 struct numa_memblk *mb = &mi->blk[i];
494 memblock_set_node(mb->start, mb->end - mb->start, 494 memblock_set_node(mb->start, mb->end - mb->start,
495 &memblock.memory, mb->nid); 495 &memblock.memory, mb->nid);
496
497 /*
498 * At this time, all memory regions reserved by memblock are
499 * used by the kernel. Set the nid in memblock.reserved will
500 * mark out all the nodes the kernel resides in.
501 */
502 memblock_set_node(mb->start, mb->end - mb->start,
503 &memblock.reserved, mb->nid);
504 } 496 }
505 497
506 /* 498 /*
@@ -565,10 +557,21 @@ static void __init numa_init_array(void)
565static void __init numa_clear_kernel_node_hotplug(void) 557static void __init numa_clear_kernel_node_hotplug(void)
566{ 558{
567 int i, nid; 559 int i, nid;
568 nodemask_t numa_kernel_nodes; 560 nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
569 unsigned long start, end; 561 unsigned long start, end;
570 struct memblock_type *type = &memblock.reserved; 562 struct memblock_type *type = &memblock.reserved;
571 563
564 /*
565 * At this time, all memory regions reserved by memblock are
566 * used by the kernel. Set the nid in memblock.reserved will
567 * mark out all the nodes the kernel resides in.
568 */
569 for (i = 0; i < numa_meminfo.nr_blks; i++) {
570 struct numa_memblk *mb = &numa_meminfo.blk[i];
571 memblock_set_node(mb->start, mb->end - mb->start,
572 &memblock.reserved, mb->nid);
573 }
574
572 /* Mark all kernel nodes. */ 575 /* Mark all kernel nodes. */
573 for (i = 0; i < type->cnt; i++) 576 for (i = 0; i < type->cnt; i++)
574 node_set(type->regions[i].nid, numa_kernel_nodes); 577 node_set(type->regions[i].nid, numa_kernel_nodes);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 0342d27ca798..47b6436e41c2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end)
52 nid, start, end); 52 nid, start, end);
53 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); 53 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
54 printk(KERN_DEBUG " "); 54 printk(KERN_DEBUG " ");
55 start = round_down(start, PAGES_PER_SECTION);
56 end = round_up(end, PAGES_PER_SECTION);
55 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 57 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
56 physnode_map[pfn / PAGES_PER_SECTION] = nid; 58 physnode_map[pfn / PAGES_PER_SECTION] = nid;
57 printk(KERN_CONT "%lx ", pfn); 59 printk(KERN_CONT "%lx ", pfn);
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 1a25187e151e..1953e9c9391a 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -42,15 +42,25 @@ static __init inline int srat_disabled(void)
42 return acpi_numa < 0; 42 return acpi_numa < 0;
43} 43}
44 44
45/* Callback for SLIT parsing */ 45/*
46 * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
47 * I/O localities since SRAT does not list them. I/O localities are
48 * not supported at this point.
49 */
46void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 50void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
47{ 51{
48 int i, j; 52 int i, j;
49 53
50 for (i = 0; i < slit->locality_count; i++) 54 for (i = 0; i < slit->locality_count; i++) {
51 for (j = 0; j < slit->locality_count; j++) 55 if (pxm_to_node(i) == NUMA_NO_NODE)
56 continue;
57 for (j = 0; j < slit->locality_count; j++) {
58 if (pxm_to_node(j) == NUMA_NO_NODE)
59 continue;
52 numa_set_distance(pxm_to_node(i), pxm_to_node(j), 60 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
53 slit->entry[slit->locality_count * i + j]); 61 slit->entry[slit->locality_count * i + j]);
62 }
63 }
54} 64}
55 65
56/* Callback for Proximity Domain -> x2APIC mapping */ 66/* Callback for Proximity Domain -> x2APIC mapping */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ae699b3bbac8..dd8dda167a24 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info)
103 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 103 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
104 return; 104 return;
105 105
106 count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 106 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
107 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 107 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (f->flush_end == TLB_FLUSH_ALL) 108 if (f->flush_end == TLB_FLUSH_ALL)
109 local_flush_tlb(); 109 local_flush_tlb();
@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
131 info.flush_start = start; 131 info.flush_start = start;
132 info.flush_end = end; 132 info.flush_end = end;
133 133
134 count_vm_event(NR_TLB_REMOTE_FLUSH); 134 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
135 if (is_uv_system()) { 135 if (is_uv_system()) {
136 unsigned int cpu; 136 unsigned int cpu;
137 137
@@ -151,44 +151,19 @@ void flush_tlb_current_task(void)
151 151
152 preempt_disable(); 152 preempt_disable();
153 153
154 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 154 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
155 local_flush_tlb(); 155 local_flush_tlb();
156 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 156 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
157 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 157 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
158 preempt_enable(); 158 preempt_enable();
159} 159}
160 160
161/*
162 * It can find out the THP large page, or
163 * HUGETLB page in tlb_flush when THP disabled
164 */
165static inline unsigned long has_large_page(struct mm_struct *mm,
166 unsigned long start, unsigned long end)
167{
168 pgd_t *pgd;
169 pud_t *pud;
170 pmd_t *pmd;
171 unsigned long addr = ALIGN(start, HPAGE_SIZE);
172 for (; addr < end; addr += HPAGE_SIZE) {
173 pgd = pgd_offset(mm, addr);
174 if (likely(!pgd_none(*pgd))) {
175 pud = pud_offset(pgd, addr);
176 if (likely(!pud_none(*pud))) {
177 pmd = pmd_offset(pud, addr);
178 if (likely(!pmd_none(*pmd)))
179 if (pmd_large(*pmd))
180 return addr;
181 }
182 }
183 }
184 return 0;
185}
186
187void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 161void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
188 unsigned long end, unsigned long vmflag) 162 unsigned long end, unsigned long vmflag)
189{ 163{
190 unsigned long addr; 164 unsigned long addr;
191 unsigned act_entries, tlb_entries = 0; 165 unsigned act_entries, tlb_entries = 0;
166 unsigned long nr_base_pages;
192 167
193 preempt_disable(); 168 preempt_disable();
194 if (current->active_mm != mm) 169 if (current->active_mm != mm)
@@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
210 tlb_entries = tlb_lli_4k[ENTRIES]; 185 tlb_entries = tlb_lli_4k[ENTRIES];
211 else 186 else
212 tlb_entries = tlb_lld_4k[ENTRIES]; 187 tlb_entries = tlb_lld_4k[ENTRIES];
188
213 /* Assume all of TLB entries was occupied by this task */ 189 /* Assume all of TLB entries was occupied by this task */
214 act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; 190 act_entries = tlb_entries >> tlb_flushall_shift;
191 act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
192 nr_base_pages = (end - start) >> PAGE_SHIFT;
215 193
216 /* tlb_flushall_shift is on balance point, details in commit log */ 194 /* tlb_flushall_shift is on balance point, details in commit log */
217 if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { 195 if (nr_base_pages > act_entries) {
218 count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); 196 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
219 local_flush_tlb(); 197 local_flush_tlb();
220 } else { 198 } else {
221 if (has_large_page(mm, start, end)) {
222 local_flush_tlb();
223 goto flush_all;
224 }
225 /* flush range by one by one 'invlpg' */ 199 /* flush range by one by one 'invlpg' */
226 for (addr = start; addr < end; addr += PAGE_SIZE) { 200 for (addr = start; addr < end; addr += PAGE_SIZE) {
227 count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); 201 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
228 __flush_tlb_single(addr); 202 __flush_tlb_single(addr);
229 } 203 }
230 204
@@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
262 236
263static void do_flush_tlb_all(void *info) 237static void do_flush_tlb_all(void *info)
264{ 238{
265 count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 239 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
266 __flush_tlb_all(); 240 __flush_tlb_all();
267 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 241 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
268 leave_mm(smp_processor_id()); 242 leave_mm(smp_processor_id());
@@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info)
270 244
271void flush_tlb_all(void) 245void flush_tlb_all(void)
272{ 246{
273 count_vm_event(NR_TLB_REMOTE_FLUSH); 247 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
274 on_each_cpu(do_flush_tlb_all, NULL, 1); 248 on_each_cpu(do_flush_tlb_all, NULL, 1);
275} 249}
276 250
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 7145ec63c520..f15103dff4b4 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -42,14 +42,15 @@ void __init efi_bgrt_init(void)
42 42
43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) 43 if (bgrt_tab->header.length < sizeof(*bgrt_tab))
44 return; 44 return;
45 if (bgrt_tab->version != 1) 45 if (bgrt_tab->version != 1 || bgrt_tab->status != 1)
46 return; 46 return;
47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
48 return; 48 return;
49 49
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 50 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 51 if (!image) {
52 image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); 52 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header));
53 ioremapped = true; 54 ioremapped = true;
54 if (!image) 55 if (!image)
55 return; 56 return;
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)
57 58
58 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
59 if (ioremapped) 60 if (ioremapped)
60 iounmap(image); 61 early_iounmap(image, sizeof(bmp_header));
61 bgrt_image_size = bmp_header.size; 62 bgrt_image_size = bmp_header.size;
62 63
63 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)
65 return; 66 return;
66 67
67 if (ioremapped) { 68 if (ioremapped) {
68 image = ioremap(bgrt_tab->image_address, bmp_header.size); 69 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size);
69 if (!image) { 71 if (!image) {
70 kfree(bgrt_image); 72 kfree(bgrt_image);
71 bgrt_image = NULL; 73 bgrt_image = NULL;
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)
75 77
76 memcpy_fromio(bgrt_image, image, bgrt_image_size); 78 memcpy_fromio(bgrt_image, image, bgrt_image_size);
77 if (ioremapped) 79 if (ioremapped)
78 iounmap(image); 80 early_iounmap(image, bmp_header.size);
79} 81}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d62ec87a2b26..b97acecf3fd9 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -52,6 +52,7 @@
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/x86_init.h> 53#include <asm/x86_init.h>
54#include <asm/rtc.h> 54#include <asm/rtc.h>
55#include <asm/uv/uv.h>
55 56
56#define EFI_DEBUG 57#define EFI_DEBUG
57 58
@@ -792,7 +793,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
792 set_memory_nx(addr, npages); 793 set_memory_nx(addr, npages);
793} 794}
794 795
795static void __init runtime_code_page_mkexec(void) 796void __init runtime_code_page_mkexec(void)
796{ 797{
797 efi_memory_desc_t *md; 798 efi_memory_desc_t *md;
798 void *p; 799 void *p;
@@ -1069,8 +1070,7 @@ void __init efi_enter_virtual_mode(void)
1069 efi.update_capsule = virt_efi_update_capsule; 1070 efi.update_capsule = virt_efi_update_capsule;
1070 efi.query_capsule_caps = virt_efi_query_capsule_caps; 1071 efi.query_capsule_caps = virt_efi_query_capsule_caps;
1071 1072
1072 if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) 1073 efi_runtime_mkexec();
1073 runtime_code_page_mkexec();
1074 1074
1075 kfree(new_memmap); 1075 kfree(new_memmap);
1076 1076
@@ -1211,3 +1211,22 @@ static int __init parse_efi_cmdline(char *str)
1211 return 0; 1211 return 0;
1212} 1212}
1213early_param("efi", parse_efi_cmdline); 1213early_param("efi", parse_efi_cmdline);
1214
1215void __init efi_apply_memmap_quirks(void)
1216{
1217 /*
1218 * Once setup is done earlier, unmap the EFI memory map on mismatched
1219 * firmware/kernel architectures since there is no support for runtime
1220 * services.
1221 */
1222 if (!efi_is_native()) {
1223 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
1224 efi_unmap_memmap();
1225 }
1226
1227 /*
1228 * UV doesn't support the new EFI pagetable mapping yet.
1229 */
1230 if (is_uv_system())
1231 set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
1232}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 249b183cf417..0b74cdf7f816 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -77,3 +77,9 @@ void efi_call_phys_epilog(void)
77 77
78 local_irq_restore(efi_rt_eflags); 78 local_irq_restore(efi_rt_eflags);
79} 79}
80
81void __init efi_runtime_mkexec(void)
82{
83 if (__supported_pte_mask & _PAGE_NX)
84 runtime_code_page_mkexec();
85}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6284f158a47d..0c2a234fef1e 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -233,3 +233,12 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len)
233{ 233{
234 efi_setup = phys_addr + sizeof(struct setup_data); 234 efi_setup = phys_addr + sizeof(struct setup_data);
235} 235}
236
237void __init efi_runtime_mkexec(void)
238{
239 if (!efi_enabled(EFI_OLD_MEMMAP))
240 return;
241
242 if (__supported_pte_mask & _PAGE_NX)
243 runtime_code_page_mkexec();
244}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4d7b647867f..201d09a7c46b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1473,6 +1473,18 @@ static void xen_pvh_set_cr_flags(int cpu)
1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests 1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ 1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); 1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1476
1477 if (!cpu)
1478 return;
1479 /*
1480 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
1481 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
1482 */
1483 if (cpu_has_pse)
1484 set_in_cr4(X86_CR4_PSE);
1485
1486 if (cpu_has_pge)
1487 set_in_cr4(X86_CR4_PGE);
1476} 1488}
1477 1489
1478/* 1490/*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2423ef04ffea..256282e7888b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -365,7 +365,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
365/* Assume pteval_t is equivalent to all the other *val_t types. */ 365/* Assume pteval_t is equivalent to all the other *val_t types. */
366static pteval_t pte_mfn_to_pfn(pteval_t val) 366static pteval_t pte_mfn_to_pfn(pteval_t val)
367{ 367{
368 if (val & _PAGE_PRESENT) { 368 if (pteval_present(val)) {
369 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 369 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
370 unsigned long pfn = mfn_to_pfn(mfn); 370 unsigned long pfn = mfn_to_pfn(mfn);
371 371
@@ -381,7 +381,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val)
381 381
382static pteval_t pte_pfn_to_mfn(pteval_t val) 382static pteval_t pte_pfn_to_mfn(pteval_t val)
383{ 383{
384 if (val & _PAGE_PRESENT) { 384 if (pteval_present(val)) {
385 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 385 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
386 pteval_t flags = val & PTE_FLAGS_MASK; 386 pteval_t flags = val & PTE_FLAGS_MASK;
387 unsigned long mfn; 387 unsigned long mfn;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8009acbe41e4..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -899,6 +899,13 @@ int m2p_add_override(unsigned long mfn, struct page *page,
899 "m2p_add_override: pfn %lx not mapped", pfn)) 899 "m2p_add_override: pfn %lx not mapped", pfn))
900 return -EINVAL; 900 return -EINVAL;
901 } 901 }
902 WARN_ON(PagePrivate(page));
903 SetPagePrivate(page);
904 set_page_private(page, mfn);
905 page->index = pfn_to_mfn(pfn);
906
907 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
908 return -ENOMEM;
902 909
903 if (kmap_op != NULL) { 910 if (kmap_op != NULL) {
904 if (!PageHighMem(page)) { 911 if (!PageHighMem(page)) {
@@ -937,16 +944,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
937} 944}
938EXPORT_SYMBOL_GPL(m2p_add_override); 945EXPORT_SYMBOL_GPL(m2p_add_override);
939int m2p_remove_override(struct page *page, 946int m2p_remove_override(struct page *page,
940 struct gnttab_map_grant_ref *kmap_op, 947 struct gnttab_map_grant_ref *kmap_op)
941 unsigned long mfn)
942{ 948{
943 unsigned long flags; 949 unsigned long flags;
950 unsigned long mfn;
944 unsigned long pfn; 951 unsigned long pfn;
945 unsigned long uninitialized_var(address); 952 unsigned long uninitialized_var(address);
946 unsigned level; 953 unsigned level;
947 pte_t *ptep = NULL; 954 pte_t *ptep = NULL;
948 955
949 pfn = page_to_pfn(page); 956 pfn = page_to_pfn(page);
957 mfn = get_phys_to_machine(pfn);
958 if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
959 return -EINVAL;
950 960
951 if (!PageHighMem(page)) { 961 if (!PageHighMem(page)) {
952 address = (unsigned long)__va(pfn << PAGE_SHIFT); 962 address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -960,7 +970,10 @@ int m2p_remove_override(struct page *page,
960 spin_lock_irqsave(&m2p_override_lock, flags); 970 spin_lock_irqsave(&m2p_override_lock, flags);
961 list_del(&page->lru); 971 list_del(&page->lru);
962 spin_unlock_irqrestore(&m2p_override_lock, flags); 972 spin_unlock_irqrestore(&m2p_override_lock, flags);
973 WARN_ON(!PagePrivate(page));
974 ClearPagePrivate(page);
963 975
976 set_phys_to_machine(pfn, page->index);
964 if (kmap_op != NULL) { 977 if (kmap_op != NULL) {
965 if (!PageHighMem(page)) { 978 if (!PageHighMem(page)) {
966 struct multicall_space mcs; 979 struct multicall_space mcs;