aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/include/asm/boot.h6
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/include/asm/perf_counter.h3
-rw-r--r--arch/x86/include/asm/proto.h11
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c22
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/kernel/setup_percpu.c219
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c15
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--arch/x86/kvm/x86_emulate.c2
-rw-r--r--arch/x86/lib/delay.c3
-rw-r--r--arch/x86/mm/init.c17
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/pageattr.c65
-rw-r--r--arch/x86/power/cpu.c2
27 files changed, 305 insertions, 156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..c07f72205909 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON
1913 recommended you say N here while the DMAR code remains 1913 recommended you say N here while the DMAR code remains
1914 experimental. 1914 experimental.
1915 1915
1916config DMAR_GFX_WA
1917 def_bool y
1918 prompt "Support for Graphics workaround"
1919 depends on DMAR
1920 ---help---
1921 Current Graphics drivers tend to use physical address
1922 for DMA and avoid using DMA APIs. Setting this config
1923 option permits the IOMMU driver to set a unity map for
1924 all the OS-visible memory. Hence the driver can continue
1925 to use physical addresses for DMA.
1926
1927config DMAR_FLOPPY_WA 1916config DMAR_FLOPPY_WA
1928 def_bool y 1917 def_bool y
1929 depends on DMAR 1918 depends on DMAR
1930 ---help--- 1919 ---help---
1931 Floppy disk drivers are know to bypass DMA API calls 1920 Floppy disk drivers are known to bypass DMA API calls
1932 thereby failing to work when IOMMU is enabled. This 1921 thereby failing to work when IOMMU is enabled. This
1933 workaround will setup a 1:1 mapping for the first 1922 workaround will setup a 1:1 mapping for the first
1934 16M to make floppy (an ISA device) work. 1923 16MiB to make floppy (an ISA device) work.
1935 1924
1936config INTR_REMAP 1925config INTR_REMAP
1937 bool "Support for Interrupt Remapping (EXPERIMENTAL)" 1926 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 418e632d4a80..7a1065958ba9 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,7 +8,7 @@
8 8
9#ifdef __KERNEL__ 9#ifdef __KERNEL__
10 10
11#include <asm/page_types.h> 11#include <asm/pgtable_types.h>
12 12
13/* Physical address where kernel should be loaded. */ 13/* Physical address where kernel should be loaded. */
14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ 14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
@@ -16,10 +16,10 @@
16 & ~(CONFIG_PHYSICAL_ALIGN - 1)) 16 & ~(CONFIG_PHYSICAL_ALIGN - 1))
17 17
18/* Minimum kernel alignment, as a power of two */ 18/* Minimum kernel alignment, as a power of two */
19#ifdef CONFIG_x86_64 19#ifdef CONFIG_X86_64
20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT 20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
21#else 21#else
22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) 22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
23#endif 23#endif
24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) 24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
25 25
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 927958d13c19..1ff685ca221c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -91,7 +91,7 @@ extern void pci_iommu_alloc(void);
91 91
92#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 92#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
93 93
94#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) 94#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG)
95 95
96#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ 96#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
97 dma_addr_t ADDR_NAME; 97 dma_addr_t ADDR_NAME;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 02ecb30982a3..103f1ddb0d85 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -42,6 +42,7 @@
42 42
43#else /* ...!ASSEMBLY */ 43#else /* ...!ASSEMBLY */
44 44
45#include <linux/kernel.h>
45#include <linux/stringify.h> 46#include <linux/stringify.h>
46 47
47#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
@@ -155,6 +156,15 @@ do { \
155/* We can use this directly for local CPU (faster). */ 156/* We can use this directly for local CPU (faster). */
156DECLARE_PER_CPU(unsigned long, this_cpu_off); 157DECLARE_PER_CPU(unsigned long, this_cpu_off);
157 158
159#ifdef CONFIG_NEED_MULTIPLE_NODES
160void *pcpu_lpage_remapped(void *kaddr);
161#else
162static inline void *pcpu_lpage_remapped(void *kaddr)
163{
164 return NULL;
165}
166#endif
167
158#endif /* !__ASSEMBLY__ */ 168#endif /* !__ASSEMBLY__ */
159 169
160#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 5fb33e160ea0..fa64e401589d 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -87,6 +87,9 @@ union cpuid10_edx {
87#ifdef CONFIG_PERF_COUNTERS 87#ifdef CONFIG_PERF_COUNTERS
88extern void init_hw_perf_counters(void); 88extern void init_hw_perf_counters(void);
89extern void perf_counters_lapic_init(void); 89extern void perf_counters_lapic_init(void);
90
91#define PERF_COUNTER_INDEX_OFFSET 0
92
90#else 93#else
91static inline void init_hw_perf_counters(void) { } 94static inline void init_hw_perf_counters(void) { }
92static inline void perf_counters_lapic_init(void) { } 95static inline void perf_counters_lapic_init(void) { }
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 49fb3ecf3bb3..621f56d73121 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -22,7 +22,14 @@ extern int reboot_force;
22 22
23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
24 24
25#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) 25/*
26#define round_down(x, y) ((x) & ~((y) - 1)) 26 * This looks more complex than it should be. But we need to
27 * get the type for the ~ right in round_down (it needs to be
28 * as wide as the result!), and we want to evaluate the macro
29 * arguments just once each.
30 */
31#define __round_mask(x,y) ((__typeof__(x))((y)-1))
32#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
33#define round_down(x,y) ((x) & ~__round_mask(x,y))
27 34
28#endif /* _ASM_X86_PROTO_H */ 35#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e5b27d8f1b47..28e5f5956042 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
258{ 258{
259#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
260 unsigned bits; 260 unsigned bits;
261 int cpu = smp_processor_id();
261 262
262 bits = c->x86_coreid_bits; 263 bits = c->x86_coreid_bits;
263
264 /* Low order bits define the core id (index of core in socket) */ 264 /* Low order bits define the core id (index of core in socket) */
265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
266 /* Convert the initial APIC ID into the socket ID */ 266 /* Convert the initial APIC ID into the socket ID */
267 c->phys_proc_id = c->initial_apicid >> bits; 267 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
268#endif 270#endif
269} 271}
270 272
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b26d4deada0..f1961c07af9a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -848,9 +848,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
848#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 848#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
849 numa_add_cpu(smp_processor_id()); 849 numa_add_cpu(smp_processor_id());
850#endif 850#endif
851
852 /* Cap the iomem address space to what is addressable on all CPUs */
853 iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
854} 851}
855 852
856#ifdef CONFIG_X86_64 853#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 284d1de968bc..af425b83202b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data)
1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
1118 1118
1119 t->expires = jiffies + *n; 1119 t->expires = jiffies + *n;
1120 add_timer(t); 1120 add_timer_on(t, smp_processor_id());
1121} 1121}
1122 1122
1123static void mce_do_trigger(struct work_struct *work) 1123static void mce_do_trigger(struct work_struct *work)
@@ -1321,7 +1321,7 @@ static void mce_init_timer(void)
1321 return; 1321 return;
1322 setup_timer(t, mcheck_timer, smp_processor_id()); 1322 setup_timer(t, mcheck_timer, smp_processor_id());
1323 t->expires = round_jiffies(jiffies + *n); 1323 t->expires = round_jiffies(jiffies + *n);
1324 add_timer(t); 1324 add_timer_on(t, smp_processor_id());
1325} 1325}
1326 1326
1327/* 1327/*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 76dfef23f789..d4cf4ce19aac 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -401,7 +401,7 @@ static const u64 amd_hw_cache_event_ids
401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
402 }, 402 },
403 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
404 [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ 404 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
405 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
406 }, 406 },
407 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
@@ -912,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
912 err = checking_wrmsrl(hwc->counter_base + idx, 912 err = checking_wrmsrl(hwc->counter_base + idx,
913 (u64)(-left) & x86_pmu.counter_mask); 913 (u64)(-left) & x86_pmu.counter_mask);
914 914
915 perf_counter_update_userpage(counter);
916
915 return ret; 917 return ret;
916} 918}
917 919
@@ -969,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
969 if (!x86_pmu.num_counters_fixed) 971 if (!x86_pmu.num_counters_fixed)
970 return -1; 972 return -1;
971 973
972 /*
973 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
974 */
975 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
976 boot_cpu_data.x86_model == 28)
977 return -1;
978
979 event = hwc->config & ARCH_PERFMON_EVENT_MASK; 974 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
980 975
981 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 976 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
@@ -1041,6 +1036,8 @@ try_generic:
1041 x86_perf_counter_set_period(counter, hwc, idx); 1036 x86_perf_counter_set_period(counter, hwc, idx);
1042 x86_pmu.enable(hwc, idx); 1037 x86_pmu.enable(hwc, idx);
1043 1038
1039 perf_counter_update_userpage(counter);
1040
1044 return 0; 1041 return 0;
1045} 1042}
1046 1043
@@ -1133,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
1133 x86_perf_counter_update(counter, hwc, idx); 1130 x86_perf_counter_update(counter, hwc, idx);
1134 cpuc->counters[idx] = NULL; 1131 cpuc->counters[idx] = NULL;
1135 clear_bit(idx, cpuc->used_mask); 1132 clear_bit(idx, cpuc->used_mask);
1133
1134 perf_counter_update_userpage(counter);
1136} 1135}
1137 1136
1138/* 1137/*
@@ -1428,8 +1427,6 @@ static int intel_pmu_init(void)
1428 */ 1427 */
1429 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1428 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1430 1429
1431 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1432
1433 /* 1430 /*
1434 * Install the hw-cache-events table: 1431 * Install the hw-cache-events table:
1435 */ 1432 */
@@ -1499,21 +1496,22 @@ void __init init_hw_perf_counters(void)
1499 pr_cont("%s PMU driver.\n", x86_pmu.name); 1496 pr_cont("%s PMU driver.\n", x86_pmu.name);
1500 1497
1501 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1498 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1502 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1503 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 1499 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1504 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 1500 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1501 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1505 } 1502 }
1506 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 1503 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1507 perf_max_counters = x86_pmu.num_counters; 1504 perf_max_counters = x86_pmu.num_counters;
1508 1505
1509 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1506 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1510 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1511 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 1507 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1512 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 1508 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1509 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1513 } 1510 }
1514 1511
1515 perf_counter_mask |= 1512 perf_counter_mask |=
1516 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1513 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1514 x86_pmu.intel_ctrl = perf_counter_mask;
1517 1515
1518 perf_counters_lapic_init(); 1516 perf_counters_lapic_init();
1519 register_die_notifier(&perf_counter_nmi_notifier); 1517 register_die_notifier(&perf_counter_nmi_notifier);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 95ea5fa7d444..c8405718a4c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,6 +22,7 @@
22#include "dumpstack.h" 22#include "dumpstack.h"
23 23
24int panic_on_unrecovered_nmi; 24int panic_on_unrecovered_nmi;
25int panic_on_io_nmi;
25unsigned int code_bytes = 64; 26unsigned int code_bytes = 64;
26int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 27int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
27static int die_counter; 28static int die_counter;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7271fa33d791..c4ca89d9aaf4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1383,6 +1383,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1383 return 32*1024*1024; 1383 return 32*1024*1024;
1384} 1384}
1385 1385
1386#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1387
1386void __init e820_reserve_resources_late(void) 1388void __init e820_reserve_resources_late(void)
1387{ 1389{
1388 int i; 1390 int i;
@@ -1400,17 +1402,19 @@ void __init e820_reserve_resources_late(void)
1400 * avoid stolen RAM: 1402 * avoid stolen RAM:
1401 */ 1403 */
1402 for (i = 0; i < e820.nr_map; i++) { 1404 for (i = 0; i < e820.nr_map; i++) {
1403 struct e820entry *entry = &e820_saved.map[i]; 1405 struct e820entry *entry = &e820.map[i];
1404 resource_size_t start, end; 1406 u64 start, end;
1405 1407
1406 if (entry->type != E820_RAM) 1408 if (entry->type != E820_RAM)
1407 continue; 1409 continue;
1408 start = entry->addr + entry->size; 1410 start = entry->addr + entry->size;
1409 end = round_up(start, ram_alignment(start)); 1411 end = round_up(start, ram_alignment(start)) - 1;
1410 if (start == end) 1412 if (end > MAX_RESOURCE_SIZE)
1413 end = MAX_RESOURCE_SIZE;
1414 if (start >= end)
1411 continue; 1415 continue;
1412 reserve_region_with_split(&iomem_resource, start, 1416 reserve_region_with_split(&iomem_resource, start, end,
1413 end - 1, "RAM buffer"); 1417 "RAM buffer");
1414 } 1418 }
1415} 1419}
1416 1420
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 47630479b067..1a041bcf506b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
211#ifdef CONFIG_SWIOTLB 211#ifdef CONFIG_SWIOTLB
212 if (!strncmp(p, "soft", 4)) 212 if (!strncmp(p, "soft", 4))
213 swiotlb = 1; 213 swiotlb = 1;
214#endif
214 if (!strncmp(p, "pt", 2)) { 215 if (!strncmp(p, "pt", 2)) {
215 iommu_pass_through = 1; 216 iommu_pass_through = 1;
216 return 1; 217 return 1;
217 } 218 }
218#endif
219 219
220 gart_parse_options(p); 220 gart_parse_options(p);
221 221
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index be5ae80f897f..de2cab132844 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align)
289 return ret; 289 return ret;
290} 290}
291 291
292#ifdef CONFIG_X86_64
293static void __init init_gbpages(void)
294{
295 if (direct_gbpages && cpu_has_gbpages)
296 printk(KERN_INFO "Using GB pages for direct mapping\n");
297 else
298 direct_gbpages = 0;
299}
300#else
301static inline void init_gbpages(void)
302{
303}
304#endif
305
292static void __init reserve_brk(void) 306static void __init reserve_brk(void)
293{ 307{
294 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
@@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p)
871 885
872 reserve_brk(); 886 reserve_brk();
873 887
888 init_gbpages();
889
874 /* max_pfn_mapped is updated here */ 890 /* max_pfn_mapped is updated here */
875 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 891 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
876 max_pfn_mapped = max_low_pfn_mapped; 892 max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9c3f0823e6aa..29a3eef7cf4a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
124} 124}
125 125
126/* 126/*
127 * Remap allocator 127 * Large page remap allocator
128 * 128 *
129 * This allocator uses PMD page as unit. A PMD page is allocated for 129 * This allocator uses PMD page as unit. A PMD page is allocated for
130 * each cpu and each is remapped into vmalloc area using PMD mapping. 130 * each cpu and each is remapped into vmalloc area using PMD mapping.
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
137 * better than only using 4k mappings while still being NUMA friendly. 137 * better than only using 4k mappings while still being NUMA friendly.
138 */ 138 */
139#ifdef CONFIG_NEED_MULTIPLE_NODES 139#ifdef CONFIG_NEED_MULTIPLE_NODES
140static size_t pcpur_size __initdata; 140struct pcpul_ent {
141static void **pcpur_ptrs __initdata; 141 unsigned int cpu;
142 void *ptr;
143};
144
145static size_t pcpul_size;
146static struct pcpul_ent *pcpul_map;
147static struct vm_struct pcpul_vm;
142 148
143static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) 149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
144{ 150{
145 size_t off = (size_t)pageno << PAGE_SHIFT; 151 size_t off = (size_t)pageno << PAGE_SHIFT;
146 152
147 if (off >= pcpur_size) 153 if (off >= pcpul_size)
148 return NULL; 154 return NULL;
149 155
150 return virt_to_page(pcpur_ptrs[cpu] + off); 156 return virt_to_page(pcpul_map[cpu].ptr + off);
151} 157}
152 158
153static ssize_t __init setup_pcpu_remap(size_t static_size) 159static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
154{ 160{
155 static struct vm_struct vm; 161 size_t map_size, dyn_size;
156 size_t ptrs_size, dyn_size;
157 unsigned int cpu; 162 unsigned int cpu;
163 int i, j;
158 ssize_t ret; 164 ssize_t ret;
159 165
160 /* 166 if (!chosen) {
161 * If large page isn't supported, there's no benefit in doing 167 size_t vm_size = VMALLOC_END - VMALLOC_START;
162 * this. Also, on non-NUMA, embedding is better. 168 size_t tot_size = num_possible_cpus() * PMD_SIZE;
163 * 169
164 * NOTE: disabled for now. 170 /* on non-NUMA, embedding is better */
165 */ 171 if (!pcpu_need_numa())
166 if (true || !cpu_has_pse || !pcpu_need_numa()) 172 return -EINVAL;
173
174 /* don't consume more than 20% of vmalloc area */
175 if (tot_size > vm_size / 5) {
176 pr_info("PERCPU: too large chunk size %zuMB for "
177 "large page remap\n", tot_size >> 20);
178 return -EINVAL;
179 }
180 }
181
182 /* need PSE */
183 if (!cpu_has_pse) {
184 pr_warning("PERCPU: lpage allocator requires PSE\n");
167 return -EINVAL; 185 return -EINVAL;
186 }
168 187
169 /* 188 /*
170 * Currently supports only single page. Supporting multiple 189 * Currently supports only single page. Supporting multiple
171 * pages won't be too difficult if it ever becomes necessary. 190 * pages won't be too difficult if it ever becomes necessary.
172 */ 191 */
173 pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 192 pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
174 PERCPU_DYNAMIC_RESERVE); 193 PERCPU_DYNAMIC_RESERVE);
175 if (pcpur_size > PMD_SIZE) { 194 if (pcpul_size > PMD_SIZE) {
176 pr_warning("PERCPU: static data is larger than large page, " 195 pr_warning("PERCPU: static data is larger than large page, "
177 "can't use large page\n"); 196 "can't use large page\n");
178 return -EINVAL; 197 return -EINVAL;
179 } 198 }
180 dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
181 200
182 /* allocate pointer array and alloc large pages */ 201 /* allocate pointer array and alloc large pages */
183 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 202 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
184 pcpur_ptrs = alloc_bootmem(ptrs_size); 203 pcpul_map = alloc_bootmem(map_size);
185 204
186 for_each_possible_cpu(cpu) { 205 for_each_possible_cpu(cpu) {
187 pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); 206 pcpul_map[cpu].cpu = cpu;
188 if (!pcpur_ptrs[cpu]) 207 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
208 PMD_SIZE);
209 if (!pcpul_map[cpu].ptr) {
210 pr_warning("PERCPU: failed to allocate large page "
211 "for cpu%u\n", cpu);
189 goto enomem; 212 goto enomem;
213 }
190 214
191 /* 215 /*
192 * Only use pcpur_size bytes and give back the rest. 216 * Only use pcpul_size bytes and give back the rest.
193 * 217 *
194 * Ingo: The 2MB up-rounding bootmem is needed to make 218 * Ingo: The 2MB up-rounding bootmem is needed to make
195 * sure the partial 2MB page is still fully RAM - it's 219 * sure the partial 2MB page is still fully RAM - it's
196 * not well-specified to have a PAT-incompatible area 220 * not well-specified to have a PAT-incompatible area
197 * (unmapped RAM, device memory, etc.) in that hole. 221 * (unmapped RAM, device memory, etc.) in that hole.
198 */ 222 */
199 free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), 223 free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
200 PMD_SIZE - pcpur_size); 224 PMD_SIZE - pcpul_size);
201 225
202 memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); 226 memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
203 } 227 }
204 228
205 /* allocate address and map */ 229 /* allocate address and map */
206 vm.flags = VM_ALLOC; 230 pcpul_vm.flags = VM_ALLOC;
207 vm.size = num_possible_cpus() * PMD_SIZE; 231 pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
208 vm_area_register_early(&vm, PMD_SIZE); 232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
209 233
210 for_each_possible_cpu(cpu) { 234 for_each_possible_cpu(cpu) {
211 pmd_t *pmd; 235 pmd_t *pmd, pmd_v;
212 236
213 pmd = populate_extra_pmd((unsigned long)vm.addr 237 pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
214 + cpu * PMD_SIZE); 238 cpu * PMD_SIZE);
215 set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), 239 pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
216 PAGE_KERNEL_LARGE)); 240 PAGE_KERNEL_LARGE);
241 set_pmd(pmd, pmd_v);
217 } 242 }
218 243
219 /* we're ready, commit */ 244 /* we're ready, commit */
220 pr_info("PERCPU: Remapped at %p with large pages, static data " 245 pr_info("PERCPU: Remapped at %p with large pages, static data "
221 "%zu bytes\n", vm.addr, static_size); 246 "%zu bytes\n", pcpul_vm.addr, static_size);
222 247
223 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 248 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
224 PERCPU_FIRST_CHUNK_RESERVE, dyn_size, 249 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
225 PMD_SIZE, vm.addr, NULL); 250 PMD_SIZE, pcpul_vm.addr, NULL);
226 goto out_free_ar; 251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < num_possible_cpus() - 1; i++)
254 for (j = i + 1; j < num_possible_cpus(); j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j];
258 pcpul_map[j] = tmp;
259 }
260
261 return ret;
227 262
228enomem: 263enomem:
229 for_each_possible_cpu(cpu) 264 for_each_possible_cpu(cpu)
230 if (pcpur_ptrs[cpu]) 265 if (pcpul_map[cpu].ptr)
231 free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); 266 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
232 ret = -ENOMEM; 267 free_bootmem(__pa(pcpul_map), map_size);
233out_free_ar: 268 return -ENOMEM;
234 free_bootmem(__pa(pcpur_ptrs), ptrs_size); 269}
235 return ret; 270
271/**
272 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
273 * @kaddr: the kernel address in question
274 *
275 * Determine whether @kaddr falls in the pcpul recycled area. This is
276 * used by pageattr to detect VM aliases and break up the pcpu PMD
277 * mapping such that the same physical page is not mapped under
278 * different attributes.
279 *
280 * The recycled area is always at the tail of a partially used PMD
281 * page.
282 *
283 * RETURNS:
284 * Address of corresponding remapped pcpu address if match is found;
285 * otherwise, NULL.
286 */
287void *pcpu_lpage_remapped(void *kaddr)
288{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
291 int left = 0, right = num_possible_cpus() - 1;
292 int pos;
293
294 /* pcpul in use at all? */
295 if (!pcpul_map)
296 return NULL;
297
298 /* okay, perform binary search */
299 while (left <= right) {
300 pos = (left + right) / 2;
301
302 if (pcpul_map[pos].ptr < pmd_addr)
303 left = pos + 1;
304 else if (pcpul_map[pos].ptr > pmd_addr)
305 right = pos - 1;
306 else {
307 /* it shouldn't be in the area for the first chunk */
308 WARN_ON(offset < pcpul_size);
309
310 return pcpul_vm.addr +
311 pcpul_map[pos].cpu * PMD_SIZE + offset;
312 }
313 }
314
315 return NULL;
236} 316}
237#else 317#else
238static ssize_t __init setup_pcpu_remap(size_t static_size) 318static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
239{ 319{
240 return -EINVAL; 320 return -EINVAL;
241} 321}
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
249 * mapping so that it can use PMD mapping without additional TLB 329 * mapping so that it can use PMD mapping without additional TLB
250 * pressure. 330 * pressure.
251 */ 331 */
252static ssize_t __init setup_pcpu_embed(size_t static_size) 332static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
253{ 333{
254 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 334 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
255 335
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
258 * this. Also, embedding allocation doesn't play well with 338 * this. Also, embedding allocation doesn't play well with
259 * NUMA. 339 * NUMA.
260 */ 340 */
261 if (!cpu_has_pse || pcpu_need_numa()) 341 if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
262 return -EINVAL; 342 return -EINVAL;
263 343
264 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, 344 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
308 void *ptr; 388 void *ptr;
309 389
310 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); 390 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
311 if (!ptr) 391 if (!ptr) {
392 pr_warning("PERCPU: failed to allocate "
393 "4k page for cpu%u\n", cpu);
312 goto enomem; 394 goto enomem;
395 }
313 396
314 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); 397 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
315 pcpu4k_pages[j++] = virt_to_page(ptr); 398 pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -333,6 +416,16 @@ out_free_ar:
333 return ret; 416 return ret;
334} 417}
335 418
419/* for explicit first chunk allocator selection */
420static char pcpu_chosen_alloc[16] __initdata;
421
422static int __init percpu_alloc_setup(char *str)
423{
424 strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
425 return 0;
426}
427early_param("percpu_alloc", percpu_alloc_setup);
428
336static inline void setup_percpu_segment(int cpu) 429static inline void setup_percpu_segment(int cpu)
337{ 430{
338#ifdef CONFIG_X86_32 431#ifdef CONFIG_X86_32
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu)
346#endif 439#endif
347} 440}
348 441
349/*
350 * Great future plan:
351 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
352 * Always point %gs to its beginning
353 */
354void __init setup_per_cpu_areas(void) 442void __init setup_per_cpu_areas(void)
355{ 443{
356 size_t static_size = __per_cpu_end - __per_cpu_start; 444 size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void)
367 * of large page mappings. Please read comments on top of 455 * of large page mappings. Please read comments on top of
368 * each allocator for details. 456 * each allocator for details.
369 */ 457 */
370 ret = setup_pcpu_remap(static_size); 458 ret = -EINVAL;
371 if (ret < 0) 459 if (strlen(pcpu_chosen_alloc)) {
372 ret = setup_pcpu_embed(static_size); 460 if (strcmp(pcpu_chosen_alloc, "4k")) {
461 if (!strcmp(pcpu_chosen_alloc, "lpage"))
462 ret = setup_pcpu_lpage(static_size, true);
463 else if (!strcmp(pcpu_chosen_alloc, "embed"))
464 ret = setup_pcpu_embed(static_size, true);
465 else
466 pr_warning("PERCPU: unknown allocator %s "
467 "specified\n", pcpu_chosen_alloc);
468 if (ret < 0)
469 pr_warning("PERCPU: %s allocator failed (%zd), "
470 "falling back to 4k\n",
471 pcpu_chosen_alloc, ret);
472 }
473 } else {
474 ret = setup_pcpu_lpage(static_size, false);
475 if (ret < 0)
476 ret = setup_pcpu_embed(static_size, false);
477 }
373 if (ret < 0) 478 if (ret < 0)
374 ret = setup_pcpu_4k(static_size); 479 ret = setup_pcpu_4k(static_size);
375 if (ret < 0) 480 if (ret < 0)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 124d40c575df..8ccabb8a2f6a 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode)
711 unsigned long pa; 711 unsigned long pa;
712 unsigned long m; 712 unsigned long m;
713 unsigned long n; 713 unsigned long n;
714 unsigned long mmr_image;
715 struct bau_desc *adp; 714 struct bau_desc *adp;
716 struct bau_desc *ad2; 715 struct bau_desc *ad2;
717 716
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode)
727 n = pa >> uv_nshift; 726 n = pa >> uv_nshift;
728 m = pa & uv_mmask; 727 m = pa & uv_mmask;
729 728
730 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); 729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
731 if (mmr_image) { 730 (n << UV_DESC_BASE_PNODE_SHIFT | m));
732 uv_write_global_mmr64(pnode, (unsigned long)
733 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
734 (n << UV_DESC_BASE_PNODE_SHIFT | m));
735 }
736 731
737 /* 732 /*
738 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 733 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a0f48f5671c0..5204332f475d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
346 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 346 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
347 show_registers(regs); 347 show_registers(regs);
348 348
349 if (panic_on_io_nmi)
350 panic("NMI IOCK error: Not continuing");
351
349 /* Re-enable the IOCK line, wait for a few seconds */ 352 /* Re-enable the IOCK line, wait for a few seconds */
350 reason = (reason & 0xf) | 8; 353 reason = (reason & 0xf) | 8;
351 outb(reason, 0x61); 354 outb(reason, 0x61);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5c3d6e81a7dc..7030b5f911bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2157,7 +2157,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2157 else 2157 else
2158 /* 32 bits PSE 4MB page */ 2158 /* 32 bits PSE 4MB page */
2159 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); 2159 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
2160 context->rsvd_bits_mask[1][0] = ~0ull; 2160 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2161 break; 2161 break;
2162 case PT32E_ROOT_LEVEL: 2162 case PT32E_ROOT_LEVEL:
2163 context->rsvd_bits_mask[0][2] = 2163 context->rsvd_bits_mask[0][2] =
@@ -2170,7 +2170,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2170 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2170 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2171 rsvd_bits(maxphyaddr, 62) | 2171 rsvd_bits(maxphyaddr, 62) |
2172 rsvd_bits(13, 20); /* large page */ 2172 rsvd_bits(13, 20); /* large page */
2173 context->rsvd_bits_mask[1][0] = ~0ull; 2173 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2174 break; 2174 break;
2175 case PT64_ROOT_LEVEL: 2175 case PT64_ROOT_LEVEL:
2176 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 2176 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
@@ -2186,7 +2186,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2186 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2186 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2187 rsvd_bits(maxphyaddr, 51) | 2187 rsvd_bits(maxphyaddr, 51) |
2188 rsvd_bits(13, 20); /* large page */ 2188 rsvd_bits(13, 20); /* large page */
2189 context->rsvd_bits_mask[1][0] = ~0ull; 2189 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2190 break; 2190 break;
2191 } 2191 }
2192} 2192}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 258e4591e1ca..67785f635399 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
281{ 281{
282 unsigned access = gw->pt_access; 282 unsigned access = gw->pt_access;
283 struct kvm_mmu_page *shadow_page; 283 struct kvm_mmu_page *shadow_page;
284 u64 spte, *sptep; 284 u64 spte, *sptep = NULL;
285 int direct; 285 int direct;
286 gfn_t table_gfn; 286 gfn_t table_gfn;
287 int r; 287 int r;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e770bf349ec4..356a0ce85c68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3012 return 1; 3012 return 1;
3013} 3013}
3014 3014
3015static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3016{
3017 kvm_queue_exception(vcpu, UD_VECTOR);
3018 return 1;
3019}
3020
3015static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3021static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3016{ 3022{
3017 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3023 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
3198 [EXIT_REASON_HLT] = handle_halt, 3204 [EXIT_REASON_HLT] = handle_halt,
3199 [EXIT_REASON_INVLPG] = handle_invlpg, 3205 [EXIT_REASON_INVLPG] = handle_invlpg,
3200 [EXIT_REASON_VMCALL] = handle_vmcall, 3206 [EXIT_REASON_VMCALL] = handle_vmcall,
3207 [EXIT_REASON_VMCLEAR] = handle_vmx_insn,
3208 [EXIT_REASON_VMLAUNCH] = handle_vmx_insn,
3209 [EXIT_REASON_VMPTRLD] = handle_vmx_insn,
3210 [EXIT_REASON_VMPTRST] = handle_vmx_insn,
3211 [EXIT_REASON_VMREAD] = handle_vmx_insn,
3212 [EXIT_REASON_VMRESUME] = handle_vmx_insn,
3213 [EXIT_REASON_VMWRITE] = handle_vmx_insn,
3214 [EXIT_REASON_VMOFF] = handle_vmx_insn,
3215 [EXIT_REASON_VMON] = handle_vmx_insn,
3201 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 3216 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
3202 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 3217 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
3203 [EXIT_REASON_WBINVD] = handle_wbinvd, 3218 [EXIT_REASON_WBINVD] = handle_wbinvd,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 249540f98513..fe5474aec41a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -898,6 +898,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
898 case MSR_VM_HSAVE_PA: 898 case MSR_VM_HSAVE_PA:
899 case MSR_P6_EVNTSEL0: 899 case MSR_P6_EVNTSEL0:
900 case MSR_P6_EVNTSEL1: 900 case MSR_P6_EVNTSEL1:
901 case MSR_K7_EVNTSEL0:
901 data = 0; 902 data = 0;
902 break; 903 break;
903 case MSR_MTRRcap: 904 case MSR_MTRRcap:
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c1b6c232e02b..616de4628d60 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1361 return 0; 1361 return 0;
1362} 1362}
1363 1363
1364void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) 1364static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1365{ 1365{
1366 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); 1366 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1367 /* 1367 /*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f4568605d7d5..ff485d361182 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops)
55 55
56 preempt_disable(); 56 preempt_disable();
57 cpu = smp_processor_id(); 57 cpu = smp_processor_id();
58 rdtsc_barrier();
58 rdtscl(bclock); 59 rdtscl(bclock);
59 for (;;) { 60 for (;;) {
61 rdtsc_barrier();
60 rdtscl(now); 62 rdtscl(now);
61 if ((now - bclock) >= loops) 63 if ((now - bclock) >= loops)
62 break; 64 break;
@@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops)
78 if (unlikely(cpu != smp_processor_id())) { 80 if (unlikely(cpu != smp_processor_id())) {
79 loops -= (now - bclock); 81 loops -= (now - bclock);
80 cpu = smp_processor_id(); 82 cpu = smp_processor_id();
83 rdtsc_barrier();
81 rdtscl(bclock); 84 rdtscl(bclock);
82 } 85 }
83 } 86 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f53b57e4086f..47ce9a2ce5e7 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -177,20 +177,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
177 return nr_range; 177 return nr_range;
178} 178}
179 179
180#ifdef CONFIG_X86_64
181static void __init init_gbpages(void)
182{
183 if (direct_gbpages && cpu_has_gbpages)
184 printk(KERN_INFO "Using GB pages for direct mapping\n");
185 else
186 direct_gbpages = 0;
187}
188#else
189static inline void init_gbpages(void)
190{
191}
192#endif
193
194/* 180/*
195 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 181 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
196 * This runs before bootmem is initialized and gets pages directly from 182 * This runs before bootmem is initialized and gets pages directly from
@@ -210,9 +196,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
210 196
211 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); 197 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
212 198
213 if (!after_bootmem)
214 init_gbpages();
215
216#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 199#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
217 /* 200 /*
218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 201 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c4378f4fd4a5..b177652251a4 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -598,6 +598,8 @@ void __init paging_init(void)
598 598
599 sparse_memory_present_with_active_regions(MAX_NUMNODES); 599 sparse_memory_present_with_active_regions(MAX_NUMNODES);
600 sparse_init(); 600 sparse_init();
601 /* clear the default setting with node 0 */
602 nodes_clear(node_states[N_NORMAL_MEMORY]);
601 free_area_init_nodes(max_zone_pfns); 603 free_area_init_nodes(max_zone_pfns);
602} 604}
603 605
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3cfe9ced8a4c..1b734d7a8966 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -11,6 +11,7 @@
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/pfn.h>
14 15
15#include <asm/e820.h> 16#include <asm/e820.h>
16#include <asm/processor.h> 17#include <asm/processor.h>
@@ -681,8 +682,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
681static int cpa_process_alias(struct cpa_data *cpa) 682static int cpa_process_alias(struct cpa_data *cpa)
682{ 683{
683 struct cpa_data alias_cpa; 684 struct cpa_data alias_cpa;
684 int ret = 0; 685 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
685 unsigned long temp_cpa_vaddr, vaddr; 686 unsigned long vaddr, remapped;
687 int ret;
686 688
687 if (cpa->pfn >= max_pfn_mapped) 689 if (cpa->pfn >= max_pfn_mapped)
688 return 0; 690 return 0;
@@ -706,42 +708,55 @@ static int cpa_process_alias(struct cpa_data *cpa)
706 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { 708 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
707 709
708 alias_cpa = *cpa; 710 alias_cpa = *cpa;
709 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 711 alias_cpa.vaddr = &laddr;
710 alias_cpa.vaddr = &temp_cpa_vaddr;
711 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 712 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
712 713
713
714 ret = __change_page_attr_set_clr(&alias_cpa, 0); 714 ret = __change_page_attr_set_clr(&alias_cpa, 0);
715 if (ret)
716 return ret;
715 } 717 }
716 718
717#ifdef CONFIG_X86_64 719#ifdef CONFIG_X86_64
718 if (ret)
719 return ret;
720 /* 720 /*
721 * No need to redo, when the primary call touched the high 721 * If the primary call didn't touch the high mapping already
722 * mapping already: 722 * and the physical address is inside the kernel map, we need
723 */
724 if (within(vaddr, (unsigned long) _text, _brk_end))
725 return 0;
726
727 /*
728 * If the physical address is inside the kernel map, we need
729 * to touch the high mapped kernel as well: 723 * to touch the high mapped kernel as well:
730 */ 724 */
731 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) 725 if (!within(vaddr, (unsigned long)_text, _brk_end) &&
732 return 0; 726 within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
727 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
728 __START_KERNEL_map - phys_base;
729 alias_cpa = *cpa;
730 alias_cpa.vaddr = &temp_cpa_vaddr;
731 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
733 732
734 alias_cpa = *cpa; 733 /*
735 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 734 * The high mapping range is imprecise, so ignore the
736 alias_cpa.vaddr = &temp_cpa_vaddr; 735 * return value.
737 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 736 */
737 __change_page_attr_set_clr(&alias_cpa, 0);
738 }
739#endif
738 740
739 /* 741 /*
740 * The high mapping range is imprecise, so ignore the return value. 742 * If the PMD page was partially used for per-cpu remapping,
743 * the recycled area needs to be split and modified. Because
744 * the area is always proper subset of a PMD page
745 * cpa->numpages is guaranteed to be 1 for these areas, so
746 * there's no need to loop over and check for further remaps.
741 */ 747 */
742 __change_page_attr_set_clr(&alias_cpa, 0); 748 remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
743#endif 749 if (remapped) {
744 return ret; 750 WARN_ON(cpa->numpages > 1);
751 alias_cpa = *cpa;
752 alias_cpa.vaddr = &remapped;
753 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
754 ret = __change_page_attr_set_clr(&alias_cpa, 0);
755 if (ret)
756 return ret;
757 }
758
759 return 0;
745} 760}
746 761
747static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) 762static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d277ef1eea51..b3d20b9cac63 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
244 do_fpu_end(); 244 do_fpu_end();
245 mtrr_ap_init(); 245 mtrr_ap_init();
246 246
247#ifdef CONFIG_X86_32 247#ifdef CONFIG_X86_OLD_MCE
248 mcheck_init(&boot_cpu_data); 248 mcheck_init(&boot_cpu_data);
249#endif 249#endif
250} 250}