aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h19
-rw-r--r--arch/x86/include/asm/xen/interface.h6
-rw-r--r--arch/x86/include/asm/xen/interface_32.h5
-rw-r--r--arch/x86/include/asm/xen/interface_64.h13
-rw-r--r--arch/x86/include/asm/xen/page.h7
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event.c20
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c64
-rw-r--r--arch/x86/mm/tlb.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_time.c4
-rw-r--r--arch/x86/xen/enlighten.c21
-rw-r--r--arch/x86/xen/mmu.c86
-rw-r--r--arch/x86/xen/setup.c12
23 files changed, 226 insertions, 100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e8327686d3c5..e330da21b84f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,7 @@ config X86
21 select HAVE_UNSTABLE_SCHED_CLOCK 21 select HAVE_UNSTABLE_SCHED_CLOCK
22 select HAVE_IDE 22 select HAVE_IDE
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_PERF_EVENTS if (!M386 && !M486) 24 select HAVE_PERF_EVENTS
25 select HAVE_IRQ_WORK 25 select HAVE_IRQ_WORK
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4d293dced62f..9479a037419f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
216} 216}
217 217
218/* Return an pointer with offset calculated */ 218/* Return an pointer with offset calculated */
219static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, 219static __always_inline unsigned long
220 phys_addr_t phys, pgprot_t flags) 220__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
221{ 221{
222 __set_fixmap(idx, phys, flags); 222 __set_fixmap(idx, phys, flags);
223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); 223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ea3dc487047..6b89f5e86021 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -128,7 +128,7 @@
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 128#define FAM10H_MMIO_CONF_ENABLE (1<<0)
129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff 131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
132#define FAM10H_MMIO_CONF_BASE_SHIFT 20 132#define FAM10H_MMIO_CONF_BASE_SHIFT 20
133#define MSR_FAM10H_NODE_ID 0xc001100c 133#define MSR_FAM10H_NODE_ID 0xc001100c
134 134
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
824#define __PV_IS_CALLEE_SAVE(func) \ 824#define __PV_IS_CALLEE_SAVE(func) \
825 ((struct paravirt_callee_save) { func }) 825 ((struct paravirt_callee_save) { func })
826 826
827static inline unsigned long arch_local_save_flags(void) 827static inline notrace unsigned long arch_local_save_flags(void)
828{ 828{
829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); 829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
830} 830}
831 831
832static inline void arch_local_irq_restore(unsigned long f) 832static inline notrace void arch_local_irq_restore(unsigned long f)
833{ 833{
834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); 834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
835} 835}
836 836
837static inline void arch_local_irq_disable(void) 837static inline notrace void arch_local_irq_disable(void)
838{ 838{
839 PVOP_VCALLEE0(pv_irq_ops.irq_disable); 839 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
840} 840}
841 841
842static inline void arch_local_irq_enable(void) 842static inline notrace void arch_local_irq_enable(void)
843{ 843{
844 PVOP_VCALLEE0(pv_irq_ops.irq_enable); 844 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
845} 845}
846 846
847static inline unsigned long arch_local_irq_save(void) 847static inline notrace unsigned long arch_local_irq_save(void)
848{ 848{
849 unsigned long f; 849 unsigned long f;
850 850
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index e969f691cbfd..a501741c2335 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -199,6 +199,8 @@ union uvh_apicid {
199#define UVH_APICID 0x002D0E00L 199#define UVH_APICID 0x002D0E00L
200#define UV_APIC_PNODE_SHIFT 6 200#define UV_APIC_PNODE_SHIFT 6
201 201
202#define UV_APICID_HIBIT_MASK 0xffff0000
203
202/* Local Bus from cpu's perspective */ 204/* Local Bus from cpu's perspective */
203#define LOCAL_BUS_BASE 0x1c00000 205#define LOCAL_BUS_BASE 0x1c00000
204#define LOCAL_BUS_SIZE (4 * 1024 * 1024) 206#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
491 } 493 }
492} 494}
493 495
496extern unsigned int uv_apicid_hibits;
494static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) 497static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
495{ 498{
499 apicid |= uv_apicid_hibits;
496 return (1UL << UVH_IPI_INT_SEND_SHFT) | 500 return (1UL << UVH_IPI_INT_SEND_SHFT) |
497 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | 501 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
498 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | 502 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 6d90adf4428a..20cafeac7455 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
754}; 754};
755 755
756/* ========================================================================= */ 756/* ========================================================================= */
757/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */
758/* ========================================================================= */
759#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
760#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
761
762#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
763#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
764
765union uvh_lb_target_physical_apic_id_mask_u {
766 unsigned long v;
767 struct uvh_lb_target_physical_apic_id_mask_s {
768 unsigned long bit_enables : 32; /* RW */
769 unsigned long rsvd_32_63 : 32; /* */
770 } s;
771};
772
773/* ========================================================================= */
757/* UVH_NODE_ID */ 774/* UVH_NODE_ID */
758/* ========================================================================= */ 775/* ========================================================================= */
759#define UVH_NODE_ID 0x0UL 776#define UVH_NODE_ID 0x0UL
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1f0c55..1c10c88ee4e1 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void);
61#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) 61#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
62#endif 62#endif
63 63
64#ifndef machine_to_phys_mapping 64#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
65#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) 65#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
66#endif 66#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
67 67
68/* Maximum number of virtual CPUs in multi-processor guests. */ 68/* Maximum number of virtual CPUs in multi-processor guests. */
69#define MAX_VIRT_CPUS 32 69#define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e004ae5c..8413688b2571 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
32/* And the trap vector is... */ 32/* And the trap vector is... */
33#define TRAP_INSTR "int $0x82" 33#define TRAP_INSTR "int $0x82"
34 34
35#define __MACH2PHYS_VIRT_START 0xF5800000
36#define __MACH2PHYS_VIRT_END 0xF6800000
37
38#define __MACH2PHYS_SHIFT 2
39
35/* 40/*
36 * Virtual addresses beyond this are not modifiable by guest OSes. The 41 * Virtual addresses beyond this are not modifiable by guest OSes. The
37 * machine->physical mapping table starts at this address, read-only. 42 * machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d2662b97c..839a4811cf98 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
39#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 39#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
40#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 40#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
41#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 41#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
42 42#define __MACH2PHYS_SHIFT 3
43#ifndef HYPERVISOR_VIRT_START
44#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
45#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
46#endif
47
48#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
49#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
50#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
51#ifndef machine_to_phys_mapping
52#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
53#endif
54 43
55/* 44/*
56 * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) 45 * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index dd8c1414b3d5..8760cc60a21c 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/pfn.h> 7#include <linux/pfn.h>
8#include <linux/mm.h>
8 9
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10#include <asm/page.h> 11#include <asm/page.h>
@@ -35,6 +36,8 @@ typedef struct xpaddr {
35#define MAX_DOMAIN_PAGES \ 36#define MAX_DOMAIN_PAGES \
36 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) 37 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
37 38
39extern unsigned long *machine_to_phys_mapping;
40extern unsigned int machine_to_phys_order;
38 41
39extern unsigned long get_phys_to_machine(unsigned long pfn); 42extern unsigned long get_phys_to_machine(unsigned long pfn);
40extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
69 if (xen_feature(XENFEAT_auto_translated_physmap)) 72 if (xen_feature(XENFEAT_auto_translated_physmap))
70 return mfn; 73 return mfn;
71 74
72#if 0
73 if (unlikely((mfn >> machine_to_phys_order) != 0)) 75 if (unlikely((mfn >> machine_to_phys_order) != 0))
74 return max_mapnr; 76 return ~0;
75#endif
76 77
77 pfn = 0; 78 pfn = 0;
78 /* 79 /*
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void) 20u64 hw_nmi_get_sample_period(void)
24{ 21{
25 return (u64)(cpu_khz) * 1000 * 60; 22 return (u64)(cpu_khz) * 1000 * 60;
26} 23}
27 24
28#ifdef ARCH_HAS_NMI_WATCHDOG 25#ifdef ARCH_HAS_NMI_WATCHDOG
26
27/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29
29void arch_trigger_all_cpu_backtrace(void) 30void arch_trigger_all_cpu_backtrace(void)
30{ 31{
31 int i; 32 int i;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539aea175..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid; 44static union uvh_apicid uvh_apicid;
45int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
47unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
47static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
48 50
49static inline bool is_GRU_range(u64 start, u64 end) 51static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; 87 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86} 88}
87 89
90/*
91 * Add an extra bit as dictated by bios to the destination apicid of
92 * interrupts potentially passing through the UV HUB. This prevents
93 * a deadlock between interrupts and IO port operations.
94 */
95static void __init uv_set_apicid_hibit(void)
96{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE |
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105}
106
88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
89{ 108{
90 int nodeid; 109 int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102 __get_cpu_var(x2apic_extra_bits) = 121 __get_cpu_var(x2apic_extra_bits) =
103 nodeid << (uvh_apicid.s.pnode_shift - 1); 122 nodeid << (uvh_apicid.s.pnode_shift - 1);
104 uv_system_type = UV_NON_UNIQUE_APIC; 123 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit();
105 return 1; 125 return 1;
106 } 126 }
107 } 127 }
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
155 int pnode; 175 int pnode;
156 176
157 pnode = uv_apicid_to_pnode(phys_apicid); 177 pnode = uv_apicid_to_pnode(phys_apicid);
178 phys_apicid |= uv_apicid_hibits;
158 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 179 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
159 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 180 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
160 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 181 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
236 int cpu = cpumask_first(cpumask); 257 int cpu = cpumask_first(cpumask);
237 258
238 if ((unsigned)cpu < nr_cpu_ids) 259 if ((unsigned)cpu < nr_cpu_ids)
239 return per_cpu(x86_cpu_to_apicid, cpu); 260 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
240 else 261 else
241 return BAD_APICID; 262 return BAD_APICID;
242} 263}
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
255 if (cpumask_test_cpu(cpu, cpu_online_mask)) 276 if (cpumask_test_cpu(cpu, cpu_online_mask))
256 break; 277 break;
257 } 278 }
258 return per_cpu(x86_cpu_to_apicid, cpu); 279 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
259} 280}
260 281
261static unsigned int x2apic_get_apic_id(unsigned long x) 282static unsigned int x2apic_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static bool check_hw_exists(void)
385{
386 u64 val, val_new = 0;
387 int ret = 0;
388
389 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new)
393 return false;
394
395 return true;
396}
397
384static void reserve_ds_buffers(void); 398static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 399static void release_ds_buffers(void);
386 400
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
1372 1386
1373 pmu_check_apic(); 1387 pmu_check_apic();
1374 1388
1389 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) {
1391 pr_cont("Broken PMU hardware detected, software events only.\n");
1392 return;
1393 }
1394
1375 pr_cont("%s PMU driver.\n", x86_pmu.name); 1395 pr_cont("%s PMU driver.\n", x86_pmu.name);
1376 1396
1377 if (x86_pmu.quirks) 1397 if (x86_pmu.quirks)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 397 */
398 pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) 398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
400 400
401 pushl_cfi %eax 401 pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295 .endm 295 .endm
296 296
297/* save partial stack frame */ 297/* save partial stack frame */
298 .pushsection .kprobes.text, "ax"
298ENTRY(save_args) 299ENTRY(save_args)
299 XCPT_FRAME 300 XCPT_FRAME
300 cld 301 cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334 ret 335 ret
335 CFI_ENDPROC 336 CFI_ENDPROC
336END(save_args) 337END(save_args)
338 .popsection
337 339
338ENTRY(save_rest) 340ENTRY(save_rest)
339 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433 dr6_p = (unsigned long *)ERR_PTR(args->err); 433 dr6_p = (unsigned long *)ERR_PTR(args->err);
434 dr6 = *dr6_p; 434 dr6 = *dr6_p;
435 435
436 /* If it's a single step, TRAP bits are random */
437 if (dr6 & DR_STEP)
438 return NOTIFY_DONE;
439
436 /* Do an early return if no trap bits are set in DR6 */ 440 /* Do an early return if no trap bits are set in DR6 */
437 if ((dr6 & DR_TRAP_BITS) == 0) 441 if ((dr6 & DR_TRAP_BITS) == 0)
438 return NOTIFY_DONE; 442 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 6da143c2a6b8..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
25}; 25};
26 26
27static u64 __cpuinitdata fam10h_pci_mmconf_base; 27static u64 __cpuinitdata fam10h_pci_mmconf_base;
28static int __cpuinitdata fam10h_pci_mmconf_base_status;
29 28
30static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { 29static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
31 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, 30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
44 return start1 - start2; 43 return start1 - start2;
45} 44}
46 45
47/*[47:0] */ 46#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
48/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ 47#define MMCONF_MASK (~(MMCONF_UNIT - 1))
48#define MMCONF_SIZE (MMCONF_UNIT << 8)
49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
49#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) 50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
50#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) 51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
51static void __cpuinit get_fam10h_pci_mmconf_base(void) 52static void __cpuinit get_fam10h_pci_mmconf_base(void)
52{ 53{
53 int i; 54 int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
64 struct range range[8]; 65 struct range range[8];
65 66
66 /* only try to get setting from BSP */ 67 /* only try to get setting from BSP */
67 /* -1 or 1 */ 68 if (fam10h_pci_mmconf_base)
68 if (fam10h_pci_mmconf_base_status)
69 return; 69 return;
70 70
71 if (!early_pci_allowed()) 71 if (!early_pci_allowed())
72 goto fail; 72 return;
73 73
74 found = 0; 74 found = 0;
75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
91 } 91 }
92 92
93 if (!found) 93 if (!found)
94 goto fail; 94 return;
95 95
96 /* SYS_CFG */ 96 /* SYS_CFG */
97 address = MSR_K8_SYSCFG; 97 address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
99 99
100 /* TOP_MEM2 is not enabled? */ 100 /* TOP_MEM2 is not enabled? */
101 if (!(val & (1<<21))) { 101 if (!(val & (1<<21))) {
102 tom2 = 0; 102 tom2 = 1ULL << 32;
103 } else { 103 } else {
104 /* TOP_MEM2 */ 104 /* TOP_MEM2 */
105 address = MSR_K8_TOP_MEM2; 105 address = MSR_K8_TOP_MEM2;
106 rdmsrl(address, val); 106 rdmsrl(address, val);
107 tom2 = val & (0xffffULL<<32); 107 tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
108 } 108 }
109 109
110 if (base <= tom2) 110 if (base <= tom2)
111 base = tom2 + (1ULL<<32); 111 base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
112 112
113 /* 113 /*
114 * need to check if the range is in the high mmio range that is 114 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
123 if (!(reg & 3)) 123 if (!(reg & 3))
124 continue; 124 continue;
125 125
126 start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 126 start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); 127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
128 end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 128 end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
129 129
130 if (!end) 130 if (end < tom2)
131 continue; 131 continue;
132 132
133 range[hi_mmio_num].start = start; 133 range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
143 143
144 if (range[hi_mmio_num - 1].end < base) 144 if (range[hi_mmio_num - 1].end < base)
145 goto out; 145 goto out;
146 if (range[0].start > base) 146 if (range[0].start > base + MMCONF_SIZE)
147 goto out; 147 goto out;
148 148
149 /* need to find one window */ 149 /* need to find one window */
150 base = range[0].start - (1ULL << 32); 150 base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
151 if ((base > tom2) && BASE_VALID(base)) 151 if ((base > tom2) && BASE_VALID(base))
152 goto out; 152 goto out;
153 base = range[hi_mmio_num - 1].end + (1ULL << 32); 153 base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
154 if ((base > tom2) && BASE_VALID(base)) 154 if (BASE_VALID(base))
155 goto out; 155 goto out;
156 /* need to find window between ranges */ 156 /* need to find window between ranges */
157 if (hi_mmio_num > 1) 157 for (i = 1; i < hi_mmio_num; i++) {
158 for (i = 0; i < hi_mmio_num - 1; i++) { 158 base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
159 if (range[i + 1].start > (range[i].end + (1ULL << 32))) { 159 val = range[i].start & MMCONF_MASK;
160 base = range[i].end + (1ULL << 32); 160 if (val >= base + MMCONF_SIZE && BASE_VALID(base))
161 if ((base > tom2) && BASE_VALID(base)) 161 goto out;
162 goto out;
163 }
164 } 162 }
165
166fail:
167 fam10h_pci_mmconf_base_status = -1;
168 return; 163 return;
164
169out: 165out:
170 fam10h_pci_mmconf_base = base; 166 fam10h_pci_mmconf_base = base;
171 fam10h_pci_mmconf_base_status = 1;
172} 167}
173 168
174void __cpuinit fam10h_check_enable_mmcfg(void) 169void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
190 185
191 /* only trust the one handle 256 buses, if acpi=off */ 186 /* only trust the one handle 256 buses, if acpi=off */
192 if (!acpi_pci_disabled || busnbits >= 8) { 187 if (!acpi_pci_disabled || busnbits >= 8) {
193 u64 base; 188 u64 base = val & MMCONF_MASK;
194 base = val & (0xffffULL << 32); 189
195 if (fam10h_pci_mmconf_base_status <= 0) { 190 if (!fam10h_pci_mmconf_base) {
196 fam10h_pci_mmconf_base = base; 191 fam10h_pci_mmconf_base = base;
197 fam10h_pci_mmconf_base_status = 1;
198 return; 192 return;
199 } else if (fam10h_pci_mmconf_base == base) 193 } else if (fam10h_pci_mmconf_base == base)
200 return; 194 return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
206 * with 256 buses 200 * with 256 buses
207 */ 201 */
208 get_fam10h_pci_mmconf_base(); 202 get_fam10h_pci_mmconf_base();
209 if (fam10h_pci_mmconf_base_status <= 0) 203 if (!fam10h_pci_mmconf_base) {
204 pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
210 return; 205 return;
206 }
211 207
212 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); 208 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
213 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | 209 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 12cdbb17ad18..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
223 223
224static void __cpuinit calculate_tlb_offset(void) 224static void __cpuinit calculate_tlb_offset(void)
225{ 225{
226 int cpu, node, nr_node_vecs; 226 int cpu, node, nr_node_vecs, idx = 0;
227 /* 227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this 228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we 229 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; 239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240 240
241 for_each_online_node(node) { 241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * 242 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs; 243 nr_node_vecs;
244 int cpu_offset = 0; 244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) { 245 for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void)
248 cpu_offset++; 248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs; 249 cpu_offset = cpu_offset % nr_node_vecs;
250 } 250 }
251 idx++;
251 } 252 }
252} 253}
253 254
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a318194002b5..ba9caa808a9c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1455 * the below initialization can't be in firmware because the 1455 * the below initialization can't be in firmware because the
1456 * messaging IRQ will be determined by the OS 1456 * messaging IRQ will be determined by the OS
1457 */ 1457 */
1458 apicid = uvhub_to_first_apicid(uvhub); 1458 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1460 ((apicid << 32) | vector)); 1460 ((apicid << 32) | vector));
1461} 1461}
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 56e421bc379b..9daf5d1af9f1 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
89 89
90 apicid = cpu_physical_id(cpu); 90 apicid = cpu_physical_id(cpu);
91 pnode = uv_apicid_to_pnode(apicid); 91 pnode = uv_apicid_to_pnode(apicid);
92 apicid |= uv_apicid_hibits;
92 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 93 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
93 (apicid << UVH_IPI_INT_APIC_ID_SHFT) | 94 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
94 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); 95 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
107static int uv_setup_intr(int cpu, u64 expires) 108static int uv_setup_intr(int cpu, u64 expires)
108{ 109{
109 u64 val; 110 u64 val;
111 unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
110 int pnode = uv_cpu_to_pnode(cpu); 112 int pnode = uv_cpu_to_pnode(cpu);
111 113
112 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, 114 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
117 UVH_EVENT_OCCURRED0_RTC1_MASK); 119 UVH_EVENT_OCCURRED0_RTC1_MASK);
118 120
119 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | 121 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
120 ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); 122 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
121 123
122 /* Set configuration */ 124 /* Set configuration */
123 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); 125 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4d3861..02c710bebf7a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
75enum xen_domain_type xen_domain_type = XEN_NATIVE; 75enum xen_domain_type xen_domain_type = XEN_NATIVE;
76EXPORT_SYMBOL_GPL(xen_domain_type); 76EXPORT_SYMBOL_GPL(xen_domain_type);
77 77
78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
79EXPORT_SYMBOL(machine_to_phys_mapping);
80unsigned int machine_to_phys_order;
81EXPORT_SYMBOL(machine_to_phys_order);
82
78struct start_info *xen_start_info; 83struct start_info *xen_start_info;
79EXPORT_SYMBOL_GPL(xen_start_info); 84EXPORT_SYMBOL_GPL(xen_start_info);
80 85
@@ -1090,6 +1095,8 @@ static void __init xen_setup_stackprotector(void)
1090/* First C function to be called on Xen boot */ 1095/* First C function to be called on Xen boot */
1091asmlinkage void __init xen_start_kernel(void) 1096asmlinkage void __init xen_start_kernel(void)
1092{ 1097{
1098 struct physdev_set_iopl set_iopl;
1099 int rc;
1093 pgd_t *pgd; 1100 pgd_t *pgd;
1094 1101
1095 if (!xen_start_info) 1102 if (!xen_start_info)
@@ -1097,6 +1104,8 @@ asmlinkage void __init xen_start_kernel(void)
1097 1104
1098 xen_domain_type = XEN_PV_DOMAIN; 1105 xen_domain_type = XEN_PV_DOMAIN;
1099 1106
1107 xen_setup_machphys_mapping();
1108
1100 /* Install Xen paravirt ops */ 1109 /* Install Xen paravirt ops */
1101 pv_info = xen_info; 1110 pv_info = xen_info;
1102 pv_init_ops = xen_init_ops; 1111 pv_init_ops = xen_init_ops;
@@ -1191,8 +1200,6 @@ asmlinkage void __init xen_start_kernel(void)
1191 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1200 /* Allocate and initialize top and mid mfn levels for p2m structure */
1192 xen_build_mfn_list_list(); 1201 xen_build_mfn_list_list();
1193 1202
1194 init_mm.pgd = pgd;
1195
1196 /* keep using Xen gdt for now; no urgent need to change it */ 1203 /* keep using Xen gdt for now; no urgent need to change it */
1197 1204
1198#ifdef CONFIG_X86_32 1205#ifdef CONFIG_X86_32
@@ -1202,10 +1209,18 @@ asmlinkage void __init xen_start_kernel(void)
1202#else 1209#else
1203 pv_info.kernel_rpl = 0; 1210 pv_info.kernel_rpl = 0;
1204#endif 1211#endif
1205
1206 /* set the limit of our address space */ 1212 /* set the limit of our address space */
1207 xen_reserve_top(); 1213 xen_reserve_top();
1208 1214
1215 /* We used to do this in xen_arch_setup, but that is too late on AMD
1216 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
1217 * which pokes 0xcf8 port.
1218 */
1219 set_iopl.iopl = 1;
1220 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1221 if (rc != 0)
1222 xen_raw_printk("physdev_op failed %d\n", rc);
1223
1209#ifdef CONFIG_X86_32 1224#ifdef CONFIG_X86_32
1210 /* set up basic CPUID stuff */ 1225 /* set up basic CPUID stuff */
1211 cpu_detect(&new_cpu_data); 1226 cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 21ed8d7f75a5..a1feff9e59b6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
2034 set_page_prot(pmd, PAGE_KERNEL_RO); 2034 set_page_prot(pmd, PAGE_KERNEL_RO);
2035} 2035}
2036 2036
2037void __init xen_setup_machphys_mapping(void)
2038{
2039 struct xen_machphys_mapping mapping;
2040 unsigned long machine_to_phys_nr_ents;
2041
2042 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
2043 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
2044 machine_to_phys_nr_ents = mapping.max_mfn + 1;
2045 } else {
2046 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
2047 }
2048 machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
2049}
2050
2037#ifdef CONFIG_X86_64 2051#ifdef CONFIG_X86_64
2038static void convert_pfn_mfn(void *v) 2052static void convert_pfn_mfn(void *v)
2039{ 2053{
@@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2119 return pgd; 2133 return pgd;
2120} 2134}
2121#else /* !CONFIG_X86_64 */ 2135#else /* !CONFIG_X86_64 */
2122static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); 2136static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
2137static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
2138
2139static __init void xen_write_cr3_init(unsigned long cr3)
2140{
2141 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
2142
2143 BUG_ON(read_cr3() != __pa(initial_page_table));
2144 BUG_ON(cr3 != __pa(swapper_pg_dir));
2145
2146 /*
2147 * We are switching to swapper_pg_dir for the first time (from
2148 * initial_page_table) and therefore need to mark that page
2149 * read-only and then pin it.
2150 *
2151 * Xen disallows sharing of kernel PMDs for PAE
2152 * guests. Therefore we must copy the kernel PMD from
2153 * initial_page_table into a new kernel PMD to be used in
2154 * swapper_pg_dir.
2155 */
2156 swapper_kernel_pmd =
2157 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2158 memcpy(swapper_kernel_pmd, initial_kernel_pmd,
2159 sizeof(pmd_t) * PTRS_PER_PMD);
2160 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
2161 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
2162 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
2163
2164 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
2165 xen_write_cr3(cr3);
2166 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
2167
2168 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
2169 PFN_DOWN(__pa(initial_page_table)));
2170 set_page_prot(initial_page_table, PAGE_KERNEL);
2171 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
2172
2173 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2174}
2123 2175
2124__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 2176__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2125 unsigned long max_pfn) 2177 unsigned long max_pfn)
2126{ 2178{
2127 pmd_t *kernel_pmd; 2179 pmd_t *kernel_pmd;
2128 2180
2129 level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); 2181 initial_kernel_pmd =
2182 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2130 2183
2131 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + 2184 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
2132 xen_start_info->nr_pt_frames * PAGE_SIZE + 2185 xen_start_info->nr_pt_frames * PAGE_SIZE +
2133 512*1024); 2186 512*1024);
2134 2187
2135 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 2188 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
2136 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 2189 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
2137 2190
2138 xen_map_identity_early(level2_kernel_pgt, max_pfn); 2191 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2139 2192
2140 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 2193 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
2141 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], 2194 initial_page_table[KERNEL_PGD_BOUNDARY] =
2142 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); 2195 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2143 2196
2144 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 2197 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
2145 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); 2198 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
2146 set_page_prot(empty_zero_page, PAGE_KERNEL_RO); 2199 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
2147 2200
2148 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 2201 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
2149 2202
2150 xen_write_cr3(__pa(swapper_pg_dir)); 2203 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
2151 2204 PFN_DOWN(__pa(initial_page_table)));
2152 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 2205 xen_write_cr3(__pa(initial_page_table));
2153 2206
2154 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 2207 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
2155 __pa(xen_start_info->pt_base + 2208 __pa(xen_start_info->pt_base +
2156 xen_start_info->nr_pt_frames * PAGE_SIZE), 2209 xen_start_info->nr_pt_frames * PAGE_SIZE),
2157 "XEN PAGETABLES"); 2210 "XEN PAGETABLES");
2158 2211
2159 return swapper_pg_dir; 2212 return initial_page_table;
2160} 2213}
2161#endif /* CONFIG_X86_64 */ 2214#endif /* CONFIG_X86_64 */
2162 2215
@@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2290 .write_cr2 = xen_write_cr2, 2343 .write_cr2 = xen_write_cr2,
2291 2344
2292 .read_cr3 = xen_read_cr3, 2345 .read_cr3 = xen_read_cr3,
2346#ifdef CONFIG_X86_32
2347 .write_cr3 = xen_write_cr3_init,
2348#else
2293 .write_cr3 = xen_write_cr3, 2349 .write_cr3 = xen_write_cr3,
2350#endif
2294 2351
2295 .flush_tlb_user = xen_flush_tlb, 2352 .flush_tlb_user = xen_flush_tlb,
2296 .flush_tlb_kernel = xen_flush_tlb, 2353 .flush_tlb_kernel = xen_flush_tlb,
@@ -2627,7 +2684,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2627 2684
2628 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2685 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
2629 2686
2630 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 2687 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
2688 (VM_PFNMAP | VM_RESERVED | VM_IO)));
2631 2689
2632 rmd.mfn = mfn; 2690 rmd.mfn = mfn;
2633 rmd.prot = prot; 2691 rmd.prot = prot;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 769c4b01fa32..01afd8a94607 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
23#include <xen/interface/callback.h> 23#include <xen/interface/callback.h>
24#include <xen/interface/memory.h> 24#include <xen/interface/memory.h>
25#include <xen/interface/physdev.h> 25#include <xen/interface/physdev.h>
26#include <xen/interface/memory.h>
27#include <xen/features.h> 26#include <xen/features.h>
28 27
29#include "xen-ops.h" 28#include "xen-ops.h"
@@ -248,8 +247,7 @@ char * __init xen_memory_setup(void)
248 else 247 else
249 extra_pages = 0; 248 extra_pages = 0;
250 249
251 if (!xen_initial_domain()) 250 xen_add_extra_mem(extra_pages);
252 xen_add_extra_mem(extra_pages);
253 251
254 return "Xen"; 252 return "Xen";
255} 253}
@@ -337,9 +335,6 @@ void __cpuinit xen_enable_syscall(void)
337 335
338void __init xen_arch_setup(void) 336void __init xen_arch_setup(void)
339{ 337{
340 struct physdev_set_iopl set_iopl;
341 int rc;
342
343 xen_panic_handler_init(); 338 xen_panic_handler_init();
344 339
345 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 340 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -356,11 +351,6 @@ void __init xen_arch_setup(void)
356 xen_enable_sysenter(); 351 xen_enable_sysenter();
357 xen_enable_syscall(); 352 xen_enable_syscall();
358 353
359 set_iopl.iopl = 1;
360 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
361 if (rc != 0)
362 printk(KERN_INFO "physdev_op failed %d\n", rc);
363
364#ifdef CONFIG_ACPI 354#ifdef CONFIG_ACPI
365 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 355 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
366 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 356 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");