aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c1
-rw-r--r--arch/x86/include/asm/apic.h10
-rw-r--r--arch/x86/include/asm/e820.h3
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h208
-rw-r--r--arch/x86/include/asm/xen/interface.h6
-rw-r--r--arch/x86/include/asm/xen/interface_32.h5
-rw-r--r--arch/x86/include/asm/xen/interface_64.h13
-rw-r--r--arch/x86/include/asm/xen/page.h7
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c9
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event.c20
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/head_32.S16
-rw-r--r--arch/x86/kernel/hpet.c26
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/kgdb.c12
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c71
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/pvclock.c43
-rw-r--r--arch/x86/kernel/resource.c48
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c27
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/lguest/i386_head.S105
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/pci/acpi.c103
-rw-r--r--arch/x86/pci/i386.c18
-rw-r--r--arch/x86/pci/xen.c35
-rw-r--r--arch/x86/platform/uv/tlb_uv.c15
-rw-r--r--arch/x86/platform/uv/uv_time.c4
-rw-r--r--arch/x86/vdso/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c25
-rw-r--r--arch/x86/xen/mmu.c88
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c71
-rw-r--r--arch/x86/xen/suspend.c1
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
60 files changed, 752 insertions, 419 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e8327686d3c5..e330da21b84f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,7 @@ config X86
21 select HAVE_UNSTABLE_SCHED_CLOCK 21 select HAVE_UNSTABLE_SCHED_CLOCK
22 select HAVE_IDE 22 select HAVE_IDE
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_PERF_EVENTS if (!M386 && !M486) 24 select HAVE_PERF_EVENTS
25 select HAVE_IRQ_WORK 25 select HAVE_IRQ_WORK
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 23f315c9f215..325c05294fc4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
355 if (heap > 0x3fffffffffffUL) 355 if (heap > 0x3fffffffffffUL)
356 error("Destination address too large"); 356 error("Destination address too large");
357#else 357#else
358 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 358 if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
359 error("Destination address too large"); 359 error("Destination address too large");
360#endif 360#endif
361#ifndef CONFIG_RELOCATABLE 361#ifndef CONFIG_RELOCATABLE
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index cbcc8d8ea93a..7a6e68e4f748 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -10,6 +10,7 @@
10 * by the Free Software Foundation. 10 * by the Free Software Foundation.
11 */ 11 */
12 12
13#include <linux/err.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 849813f398e7..5852519b2d0f 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -28,7 +28,6 @@
28#include <linux/syscalls.h> 28#include <linux/syscalls.h>
29#include <linux/times.h> 29#include <linux/times.h>
30#include <linux/utsname.h> 30#include <linux/utsname.h>
31#include <linux/smp_lock.h>
32#include <linux/mm.h> 31#include <linux/mm.h>
33#include <linux/uio.h> 32#include <linux/uio.h>
34#include <linux/poll.h> 33#include <linux/poll.h>
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 286de34b0ed6..f6ce0bda3b98 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -141,13 +141,13 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
141 141
142static inline u32 native_apic_msr_read(u32 reg) 142static inline u32 native_apic_msr_read(u32 reg)
143{ 143{
144 u32 low, high; 144 u64 msr;
145 145
146 if (reg == APIC_DFR) 146 if (reg == APIC_DFR)
147 return -1; 147 return -1;
148 148
149 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); 149 rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
150 return low; 150 return (u32)msr;
151} 151}
152 152
153static inline void native_x2apic_wait_icr_idle(void) 153static inline void native_x2apic_wait_icr_idle(void)
@@ -181,12 +181,12 @@ extern void enable_x2apic(void);
181extern void x2apic_icr_write(u32 low, u32 id); 181extern void x2apic_icr_write(u32 low, u32 id);
182static inline int x2apic_enabled(void) 182static inline int x2apic_enabled(void)
183{ 183{
184 int msr, msr2; 184 u64 msr;
185 185
186 if (!cpu_has_x2apic) 186 if (!cpu_has_x2apic)
187 return 0; 187 return 0;
188 188
189 rdmsr(MSR_IA32_APICBASE, msr, msr2); 189 rdmsrl(MSR_IA32_APICBASE, msr);
190 if (msr & X2APIC_ENABLE) 190 if (msr & X2APIC_ENABLE)
191 return 1; 191 return 1;
192 return 0; 192 return 0;
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542fbfaf..e99d55d74df5 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,6 +72,9 @@ struct e820map {
72#define BIOS_BEGIN 0x000a0000 72#define BIOS_BEGIN 0x000a0000
73#define BIOS_END 0x00100000 73#define BIOS_END 0x00100000
74 74
75#define BIOS_ROM_BASE 0xffe00000
76#define BIOS_ROM_END 0xffffffff
77
75#ifdef __KERNEL__ 78#ifdef __KERNEL__
76/* see comment in arch/x86/kernel/e820.c */ 79/* see comment in arch/x86/kernel/e820.c */
77extern struct e820map e820; 80extern struct e820map e820;
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4d293dced62f..9479a037419f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
216} 216}
217 217
218/* Return an pointer with offset calculated */ 218/* Return an pointer with offset calculated */
219static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, 219static __always_inline unsigned long
220 phys_addr_t phys, pgprot_t flags) 220__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
221{ 221{
222 __set_fixmap(idx, phys, flags); 222 __set_fixmap(idx, phys, flags);
223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); 223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe391094e..f702f82aa1eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,7 @@
79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) 79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
80#define KVM_MIN_FREE_MMU_PAGES 5 80#define KVM_MIN_FREE_MMU_PAGES 5
81#define KVM_REFILL_PAGES 25 81#define KVM_REFILL_PAGES 25
82#define KVM_MAX_CPUID_ENTRIES 40 82#define KVM_MAX_CPUID_ENTRIES 80
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ea3dc487047..6b89f5e86021 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -128,7 +128,7 @@
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 128#define FAM10H_MMIO_CONF_ENABLE (1<<0)
129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff 131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
132#define FAM10H_MMIO_CONF_BASE_SHIFT 20 132#define FAM10H_MMIO_CONF_BASE_SHIFT 20
133#define MSR_FAM10H_NODE_ID 0xc001100c 133#define MSR_FAM10H_NODE_ID 0xc001100c
134 134
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
824#define __PV_IS_CALLEE_SAVE(func) \ 824#define __PV_IS_CALLEE_SAVE(func) \
825 ((struct paravirt_callee_save) { func }) 825 ((struct paravirt_callee_save) { func })
826 826
827static inline unsigned long arch_local_save_flags(void) 827static inline notrace unsigned long arch_local_save_flags(void)
828{ 828{
829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); 829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
830} 830}
831 831
832static inline void arch_local_irq_restore(unsigned long f) 832static inline notrace void arch_local_irq_restore(unsigned long f)
833{ 833{
834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); 834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
835} 835}
836 836
837static inline void arch_local_irq_disable(void) 837static inline notrace void arch_local_irq_disable(void)
838{ 838{
839 PVOP_VCALLEE0(pv_irq_ops.irq_disable); 839 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
840} 840}
841 841
842static inline void arch_local_irq_enable(void) 842static inline notrace void arch_local_irq_enable(void)
843{ 843{
844 PVOP_VCALLEE0(pv_irq_ops.irq_enable); 844 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
845} 845}
846 846
847static inline unsigned long arch_local_irq_save(void) 847static inline notrace unsigned long arch_local_irq_save(void)
848{ 848{
849 unsigned long f; 849 unsigned long f;
850 850
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7f7e577a0e39..31d84acc1512 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
11void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 11void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
12 struct pvclock_vcpu_time_info *vcpu, 12 struct pvclock_vcpu_time_info *vcpu,
13 struct timespec *ts); 13 struct timespec *ts);
14void pvclock_resume(void);
14 15
15/* 16/*
16 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 17 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index e969f691cbfd..a501741c2335 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -199,6 +199,8 @@ union uvh_apicid {
199#define UVH_APICID 0x002D0E00L 199#define UVH_APICID 0x002D0E00L
200#define UV_APIC_PNODE_SHIFT 6 200#define UV_APIC_PNODE_SHIFT 6
201 201
202#define UV_APICID_HIBIT_MASK 0xffff0000
203
202/* Local Bus from cpu's perspective */ 204/* Local Bus from cpu's perspective */
203#define LOCAL_BUS_BASE 0x1c00000 205#define LOCAL_BUS_BASE 0x1c00000
204#define LOCAL_BUS_SIZE (4 * 1024 * 1024) 206#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
491 } 493 }
492} 494}
493 495
496extern unsigned int uv_apicid_hibits;
494static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) 497static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
495{ 498{
499 apicid |= uv_apicid_hibits;
496 return (1UL << UVH_IPI_INT_SEND_SHFT) | 500 return (1UL << UVH_IPI_INT_SEND_SHFT) |
497 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | 501 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
498 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | 502 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index b2f2d2e05cec..20cafeac7455 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
754}; 754};
755 755
756/* ========================================================================= */ 756/* ========================================================================= */
757/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */
758/* ========================================================================= */
759#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
760#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
761
762#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
763#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
764
765union uvh_lb_target_physical_apic_id_mask_u {
766 unsigned long v;
767 struct uvh_lb_target_physical_apic_id_mask_s {
768 unsigned long bit_enables : 32; /* RW */
769 unsigned long rsvd_32_63 : 32; /* */
770 } s;
771};
772
773/* ========================================================================= */
757/* UVH_NODE_ID */ 774/* UVH_NODE_ID */
758/* ========================================================================= */ 775/* ========================================================================= */
759#define UVH_NODE_ID 0x0UL 776#define UVH_NODE_ID 0x0UL
@@ -806,6 +823,78 @@ union uvh_node_present_table_u {
806}; 823};
807 824
808/* ========================================================================= */ 825/* ========================================================================= */
826/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
827/* ========================================================================= */
828#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
829
830#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
831#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
832#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
833#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
834#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
835#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
836
837union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
838 unsigned long v;
839 struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
840 unsigned long rsvd_0_23: 24; /* */
841 unsigned long base : 8; /* RW */
842 unsigned long rsvd_32_47: 16; /* */
843 unsigned long m_alias : 5; /* RW */
844 unsigned long rsvd_53_62: 10; /* */
845 unsigned long enable : 1; /* RW */
846 } s;
847};
848
849/* ========================================================================= */
850/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
851/* ========================================================================= */
852#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
853
854#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
855#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
856#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
857#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
858#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
859#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
860
861union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
862 unsigned long v;
863 struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
864 unsigned long rsvd_0_23: 24; /* */
865 unsigned long base : 8; /* RW */
866 unsigned long rsvd_32_47: 16; /* */
867 unsigned long m_alias : 5; /* RW */
868 unsigned long rsvd_53_62: 10; /* */
869 unsigned long enable : 1; /* RW */
870 } s;
871};
872
873/* ========================================================================= */
874/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
875/* ========================================================================= */
876#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
877
878#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
879#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
880#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
881#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
882#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
883#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
884
885union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
886 unsigned long v;
887 struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
888 unsigned long rsvd_0_23: 24; /* */
889 unsigned long base : 8; /* RW */
890 unsigned long rsvd_32_47: 16; /* */
891 unsigned long m_alias : 5; /* RW */
892 unsigned long rsvd_53_62: 10; /* */
893 unsigned long enable : 1; /* RW */
894 } s;
895};
896
897/* ========================================================================= */
809/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ 898/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
810/* ========================================================================= */ 899/* ========================================================================= */
811#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL 900#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
@@ -857,6 +946,29 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
857}; 946};
858 947
859/* ========================================================================= */ 948/* ========================================================================= */
949/* UVH_RH_GAM_CONFIG_MMR */
950/* ========================================================================= */
951#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
952
953#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
954#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
955#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
956#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
957#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12
958#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
959
960union uvh_rh_gam_config_mmr_u {
961 unsigned long v;
962 struct uvh_rh_gam_config_mmr_s {
963 unsigned long m_skt : 6; /* RW */
964 unsigned long n_skt : 4; /* RW */
965 unsigned long rsvd_10_11: 2; /* */
966 unsigned long mmiol_cfg : 1; /* RW */
967 unsigned long rsvd_13_63: 51; /* */
968 } s;
969};
970
971/* ========================================================================= */
860/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ 972/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
861/* ========================================================================= */ 973/* ========================================================================= */
862#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL 974#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
@@ -987,97 +1099,5 @@ union uvh_rtc1_int_config_u {
987 } s; 1099 } s;
988}; 1100};
989 1101
990/* ========================================================================= */
991/* UVH_SI_ADDR_MAP_CONFIG */
992/* ========================================================================= */
993#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL
994
995#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0
996#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
997#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8
998#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL
999
1000union uvh_si_addr_map_config_u {
1001 unsigned long v;
1002 struct uvh_si_addr_map_config_s {
1003 unsigned long m_skt : 6; /* RW */
1004 unsigned long rsvd_6_7: 2; /* */
1005 unsigned long n_skt : 4; /* RW */
1006 unsigned long rsvd_12_63: 52; /* */
1007 } s;
1008};
1009
1010/* ========================================================================= */
1011/* UVH_SI_ALIAS0_OVERLAY_CONFIG */
1012/* ========================================================================= */
1013#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
1014
1015#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
1016#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1017#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1018#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1019#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
1020#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1021
1022union uvh_si_alias0_overlay_config_u {
1023 unsigned long v;
1024 struct uvh_si_alias0_overlay_config_s {
1025 unsigned long rsvd_0_23: 24; /* */
1026 unsigned long base : 8; /* RW */
1027 unsigned long rsvd_32_47: 16; /* */
1028 unsigned long m_alias : 5; /* RW */
1029 unsigned long rsvd_53_62: 10; /* */
1030 unsigned long enable : 1; /* RW */
1031 } s;
1032};
1033
1034/* ========================================================================= */
1035/* UVH_SI_ALIAS1_OVERLAY_CONFIG */
1036/* ========================================================================= */
1037#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
1038
1039#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
1040#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1041#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1042#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1043#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
1044#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1045
1046union uvh_si_alias1_overlay_config_u {
1047 unsigned long v;
1048 struct uvh_si_alias1_overlay_config_s {
1049 unsigned long rsvd_0_23: 24; /* */
1050 unsigned long base : 8; /* RW */
1051 unsigned long rsvd_32_47: 16; /* */
1052 unsigned long m_alias : 5; /* RW */
1053 unsigned long rsvd_53_62: 10; /* */
1054 unsigned long enable : 1; /* RW */
1055 } s;
1056};
1057
1058/* ========================================================================= */
1059/* UVH_SI_ALIAS2_OVERLAY_CONFIG */
1060/* ========================================================================= */
1061#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
1062
1063#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
1064#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1065#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1066#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1067#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
1068#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1069
1070union uvh_si_alias2_overlay_config_u {
1071 unsigned long v;
1072 struct uvh_si_alias2_overlay_config_s {
1073 unsigned long rsvd_0_23: 24; /* */
1074 unsigned long base : 8; /* RW */
1075 unsigned long rsvd_32_47: 16; /* */
1076 unsigned long m_alias : 5; /* RW */
1077 unsigned long rsvd_53_62: 10; /* */
1078 unsigned long enable : 1; /* RW */
1079 } s;
1080};
1081
1082 1102
1083#endif /* _ASM_X86_UV_UV_MMRS_H */ 1103#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1f0c55..1c10c88ee4e1 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void);
61#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) 61#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
62#endif 62#endif
63 63
64#ifndef machine_to_phys_mapping 64#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
65#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) 65#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
66#endif 66#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
67 67
68/* Maximum number of virtual CPUs in multi-processor guests. */ 68/* Maximum number of virtual CPUs in multi-processor guests. */
69#define MAX_VIRT_CPUS 32 69#define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e004ae5c..8413688b2571 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
32/* And the trap vector is... */ 32/* And the trap vector is... */
33#define TRAP_INSTR "int $0x82" 33#define TRAP_INSTR "int $0x82"
34 34
35#define __MACH2PHYS_VIRT_START 0xF5800000
36#define __MACH2PHYS_VIRT_END 0xF6800000
37
38#define __MACH2PHYS_SHIFT 2
39
35/* 40/*
36 * Virtual addresses beyond this are not modifiable by guest OSes. The 41 * Virtual addresses beyond this are not modifiable by guest OSes. The
37 * machine->physical mapping table starts at this address, read-only. 42 * machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d2662b97c..839a4811cf98 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
39#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 39#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
40#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 40#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
41#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 41#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
42 42#define __MACH2PHYS_SHIFT 3
43#ifndef HYPERVISOR_VIRT_START
44#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
45#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
46#endif
47
48#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
49#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
50#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
51#ifndef machine_to_phys_mapping
52#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
53#endif
54 43
55/* 44/*
56 * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) 45 * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index dd8c1414b3d5..8760cc60a21c 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/pfn.h> 7#include <linux/pfn.h>
8#include <linux/mm.h>
8 9
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10#include <asm/page.h> 11#include <asm/page.h>
@@ -35,6 +36,8 @@ typedef struct xpaddr {
35#define MAX_DOMAIN_PAGES \ 36#define MAX_DOMAIN_PAGES \
36 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) 37 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
37 38
39extern unsigned long *machine_to_phys_mapping;
40extern unsigned int machine_to_phys_order;
38 41
39extern unsigned long get_phys_to_machine(unsigned long pfn); 42extern unsigned long get_phys_to_machine(unsigned long pfn);
40extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
69 if (xen_feature(XENFEAT_auto_translated_physmap)) 72 if (xen_feature(XENFEAT_auto_translated_physmap))
70 return mfn; 73 return mfn;
71 74
72#if 0
73 if (unlikely((mfn >> machine_to_phys_order) != 0)) 75 if (unlikely((mfn >> machine_to_phys_order) != 0))
74 return max_mapnr; 76 return ~0;
75#endif
76 77
77 pfn = 0; 78 pfn = 0;
78 /* 79 /*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763b6092..1e994754d323 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 46obj-y += tsc.o io_delay.o rtc.o
47obj-y += pci-iommu_table.o 47obj-y += pci-iommu_table.o
48obj-y += resource.o
48 49
49obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
50obj-y += process.o 51obj-y += process.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 850657d1b0ed..78218135b48e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,7 +52,6 @@
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 54#include <asm/tsc.h>
55#include <asm/atomic.h>
56 55
57unsigned int num_processors; 56unsigned int num_processors;
58 57
@@ -1390,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void)
1390 1389
1391 setup_apic_nmi_watchdog(NULL); 1390 setup_apic_nmi_watchdog(NULL);
1392 apic_pm_activate(); 1391 apic_pm_activate();
1392
1393 /*
1394 * Now that local APIC setup is completed for BP, configure the fault
1395 * handling for interrupt remapping.
1396 */
1397 if (!smp_processor_id() && intr_remapping_enabled)
1398 enable_drhd_fault_handling();
1399
1393} 1400}
1394 1401
1395#ifdef CONFIG_X86_X2APIC 1402#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void) 20u64 hw_nmi_get_sample_period(void)
24{ 21{
25 return (u64)(cpu_khz) * 1000 * 60; 22 return (u64)(cpu_khz) * 1000 * 60;
26} 23}
27 24
28#ifdef ARCH_HAS_NMI_WATCHDOG 25#ifdef ARCH_HAS_NMI_WATCHDOG
26
27/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29
29void arch_trigger_all_cpu_backtrace(void) 30void arch_trigger_all_cpu_backtrace(void)
30{ 31{
31 int i; 32 int i;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..fadcd743a74f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data)
2430{ 2430{
2431 struct irq_cfg *cfg = data->chip_data; 2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq; 2432 int i, do_unmask_irq = 0, irq = data->irq;
2433 struct irq_desc *desc = irq_to_desc(irq);
2434 unsigned long v; 2433 unsigned long v;
2435 2434
2436 irq_complete_move(cfg); 2435 irq_complete_move(cfg);
2437#ifdef CONFIG_GENERIC_PENDING_IRQ 2436#ifdef CONFIG_GENERIC_PENDING_IRQ
2438 /* If we are moving the irq we need to mask it */ 2437 /* If we are moving the irq we need to mask it */
2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2438 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2440 do_unmask_irq = 1; 2439 do_unmask_irq = 1;
2441 mask_ioapic(cfg); 2440 mask_ioapic(cfg);
2442 } 2441 }
@@ -3413,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3413 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3412 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3414 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3413 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3415 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3414 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3415 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3416 3416
3417 dmar_msi_write(irq, &msg); 3417 dmar_msi_write(irq, &msg);
3418 3418
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
79 /* need to update phys_pkg_id */ 79 /* need to update phys_pkg_id */
80 apic->phys_pkg_id = apicid_phys_pkg_id; 80 apic->phys_pkg_id = apicid_phys_pkg_id;
81 } 81 }
82
83 /*
84 * Now that apic routing model is selected, configure the
85 * fault handling for intr remapping.
86 */
87 if (intr_remapping_enabled)
88 enable_drhd_fault_handling();
89} 82}
90 83
91/* Same for both flat and physical. */ 84/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ed4118de249e..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid; 44static union uvh_apicid uvh_apicid;
45int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
47unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
47static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
48 50
49static inline bool is_GRU_range(u64 start, u64 end) 51static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; 87 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86} 88}
87 89
90/*
91 * Add an extra bit as dictated by bios to the destination apicid of
92 * interrupts potentially passing through the UV HUB. This prevents
93 * a deadlock between interrupts and IO port operations.
94 */
95static void __init uv_set_apicid_hibit(void)
96{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE |
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105}
106
88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
89{ 108{
90 int nodeid; 109 int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102 __get_cpu_var(x2apic_extra_bits) = 121 __get_cpu_var(x2apic_extra_bits) =
103 nodeid << (uvh_apicid.s.pnode_shift - 1); 122 nodeid << (uvh_apicid.s.pnode_shift - 1);
104 uv_system_type = UV_NON_UNIQUE_APIC; 123 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit();
105 return 1; 125 return 1;
106 } 126 }
107 } 127 }
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
155 int pnode; 175 int pnode;
156 176
157 pnode = uv_apicid_to_pnode(phys_apicid); 177 pnode = uv_apicid_to_pnode(phys_apicid);
178 phys_apicid |= uv_apicid_hibits;
158 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 179 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
159 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 180 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
160 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 181 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
236 int cpu = cpumask_first(cpumask); 257 int cpu = cpumask_first(cpumask);
237 258
238 if ((unsigned)cpu < nr_cpu_ids) 259 if ((unsigned)cpu < nr_cpu_ids)
239 return per_cpu(x86_cpu_to_apicid, cpu); 260 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
240 else 261 else
241 return BAD_APICID; 262 return BAD_APICID;
242} 263}
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
255 if (cpumask_test_cpu(cpu, cpu_online_mask)) 276 if (cpumask_test_cpu(cpu, cpu_online_mask))
256 break; 277 break;
257 } 278 }
258 return per_cpu(x86_cpu_to_apicid, cpu); 279 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
259} 280}
260 281
261static unsigned int x2apic_get_apic_id(unsigned long x) 282static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -379,14 +400,14 @@ struct redir_addr {
379#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 400#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
380 401
381static __initdata struct redir_addr redir_addrs[] = { 402static __initdata struct redir_addr redir_addrs[] = {
382 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, 403 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
383 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, 404 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
384 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, 405 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
385}; 406};
386 407
387static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) 408static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
388{ 409{
389 union uvh_si_alias0_overlay_config_u alias; 410 union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
390 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; 411 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
391 int i; 412 int i;
392 413
@@ -660,7 +681,7 @@ void uv_nmi_init(void)
660 681
661void __init uv_system_init(void) 682void __init uv_system_init(void)
662{ 683{
663 union uvh_si_addr_map_config_u m_n_config; 684 union uvh_rh_gam_config_mmr_u m_n_config;
664 union uvh_node_id_u node_id; 685 union uvh_node_id_u node_id;
665 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 686 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
666 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 687 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
@@ -670,7 +691,7 @@ void __init uv_system_init(void)
670 691
671 map_low_mmrs(); 692 map_low_mmrs();
672 693
673 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 694 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
674 m_val = m_n_config.s.m_skt; 695 m_val = m_n_config.s.m_skt;
675 n_val = m_n_config.s.n_skt; 696 n_val = m_n_config.s.n_skt;
676 mmr_base = 697 mmr_base =
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static bool check_hw_exists(void)
385{
386 u64 val, val_new = 0;
387 int ret = 0;
388
389 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new)
393 return false;
394
395 return true;
396}
397
384static void reserve_ds_buffers(void); 398static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 399static void release_ds_buffers(void);
386 400
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
1372 1386
1373 pmu_check_apic(); 1387 pmu_check_apic();
1374 1388
1389 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) {
1391 pr_cont("Broken PMU hardware detected, software events only.\n");
1392 return;
1393 }
1394
1375 pr_cont("%s PMU driver.\n", x86_pmu.name); 1395 pr_cont("%s PMU driver.\n", x86_pmu.name);
1376 1396
1377 if (x86_pmu.quirks) 1397 if (x86_pmu.quirks)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 46d58448c3af..e421b8cd6944 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
280 struct amd_nb *nb; 280 struct amd_nb *nb;
281 int i; 281 int i;
282 282
283 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); 283 nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
284 cpu_to_node(cpu));
284 if (!nb) 285 if (!nb)
285 return NULL; 286 return NULL;
286 287
287 memset(nb, 0, sizeof(*nb));
288 nb->nb_id = nb_id; 288 nb->nb_id = nb_id;
289 289
290 /* 290 /*
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1b7b31ab7d86..212a6a42527c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,7 +33,6 @@
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/poll.h> 34#include <linux/poll.h>
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/smp_lock.h>
37#include <linux/major.h> 36#include <linux/major.h>
38#include <linux/fs.h> 37#include <linux/fs.h>
39#include <linux/device.h> 38#include <linux/device.h>
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 397 */
398 pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) 398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
400 400
401 pushl_cfi %eax 401 pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295 .endm 295 .endm
296 296
297/* save partial stack frame */ 297/* save partial stack frame */
298 .pushsection .kprobes.text, "ax"
298ENTRY(save_args) 299ENTRY(save_args)
299 XCPT_FRAME 300 XCPT_FRAME
300 cld 301 cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334 ret 335 ret
335 CFI_ENDPROC 336 CFI_ENDPROC
336END(save_args) 337END(save_args)
338 .popsection
337 339
338ENTRY(save_rest) 340ENTRY(save_rest)
339 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91dd311..c0dbd9ac24f0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) 60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif 61#endif
62 62
63/* Number of possible pages in the lowmem region */
64LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
65
63/* Enough space to fit pagetables for the low memory linear map */ 66/* Enough space to fit pagetables for the low memory linear map */
64MAPPING_BEYOND_END = \ 67MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
65 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
66 68
67/* 69/*
68 * Worst-case size of the kernel mapping we need to make: 70 * Worst-case size of the kernel mapping we need to make:
69 * the worst-case size of the kernel itself, plus the extra we need 71 * a relocatable kernel can live anywhere in lowmem, so we need to be able
70 * to map for the linear map. 72 * to map all of lowmem.
71 */ 73 */
72KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 74KERNEL_PAGES = LOWMEM_PAGES
73 75
74INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
75RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -620,13 +622,13 @@ ENTRY(initial_code)
620__PAGE_ALIGNED_BSS 622__PAGE_ALIGNED_BSS
621 .align PAGE_SIZE_asm 623 .align PAGE_SIZE_asm
622#ifdef CONFIG_X86_PAE 624#ifdef CONFIG_X86_PAE
623initial_pg_pmd: 625ENTRY(initial_pg_pmd)
624 .fill 1024*KPMDS,4,0 626 .fill 1024*KPMDS,4,0
625#else 627#else
626ENTRY(initial_page_table) 628ENTRY(initial_page_table)
627 .fill 1024,4,0 629 .fill 1024,4,0
628#endif 630#endif
629initial_pg_fixmap: 631ENTRY(initial_pg_fixmap)
630 .fill 1024,4,0 632 .fill 1024,4,0
631ENTRY(empty_zero_page) 633ENTRY(empty_zero_page)
632 .fill 4096,1,0 634 .fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab4352e..4ff5968f12d2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
27#define HPET_DEV_FSB_CAP 0x1000 27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000 28#define HPET_DEV_PERI_CAP 0x2000
29 29
30#define HPET_MIN_CYCLES 128
31#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
32
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) 33#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
31 34
32/* 35/*
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void)
299 /* Calculate the min / max delta */ 302 /* Calculate the min / max delta */
300 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
301 &hpet_clockevent); 304 &hpet_clockevent);
302 /* 5 usec minimum reprogramming delta. */ 305 /* Setup minimum reprogramming delta. */
303 hpet_clockevent.min_delta_ns = 5000; 306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
304 308
305 /* 309 /*
306 * Start hpet with the boot cpu mask and make it 310 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta,
393 * the wraparound into account) nor a simple count down event 397 * the wraparound into account) nor a simple count down event
394 * mode. Further the write to the comparator register is 398 * mode. Further the write to the comparator register is
395 * delayed internally up to two HPET clock cycles in certain 399 * delayed internally up to two HPET clock cycles in certain
396 * chipsets (ATI, ICH9,10). We worked around that by reading 400 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
397 * back the compare register, but that required another 401 * longer delays. We worked around that by reading back the
398 * workaround for ICH9,10 chips where the first readout after 402 * compare register, but that required another workaround for
399 * write can return the old stale value. We already have a 403 * ICH9,10 chips where the first readout after write can
400 * minimum delta of 5us enforced, but a NMI or SMI hitting 404 * return the old stale value. We already had a minimum
405 * programming delta of 5us enforced, but a NMI or SMI hitting
401 * between the counter readout and the comparator write can 406 * between the counter readout and the comparator write can
402 * move us behind that point easily. Now instead of reading 407 * move us behind that point easily. Now instead of reading
403 * the compare register back several times, we make the ETIME 408 * the compare register back several times, we make the ETIME
404 * decision based on the following: Return ETIME if the 409 * decision based on the following: Return ETIME if the
405 * counter value after the write is less than 8 HPET cycles 410 * counter value after the write is less than HPET_MIN_CYCLES
406 * away from the event or if the counter is already ahead of 411 * away from the event or if the counter is already ahead of
407 * the event. 412 * the event. The minimum programming delta for the generic
413 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
408 */ 414 */
409 res = (s32)(cnt - hpet_readl(HPET_COUNTER)); 415 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
410 416
411 return res < 8 ? -ETIME : 0; 417 return res < HPET_MIN_CYCLES ? -ETIME : 0;
412} 418}
413 419
414static void hpet_legacy_set_mode(enum clock_event_mode mode, 420static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433 dr6_p = (unsigned long *)ERR_PTR(args->err); 433 dr6_p = (unsigned long *)ERR_PTR(args->err);
434 dr6 = *dr6_p; 434 dr6 = *dr6_p;
435 435
436 /* If it's a single step, TRAP bits are random */
437 if (dr6 & DR_STEP)
438 return NOTIFY_DONE;
439
436 /* Do an early return if no trap bits are set in DR6 */ 440 /* Do an early return if no trap bits are set in DR6 */
437 if ((dr6 & DR_TRAP_BITS) == 0) 441 if ((dr6 & DR_TRAP_BITS) == 0)
438 return NOTIFY_DONE; 442 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ec592caac4b4..cd21b654dec6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -315,14 +315,18 @@ static void kgdb_remove_all_hw_break(void)
315 if (!breakinfo[i].enabled) 315 if (!breakinfo[i].enabled)
316 continue; 316 continue;
317 bp = *per_cpu_ptr(breakinfo[i].pev, cpu); 317 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
318 if (bp->attr.disabled == 1) 318 if (!bp->attr.disabled) {
319 arch_uninstall_hw_breakpoint(bp);
320 bp->attr.disabled = 1;
319 continue; 321 continue;
322 }
320 if (dbg_is_early) 323 if (dbg_is_early)
321 early_dr7 &= ~encode_dr7(i, breakinfo[i].len, 324 early_dr7 &= ~encode_dr7(i, breakinfo[i].len,
322 breakinfo[i].type); 325 breakinfo[i].type);
323 else 326 else if (hw_break_release_slot(i))
324 arch_uninstall_hw_breakpoint(bp); 327 printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n",
325 bp->attr.disabled = 1; 328 breakinfo[i].addr);
329 breakinfo[i].enabled = 0;
326 } 330 }
327} 331}
328 332
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1af7c055c7d..ce0cb4721c9a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -212,7 +212,7 @@ static int install_equiv_cpu_table(const u8 *buf)
212 return 0; 212 return 0;
213 } 213 }
214 214
215 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 215 equiv_cpu_table = vmalloc(size);
216 if (!equiv_cpu_table) { 216 if (!equiv_cpu_table) {
217 pr_err("failed to allocate equivalent CPU table\n"); 217 pr_err("failed to allocate equivalent CPU table\n");
218 return 0; 218 return 0;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 71825806cd44..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
25}; 25};
26 26
27static u64 __cpuinitdata fam10h_pci_mmconf_base; 27static u64 __cpuinitdata fam10h_pci_mmconf_base;
28static int __cpuinitdata fam10h_pci_mmconf_base_status;
29 28
30static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { 29static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
31 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, 30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
44 return start1 - start2; 43 return start1 - start2;
45} 44}
46 45
47/*[47:0] */ 46#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
48/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ 47#define MMCONF_MASK (~(MMCONF_UNIT - 1))
48#define MMCONF_SIZE (MMCONF_UNIT << 8)
49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
49#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) 50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
50#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) 51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
51static void __cpuinit get_fam10h_pci_mmconf_base(void) 52static void __cpuinit get_fam10h_pci_mmconf_base(void)
52{ 53{
53 int i; 54 int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
64 struct range range[8]; 65 struct range range[8];
65 66
66 /* only try to get setting from BSP */ 67 /* only try to get setting from BSP */
67 /* -1 or 1 */ 68 if (fam10h_pci_mmconf_base)
68 if (fam10h_pci_mmconf_base_status)
69 return; 69 return;
70 70
71 if (!early_pci_allowed()) 71 if (!early_pci_allowed())
72 goto fail; 72 return;
73 73
74 found = 0; 74 found = 0;
75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
91 } 91 }
92 92
93 if (!found) 93 if (!found)
94 goto fail; 94 return;
95 95
96 /* SYS_CFG */ 96 /* SYS_CFG */
97 address = MSR_K8_SYSCFG; 97 address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
99 99
100 /* TOP_MEM2 is not enabled? */ 100 /* TOP_MEM2 is not enabled? */
101 if (!(val & (1<<21))) { 101 if (!(val & (1<<21))) {
102 tom2 = 0; 102 tom2 = 1ULL << 32;
103 } else { 103 } else {
104 /* TOP_MEM2 */ 104 /* TOP_MEM2 */
105 address = MSR_K8_TOP_MEM2; 105 address = MSR_K8_TOP_MEM2;
106 rdmsrl(address, val); 106 rdmsrl(address, val);
107 tom2 = val & (0xffffULL<<32); 107 tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
108 } 108 }
109 109
110 if (base <= tom2) 110 if (base <= tom2)
111 base = tom2 + (1ULL<<32); 111 base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
112 112
113 /* 113 /*
114 * need to check if the range is in the high mmio range that is 114 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
123 if (!(reg & 3)) 123 if (!(reg & 3))
124 continue; 124 continue;
125 125
126 start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 126 start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); 127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
128 end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 128 end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
129 129
130 if (!end) 130 if (end < tom2)
131 continue; 131 continue;
132 132
133 range[hi_mmio_num].start = start; 133 range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
143 143
144 if (range[hi_mmio_num - 1].end < base) 144 if (range[hi_mmio_num - 1].end < base)
145 goto out; 145 goto out;
146 if (range[0].start > base) 146 if (range[0].start > base + MMCONF_SIZE)
147 goto out; 147 goto out;
148 148
149 /* need to find one window */ 149 /* need to find one window */
150 base = range[0].start - (1ULL << 32); 150 base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
151 if ((base > tom2) && BASE_VALID(base)) 151 if ((base > tom2) && BASE_VALID(base))
152 goto out; 152 goto out;
153 base = range[hi_mmio_num - 1].end + (1ULL << 32); 153 base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
154 if ((base > tom2) && BASE_VALID(base)) 154 if (BASE_VALID(base))
155 goto out; 155 goto out;
156 /* need to find window between ranges */ 156 /* need to find window between ranges */
157 if (hi_mmio_num > 1) 157 for (i = 1; i < hi_mmio_num; i++) {
158 for (i = 0; i < hi_mmio_num - 1; i++) { 158 base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
159 if (range[i + 1].start > (range[i].end + (1ULL << 32))) { 159 val = range[i].start & MMCONF_MASK;
160 base = range[i].end + (1ULL << 32); 160 if (val >= base + MMCONF_SIZE && BASE_VALID(base))
161 if ((base > tom2) && BASE_VALID(base)) 161 goto out;
162 goto out;
163 }
164 } 162 }
165
166fail:
167 fam10h_pci_mmconf_base_status = -1;
168 return; 163 return;
164
169out: 165out:
170 fam10h_pci_mmconf_base = base; 166 fam10h_pci_mmconf_base = base;
171 fam10h_pci_mmconf_base_status = 1;
172} 167}
173 168
174void __cpuinit fam10h_check_enable_mmcfg(void) 169void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
190 185
191 /* only trust the one handle 256 buses, if acpi=off */ 186 /* only trust the one handle 256 buses, if acpi=off */
192 if (!acpi_pci_disabled || busnbits >= 8) { 187 if (!acpi_pci_disabled || busnbits >= 8) {
193 u64 base; 188 u64 base = val & MMCONF_MASK;
194 base = val & (0xffffULL << 32); 189
195 if (fam10h_pci_mmconf_base_status <= 0) { 190 if (!fam10h_pci_mmconf_base) {
196 fam10h_pci_mmconf_base = base; 191 fam10h_pci_mmconf_base = base;
197 fam10h_pci_mmconf_base_status = 1;
198 return; 192 return;
199 } else if (fam10h_pci_mmconf_base == base) 193 } else if (fam10h_pci_mmconf_base == base)
200 return; 194 return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
206 * with 256 buses 200 * with 256 buses
207 */ 201 */
208 get_fam10h_pci_mmconf_base(); 202 get_fam10h_pci_mmconf_base();
209 if (fam10h_pci_mmconf_base_status <= 0) 203 if (!fam10h_pci_mmconf_base) {
204 pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
210 return; 205 return;
206 }
211 207
212 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); 208 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
213 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | 209 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
@@ -217,13 +213,13 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
217 wrmsrl(address, val); 213 wrmsrl(address, val);
218} 214}
219 215
220static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) 216static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d)
221{ 217{
222 pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; 218 pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF;
223 return 0; 219 return 0;
224} 220}
225 221
226static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { 222static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
227 { 223 {
228 .callback = set_check_enable_amd_mmconf, 224 .callback = set_check_enable_amd_mmconf,
229 .ident = "Sun Microsystems Machine", 225 .ident = "Sun Microsystems Machine",
@@ -234,7 +230,8 @@ static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
234 {} 230 {}
235}; 231};
236 232
237void __cpuinit check_enable_amd_mmconf_dmi(void) 233/* Called from a __cpuinit function, but only on the BSP. */
234void __ref check_enable_amd_mmconf_dmi(void)
238{ 235{
239 dmi_check_system(mmconf_dmi_table); 236 dmi_check_system(mmconf_dmi_table);
240} 237}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7bf2dc4c8f70..12fcbe2c143e 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -30,7 +30,6 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/poll.h> 31#include <linux/poll.h>
32#include <linux/smp.h> 32#include <linux/smp.h>
33#include <linux/smp_lock.h>
34#include <linux/major.h> 33#include <linux/major.h>
35#include <linux/fs.h> 34#include <linux/fs.h>
36#include <linux/device.h> 35#include <linux/device.h>
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index bab3b9e6f66d..42eb3300dfc6 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -41,44 +41,6 @@ void pvclock_set_flags(u8 flags)
41 valid_flags = flags; 41 valid_flags = flags;
42} 42}
43 43
44/*
45 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
46 * yielding a 64-bit result.
47 */
48static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
49{
50 u64 product;
51#ifdef __i386__
52 u32 tmp1, tmp2;
53#endif
54
55 if (shift < 0)
56 delta >>= -shift;
57 else
58 delta <<= shift;
59
60#ifdef __i386__
61 __asm__ (
62 "mul %5 ; "
63 "mov %4,%%eax ; "
64 "mov %%edx,%4 ; "
65 "mul %5 ; "
66 "xor %5,%5 ; "
67 "add %4,%%eax ; "
68 "adc %5,%%edx ; "
69 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
70 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
71#elif defined(__x86_64__)
72 __asm__ (
73 "mul %%rdx ; shrd $32,%%rdx,%%rax"
74 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
75#else
76#error implement me!
77#endif
78
79 return product;
80}
81
82static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) 44static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
83{ 45{
84 u64 delta = native_read_tsc() - shadow->tsc_timestamp; 46 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
@@ -121,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
121 83
122static atomic64_t last_value = ATOMIC64_INIT(0); 84static atomic64_t last_value = ATOMIC64_INIT(0);
123 85
86void pvclock_resume(void)
87{
88 atomic64_set(&last_value, 0);
89}
90
124cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 91cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
125{ 92{
126 struct pvclock_shadow_time shadow; 93 struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 000000000000..2a26819bb6a8
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
1#include <linux/ioport.h>
2#include <asm/e820.h>
3
4static void resource_clip(struct resource *res, resource_size_t start,
5 resource_size_t end)
6{
7 resource_size_t low = 0, high = 0;
8
9 if (res->end < start || res->start > end)
10 return; /* no conflict */
11
12 if (res->start < start)
13 low = start - res->start;
14
15 if (res->end > end)
16 high = res->end - end;
17
18 /* Keep the area above or below the conflict, whichever is larger */
19 if (low > high)
20 res->end = start - 1;
21 else
22 res->start = end + 1;
23}
24
25static void remove_e820_regions(struct resource *avail)
26{
27 int i;
28 struct e820entry *entry;
29
30 for (i = 0; i < e820.nr_map; i++) {
31 entry = &e820.map[i];
32
33 resource_clip(avail, entry->addr,
34 entry->addr + entry->size - 1);
35 }
36}
37
38void arch_remove_reservations(struct resource *avail)
39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
41 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45
46 remove_e820_regions(avail);
47 }
48}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746338af..85268f8eadf6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -769,7 +769,6 @@ void __init setup_arch(char **cmdline_p)
769 769
770 x86_init.oem.arch_setup(); 770 x86_init.oem.arch_setup();
771 771
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 772 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
774 setup_memory_map(); 773 setup_memory_map();
775 parse_setup_data(); 774 parse_setup_data();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd65e24..547128546cc3 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void)
394 * Setup init_xstate_buf to represent the init state of 394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave 395 * all the features managed by the xsave
396 */ 396 */
397 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem_align(xstate_size,
398 __alignof__(struct xsave_struct));
398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 399 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399 400
400 clts(); 401 clts();
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 908ea5464a51..fb8b376bf28c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -720,7 +720,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
720 } 720 }
721} 721}
722 722
723static void set_spte_track_bits(u64 *sptep, u64 new_spte) 723static int set_spte_track_bits(u64 *sptep, u64 new_spte)
724{ 724{
725 pfn_t pfn; 725 pfn_t pfn;
726 u64 old_spte = *sptep; 726 u64 old_spte = *sptep;
@@ -731,19 +731,20 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte)
731 old_spte = __xchg_spte(sptep, new_spte); 731 old_spte = __xchg_spte(sptep, new_spte);
732 732
733 if (!is_rmap_spte(old_spte)) 733 if (!is_rmap_spte(old_spte))
734 return; 734 return 0;
735 735
736 pfn = spte_to_pfn(old_spte); 736 pfn = spte_to_pfn(old_spte);
737 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) 737 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
738 kvm_set_pfn_accessed(pfn); 738 kvm_set_pfn_accessed(pfn);
739 if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) 739 if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
740 kvm_set_pfn_dirty(pfn); 740 kvm_set_pfn_dirty(pfn);
741 return 1;
741} 742}
742 743
743static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) 744static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
744{ 745{
745 set_spte_track_bits(sptep, new_spte); 746 if (set_spte_track_bits(sptep, new_spte))
746 rmap_remove(kvm, sptep); 747 rmap_remove(kvm, sptep);
747} 748}
748 749
749static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 750static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 82e144a4e514..b81a9b7c2ca4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3395,6 +3395,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3395 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3395 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3396 3396
3397 load_host_msrs(vcpu); 3397 load_host_msrs(vcpu);
3398 kvm_load_ldt(ldt_selector);
3398 loadsegment(fs, fs_selector); 3399 loadsegment(fs, fs_selector);
3399#ifdef CONFIG_X86_64 3400#ifdef CONFIG_X86_64
3400 load_gs_index(gs_selector); 3401 load_gs_index(gs_selector);
@@ -3402,7 +3403,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3402#else 3403#else
3403 loadsegment(gs, gs_selector); 3404 loadsegment(gs, gs_selector);
3404#endif 3405#endif
3405 kvm_load_ldt(ldt_selector);
3406 3406
3407 reload_tss(vcpu); 3407 reload_tss(vcpu);
3408 3408
@@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3495{ 3495{
3496 switch (func) { 3496 switch (func) {
3497 case 0x00000001:
3498 /* Mask out xsave bit as long as it is not supported by SVM */
3499 entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
3500 break;
3497 case 0x80000001: 3501 case 0x80000001:
3498 if (nested) 3502 if (nested)
3499 entry->ecx |= (1 << 2); /* Set SVM bit */ 3503 entry->ecx |= (1 << 2); /* Set SVM bit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8da0e45ff7c9..81fcbe9515c5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
821#endif 821#endif
822 822
823#ifdef CONFIG_X86_64 823#ifdef CONFIG_X86_64
824 if (is_long_mode(&vmx->vcpu)) { 824 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
825 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 825 if (is_long_mode(&vmx->vcpu))
826 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 826 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
827 }
828#endif 827#endif
829 for (i = 0; i < vmx->save_nmsrs; ++i) 828 for (i = 0; i < vmx->save_nmsrs; ++i)
830 kvm_set_shared_msr(vmx->guest_msrs[i].index, 829 kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -839,23 +838,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
839 838
840 ++vmx->vcpu.stat.host_state_reload; 839 ++vmx->vcpu.stat.host_state_reload;
841 vmx->host_state.loaded = 0; 840 vmx->host_state.loaded = 0;
842 if (vmx->host_state.fs_reload_needed) 841#ifdef CONFIG_X86_64
843 loadsegment(fs, vmx->host_state.fs_sel); 842 if (is_long_mode(&vmx->vcpu))
843 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
844#endif
844 if (vmx->host_state.gs_ldt_reload_needed) { 845 if (vmx->host_state.gs_ldt_reload_needed) {
845 kvm_load_ldt(vmx->host_state.ldt_sel); 846 kvm_load_ldt(vmx->host_state.ldt_sel);
846#ifdef CONFIG_X86_64 847#ifdef CONFIG_X86_64
847 load_gs_index(vmx->host_state.gs_sel); 848 load_gs_index(vmx->host_state.gs_sel);
848 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
849#else 849#else
850 loadsegment(gs, vmx->host_state.gs_sel); 850 loadsegment(gs, vmx->host_state.gs_sel);
851#endif 851#endif
852 } 852 }
853 if (vmx->host_state.fs_reload_needed)
854 loadsegment(fs, vmx->host_state.fs_sel);
853 reload_tss(); 855 reload_tss();
854#ifdef CONFIG_X86_64 856#ifdef CONFIG_X86_64
855 if (is_long_mode(&vmx->vcpu)) { 857 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
856 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
857 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
858 }
859#endif 858#endif
860 if (current_thread_info()->status & TS_USEDFPU) 859 if (current_thread_info()->status & TS_USEDFPU)
861 clts(); 860 clts();
@@ -4228,11 +4227,6 @@ static int vmx_get_lpage_level(void)
4228 return PT_PDPE_LEVEL; 4227 return PT_PDPE_LEVEL;
4229} 4228}
4230 4229
4231static inline u32 bit(int bitno)
4232{
4233 return 1 << (bitno & 31);
4234}
4235
4236static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 4230static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4237{ 4231{
4238 struct kvm_cpuid_entry2 *best; 4232 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2288ad829b32..b989e1f1e5d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 155
156u64 __read_mostly host_xcr0; 156u64 __read_mostly host_xcr0;
157 157
158static inline u32 bit(int bitno)
159{
160 return 1 << (bitno & 31);
161}
162
163static void kvm_on_user_return(struct user_return_notifier *urn) 158static void kvm_on_user_return(struct user_return_notifier *urn)
164{ 159{
165 unsigned slot; 160 unsigned slot;
@@ -2560,6 +2555,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2560 !kvm_exception_is_soft(vcpu->arch.exception.nr); 2555 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2561 events->exception.nr = vcpu->arch.exception.nr; 2556 events->exception.nr = vcpu->arch.exception.nr;
2562 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2557 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2558 events->exception.pad = 0;
2563 events->exception.error_code = vcpu->arch.exception.error_code; 2559 events->exception.error_code = vcpu->arch.exception.error_code;
2564 2560
2565 events->interrupt.injected = 2561 events->interrupt.injected =
@@ -2573,12 +2569,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2573 events->nmi.injected = vcpu->arch.nmi_injected; 2569 events->nmi.injected = vcpu->arch.nmi_injected;
2574 events->nmi.pending = vcpu->arch.nmi_pending; 2570 events->nmi.pending = vcpu->arch.nmi_pending;
2575 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); 2571 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2572 events->nmi.pad = 0;
2576 2573
2577 events->sipi_vector = vcpu->arch.sipi_vector; 2574 events->sipi_vector = vcpu->arch.sipi_vector;
2578 2575
2579 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2576 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2580 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2577 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2581 | KVM_VCPUEVENT_VALID_SHADOW); 2578 | KVM_VCPUEVENT_VALID_SHADOW);
2579 memset(&events->reserved, 0, sizeof(events->reserved));
2582} 2580}
2583 2581
2584static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 2582static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
@@ -2623,6 +2621,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2623 dbgregs->dr6 = vcpu->arch.dr6; 2621 dbgregs->dr6 = vcpu->arch.dr6;
2624 dbgregs->dr7 = vcpu->arch.dr7; 2622 dbgregs->dr7 = vcpu->arch.dr7;
2625 dbgregs->flags = 0; 2623 dbgregs->flags = 0;
2624 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2626} 2625}
2627 2626
2628static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, 2627static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -3106,6 +3105,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3106 sizeof(ps->channels)); 3105 sizeof(ps->channels));
3107 ps->flags = kvm->arch.vpit->pit_state.flags; 3106 ps->flags = kvm->arch.vpit->pit_state.flags;
3108 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 3107 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3108 memset(&ps->reserved, 0, sizeof(ps->reserved));
3109 return r; 3109 return r;
3110} 3110}
3111 3111
@@ -3169,10 +3169,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3169 struct kvm_memslots *slots, *old_slots; 3169 struct kvm_memslots *slots, *old_slots;
3170 unsigned long *dirty_bitmap; 3170 unsigned long *dirty_bitmap;
3171 3171
3172 spin_lock(&kvm->mmu_lock);
3173 kvm_mmu_slot_remove_write_access(kvm, log->slot);
3174 spin_unlock(&kvm->mmu_lock);
3175
3176 r = -ENOMEM; 3172 r = -ENOMEM;
3177 dirty_bitmap = vmalloc(n); 3173 dirty_bitmap = vmalloc(n);
3178 if (!dirty_bitmap) 3174 if (!dirty_bitmap)
@@ -3194,6 +3190,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3194 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 3190 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
3195 kfree(old_slots); 3191 kfree(old_slots);
3196 3192
3193 spin_lock(&kvm->mmu_lock);
3194 kvm_mmu_slot_remove_write_access(kvm, log->slot);
3195 spin_unlock(&kvm->mmu_lock);
3196
3197 r = -EFAULT; 3197 r = -EFAULT;
3198 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { 3198 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
3199 vfree(dirty_bitmap); 3199 vfree(dirty_bitmap);
@@ -3486,6 +3486,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
3486 user_ns.clock = kvm->arch.kvmclock_offset + now_ns; 3486 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3487 local_irq_enable(); 3487 local_irq_enable();
3488 user_ns.flags = 0; 3488 user_ns.flags = 0;
3489 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3489 3490
3490 r = -EFAULT; 3491 r = -EFAULT;
3491 if (copy_to_user(argp, &user_ns, sizeof(user_ns))) 3492 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
@@ -3972,8 +3973,10 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
3972 return X86EMUL_CONTINUE; 3973 return X86EMUL_CONTINUE;
3973 3974
3974 if (kvm_x86_ops->has_wbinvd_exit()) { 3975 if (kvm_x86_ops->has_wbinvd_exit()) {
3976 preempt_disable();
3975 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, 3977 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
3976 wbinvd_ipi, NULL, 1); 3978 wbinvd_ipi, NULL, 1);
3979 preempt_enable();
3977 cpumask_clear(vcpu->arch.wbinvd_dirty_mask); 3980 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
3978 } 3981 }
3979 wbinvd(); 3982 wbinvd();
@@ -4561,9 +4564,11 @@ static void kvm_timer_init(void)
4561#ifdef CONFIG_CPU_FREQ 4564#ifdef CONFIG_CPU_FREQ
4562 struct cpufreq_policy policy; 4565 struct cpufreq_policy policy;
4563 memset(&policy, 0, sizeof(policy)); 4566 memset(&policy, 0, sizeof(policy));
4564 cpufreq_get_policy(&policy, get_cpu()); 4567 cpu = get_cpu();
4568 cpufreq_get_policy(&policy, cpu);
4565 if (policy.cpuinfo.max_freq) 4569 if (policy.cpuinfo.max_freq)
4566 max_tsc_khz = policy.cpuinfo.max_freq; 4570 max_tsc_khz = policy.cpuinfo.max_freq;
4571 put_cpu();
4567#endif 4572#endif
4568 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4573 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4569 CPUFREQ_TRANSITION_NOTIFIER); 4574 CPUFREQ_TRANSITION_NOTIFIER);
@@ -5514,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5514 5519
5515 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5520 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5516 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5521 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5522 if (sregs->cr4 & X86_CR4_OSXSAVE)
5523 update_cpuid(vcpu);
5517 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5524 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5518 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); 5525 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
5519 mmu_reset_needed = 1; 5526 mmu_reset_needed = 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2cea414489f3..c600da830ce0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
71} 71}
72 72
73static inline u32 bit(int bitno)
74{
75 return 1 << (bitno & 31);
76}
77
73void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
74void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
75int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 73b1e1a1f489..4996cf5f73a0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3)
531{ 531{
532 lguest_data.pgdir = cr3; 532 lguest_data.pgdir = cr3;
533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); 533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
534 cr3_changed = true; 534
535 /* These two page tables are simple, linear, and used during boot */
536 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
537 cr3_changed = true;
535} 538}
536 539
537static unsigned long lguest_read_cr3(void) 540static unsigned long lguest_read_cr3(void)
@@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
703 * to forget all of them. Fortunately, this is very rare. 706 * to forget all of them. Fortunately, this is very rare.
704 * 707 *
705 * ... except in early boot when the kernel sets up the initial pagetables, 708 * ... except in early boot when the kernel sets up the initial pagetables,
706 * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell 709 * which makes booting astonishingly slow: 48 seconds! So we don't even tell
707 * the Host anything changed until we've done the first page table switch, 710 * the Host anything changed until we've done the first real page table switch,
708 * which brings boot back to 0.25 seconds. 711 * which brings boot back to 4.3 seconds.
709 */ 712 */
710static void lguest_set_pte(pte_t *ptep, pte_t pteval) 713static void lguest_set_pte(pte_t *ptep, pte_t pteval)
711{ 714{
@@ -1002,7 +1005,7 @@ static void lguest_time_init(void)
1002 clockevents_register_device(&lguest_clockevent); 1005 clockevents_register_device(&lguest_clockevent);
1003 1006
1004 /* Finally, we unblock the timer interrupt. */ 1007 /* Finally, we unblock the timer interrupt. */
1005 enable_lguest_irq(0); 1008 clear_bit(0, lguest_data.blocked_interrupts);
1006} 1009}
1007 1010
1008/* 1011/*
@@ -1349,9 +1352,6 @@ __init void lguest_init(void)
1349 */ 1352 */
1350 switch_to_new_gdt(0); 1353 switch_to_new_gdt(0);
1351 1354
1352 /* We actually boot with all memory mapped, but let's say 128MB. */
1353 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1354
1355 /* 1355 /*
1356 * The Host<->Guest Switcher lives at the top of our address space, and 1356 * The Host<->Guest Switcher lives at the top of our address space, and
1357 * the Host told us how big it is when we made LGUEST_INIT hypercall: 1357 * the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..e7d5382ef263 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
4#include <asm/asm-offsets.h> 4#include <asm/asm-offsets.h>
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/pgtable.h>
7 8
8/*G:020 9/*G:020
9 * Our story starts with the kernel booting into startup_32 in 10 * Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@ ENTRY(lguest_entry)
37 /* Set up the initial stack so we can run C code. */ 38 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 39 movl $(init_thread_union+THREAD_SIZE),%esp
39 40
41 call init_pagetables
42
40 /* Jumps are relative: we're running __PAGE_OFFSET too low. */ 43 /* Jumps are relative: we're running __PAGE_OFFSET too low. */
41 jmp lguest_init+__PAGE_OFFSET 44 jmp lguest_init+__PAGE_OFFSET
42 45
46/*
47 * Initialize page tables. This creates a PDE and a set of page
48 * tables, which are located immediately beyond __brk_base. The variable
49 * _brk_end is set up to point to the first "safe" location.
50 * Mappings are created both at virtual address 0 (identity mapping)
51 * and PAGE_OFFSET for up to _end.
52 *
53 * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
54 * don't have a stack at this point, so we can't just use call and ret.
55 */
56init_pagetables:
57#if PTRS_PER_PMD > 1
58#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
59#else
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif
62#define pa(X) ((X) - __PAGE_OFFSET)
63
64/* Enough space to fit pagetables for the low memory linear map */
65MAPPING_BEYOND_END = \
66 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
67#ifdef CONFIG_X86_PAE
68
69 /*
70 * In PAE mode initial_page_table is statically defined to contain
71 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
72 * entries). The identity mapping is handled by pointing two PGD entries
73 * to the first kernel PMD.
74 *
75 * Note the upper half of each PMD or PTE are always zero at this stage.
76 */
77
78#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
79
80 xorl %ebx,%ebx /* %ebx is kept at zero */
81
82 movl $pa(__brk_base), %edi
83 movl $pa(initial_pg_pmd), %edx
84 movl $PTE_IDENT_ATTR, %eax
8510:
86 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
87 movl %ecx,(%edx) /* Store PMD entry */
88 /* Upper half already zero */
89 addl $8,%edx
90 movl $512,%ecx
9111:
92 stosl
93 xchgl %eax,%ebx
94 stosl
95 xchgl %eax,%ebx
96 addl $0x1000,%eax
97 loop 11b
98
99 /*
100 * End condition: we must map up to the end + MAPPING_BEYOND_END.
101 */
102 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
103 cmpl %ebp,%eax
104 jb 10b
1051:
106 addl $__PAGE_OFFSET, %edi
107 movl %edi, pa(_brk_end)
108 shrl $12, %eax
109 movl %eax, pa(max_pfn_mapped)
110
111 /* Do early initialization of the fixmap area */
112 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
113 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
114#else /* Not PAE */
115
116page_pde_offset = (__PAGE_OFFSET >> 20);
117
118 movl $pa(__brk_base), %edi
119 movl $pa(initial_page_table), %edx
120 movl $PTE_IDENT_ATTR, %eax
12110:
122 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
123 movl %ecx,(%edx) /* Store identity PDE entry */
124 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
125 addl $4,%edx
126 movl $1024, %ecx
12711:
128 stosl
129 addl $0x1000,%eax
130 loop 11b
131 /*
132 * End condition: we must map up to the end + MAPPING_BEYOND_END.
133 */
134 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
135 cmpl %ebp,%eax
136 jb 10b
137 addl $__PAGE_OFFSET, %edi
138 movl %edi, pa(_brk_end)
139 shrl $12, %eax
140 movl %eax, pa(max_pfn_mapped)
141
142 /* Do early initialization of the fixmap area */
143 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
144 movl %eax,pa(initial_page_table+0xffc)
145#endif
146 ret
147
43/*G:055 148/*G:055
44 * We create a macro which puts the assembler code between lgstart_ and lgend_ 149 * We create a macro which puts the assembler code between lgstart_ and lgend_
45 * markers. These templates are put in the .text section: they can't be 150 * markers. These templates are put in the .text section: they can't be
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 49358481c733..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
223 223
224static void __cpuinit calculate_tlb_offset(void) 224static void __cpuinit calculate_tlb_offset(void)
225{ 225{
226 int cpu, node, nr_node_vecs; 226 int cpu, node, nr_node_vecs, idx = 0;
227 /* 227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this 228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we 229 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; 239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240 240
241 for_each_online_node(node) { 241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * 242 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs; 243 nr_node_vecs;
244 int cpu_offset = 0; 244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) { 245 for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,10 +248,11 @@ static void __cpuinit calculate_tlb_offset(void)
248 cpu_offset++; 248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs; 249 cpu_offset = cpu_offset % nr_node_vecs;
250 } 250 }
251 idx++;
251 } 252 }
252} 253}
253 254
254static int tlb_cpuhp_notify(struct notifier_block *n, 255static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
255 unsigned long action, void *hcpu) 256 unsigned long action, void *hcpu)
256{ 257{
257 switch (action & 0xf) { 258 switch (action & 0xf) {
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 15466c096ba5..0972315c3860 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
138 struct acpi_resource_address64 addr; 138 struct acpi_resource_address64 addr;
139 acpi_status status; 139 acpi_status status;
140 unsigned long flags; 140 unsigned long flags;
141 struct resource *root, *conflict;
142 u64 start, end; 141 u64 start, end;
143 142
144 status = resource_to_addr(acpi_res, &addr); 143 status = resource_to_addr(acpi_res, &addr);
@@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
146 return AE_OK; 145 return AE_OK;
147 146
148 if (addr.resource_type == ACPI_MEMORY_RANGE) { 147 if (addr.resource_type == ACPI_MEMORY_RANGE) {
149 root = &iomem_resource;
150 flags = IORESOURCE_MEM; 148 flags = IORESOURCE_MEM;
151 if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) 149 if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
152 flags |= IORESOURCE_PREFETCH; 150 flags |= IORESOURCE_PREFETCH;
153 } else if (addr.resource_type == ACPI_IO_RANGE) { 151 } else if (addr.resource_type == ACPI_IO_RANGE) {
154 root = &ioport_resource;
155 flags = IORESOURCE_IO; 152 flags = IORESOURCE_IO;
156 } else 153 } else
157 return AE_OK; 154 return AE_OK;
@@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
172 return AE_OK; 169 return AE_OK;
173 } 170 }
174 171
175 conflict = insert_resource_conflict(root, res); 172 info->res_num++;
176 if (conflict) { 173 if (addr.translation_offset)
177 dev_err(&info->bridge->dev, 174 dev_info(&info->bridge->dev, "host bridge window %pR "
178 "address space collision: host bridge window %pR " 175 "(PCI address [%#llx-%#llx])\n",
179 "conflicts with %s %pR\n", 176 res, res->start - addr.translation_offset,
180 res, conflict->name, conflict); 177 res->end - addr.translation_offset);
181 } else { 178 else
182 pci_bus_add_resource(info->bus, res, 0); 179 dev_info(&info->bridge->dev, "host bridge window %pR\n", res);
183 info->res_num++; 180
184 if (addr.translation_offset) 181 return AE_OK;
185 dev_info(&info->bridge->dev, "host bridge window %pR " 182}
186 "(PCI address [%#llx-%#llx])\n", 183
187 res, res->start - addr.translation_offset, 184static bool resource_contains(struct resource *res, resource_size_t point)
188 res->end - addr.translation_offset); 185{
186 if (res->start <= point && point <= res->end)
187 return true;
188 return false;
189}
190
191static void coalesce_windows(struct pci_root_info *info, int type)
192{
193 int i, j;
194 struct resource *res1, *res2;
195
196 for (i = 0; i < info->res_num; i++) {
197 res1 = &info->res[i];
198 if (!(res1->flags & type))
199 continue;
200
201 for (j = i + 1; j < info->res_num; j++) {
202 res2 = &info->res[j];
203 if (!(res2->flags & type))
204 continue;
205
206 /*
207 * I don't like throwing away windows because then
208 * our resources no longer match the ACPI _CRS, but
209 * the kernel resource tree doesn't allow overlaps.
210 */
211 if (resource_contains(res1, res2->start) ||
212 resource_contains(res1, res2->end) ||
213 resource_contains(res2, res1->start) ||
214 resource_contains(res2, res1->end)) {
215 res1->start = min(res1->start, res2->start);
216 res1->end = max(res1->end, res2->end);
217 dev_info(&info->bridge->dev,
218 "host bridge window expanded to %pR; %pR ignored\n",
219 res1, res2);
220 res2->flags = 0;
221 }
222 }
223 }
224}
225
226static void add_resources(struct pci_root_info *info)
227{
228 int i;
229 struct resource *res, *root, *conflict;
230
231 if (!pci_use_crs)
232 return;
233
234 coalesce_windows(info, IORESOURCE_MEM);
235 coalesce_windows(info, IORESOURCE_IO);
236
237 for (i = 0; i < info->res_num; i++) {
238 res = &info->res[i];
239
240 if (res->flags & IORESOURCE_MEM)
241 root = &iomem_resource;
242 else if (res->flags & IORESOURCE_IO)
243 root = &ioport_resource;
189 else 244 else
190 dev_info(&info->bridge->dev, 245 continue;
191 "host bridge window %pR\n", res); 246
247 conflict = insert_resource_conflict(root, res);
248 if (conflict)
249 dev_err(&info->bridge->dev,
250 "address space collision: host bridge window %pR "
251 "conflicts with %s %pR\n",
252 res, conflict->name, conflict);
253 else
254 pci_bus_add_resource(info->bus, res, 0);
192 } 255 }
193 return AE_OK;
194} 256}
195 257
196static void 258static void
@@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum,
224 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 286 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
225 &info); 287 &info);
226 288
289 add_resources(&info);
227 return; 290 return;
228 291
229name_alloc_fail: 292name_alloc_fail:
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4bb261c106e..b1805b78842f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = round_down(res->end - size + 1, align); 68 resource_size_t start = res->start;
69 69
70 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
71 71 if (skip_isa_ioresource_align(dev))
72 /* 72 return start;
73 * If we're avoiding ISA aliases, the largest contiguous I/O 73 if (start & 0x300)
74 * port space is 256 bytes. Clearing bits 9 and 10 preserves 74 start = (start + 0x3ff) & ~0x3ff;
75 * all 256-byte and smaller alignments, so the result will
76 * still be correctly aligned.
77 */
78 if (!skip_isa_ioresource_align(dev))
79 start &= ~0x300;
80 } else if (res->flags & IORESOURCE_MEM) {
81 if (start < BIOS_END)
82 start = res->end; /* fail; no space */
83 } 75 }
84 return start; 76 return start;
85} 77}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 117f5b8daf75..25cd4a07d09f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
70struct xen_pci_frontend_ops *xen_pci_frontend; 70struct xen_pci_frontend_ops *xen_pci_frontend;
71EXPORT_SYMBOL_GPL(xen_pci_frontend); 71EXPORT_SYMBOL_GPL(xen_pci_frontend);
72 72
73#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \
74 MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
75
73static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, 76static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
74 struct msi_msg *msg) 77 struct msi_msg *msg)
75{ 78{
@@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
83 MSI_ADDR_REDIRECTION_CPU | 86 MSI_ADDR_REDIRECTION_CPU |
84 MSI_ADDR_DEST_ID(pirq); 87 MSI_ADDR_DEST_ID(pirq);
85 88
86 msg->data = 89 msg->data = XEN_PIRQ_MSI_DATA;
87 MSI_DATA_TRIGGER_EDGE |
88 MSI_DATA_LEVEL_ASSERT |
89 /* delivery mode reserved */
90 (3 << 8) |
91 MSI_DATA_VECTOR(0);
92} 90}
93 91
94static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 92static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
@@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
98 struct msi_msg msg; 96 struct msi_msg msg;
99 97
100 list_for_each_entry(msidesc, &dev->msi_list, list) { 98 list_for_each_entry(msidesc, &dev->msi_list, list) {
99 __read_msi_msg(msidesc, &msg);
100 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
101 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
102 if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
103 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
104 "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
105 if (irq < 0)
106 goto error;
107 ret = set_irq_msi(irq, msidesc);
108 if (ret < 0)
109 goto error_while;
110 printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
111 " pirq=%d\n", irq, pirq);
112 return 0;
113 }
101 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 114 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
102 "msi-x" : "msi", &irq, &pirq); 115 "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
103 if (irq < 0 || pirq < 0) 116 if (irq < 0 || pirq < 0)
104 goto error; 117 goto error;
105 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); 118 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
@@ -147,8 +160,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
147 irq = xen_allocate_pirq(v[i], 0, /* not sharable */ 160 irq = xen_allocate_pirq(v[i], 0, /* not sharable */
148 (type == PCI_CAP_ID_MSIX) ? 161 (type == PCI_CAP_ID_MSIX) ?
149 "pcifront-msi-x" : "pcifront-msi"); 162 "pcifront-msi-x" : "pcifront-msi");
150 if (irq < 0) 163 if (irq < 0) {
151 return -1; 164 ret = -1;
165 goto free;
166 }
152 167
153 ret = set_irq_msi(irq, msidesc); 168 ret = set_irq_msi(irq, msidesc);
154 if (ret) 169 if (ret)
@@ -164,7 +179,7 @@ error:
164 if (ret == -ENODEV) 179 if (ret == -ENODEV)
165 dev_err(&dev->dev, "Xen PCI frontend has not registered" \ 180 dev_err(&dev->dev, "Xen PCI frontend has not registered" \
166 " MSI/MSI-X support!\n"); 181 " MSI/MSI-X support!\n");
167 182free:
168 kfree(v); 183 kfree(v);
169 return ret; 184 return ret;
170} 185}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 20ea20a39e2a..ba9caa808a9c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1343,8 +1343,8 @@ uv_activation_descriptor_init(int node, int pnode)
1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1344 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 1344 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
1345 */ 1345 */
1346 bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
1347 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
1348 BUG_ON(!bau_desc); 1348 BUG_ON(!bau_desc);
1349 1349
1350 pa = uv_gpa(bau_desc); /* need the real nasid*/ 1350 pa = uv_gpa(bau_desc); /* need the real nasid*/
@@ -1402,9 +1402,9 @@ uv_payload_queue_init(int node, int pnode)
1402 struct bau_payload_queue_entry *pqp_malloc; 1402 struct bau_payload_queue_entry *pqp_malloc;
1403 struct bau_control *bcp; 1403 struct bau_control *bcp;
1404 1404
1405 pqp = (struct bau_payload_queue_entry *) kmalloc_node( 1405 pqp = kmalloc_node((DEST_Q_SIZE + 1)
1406 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), 1406 * sizeof(struct bau_payload_queue_entry),
1407 GFP_KERNEL, node); 1407 GFP_KERNEL, node);
1408 BUG_ON(!pqp); 1408 BUG_ON(!pqp);
1409 pqp_malloc = pqp; 1409 pqp_malloc = pqp;
1410 1410
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1455 * the below initialization can't be in firmware because the 1455 * the below initialization can't be in firmware because the
1456 * messaging IRQ will be determined by the OS 1456 * messaging IRQ will be determined by the OS
1457 */ 1457 */
1458 apicid = uvhub_to_first_apicid(uvhub); 1458 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1460 ((apicid << 32) | vector)); 1460 ((apicid << 32) | vector));
1461} 1461}
@@ -1520,8 +1520,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
1520 1520
1521 timeout_us = calculate_destination_timeout(); 1521 timeout_us = calculate_destination_timeout();
1522 1522
1523 uvhub_descs = (struct uvhub_desc *) 1523 uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
1524 kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
1525 memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); 1524 memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
1526 uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); 1525 uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
1527 for_each_present_cpu(cpu) { 1526 for_each_present_cpu(cpu) {
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 56e421bc379b..9daf5d1af9f1 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
89 89
90 apicid = cpu_physical_id(cpu); 90 apicid = cpu_physical_id(cpu);
91 pnode = uv_apicid_to_pnode(apicid); 91 pnode = uv_apicid_to_pnode(apicid);
92 apicid |= uv_apicid_hibits;
92 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 93 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
93 (apicid << UVH_IPI_INT_APIC_ID_SHFT) | 94 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
94 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); 95 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
107static int uv_setup_intr(int cpu, u64 expires) 108static int uv_setup_intr(int cpu, u64 expires)
108{ 109{
109 u64 val; 110 u64 val;
111 unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
110 int pnode = uv_cpu_to_pnode(cpu); 112 int pnode = uv_cpu_to_pnode(cpu);
111 113
112 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, 114 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
117 UVH_EVENT_OCCURRED0_RTC1_MASK); 119 UVH_EVENT_OCCURRED0_RTC1_MASK);
118 120
119 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | 121 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
120 ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); 122 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
121 123
122 /* Set configuration */ 124 /* Set configuration */
123 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); 125 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4a2afa1bac51..b6552b189bcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
25 25
26export CPPFLAGS_vdso.lds += -P -C 26export CPPFLAGS_vdso.lds += -P -C
27 27
28VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ 28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
30 30
31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter
69vdso32-images = $(vdso32.so-y:%=vdso32-%.so) 69vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
70 70
71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
72VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 72VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
73 73
74# This makes sure the $(obj) subdirectory exists even though vdso32/ 74# This makes sure the $(obj) subdirectory exists even though vdso32/
75# is not a kbuild sub-make subdirectory. 75# is not a kbuild sub-make subdirectory.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4d3861..44dcad43989d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
75enum xen_domain_type xen_domain_type = XEN_NATIVE; 75enum xen_domain_type xen_domain_type = XEN_NATIVE;
76EXPORT_SYMBOL_GPL(xen_domain_type); 76EXPORT_SYMBOL_GPL(xen_domain_type);
77 77
78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
79EXPORT_SYMBOL(machine_to_phys_mapping);
80unsigned int machine_to_phys_order;
81EXPORT_SYMBOL(machine_to_phys_order);
82
78struct start_info *xen_start_info; 83struct start_info *xen_start_info;
79EXPORT_SYMBOL_GPL(xen_start_info); 84EXPORT_SYMBOL_GPL(xen_start_info);
80 85
@@ -1016,10 +1021,6 @@ static void xen_reboot(int reason)
1016{ 1021{
1017 struct sched_shutdown r = { .reason = reason }; 1022 struct sched_shutdown r = { .reason = reason };
1018 1023
1019#ifdef CONFIG_SMP
1020 stop_other_cpus();
1021#endif
1022
1023 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) 1024 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1024 BUG(); 1025 BUG();
1025} 1026}
@@ -1090,6 +1091,8 @@ static void __init xen_setup_stackprotector(void)
1090/* First C function to be called on Xen boot */ 1091/* First C function to be called on Xen boot */
1091asmlinkage void __init xen_start_kernel(void) 1092asmlinkage void __init xen_start_kernel(void)
1092{ 1093{
1094 struct physdev_set_iopl set_iopl;
1095 int rc;
1093 pgd_t *pgd; 1096 pgd_t *pgd;
1094 1097
1095 if (!xen_start_info) 1098 if (!xen_start_info)
@@ -1097,6 +1100,8 @@ asmlinkage void __init xen_start_kernel(void)
1097 1100
1098 xen_domain_type = XEN_PV_DOMAIN; 1101 xen_domain_type = XEN_PV_DOMAIN;
1099 1102
1103 xen_setup_machphys_mapping();
1104
1100 /* Install Xen paravirt ops */ 1105 /* Install Xen paravirt ops */
1101 pv_info = xen_info; 1106 pv_info = xen_info;
1102 pv_init_ops = xen_init_ops; 1107 pv_init_ops = xen_init_ops;
@@ -1191,8 +1196,6 @@ asmlinkage void __init xen_start_kernel(void)
1191 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1196 /* Allocate and initialize top and mid mfn levels for p2m structure */
1192 xen_build_mfn_list_list(); 1197 xen_build_mfn_list_list();
1193 1198
1194 init_mm.pgd = pgd;
1195
1196 /* keep using Xen gdt for now; no urgent need to change it */ 1199 /* keep using Xen gdt for now; no urgent need to change it */
1197 1200
1198#ifdef CONFIG_X86_32 1201#ifdef CONFIG_X86_32
@@ -1202,10 +1205,18 @@ asmlinkage void __init xen_start_kernel(void)
1202#else 1205#else
1203 pv_info.kernel_rpl = 0; 1206 pv_info.kernel_rpl = 0;
1204#endif 1207#endif
1205
1206 /* set the limit of our address space */ 1208 /* set the limit of our address space */
1207 xen_reserve_top(); 1209 xen_reserve_top();
1208 1210
1211 /* We used to do this in xen_arch_setup, but that is too late on AMD
1212 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
1213 * which pokes 0xcf8 port.
1214 */
1215 set_iopl.iopl = 1;
1216 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1217 if (rc != 0)
1218 xen_raw_printk("physdev_op failed %d\n", rc);
1219
1209#ifdef CONFIG_X86_32 1220#ifdef CONFIG_X86_32
1210 /* set up basic CPUID stuff */ 1221 /* set up basic CPUID stuff */
1211 cpu_detect(&new_cpu_data); 1222 cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c237b810b03f..44924e551fde 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
2034 set_page_prot(pmd, PAGE_KERNEL_RO); 2034 set_page_prot(pmd, PAGE_KERNEL_RO);
2035} 2035}
2036 2036
2037void __init xen_setup_machphys_mapping(void)
2038{
2039 struct xen_machphys_mapping mapping;
2040 unsigned long machine_to_phys_nr_ents;
2041
2042 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
2043 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
2044 machine_to_phys_nr_ents = mapping.max_mfn + 1;
2045 } else {
2046 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
2047 }
2048 machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
2049}
2050
2037#ifdef CONFIG_X86_64 2051#ifdef CONFIG_X86_64
2038static void convert_pfn_mfn(void *v) 2052static void convert_pfn_mfn(void *v)
2039{ 2053{
@@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2119 return pgd; 2133 return pgd;
2120} 2134}
2121#else /* !CONFIG_X86_64 */ 2135#else /* !CONFIG_X86_64 */
2122static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); 2136static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
2137static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
2138
2139static __init void xen_write_cr3_init(unsigned long cr3)
2140{
2141 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
2142
2143 BUG_ON(read_cr3() != __pa(initial_page_table));
2144 BUG_ON(cr3 != __pa(swapper_pg_dir));
2145
2146 /*
2147 * We are switching to swapper_pg_dir for the first time (from
2148 * initial_page_table) and therefore need to mark that page
2149 * read-only and then pin it.
2150 *
2151 * Xen disallows sharing of kernel PMDs for PAE
2152 * guests. Therefore we must copy the kernel PMD from
2153 * initial_page_table into a new kernel PMD to be used in
2154 * swapper_pg_dir.
2155 */
2156 swapper_kernel_pmd =
2157 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2158 memcpy(swapper_kernel_pmd, initial_kernel_pmd,
2159 sizeof(pmd_t) * PTRS_PER_PMD);
2160 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
2161 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
2162 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
2163
2164 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
2165 xen_write_cr3(cr3);
2166 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
2167
2168 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
2169 PFN_DOWN(__pa(initial_page_table)));
2170 set_page_prot(initial_page_table, PAGE_KERNEL);
2171 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
2172
2173 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2174}
2123 2175
2124__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 2176__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2125 unsigned long max_pfn) 2177 unsigned long max_pfn)
2126{ 2178{
2127 pmd_t *kernel_pmd; 2179 pmd_t *kernel_pmd;
2128 2180
2129 level2_kernel_pgt = extend_brk(sizeof(pmd_t *) * PTRS_PER_PMD, PAGE_SIZE); 2181 initial_kernel_pmd =
2182 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2130 2183
2131 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + 2184 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
2132 xen_start_info->nr_pt_frames * PAGE_SIZE + 2185 xen_start_info->nr_pt_frames * PAGE_SIZE +
2133 512*1024); 2186 512*1024);
2134 2187
2135 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 2188 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
2136 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 2189 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
2137 2190
2138 xen_map_identity_early(level2_kernel_pgt, max_pfn); 2191 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2139 2192
2140 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 2193 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
2141 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], 2194 initial_page_table[KERNEL_PGD_BOUNDARY] =
2142 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); 2195 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2143 2196
2144 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 2197 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
2145 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); 2198 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
2146 set_page_prot(empty_zero_page, PAGE_KERNEL_RO); 2199 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
2147 2200
2148 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 2201 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
2149 2202
2150 xen_write_cr3(__pa(swapper_pg_dir)); 2203 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
2151 2204 PFN_DOWN(__pa(initial_page_table)));
2152 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 2205 xen_write_cr3(__pa(initial_page_table));
2153 2206
2154 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 2207 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
2155 __pa(xen_start_info->pt_base + 2208 __pa(xen_start_info->pt_base +
2156 xen_start_info->nr_pt_frames * PAGE_SIZE), 2209 xen_start_info->nr_pt_frames * PAGE_SIZE),
2157 "XEN PAGETABLES"); 2210 "XEN PAGETABLES");
2158 2211
2159 return swapper_pg_dir; 2212 return initial_page_table;
2160} 2213}
2161#endif /* CONFIG_X86_64 */ 2214#endif /* CONFIG_X86_64 */
2162 2215
@@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2290 .write_cr2 = xen_write_cr2, 2343 .write_cr2 = xen_write_cr2,
2291 2344
2292 .read_cr3 = xen_read_cr3, 2345 .read_cr3 = xen_read_cr3,
2346#ifdef CONFIG_X86_32
2347 .write_cr3 = xen_write_cr3_init,
2348#else
2293 .write_cr3 = xen_write_cr3, 2349 .write_cr3 = xen_write_cr3,
2350#endif
2294 2351
2295 .flush_tlb_user = xen_flush_tlb, 2352 .flush_tlb_user = xen_flush_tlb,
2296 .flush_tlb_kernel = xen_flush_tlb, 2353 .flush_tlb_kernel = xen_flush_tlb,
@@ -2358,8 +2415,6 @@ void __init xen_init_mmu_ops(void)
2358 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2415 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2359 pv_mmu_ops = xen_mmu_ops; 2416 pv_mmu_ops = xen_mmu_ops;
2360 2417
2361 vmap_lazy_unmap = false;
2362
2363 memset(dummy_mapping, 0xff, PAGE_SIZE); 2418 memset(dummy_mapping, 0xff, PAGE_SIZE);
2364} 2419}
2365 2420
@@ -2627,7 +2682,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2627 2682
2628 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2683 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
2629 2684
2630 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 2685 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
2686 (VM_PFNMAP | VM_RESERVED | VM_IO)));
2631 2687
2632 rmd.mfn = mfn; 2688 rmd.mfn = mfn;
2633 rmd.prot = prot; 2689 rmd.prot = prot;
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0f456386cce5..25c52f94a27c 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -68,7 +68,7 @@ static int __init check_platform_magic(void)
68 return 0; 68 return 0;
69} 69}
70 70
71void __init xen_unplug_emulated_devices(void) 71void xen_unplug_emulated_devices(void)
72{ 72{
73 int r; 73 int r;
74 74
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b1dbdaa23ecc..b5a7f928234b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
23#include <xen/interface/callback.h> 23#include <xen/interface/callback.h>
24#include <xen/interface/memory.h> 24#include <xen/interface/memory.h>
25#include <xen/interface/physdev.h> 25#include <xen/interface/physdev.h>
26#include <xen/interface/memory.h>
27#include <xen/features.h> 26#include <xen/features.h>
28 27
29#include "xen-ops.h" 28#include "xen-ops.h"
@@ -118,16 +117,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
118 const struct e820map *e820) 117 const struct e820map *e820)
119{ 118{
120 phys_addr_t max_addr = PFN_PHYS(max_pfn); 119 phys_addr_t max_addr = PFN_PHYS(max_pfn);
121 phys_addr_t last_end = 0; 120 phys_addr_t last_end = ISA_END_ADDRESS;
122 unsigned long released = 0; 121 unsigned long released = 0;
123 int i; 122 int i;
124 123
124 /* Free any unused memory above the low 1Mbyte. */
125 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { 125 for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
126 phys_addr_t end = e820->map[i].addr; 126 phys_addr_t end = e820->map[i].addr;
127 end = min(max_addr, end); 127 end = min(max_addr, end);
128 128
129 released += xen_release_chunk(last_end, end); 129 if (last_end < end)
130 last_end = e820->map[i].addr + e820->map[i].size; 130 released += xen_release_chunk(last_end, end);
131 last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
131 } 132 }
132 133
133 if (last_end < max_addr) 134 if (last_end < max_addr)
@@ -164,6 +165,7 @@ char * __init xen_memory_setup(void)
164 XENMEM_memory_map; 165 XENMEM_memory_map;
165 rc = HYPERVISOR_memory_op(op, &memmap); 166 rc = HYPERVISOR_memory_op(op, &memmap);
166 if (rc == -ENOSYS) { 167 if (rc == -ENOSYS) {
168 BUG_ON(xen_initial_domain());
167 memmap.nr_entries = 1; 169 memmap.nr_entries = 1;
168 map[0].addr = 0ULL; 170 map[0].addr = 0ULL;
169 map[0].size = mem_end; 171 map[0].size = mem_end;
@@ -179,34 +181,32 @@ char * __init xen_memory_setup(void)
179 for (i = 0; i < memmap.nr_entries; i++) { 181 for (i = 0; i < memmap.nr_entries; i++) {
180 unsigned long long end = map[i].addr + map[i].size; 182 unsigned long long end = map[i].addr + map[i].size;
181 183
182 if (map[i].type == E820_RAM) { 184 if (map[i].type == E820_RAM && end > mem_end) {
183 if (map[i].addr < mem_end && end > mem_end) { 185 /* RAM off the end - may be partially included */
184 /* Truncate region to max_mem. */ 186 u64 delta = min(map[i].size, end - mem_end);
185 u64 delta = end - mem_end;
186 187
187 map[i].size -= delta; 188 map[i].size -= delta;
188 extra_pages += PFN_DOWN(delta); 189 end -= delta;
189 190
190 end = mem_end; 191 extra_pages += PFN_DOWN(delta);
191 }
192 } 192 }
193 193
194 if (end > xen_extra_mem_start) 194 if (map[i].size > 0 && end > xen_extra_mem_start)
195 xen_extra_mem_start = end; 195 xen_extra_mem_start = end;
196 196
197 /* If region is non-RAM or below mem_end, add what remains */ 197 /* Add region if any remains */
198 if ((map[i].type != E820_RAM || map[i].addr < mem_end) && 198 if (map[i].size > 0)
199 map[i].size > 0)
200 e820_add_region(map[i].addr, map[i].size, map[i].type); 199 e820_add_region(map[i].addr, map[i].size, map[i].type);
201 } 200 }
202 201
203 /* 202 /*
204 * Even though this is normal, usable memory under Xen, reserve 203 * In domU, the ISA region is normal, usable memory, but we
205 * ISA memory anyway because too many things think they can poke 204 * reserve ISA memory anyway because too many things poke
206 * about in there. 205 * about in there.
207 * 206 *
208 * In a dom0 kernel, this region is identity mapped with the 207 * In Dom0, the host E820 information can leave gaps in the
209 * hardware ISA area, so it really is out of bounds. 208 * ISA range, which would cause us to release those pages. To
209 * avoid this, we unconditionally reserve them here.
210 */ 210 */
211 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 211 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
212 E820_RESERVED); 212 E820_RESERVED);
@@ -244,26 +244,11 @@ char * __init xen_memory_setup(void)
244 else 244 else
245 extra_pages = 0; 245 extra_pages = 0;
246 246
247 if (!xen_initial_domain()) 247 xen_add_extra_mem(extra_pages);
248 xen_add_extra_mem(extra_pages);
249 248
250 return "Xen"; 249 return "Xen";
251} 250}
252 251
253static void xen_idle(void)
254{
255 local_irq_disable();
256
257 if (need_resched())
258 local_irq_enable();
259 else {
260 current_thread_info()->status &= ~TS_POLLING;
261 smp_mb__after_clear_bit();
262 safe_halt();
263 current_thread_info()->status |= TS_POLLING;
264 }
265}
266
267/* 252/*
268 * Set the bit indicating "nosegneg" library variants should be used. 253 * Set the bit indicating "nosegneg" library variants should be used.
269 * We only need to bother in pure 32-bit mode; compat 32-bit processes 254 * We only need to bother in pure 32-bit mode; compat 32-bit processes
@@ -333,9 +318,6 @@ void __cpuinit xen_enable_syscall(void)
333 318
334void __init xen_arch_setup(void) 319void __init xen_arch_setup(void)
335{ 320{
336 struct physdev_set_iopl set_iopl;
337 int rc;
338
339 xen_panic_handler_init(); 321 xen_panic_handler_init();
340 322
341 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 323 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -352,11 +334,6 @@ void __init xen_arch_setup(void)
352 xen_enable_sysenter(); 334 xen_enable_sysenter();
353 xen_enable_syscall(); 335 xen_enable_syscall();
354 336
355 set_iopl.iopl = 1;
356 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
357 if (rc != 0)
358 printk(KERN_INFO "physdev_op failed %d\n", rc);
359
360#ifdef CONFIG_ACPI 337#ifdef CONFIG_ACPI
361 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 338 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
362 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 339 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
@@ -368,7 +345,11 @@ void __init xen_arch_setup(void)
368 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 345 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
369 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 346 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
370 347
371 pm_idle = xen_idle; 348 /* Set up idle, making sure it calls safe_halt() pvop */
349#ifdef CONFIG_X86_32
350 boot_cpu_data.hlt_works_ok = 1;
351#endif
352 pm_idle = default_idle;
372 353
373 fiddle_vdso(); 354 fiddle_vdso();
374} 355}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 1d789d56877c..9bbd63a129b5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,6 +31,7 @@ void xen_hvm_post_suspend(int suspend_cancelled)
31 int cpu; 31 int cpu;
32 xen_hvm_init_shared_info(); 32 xen_hvm_init_shared_info();
33 xen_callback_vector(); 33 xen_callback_vector();
34 xen_unplug_emulated_devices();
34 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 35 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
35 for_each_online_cpu(cpu) { 36 for_each_online_cpu(cpu) {
36 xen_setup_runstate_info(cpu); 37 xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b2bb5aa3b054..5da5e53fb94c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -426,6 +426,8 @@ void xen_timer_resume(void)
426{ 426{
427 int cpu; 427 int cpu;
428 428
429 pvclock_resume();
430
429 if (xen_clockevent != &xen_vcpuop_clockevent) 431 if (xen_clockevent != &xen_vcpuop_clockevent)
430 return; 432 return;
431 433
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 64044747348e..9d41bf985757 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -43,7 +43,7 @@ void xen_vcpu_restore(void);
43 43
44void xen_callback_vector(void); 44void xen_callback_vector(void);
45void xen_hvm_init_shared_info(void); 45void xen_hvm_init_shared_info(void);
46void __init xen_unplug_emulated_devices(void); 46void xen_unplug_emulated_devices(void);
47 47
48void __init xen_build_dynamic_phys_to_machine(void); 48void __init xen_build_dynamic_phys_to_machine(void);
49 49