diff options
Diffstat (limited to 'arch/x86')
40 files changed, 626 insertions, 225 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f70e3e3a9fa7..845ea2b2d487 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -25,6 +25,18 @@ config X86 | |||
25 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 25 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
26 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 26 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
27 | 27 | ||
28 | config DEFCONFIG_LIST | ||
29 | string | ||
30 | depends on X86_32 | ||
31 | option defconfig_list | ||
32 | default "arch/x86/configs/i386_defconfig" | ||
33 | |||
34 | config DEFCONFIG_LIST | ||
35 | string | ||
36 | depends on X86_64 | ||
37 | option defconfig_list | ||
38 | default "arch/x86/configs/x86_64_defconfig" | ||
39 | |||
28 | 40 | ||
29 | config GENERIC_LOCKBREAK | 41 | config GENERIC_LOCKBREAK |
30 | def_bool n | 42 | def_bool n |
@@ -180,7 +192,7 @@ config X86_HT | |||
180 | 192 | ||
181 | config X86_BIOS_REBOOT | 193 | config X86_BIOS_REBOOT |
182 | bool | 194 | bool |
183 | depends on X86_32 && !(X86_VISWS || X86_VOYAGER) | 195 | depends on !X86_VISWS && !X86_VOYAGER |
184 | default y | 196 | default y |
185 | 197 | ||
186 | config X86_TRAMPOLINE | 198 | config X86_TRAMPOLINE |
@@ -1161,7 +1173,7 @@ source kernel/Kconfig.hz | |||
1161 | 1173 | ||
1162 | config KEXEC | 1174 | config KEXEC |
1163 | bool "kexec system call" | 1175 | bool "kexec system call" |
1164 | depends on X86_64 || X86_BIOS_REBOOT | 1176 | depends on X86_BIOS_REBOOT |
1165 | help | 1177 | help |
1166 | kexec is a system call that implements the ability to shutdown your | 1178 | kexec is a system call that implements the ability to shutdown your |
1167 | current kernel, and to start another kernel. It is like a reboot | 1179 | current kernel, and to start another kernel. It is like a reboot |
@@ -1649,6 +1661,7 @@ config GEODE_MFGPT_TIMER | |||
1649 | 1661 | ||
1650 | config OLPC | 1662 | config OLPC |
1651 | bool "One Laptop Per Child support" | 1663 | bool "One Laptop Per Child support" |
1664 | depends on MGEODE_LX | ||
1652 | default n | 1665 | default n |
1653 | help | 1666 | help |
1654 | Add support for detecting the unique features of the OLPC | 1667 | Add support for detecting the unique features of the OLPC |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 7ef18b01f0bc..2ad6301849a1 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -278,11 +278,6 @@ config GENERIC_CPU | |||
278 | 278 | ||
279 | endchoice | 279 | endchoice |
280 | 280 | ||
281 | config X86_CPU | ||
282 | def_bool y | ||
283 | select GENERIC_FIND_FIRST_BIT | ||
284 | select GENERIC_FIND_NEXT_BIT | ||
285 | |||
286 | config X86_GENERIC | 281 | config X86_GENERIC |
287 | bool "Generic x86 support" | 282 | bool "Generic x86 support" |
288 | depends on X86_32 | 283 | depends on X86_32 |
@@ -297,6 +292,11 @@ config X86_GENERIC | |||
297 | 292 | ||
298 | endif | 293 | endif |
299 | 294 | ||
295 | config X86_CPU | ||
296 | def_bool y | ||
297 | select GENERIC_FIND_FIRST_BIT | ||
298 | select GENERIC_FIND_NEXT_BIT | ||
299 | |||
300 | # | 300 | # |
301 | # Define implied options from the CPU selection here | 301 | # Define implied options from the CPU selection here |
302 | config X86_L1_CACHE_BYTES | 302 | config X86_L1_CACHE_BYTES |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 5b1979a45a1e..ac1e31ba4795 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -118,7 +118,6 @@ config DEBUG_NX_TEST | |||
118 | config 4KSTACKS | 118 | config 4KSTACKS |
119 | bool "Use 4Kb for kernel stacks instead of 8Kb" | 119 | bool "Use 4Kb for kernel stacks instead of 8Kb" |
120 | depends on X86_32 | 120 | depends on X86_32 |
121 | default y | ||
122 | help | 121 | help |
123 | If you say Y here the kernel will use a 4Kb stacksize for the | 122 | If you say Y here the kernel will use a 4Kb stacksize for the |
124 | kernel stack attached to each process/thread. This facilitates | 123 | kernel stack attached to each process/thread. This facilitates |
@@ -256,11 +255,9 @@ config CPA_DEBUG | |||
256 | help | 255 | help |
257 | Do change_page_attr() self-tests every 30 seconds. | 256 | Do change_page_attr() self-tests every 30 seconds. |
258 | 257 | ||
259 | endmenu | ||
260 | |||
261 | config OPTIMIZE_INLINING | 258 | config OPTIMIZE_INLINING |
262 | bool "Allow gcc to uninline functions marked 'inline'" | 259 | bool "Allow gcc to uninline functions marked 'inline'" |
263 | default y | 260 | depends on BROKEN |
264 | help | 261 | help |
265 | This option determines if the kernel forces gcc to inline the functions | 262 | This option determines if the kernel forces gcc to inline the functions |
266 | developers have marked 'inline'. Doing so takes away freedom from gcc to | 263 | developers have marked 'inline'. Doing so takes away freedom from gcc to |
@@ -270,3 +267,6 @@ config OPTIMIZE_INLINING | |||
270 | this algorithm is so good that allowing gcc4 to make the decision can | 267 | this algorithm is so good that allowing gcc4 to make the decision can |
271 | become the default in the future, until then this option is there to | 268 | become the default in the future, until then this option is there to |
272 | test gcc for this. | 269 | test gcc for this. |
270 | |||
271 | endmenu | ||
272 | |||
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index d01ea42187e6..edaadea90aaf 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c | |||
@@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp) | |||
191 | die("Cannot read ELF header: %s\n", | 191 | die("Cannot read ELF header: %s\n", |
192 | strerror(errno)); | 192 | strerror(errno)); |
193 | } | 193 | } |
194 | if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { | 194 | if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { |
195 | die("No ELF magic\n"); | 195 | die("No ELF magic\n"); |
196 | } | 196 | } |
197 | if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { | 197 | if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index bbed3a26ce55..cb3856a18c85 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -128,7 +128,7 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) | |||
128 | 128 | ||
129 | current->state = TASK_INTERRUPTIBLE; | 129 | current->state = TASK_INTERRUPTIBLE; |
130 | schedule(); | 130 | schedule(); |
131 | set_thread_flag(TIF_RESTORE_SIGMASK); | 131 | set_restore_sigmask(); |
132 | return -ERESTARTNOHAND; | 132 | return -ERESTARTNOHAND; |
133 | } | 133 | } |
134 | 134 | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 30d54ed27e55..bbdacb398d48 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o | |||
40 | obj-y += cpu/ | 40 | obj-y += cpu/ |
41 | obj-y += acpi/ | 41 | obj-y += acpi/ |
42 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | 42 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o |
43 | obj-$(CONFIG_X86_64) += reboot.o | ||
44 | obj-$(CONFIG_MCA) += mca_32.o | 43 | obj-$(CONFIG_MCA) += mca_32.o |
45 | obj-$(CONFIG_X86_MSR) += msr.o | 44 | obj-$(CONFIG_X86_MSR) += msr.o |
46 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 45 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 7335959b6aff..fd5ca97a2ad5 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -10,5 +10,5 @@ endif | |||
10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin | 10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin |
11 | 11 | ||
12 | $(obj)/realmode/wakeup.bin: FORCE | 12 | $(obj)/realmode/wakeup.bin: FORCE |
13 | $(Q)$(MAKE) $(build)=$(obj)/realmode $@ | 13 | $(Q)$(MAKE) $(build)=$(obj)/realmode |
14 | 14 | ||
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 092900854acc..1c31cc0e9def 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile | |||
@@ -6,7 +6,8 @@ | |||
6 | # for more details. | 6 | # for more details. |
7 | # | 7 | # |
8 | 8 | ||
9 | targets := wakeup.bin wakeup.elf | 9 | always := wakeup.bin |
10 | targets := wakeup.elf wakeup.lds | ||
10 | 11 | ||
11 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o | 12 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o |
12 | 13 | ||
@@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T | |||
48 | 49 | ||
49 | CPPFLAGS_wakeup.lds += -P -C | 50 | CPPFLAGS_wakeup.lds += -P -C |
50 | 51 | ||
51 | $(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE | 52 | $(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE |
52 | $(call if_changed,ld) | 53 | $(call if_changed,ld) |
53 | 54 | ||
54 | OBJCOPYFLAGS_wakeup.bin := -O binary | 55 | OBJCOPYFLAGS_wakeup.bin := -O binary |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index e4ea362e8480..bf9290e29013 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1192,19 +1192,6 @@ static int suspend(int vetoable) | |||
1192 | int err; | 1192 | int err; |
1193 | struct apm_user *as; | 1193 | struct apm_user *as; |
1194 | 1194 | ||
1195 | if (pm_send_all(PM_SUSPEND, (void *)3)) { | ||
1196 | /* Vetoed */ | ||
1197 | if (vetoable) { | ||
1198 | if (apm_info.connection_version > 0x100) | ||
1199 | set_system_power_state(APM_STATE_REJECT); | ||
1200 | err = -EBUSY; | ||
1201 | ignore_sys_suspend = 0; | ||
1202 | printk(KERN_WARNING "apm: suspend was vetoed.\n"); | ||
1203 | goto out; | ||
1204 | } | ||
1205 | printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n"); | ||
1206 | } | ||
1207 | |||
1208 | device_suspend(PMSG_SUSPEND); | 1195 | device_suspend(PMSG_SUSPEND); |
1209 | local_irq_disable(); | 1196 | local_irq_disable(); |
1210 | device_power_down(PMSG_SUSPEND); | 1197 | device_power_down(PMSG_SUSPEND); |
@@ -1227,9 +1214,7 @@ static int suspend(int vetoable) | |||
1227 | device_power_up(); | 1214 | device_power_up(); |
1228 | local_irq_enable(); | 1215 | local_irq_enable(); |
1229 | device_resume(); | 1216 | device_resume(); |
1230 | pm_send_all(PM_RESUME, (void *)0); | ||
1231 | queue_event(APM_NORMAL_RESUME, NULL); | 1217 | queue_event(APM_NORMAL_RESUME, NULL); |
1232 | out: | ||
1233 | spin_lock(&user_list_lock); | 1218 | spin_lock(&user_list_lock); |
1234 | for (as = user_list; as != NULL; as = as->next) { | 1219 | for (as = user_list; as != NULL; as = as->next) { |
1235 | as->suspend_wait = 0; | 1220 | as->suspend_wait = 0; |
@@ -1340,7 +1325,6 @@ static void check_events(void) | |||
1340 | if ((event != APM_NORMAL_RESUME) | 1325 | if ((event != APM_NORMAL_RESUME) |
1341 | || (ignore_normal_resume == 0)) { | 1326 | || (ignore_normal_resume == 0)) { |
1342 | device_resume(); | 1327 | device_resume(); |
1343 | pm_send_all(PM_RESUME, (void *)0); | ||
1344 | queue_event(event, NULL); | 1328 | queue_event(event, NULL); |
1345 | } | 1329 | } |
1346 | ignore_normal_resume = 0; | 1330 | ignore_normal_resume = 0; |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 021624c83583..cbaaf69bedb2 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -83,7 +83,7 @@ unsigned int read_apic_id(void) | |||
83 | { | 83 | { |
84 | unsigned int id; | 84 | unsigned int id; |
85 | 85 | ||
86 | WARN_ON(preemptible()); | 86 | WARN_ON(preemptible() && num_online_cpus() > 1); |
87 | id = apic_read(APIC_ID); | 87 | id = apic_read(APIC_ID); |
88 | if (uv_system_type >= UV_X2APIC) | 88 | if (uv_system_type >= UV_X2APIC) |
89 | id |= __get_cpu_var(x2apic_extra_bits); | 89 | id |= __get_cpu_var(x2apic_extra_bits); |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 90f038af3adc..b2cc73768a9d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -656,15 +656,16 @@ int_msg: | |||
656 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" | 656 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" |
657 | 657 | ||
658 | fault_msg: | 658 | fault_msg: |
659 | .asciz \ | 659 | /* fault info: */ |
660 | /* fault info: */ "BUG: Int %d: CR2 %p\n" \ | 660 | .ascii "BUG: Int %d: CR2 %p\n" |
661 | /* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ | 661 | /* pusha regs: */ |
662 | " EBX %p EDX %p ECX %p EAX %p\n" \ | 662 | .ascii " EDI %p ESI %p EBP %p ESP %p\n" |
663 | /* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ | 663 | .ascii " EBX %p EDX %p ECX %p EAX %p\n" |
664 | \ | 664 | /* fault frame: */ |
665 | "Stack: %p %p %p %p %p %p %p %p\n" \ | 665 | .ascii " err %p EIP %p CS %p flg %p\n" |
666 | " %p %p %p %p %p %p %p %p\n" \ | 666 | .ascii "Stack: %p %p %p %p %p %p %p %p\n" |
667 | " %p %p %p %p %p %p %p %p\n" | 667 | .ascii " %p %p %p %p %p %p %p %p\n" |
668 | .asciz " %p %p %p %p %p %p %p %p\n" | ||
668 | 669 | ||
669 | #include "../../x86/xen/xen-head.S" | 670 | #include "../../x86/xen/xen-head.S" |
670 | 671 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9007f9ea64ee..9b5cfcdfc426 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -137,9 +137,10 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
137 | hd.hd_irq[0] = HPET_LEGACY_8254; | 137 | hd.hd_irq[0] = HPET_LEGACY_8254; |
138 | hd.hd_irq[1] = HPET_LEGACY_RTC; | 138 | hd.hd_irq[1] = HPET_LEGACY_RTC; |
139 | 139 | ||
140 | for (i = 2; i < nrtimers; timer++, i++) | 140 | for (i = 2; i < nrtimers; timer++, i++) { |
141 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> | 141 | hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> |
142 | Tn_INT_ROUTE_CNF_SHIFT; | 142 | Tn_INT_ROUTE_CNF_SHIFT; |
143 | } | ||
143 | 144 | ||
144 | hpet_alloc(&hd); | 145 | hpet_alloc(&hd); |
145 | 146 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ddee04043aeb..4bc1be5d5472 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -133,6 +133,7 @@ static int kvm_register_clock(void) | |||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); |
134 | } | 134 | } |
135 | 135 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | ||
136 | static void kvm_setup_secondary_clock(void) | 137 | static void kvm_setup_secondary_clock(void) |
137 | { | 138 | { |
138 | /* | 139 | /* |
@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void) | |||
143 | /* ok, done with our trickery, call native */ | 144 | /* ok, done with our trickery, call native */ |
144 | setup_secondary_APIC_clock(); | 145 | setup_secondary_APIC_clock(); |
145 | } | 146 | } |
147 | #endif | ||
146 | 148 | ||
147 | /* | 149 | /* |
148 | * After the clock is registered, the host will keep writing to the | 150 | * After the clock is registered, the host will keep writing to the |
@@ -177,7 +179,9 @@ void __init kvmclock_init(void) | |||
177 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 179 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
178 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 180 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
179 | pv_time_ops.sched_clock = kvm_clock_read; | 181 | pv_time_ops.sched_clock = kvm_clock_read; |
182 | #ifdef CONFIG_X86_LOCAL_APIC | ||
180 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 183 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
184 | #endif | ||
181 | machine_ops.shutdown = kvm_shutdown; | 185 | machine_ops.shutdown = kvm_shutdown; |
182 | #ifdef CONFIG_KEXEC | 186 | #ifdef CONFIG_KEXEC |
183 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 187 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3e2c54dc8b29..404683b94e79 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -794,6 +794,11 @@ void __init find_smp_config(void) | |||
794 | ACPI-based MP Configuration | 794 | ACPI-based MP Configuration |
795 | -------------------------------------------------------------------------- */ | 795 | -------------------------------------------------------------------------- */ |
796 | 796 | ||
797 | /* | ||
798 | * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: | ||
799 | */ | ||
800 | int es7000_plat; | ||
801 | |||
797 | #ifdef CONFIG_ACPI | 802 | #ifdef CONFIG_ACPI |
798 | 803 | ||
799 | #ifdef CONFIG_X86_IO_APIC | 804 | #ifdef CONFIG_X86_IO_APIC |
@@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
909 | MP_intsrc_info(&intsrc); | 914 | MP_intsrc_info(&intsrc); |
910 | } | 915 | } |
911 | 916 | ||
912 | int es7000_plat; | ||
913 | |||
914 | void __init mp_config_acpi_legacy_irqs(void) | 917 | void __init mp_config_acpi_legacy_irqs(void) |
915 | { | 918 | { |
916 | struct mpc_config_intsrc intsrc; | 919 | struct mpc_config_intsrc intsrc; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 388b113a7d88..0c37f16b6950 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -14,7 +14,7 @@ EXPORT_SYMBOL(forbid_dac); | |||
14 | const struct dma_mapping_ops *dma_ops; | 14 | const struct dma_mapping_ops *dma_ops; |
15 | EXPORT_SYMBOL(dma_ops); | 15 | EXPORT_SYMBOL(dma_ops); |
16 | 16 | ||
17 | int iommu_sac_force __read_mostly = 0; | 17 | static int iommu_sac_force __read_mostly; |
18 | 18 | ||
19 | #ifdef CONFIG_IOMMU_DEBUG | 19 | #ifdef CONFIG_IOMMU_DEBUG |
20 | int panic_on_overflow __read_mostly = 1; | 20 | int panic_on_overflow __read_mostly = 1; |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a4a838306b2c..f6be7d5f82f8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
10 | #include <asm/hpet.h> | 10 | #include <asm/hpet.h> |
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/proto.h> | ||
12 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
13 | #include <asm/reboot.h> | 14 | #include <asm/reboot.h> |
14 | 15 | ||
@@ -148,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
148 | .matches = { | 149 | .matches = { |
149 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | 150 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), |
150 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), | 151 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), |
151 | DMI_MATCH(DMI_BOARD_NAME, "0WF810"), | ||
152 | }, | 152 | }, |
153 | }, | 153 | }, |
154 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ | 154 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c0c68c18a788..cc6f5eb20b24 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void) | |||
95 | 95 | ||
96 | /* Copy section for each CPU (we discard the original) */ | 96 | /* Copy section for each CPU (we discard the original) */ |
97 | size = PERCPU_ENOUGH_ROOM; | 97 | size = PERCPU_ENOUGH_ROOM; |
98 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", | 98 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
99 | size); | 99 | size); |
100 | 100 | ||
101 | for_each_possible_cpu(i) { | 101 | for_each_possible_cpu(i) { |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 8e05e7f7bd40..d92373630963 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -57,7 +57,7 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask) | |||
57 | 57 | ||
58 | current->state = TASK_INTERRUPTIBLE; | 58 | current->state = TASK_INTERRUPTIBLE; |
59 | schedule(); | 59 | schedule(); |
60 | set_thread_flag(TIF_RESTORE_SIGMASK); | 60 | set_restore_sigmask(); |
61 | 61 | ||
62 | return -ERESTARTNOHAND; | 62 | return -ERESTARTNOHAND; |
63 | } | 63 | } |
@@ -593,7 +593,7 @@ static void do_signal(struct pt_regs *regs) | |||
593 | if (!user_mode(regs)) | 593 | if (!user_mode(regs)) |
594 | return; | 594 | return; |
595 | 595 | ||
596 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) | 596 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) |
597 | oldset = ¤t->saved_sigmask; | 597 | oldset = ¤t->saved_sigmask; |
598 | else | 598 | else |
599 | oldset = ¤t->blocked; | 599 | oldset = ¤t->blocked; |
@@ -612,13 +612,12 @@ static void do_signal(struct pt_regs *regs) | |||
612 | /* Whee! Actually deliver the signal. */ | 612 | /* Whee! Actually deliver the signal. */ |
613 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 613 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
614 | /* | 614 | /* |
615 | * a signal was successfully delivered; the saved | 615 | * A signal was successfully delivered; the saved |
616 | * sigmask will have been stored in the signal frame, | 616 | * sigmask will have been stored in the signal frame, |
617 | * and will be restored by sigreturn, so we can simply | 617 | * and will be restored by sigreturn, so we can simply |
618 | * clear the TIF_RESTORE_SIGMASK flag | 618 | * clear the TS_RESTORE_SIGMASK flag. |
619 | */ | 619 | */ |
620 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) | 620 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; |
621 | clear_thread_flag(TIF_RESTORE_SIGMASK); | ||
622 | } | 621 | } |
623 | return; | 622 | return; |
624 | } | 623 | } |
@@ -645,8 +644,8 @@ static void do_signal(struct pt_regs *regs) | |||
645 | * If there's no signal to deliver, we just put the saved sigmask | 644 | * If there's no signal to deliver, we just put the saved sigmask |
646 | * back. | 645 | * back. |
647 | */ | 646 | */ |
648 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) { | 647 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { |
649 | clear_thread_flag(TIF_RESTORE_SIGMASK); | 648 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; |
650 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | 649 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); |
651 | } | 650 | } |
652 | } | 651 | } |
@@ -665,7 +664,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
665 | } | 664 | } |
666 | 665 | ||
667 | /* deal with pending signal delivery */ | 666 | /* deal with pending signal delivery */ |
668 | if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) | 667 | if (thread_info_flags & _TIF_SIGPENDING) |
669 | do_signal(regs); | 668 | do_signal(regs); |
670 | 669 | ||
671 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | 670 | if (thread_info_flags & _TIF_HRTICK_RESCHED) |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ccb2a4560c2d..e53b267662e7 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -427,7 +427,7 @@ static void do_signal(struct pt_regs *regs) | |||
427 | if (!user_mode(regs)) | 427 | if (!user_mode(regs)) |
428 | return; | 428 | return; |
429 | 429 | ||
430 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) | 430 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) |
431 | oldset = ¤t->saved_sigmask; | 431 | oldset = ¤t->saved_sigmask; |
432 | else | 432 | else |
433 | oldset = ¤t->blocked; | 433 | oldset = ¤t->blocked; |
@@ -444,11 +444,13 @@ static void do_signal(struct pt_regs *regs) | |||
444 | 444 | ||
445 | /* Whee! Actually deliver the signal. */ | 445 | /* Whee! Actually deliver the signal. */ |
446 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 446 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
447 | /* a signal was successfully delivered; the saved | 447 | /* |
448 | * A signal was successfully delivered; the saved | ||
448 | * sigmask will have been stored in the signal frame, | 449 | * sigmask will have been stored in the signal frame, |
449 | * and will be restored by sigreturn, so we can simply | 450 | * and will be restored by sigreturn, so we can simply |
450 | * clear the TIF_RESTORE_SIGMASK flag */ | 451 | * clear the TS_RESTORE_SIGMASK flag. |
451 | clear_thread_flag(TIF_RESTORE_SIGMASK); | 452 | */ |
453 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
452 | } | 454 | } |
453 | return; | 455 | return; |
454 | } | 456 | } |
@@ -476,8 +478,8 @@ static void do_signal(struct pt_regs *regs) | |||
476 | * If there's no signal to deliver, we just put the saved sigmask | 478 | * If there's no signal to deliver, we just put the saved sigmask |
477 | * back. | 479 | * back. |
478 | */ | 480 | */ |
479 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) { | 481 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { |
480 | clear_thread_flag(TIF_RESTORE_SIGMASK); | 482 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; |
481 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | 483 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); |
482 | } | 484 | } |
483 | } | 485 | } |
@@ -498,7 +500,7 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
498 | #endif /* CONFIG_X86_MCE */ | 500 | #endif /* CONFIG_X86_MCE */ |
499 | 501 | ||
500 | /* deal with pending signal delivery */ | 502 | /* deal with pending signal delivery */ |
501 | if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) | 503 | if (thread_info_flags & _TIF_SIGPENDING) |
502 | do_signal(regs); | 504 | do_signal(regs); |
503 | 505 | ||
504 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | 506 | if (thread_info_flags & _TIF_HRTICK_RESCHED) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84241a256dc8..6b087ab6cd8f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void) | |||
299 | /* | 299 | /* |
300 | * Activate a secondary processor. | 300 | * Activate a secondary processor. |
301 | */ | 301 | */ |
302 | void __cpuinit start_secondary(void *unused) | 302 | static void __cpuinit start_secondary(void *unused) |
303 | { | 303 | { |
304 | /* | 304 | /* |
305 | * Don't put *anything* before cpu_init(), SMP booting is too | 305 | * Don't put *anything* before cpu_init(), SMP booting is too |
@@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu) | |||
1306 | cpu_clear(cpu, cpu_sibling_setup_map); | 1306 | cpu_clear(cpu, cpu_sibling_setup_map); |
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | int additional_cpus __initdata = -1; | 1309 | static int additional_cpus __initdata = -1; |
1310 | 1310 | ||
1311 | static __init int setup_additional_cpus(char *s) | 1311 | static __init int setup_additional_cpus(char *s) |
1312 | { | 1312 | { |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f036e1..d2ab52cc1d6b 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -22,23 +22,6 @@ | |||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | #include <asm/unistd.h> | 23 | #include <asm/unistd.h> |
24 | 24 | ||
25 | /* | ||
26 | * sys_pipe() is the normal C calling standard for creating | ||
27 | * a pipe. It's not the way Unix traditionally does this, though. | ||
28 | */ | ||
29 | asmlinkage int sys_pipe(unsigned long __user * fildes) | ||
30 | { | ||
31 | int fd[2]; | ||
32 | int error; | ||
33 | |||
34 | error = do_pipe(fd); | ||
35 | if (!error) { | ||
36 | if (copy_to_user(fildes, fd, 2*sizeof(int))) | ||
37 | error = -EFAULT; | ||
38 | } | ||
39 | return error; | ||
40 | } | ||
41 | |||
42 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | 25 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
43 | unsigned long prot, unsigned long flags, | 26 | unsigned long prot, unsigned long flags, |
44 | unsigned long fd, unsigned long pgoff) | 27 | unsigned long fd, unsigned long pgoff) |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5e1aa3..3b360ef33817 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -17,23 +17,6 @@ | |||
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | 19 | ||
20 | /* | ||
21 | * sys_pipe() is the normal C calling standard for creating | ||
22 | * a pipe. It's not the way Unix traditionally does this, though. | ||
23 | */ | ||
24 | asmlinkage long sys_pipe(int __user *fildes) | ||
25 | { | ||
26 | int fd[2]; | ||
27 | int error; | ||
28 | |||
29 | error = do_pipe(fd); | ||
30 | if (!error) { | ||
31 | if (copy_to_user(fildes, fd, 2*sizeof(int))) | ||
32 | error = -EFAULT; | ||
33 | } | ||
34 | return error; | ||
35 | } | ||
36 | |||
37 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, | 20 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, |
38 | unsigned long fd, unsigned long off) | 21 | unsigned long fd, unsigned long off) |
39 | { | 22 | { |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 361e31611276..3324d90038e4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include "i8254.h" | 35 | #include "i8254.h" |
36 | 36 | ||
37 | #ifndef CONFIG_X86_64 | 37 | #ifndef CONFIG_X86_64 |
38 | #define mod_64(x, y) ((x) - (y) * div64_64(x, y)) | 38 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
39 | #else | 39 | #else |
40 | #define mod_64(x, y) ((x) % (y)) | 40 | #define mod_64(x, y) ((x) % (y)) |
41 | #endif | 41 | #endif |
@@ -60,8 +60,8 @@ static u64 muldiv64(u64 a, u32 b, u32 c) | |||
60 | rl = (u64)u.l.low * (u64)b; | 60 | rl = (u64)u.l.low * (u64)b; |
61 | rh = (u64)u.l.high * (u64)b; | 61 | rh = (u64)u.l.high * (u64)b; |
62 | rh += (rl >> 32); | 62 | rh += (rl >> 32); |
63 | res.l.high = div64_64(rh, c); | 63 | res.l.high = div64_u64(rh, c); |
64 | res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); | 64 | res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); |
65 | return res.ll; | 65 | return res.ll; |
66 | } | 66 | } |
67 | 67 | ||
@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | 288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ |
289 | switch (ps->channels[0].mode) { | 289 | switch (ps->channels[0].mode) { |
290 | case 1: | 290 | case 1: |
291 | /* FIXME: enhance mode 4 precision */ | ||
292 | case 4: | ||
291 | create_pit_timer(&ps->pit_timer, val, 0); | 293 | create_pit_timer(&ps->pit_timer, val, 0); |
292 | break; | 294 | break; |
293 | case 2: | 295 | case 2: |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 57ac4e4c556a..36809d79788b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -25,13 +25,13 @@ | |||
25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
26 | #include <linux/io.h> | 26 | #include <linux/io.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/math64.h> | ||
28 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
29 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
30 | #include <asm/page.h> | 31 | #include <asm/page.h> |
31 | #include <asm/current.h> | 32 | #include <asm/current.h> |
32 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
33 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
34 | #include <asm/div64.h> | ||
35 | #include "irq.h" | 35 | #include "irq.h" |
36 | 36 | ||
37 | #define PRId64 "d" | 37 | #define PRId64 "d" |
@@ -526,8 +526,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
526 | } else | 526 | } else |
527 | passed = ktime_sub(now, apic->timer.last_update); | 527 | passed = ktime_sub(now, apic->timer.last_update); |
528 | 528 | ||
529 | counter_passed = div64_64(ktime_to_ns(passed), | 529 | counter_passed = div64_u64(ktime_to_ns(passed), |
530 | (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); | 530 | (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); |
531 | 531 | ||
532 | if (counter_passed > tmcct) { | 532 | if (counter_passed > tmcct) { |
533 | if (unlikely(!apic_lvtt_period(apic))) { | 533 | if (unlikely(!apic_lvtt_period(apic))) { |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ad6f5481671..36c5406b1813 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -79,36 +79,6 @@ static int dbg = 1; | |||
79 | } | 79 | } |
80 | #endif | 80 | #endif |
81 | 81 | ||
82 | #define PT64_PT_BITS 9 | ||
83 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | ||
84 | #define PT32_PT_BITS 10 | ||
85 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | ||
86 | |||
87 | #define PT_WRITABLE_SHIFT 1 | ||
88 | |||
89 | #define PT_PRESENT_MASK (1ULL << 0) | ||
90 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
91 | #define PT_USER_MASK (1ULL << 2) | ||
92 | #define PT_PWT_MASK (1ULL << 3) | ||
93 | #define PT_PCD_MASK (1ULL << 4) | ||
94 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
95 | #define PT_DIRTY_MASK (1ULL << 6) | ||
96 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
97 | #define PT_PAT_MASK (1ULL << 7) | ||
98 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
99 | #define PT64_NX_SHIFT 63 | ||
100 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
101 | |||
102 | #define PT_PAT_SHIFT 7 | ||
103 | #define PT_DIR_PAT_SHIFT 12 | ||
104 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
105 | |||
106 | #define PT32_DIR_PSE36_SIZE 4 | ||
107 | #define PT32_DIR_PSE36_SHIFT 13 | ||
108 | #define PT32_DIR_PSE36_MASK \ | ||
109 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
110 | |||
111 | |||
112 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 82 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
113 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 83 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
114 | 84 | ||
@@ -154,10 +124,6 @@ static int dbg = 1; | |||
154 | #define PFERR_USER_MASK (1U << 2) | 124 | #define PFERR_USER_MASK (1U << 2) |
155 | #define PFERR_FETCH_MASK (1U << 4) | 125 | #define PFERR_FETCH_MASK (1U << 4) |
156 | 126 | ||
157 | #define PT64_ROOT_LEVEL 4 | ||
158 | #define PT32_ROOT_LEVEL 2 | ||
159 | #define PT32E_ROOT_LEVEL 3 | ||
160 | |||
161 | #define PT_DIRECTORY_LEVEL 2 | 127 | #define PT_DIRECTORY_LEVEL 2 |
162 | #define PT_PAGE_TABLE_LEVEL 1 | 128 | #define PT_PAGE_TABLE_LEVEL 1 |
163 | 129 | ||
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; | |||
186 | 152 | ||
187 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 153 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
188 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 154 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
155 | static u64 __read_mostly shadow_base_present_pte; | ||
156 | static u64 __read_mostly shadow_nx_mask; | ||
157 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | ||
158 | static u64 __read_mostly shadow_user_mask; | ||
159 | static u64 __read_mostly shadow_accessed_mask; | ||
160 | static u64 __read_mostly shadow_dirty_mask; | ||
189 | 161 | ||
190 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 162 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
191 | { | 163 | { |
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
194 | } | 166 | } |
195 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 167 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
196 | 168 | ||
169 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
170 | { | ||
171 | shadow_base_present_pte = base_pte; | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
174 | |||
175 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | ||
176 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | ||
177 | { | ||
178 | shadow_user_mask = user_mask; | ||
179 | shadow_accessed_mask = accessed_mask; | ||
180 | shadow_dirty_mask = dirty_mask; | ||
181 | shadow_nx_mask = nx_mask; | ||
182 | shadow_x_mask = x_mask; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | ||
185 | |||
197 | static int is_write_protection(struct kvm_vcpu *vcpu) | 186 | static int is_write_protection(struct kvm_vcpu *vcpu) |
198 | { | 187 | { |
199 | return vcpu->arch.cr0 & X86_CR0_WP; | 188 | return vcpu->arch.cr0 & X86_CR0_WP; |
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) | |||
232 | 221 | ||
233 | static int is_dirty_pte(unsigned long pte) | 222 | static int is_dirty_pte(unsigned long pte) |
234 | { | 223 | { |
235 | return pte & PT_DIRTY_MASK; | 224 | return pte & shadow_dirty_mask; |
236 | } | 225 | } |
237 | 226 | ||
238 | static int is_rmap_pte(u64 pte) | 227 | static int is_rmap_pte(u64 pte) |
@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
387 | 376 | ||
388 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 377 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); |
389 | *write_count += 1; | 378 | *write_count += 1; |
390 | WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); | ||
391 | } | 379 | } |
392 | 380 | ||
393 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 381 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
547 | return; | 535 | return; |
548 | sp = page_header(__pa(spte)); | 536 | sp = page_header(__pa(spte)); |
549 | pfn = spte_to_pfn(*spte); | 537 | pfn = spte_to_pfn(*spte); |
550 | if (*spte & PT_ACCESSED_MASK) | 538 | if (*spte & shadow_accessed_mask) |
551 | kvm_set_pfn_accessed(pfn); | 539 | kvm_set_pfn_accessed(pfn); |
552 | if (is_writeble_pte(*spte)) | 540 | if (is_writeble_pte(*spte)) |
553 | kvm_release_pfn_dirty(pfn); | 541 | kvm_release_pfn_dirty(pfn); |
@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1073 | * whether the guest actually used the pte (in order to detect | 1061 | * whether the guest actually used the pte (in order to detect |
1074 | * demand paging). | 1062 | * demand paging). |
1075 | */ | 1063 | */ |
1076 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; | 1064 | spte = shadow_base_present_pte | shadow_dirty_mask; |
1077 | if (!speculative) | 1065 | if (!speculative) |
1078 | pte_access |= PT_ACCESSED_MASK; | 1066 | pte_access |= PT_ACCESSED_MASK; |
1079 | if (!dirty) | 1067 | if (!dirty) |
1080 | pte_access &= ~ACC_WRITE_MASK; | 1068 | pte_access &= ~ACC_WRITE_MASK; |
1081 | if (!(pte_access & ACC_EXEC_MASK)) | 1069 | if (pte_access & ACC_EXEC_MASK) |
1082 | spte |= PT64_NX_MASK; | 1070 | spte |= shadow_x_mask; |
1083 | 1071 | else | |
1084 | spte |= PT_PRESENT_MASK; | 1072 | spte |= shadow_nx_mask; |
1085 | if (pte_access & ACC_USER_MASK) | 1073 | if (pte_access & ACC_USER_MASK) |
1086 | spte |= PT_USER_MASK; | 1074 | spte |= shadow_user_mask; |
1087 | if (largepage) | 1075 | if (largepage) |
1088 | spte |= PT_PAGE_SIZE_MASK; | 1076 | spte |= PT_PAGE_SIZE_MASK; |
1089 | 1077 | ||
@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1188 | return -ENOMEM; | 1176 | return -ENOMEM; |
1189 | } | 1177 | } |
1190 | 1178 | ||
1191 | table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | 1179 | table[index] = __pa(new_table->spt) |
1192 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1180 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1181 | | shadow_user_mask | shadow_x_mask; | ||
1193 | } | 1182 | } |
1194 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 1183 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
1195 | } | 1184 | } |
@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1244 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 1233 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
1245 | return; | 1234 | return; |
1246 | spin_lock(&vcpu->kvm->mmu_lock); | 1235 | spin_lock(&vcpu->kvm->mmu_lock); |
1247 | #ifdef CONFIG_X86_64 | ||
1248 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1236 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1249 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1237 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1250 | 1238 | ||
@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1256 | spin_unlock(&vcpu->kvm->mmu_lock); | 1244 | spin_unlock(&vcpu->kvm->mmu_lock); |
1257 | return; | 1245 | return; |
1258 | } | 1246 | } |
1259 | #endif | ||
1260 | for (i = 0; i < 4; ++i) { | 1247 | for (i = 0; i < 4; ++i) { |
1261 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1248 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
1262 | 1249 | ||
@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1282 | 1269 | ||
1283 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 1270 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
1284 | 1271 | ||
1285 | #ifdef CONFIG_X86_64 | ||
1286 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1272 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1287 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1273 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1288 | 1274 | ||
@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1297 | vcpu->arch.mmu.root_hpa = root; | 1283 | vcpu->arch.mmu.root_hpa = root; |
1298 | return; | 1284 | return; |
1299 | } | 1285 | } |
1300 | #endif | ||
1301 | metaphysical = !is_paging(vcpu); | 1286 | metaphysical = !is_paging(vcpu); |
1302 | if (tdp_enabled) | 1287 | if (tdp_enabled) |
1303 | metaphysical = 1; | 1288 | metaphysical = 1; |
@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1377 | spin_lock(&vcpu->kvm->mmu_lock); | 1362 | spin_lock(&vcpu->kvm->mmu_lock); |
1378 | kvm_mmu_free_some_pages(vcpu); | 1363 | kvm_mmu_free_some_pages(vcpu); |
1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1364 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); | 1365 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1381 | spin_unlock(&vcpu->kvm->mmu_lock); | 1366 | spin_unlock(&vcpu->kvm->mmu_lock); |
1382 | 1367 | ||
1383 | return r; | 1368 | return r; |
@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
1484 | context->page_fault = tdp_page_fault; | 1469 | context->page_fault = tdp_page_fault; |
1485 | context->free = nonpaging_free; | 1470 | context->free = nonpaging_free; |
1486 | context->prefetch_page = nonpaging_prefetch_page; | 1471 | context->prefetch_page = nonpaging_prefetch_page; |
1487 | context->shadow_root_level = TDP_ROOT_LEVEL; | 1472 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
1488 | context->root_hpa = INVALID_PAGE; | 1473 | context->root_hpa = INVALID_PAGE; |
1489 | 1474 | ||
1490 | if (!is_paging(vcpu)) { | 1475 | if (!is_paging(vcpu)) { |
@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
1633 | { | 1618 | { |
1634 | u64 *spte = vcpu->arch.last_pte_updated; | 1619 | u64 *spte = vcpu->arch.last_pte_updated; |
1635 | 1620 | ||
1636 | return !!(spte && (*spte & PT_ACCESSED_MASK)); | 1621 | return !!(spte && (*spte & shadow_accessed_mask)); |
1637 | } | 1622 | } |
1638 | 1623 | ||
1639 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1624 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e64e9f56a65e..1730757bbc7a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -3,11 +3,38 @@ | |||
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | 6 | #define PT64_PT_BITS 9 |
7 | #define TDP_ROOT_LEVEL PT64_ROOT_LEVEL | 7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
8 | #else | 8 | #define PT32_PT_BITS 10 |
9 | #define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL | 9 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) |
10 | #endif | 10 | |
11 | #define PT_WRITABLE_SHIFT 1 | ||
12 | |||
13 | #define PT_PRESENT_MASK (1ULL << 0) | ||
14 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
15 | #define PT_USER_MASK (1ULL << 2) | ||
16 | #define PT_PWT_MASK (1ULL << 3) | ||
17 | #define PT_PCD_MASK (1ULL << 4) | ||
18 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
19 | #define PT_DIRTY_MASK (1ULL << 6) | ||
20 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
21 | #define PT_PAT_MASK (1ULL << 7) | ||
22 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
23 | #define PT64_NX_SHIFT 63 | ||
24 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
25 | |||
26 | #define PT_PAT_SHIFT 7 | ||
27 | #define PT_DIR_PAT_SHIFT 12 | ||
28 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
29 | |||
30 | #define PT32_DIR_PSE36_SIZE 4 | ||
31 | #define PT32_DIR_PSE36_SHIFT 13 | ||
32 | #define PT32_DIR_PSE36_MASK \ | ||
33 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
34 | |||
35 | #define PT64_ROOT_LEVEL 4 | ||
36 | #define PT32_ROOT_LEVEL 2 | ||
37 | #define PT32E_ROOT_LEVEL 3 | ||
11 | 38 | ||
12 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 39 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
13 | { | 40 | { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89e0be2c10d0..ab22615eee89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) | |||
1863 | return false; | 1863 | return false; |
1864 | } | 1864 | } |
1865 | 1865 | ||
1866 | static int get_npt_level(void) | ||
1867 | { | ||
1868 | #ifdef CONFIG_X86_64 | ||
1869 | return PT64_ROOT_LEVEL; | ||
1870 | #else | ||
1871 | return PT32E_ROOT_LEVEL; | ||
1872 | #endif | ||
1873 | } | ||
1874 | |||
1866 | static struct kvm_x86_ops svm_x86_ops = { | 1875 | static struct kvm_x86_ops svm_x86_ops = { |
1867 | .cpu_has_kvm_support = has_svm, | 1876 | .cpu_has_kvm_support = has_svm, |
1868 | .disabled_by_bios = is_disabled, | 1877 | .disabled_by_bios = is_disabled, |
@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1920 | .inject_pending_vectors = do_interrupt_requests, | 1929 | .inject_pending_vectors = do_interrupt_requests, |
1921 | 1930 | ||
1922 | .set_tss_addr = svm_set_tss_addr, | 1931 | .set_tss_addr = svm_set_tss_addr, |
1932 | .get_tdp_level = get_npt_level, | ||
1923 | }; | 1933 | }; |
1924 | 1934 | ||
1925 | static int __init svm_init(void) | 1935 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5d6645b90d..bfe4db11989c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0); | |||
42 | static int flexpriority_enabled = 1; | 42 | static int flexpriority_enabled = 1; |
43 | module_param(flexpriority_enabled, bool, 0); | 43 | module_param(flexpriority_enabled, bool, 0); |
44 | 44 | ||
45 | static int enable_ept = 1; | ||
46 | module_param(enable_ept, bool, 0); | ||
47 | |||
45 | struct vmcs { | 48 | struct vmcs { |
46 | u32 revision_id; | 49 | u32 revision_id; |
47 | u32 abort; | 50 | u32 abort; |
@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
84 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 87 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
85 | } | 88 | } |
86 | 89 | ||
87 | static int init_rmode_tss(struct kvm *kvm); | 90 | static int init_rmode(struct kvm *kvm); |
88 | 91 | ||
89 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 92 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
90 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 93 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -107,6 +110,11 @@ static struct vmcs_config { | |||
107 | u32 vmentry_ctrl; | 110 | u32 vmentry_ctrl; |
108 | } vmcs_config; | 111 | } vmcs_config; |
109 | 112 | ||
113 | struct vmx_capability { | ||
114 | u32 ept; | ||
115 | u32 vpid; | ||
116 | } vmx_capability; | ||
117 | |||
110 | #define VMX_SEGMENT_FIELD(seg) \ | 118 | #define VMX_SEGMENT_FIELD(seg) \ |
111 | [VCPU_SREG_##seg] = { \ | 119 | [VCPU_SREG_##seg] = { \ |
112 | .selector = GUEST_##seg##_SELECTOR, \ | 120 | .selector = GUEST_##seg##_SELECTOR, \ |
@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
214 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 222 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
215 | } | 223 | } |
216 | 224 | ||
225 | static inline int cpu_has_vmx_invept_individual_addr(void) | ||
226 | { | ||
227 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); | ||
228 | } | ||
229 | |||
230 | static inline int cpu_has_vmx_invept_context(void) | ||
231 | { | ||
232 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); | ||
233 | } | ||
234 | |||
235 | static inline int cpu_has_vmx_invept_global(void) | ||
236 | { | ||
237 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); | ||
238 | } | ||
239 | |||
240 | static inline int cpu_has_vmx_ept(void) | ||
241 | { | ||
242 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | ||
243 | SECONDARY_EXEC_ENABLE_EPT); | ||
244 | } | ||
245 | |||
246 | static inline int vm_need_ept(void) | ||
247 | { | ||
248 | return (cpu_has_vmx_ept() && enable_ept); | ||
249 | } | ||
250 | |||
217 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 251 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
218 | { | 252 | { |
219 | return ((cpu_has_vmx_virtualize_apic_accesses()) && | 253 | return ((cpu_has_vmx_virtualize_apic_accesses()) && |
@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) | |||
250 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | 284 | : : "a"(&operand), "c"(ext) : "cc", "memory"); |
251 | } | 285 | } |
252 | 286 | ||
287 | static inline void __invept(int ext, u64 eptp, gpa_t gpa) | ||
288 | { | ||
289 | struct { | ||
290 | u64 eptp, gpa; | ||
291 | } operand = {eptp, gpa}; | ||
292 | |||
293 | asm volatile (ASM_VMX_INVEPT | ||
294 | /* CF==1 or ZF==1 --> rc = -1 */ | ||
295 | "; ja 1f ; ud2 ; 1:\n" | ||
296 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | ||
297 | } | ||
298 | |||
253 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 299 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
254 | { | 300 | { |
255 | int i; | 301 | int i; |
@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | |||
301 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 347 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); |
302 | } | 348 | } |
303 | 349 | ||
350 | static inline void ept_sync_global(void) | ||
351 | { | ||
352 | if (cpu_has_vmx_invept_global()) | ||
353 | __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); | ||
354 | } | ||
355 | |||
356 | static inline void ept_sync_context(u64 eptp) | ||
357 | { | ||
358 | if (vm_need_ept()) { | ||
359 | if (cpu_has_vmx_invept_context()) | ||
360 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); | ||
361 | else | ||
362 | ept_sync_global(); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | ||
367 | { | ||
368 | if (vm_need_ept()) { | ||
369 | if (cpu_has_vmx_invept_individual_addr()) | ||
370 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, | ||
371 | eptp, gpa); | ||
372 | else | ||
373 | ept_sync_context(eptp); | ||
374 | } | ||
375 | } | ||
376 | |||
304 | static unsigned long vmcs_readl(unsigned long field) | 377 | static unsigned long vmcs_readl(unsigned long field) |
305 | { | 378 | { |
306 | unsigned long value; | 379 | unsigned long value; |
@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
388 | eb |= 1u << 1; | 461 | eb |= 1u << 1; |
389 | if (vcpu->arch.rmode.active) | 462 | if (vcpu->arch.rmode.active) |
390 | eb = ~0; | 463 | eb = ~0; |
464 | if (vm_need_ept()) | ||
465 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | ||
391 | vmcs_write32(EXCEPTION_BITMAP, eb); | 466 | vmcs_write32(EXCEPTION_BITMAP, eb); |
392 | } | 467 | } |
393 | 468 | ||
@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
985 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1060 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
986 | { | 1061 | { |
987 | u32 vmx_msr_low, vmx_msr_high; | 1062 | u32 vmx_msr_low, vmx_msr_high; |
988 | u32 min, opt; | 1063 | u32 min, opt, min2, opt2; |
989 | u32 _pin_based_exec_control = 0; | 1064 | u32 _pin_based_exec_control = 0; |
990 | u32 _cpu_based_exec_control = 0; | 1065 | u32 _cpu_based_exec_control = 0; |
991 | u32 _cpu_based_2nd_exec_control = 0; | 1066 | u32 _cpu_based_2nd_exec_control = 0; |
@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1003 | CPU_BASED_CR8_LOAD_EXITING | | 1078 | CPU_BASED_CR8_LOAD_EXITING | |
1004 | CPU_BASED_CR8_STORE_EXITING | | 1079 | CPU_BASED_CR8_STORE_EXITING | |
1005 | #endif | 1080 | #endif |
1081 | CPU_BASED_CR3_LOAD_EXITING | | ||
1082 | CPU_BASED_CR3_STORE_EXITING | | ||
1006 | CPU_BASED_USE_IO_BITMAPS | | 1083 | CPU_BASED_USE_IO_BITMAPS | |
1007 | CPU_BASED_MOV_DR_EXITING | | 1084 | CPU_BASED_MOV_DR_EXITING | |
1008 | CPU_BASED_USE_TSC_OFFSETING; | 1085 | CPU_BASED_USE_TSC_OFFSETING; |
@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1018 | ~CPU_BASED_CR8_STORE_EXITING; | 1095 | ~CPU_BASED_CR8_STORE_EXITING; |
1019 | #endif | 1096 | #endif |
1020 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 1097 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
1021 | min = 0; | 1098 | min2 = 0; |
1022 | opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 1099 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
1023 | SECONDARY_EXEC_WBINVD_EXITING | | 1100 | SECONDARY_EXEC_WBINVD_EXITING | |
1024 | SECONDARY_EXEC_ENABLE_VPID; | 1101 | SECONDARY_EXEC_ENABLE_VPID | |
1025 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, | 1102 | SECONDARY_EXEC_ENABLE_EPT; |
1103 | if (adjust_vmx_controls(min2, opt2, | ||
1104 | MSR_IA32_VMX_PROCBASED_CTLS2, | ||
1026 | &_cpu_based_2nd_exec_control) < 0) | 1105 | &_cpu_based_2nd_exec_control) < 0) |
1027 | return -EIO; | 1106 | return -EIO; |
1028 | } | 1107 | } |
@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1031 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 1110 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
1032 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 1111 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
1033 | #endif | 1112 | #endif |
1113 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | ||
1114 | /* CR3 accesses don't need to cause VM Exits when EPT enabled */ | ||
1115 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1116 | CPU_BASED_CR3_STORE_EXITING); | ||
1117 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | ||
1118 | &_cpu_based_exec_control) < 0) | ||
1119 | return -EIO; | ||
1120 | rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, | ||
1121 | vmx_capability.ept, vmx_capability.vpid); | ||
1122 | } | ||
1034 | 1123 | ||
1035 | min = 0; | 1124 | min = 0; |
1036 | #ifdef CONFIG_X86_64 | 1125 | #ifdef CONFIG_X86_64 |
@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1256 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1345 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); |
1257 | 1346 | ||
1258 | kvm_mmu_reset_context(vcpu); | 1347 | kvm_mmu_reset_context(vcpu); |
1259 | init_rmode_tss(vcpu->kvm); | 1348 | init_rmode(vcpu->kvm); |
1260 | } | 1349 | } |
1261 | 1350 | ||
1262 | #ifdef CONFIG_X86_64 | 1351 | #ifdef CONFIG_X86_64 |
@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
1304 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1393 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; |
1305 | } | 1394 | } |
1306 | 1395 | ||
1396 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | ||
1397 | { | ||
1398 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | ||
1399 | if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { | ||
1400 | printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); | ||
1401 | return; | ||
1402 | } | ||
1403 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); | ||
1404 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); | ||
1405 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); | ||
1406 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); | ||
1407 | } | ||
1408 | } | ||
1409 | |||
1410 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | ||
1411 | |||
1412 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | ||
1413 | unsigned long cr0, | ||
1414 | struct kvm_vcpu *vcpu) | ||
1415 | { | ||
1416 | if (!(cr0 & X86_CR0_PG)) { | ||
1417 | /* From paging/starting to nonpaging */ | ||
1418 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1419 | vmcs_config.cpu_based_exec_ctrl | | ||
1420 | (CPU_BASED_CR3_LOAD_EXITING | | ||
1421 | CPU_BASED_CR3_STORE_EXITING)); | ||
1422 | vcpu->arch.cr0 = cr0; | ||
1423 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1424 | *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; | ||
1425 | *hw_cr0 &= ~X86_CR0_WP; | ||
1426 | } else if (!is_paging(vcpu)) { | ||
1427 | /* From nonpaging to paging */ | ||
1428 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1429 | vmcs_config.cpu_based_exec_ctrl & | ||
1430 | ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1431 | CPU_BASED_CR3_STORE_EXITING)); | ||
1432 | vcpu->arch.cr0 = cr0; | ||
1433 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1434 | if (!(vcpu->arch.cr0 & X86_CR0_WP)) | ||
1435 | *hw_cr0 &= ~X86_CR0_WP; | ||
1436 | } | ||
1437 | } | ||
1438 | |||
1439 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1440 | struct kvm_vcpu *vcpu) | ||
1441 | { | ||
1442 | if (!is_paging(vcpu)) { | ||
1443 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1444 | *hw_cr4 |= X86_CR4_PSE; | ||
1445 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1446 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1447 | } | ||
1448 | |||
1307 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1449 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1308 | { | 1450 | { |
1451 | unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | | ||
1452 | KVM_VM_CR0_ALWAYS_ON; | ||
1453 | |||
1309 | vmx_fpu_deactivate(vcpu); | 1454 | vmx_fpu_deactivate(vcpu); |
1310 | 1455 | ||
1311 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) | 1456 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) |
@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1323 | } | 1468 | } |
1324 | #endif | 1469 | #endif |
1325 | 1470 | ||
1471 | if (vm_need_ept()) | ||
1472 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | ||
1473 | |||
1326 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1474 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1327 | vmcs_writel(GUEST_CR0, | 1475 | vmcs_writel(GUEST_CR0, hw_cr0); |
1328 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | ||
1329 | vcpu->arch.cr0 = cr0; | 1476 | vcpu->arch.cr0 = cr0; |
1330 | 1477 | ||
1331 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | 1478 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) |
1332 | vmx_fpu_activate(vcpu); | 1479 | vmx_fpu_activate(vcpu); |
1333 | } | 1480 | } |
1334 | 1481 | ||
1482 | static u64 construct_eptp(unsigned long root_hpa) | ||
1483 | { | ||
1484 | u64 eptp; | ||
1485 | |||
1486 | /* TODO write the value reading from MSR */ | ||
1487 | eptp = VMX_EPT_DEFAULT_MT | | ||
1488 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | ||
1489 | eptp |= (root_hpa & PAGE_MASK); | ||
1490 | |||
1491 | return eptp; | ||
1492 | } | ||
1493 | |||
1335 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1494 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
1336 | { | 1495 | { |
1496 | unsigned long guest_cr3; | ||
1497 | u64 eptp; | ||
1498 | |||
1499 | guest_cr3 = cr3; | ||
1500 | if (vm_need_ept()) { | ||
1501 | eptp = construct_eptp(cr3); | ||
1502 | vmcs_write64(EPT_POINTER, eptp); | ||
1503 | ept_sync_context(eptp); | ||
1504 | ept_load_pdptrs(vcpu); | ||
1505 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | ||
1506 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1507 | } | ||
1508 | |||
1337 | vmx_flush_tlb(vcpu); | 1509 | vmx_flush_tlb(vcpu); |
1338 | vmcs_writel(GUEST_CR3, cr3); | 1510 | vmcs_writel(GUEST_CR3, guest_cr3); |
1339 | if (vcpu->arch.cr0 & X86_CR0_PE) | 1511 | if (vcpu->arch.cr0 & X86_CR0_PE) |
1340 | vmx_fpu_deactivate(vcpu); | 1512 | vmx_fpu_deactivate(vcpu); |
1341 | } | 1513 | } |
1342 | 1514 | ||
1343 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1515 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1344 | { | 1516 | { |
1345 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1517 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? |
1346 | vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? | 1518 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1347 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); | 1519 | |
1348 | vcpu->arch.cr4 = cr4; | 1520 | vcpu->arch.cr4 = cr4; |
1521 | if (vm_need_ept()) | ||
1522 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | ||
1523 | |||
1524 | vmcs_writel(CR4_READ_SHADOW, cr4); | ||
1525 | vmcs_writel(GUEST_CR4, hw_cr4); | ||
1349 | } | 1526 | } |
1350 | 1527 | ||
1351 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1528 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -1530,6 +1707,41 @@ out: | |||
1530 | return ret; | 1707 | return ret; |
1531 | } | 1708 | } |
1532 | 1709 | ||
1710 | static int init_rmode_identity_map(struct kvm *kvm) | ||
1711 | { | ||
1712 | int i, r, ret; | ||
1713 | pfn_t identity_map_pfn; | ||
1714 | u32 tmp; | ||
1715 | |||
1716 | if (!vm_need_ept()) | ||
1717 | return 1; | ||
1718 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | ||
1719 | printk(KERN_ERR "EPT: identity-mapping pagetable " | ||
1720 | "haven't been allocated!\n"); | ||
1721 | return 0; | ||
1722 | } | ||
1723 | if (likely(kvm->arch.ept_identity_pagetable_done)) | ||
1724 | return 1; | ||
1725 | ret = 0; | ||
1726 | identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; | ||
1727 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | ||
1728 | if (r < 0) | ||
1729 | goto out; | ||
1730 | /* Set up identity-mapping pagetable for EPT in real mode */ | ||
1731 | for (i = 0; i < PT32_ENT_PER_PAGE; i++) { | ||
1732 | tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | | ||
1733 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); | ||
1734 | r = kvm_write_guest_page(kvm, identity_map_pfn, | ||
1735 | &tmp, i * sizeof(tmp), sizeof(tmp)); | ||
1736 | if (r < 0) | ||
1737 | goto out; | ||
1738 | } | ||
1739 | kvm->arch.ept_identity_pagetable_done = true; | ||
1740 | ret = 1; | ||
1741 | out: | ||
1742 | return ret; | ||
1743 | } | ||
1744 | |||
1533 | static void seg_setup(int seg) | 1745 | static void seg_setup(int seg) |
1534 | { | 1746 | { |
1535 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -1564,6 +1776,31 @@ out: | |||
1564 | return r; | 1776 | return r; |
1565 | } | 1777 | } |
1566 | 1778 | ||
1779 | static int alloc_identity_pagetable(struct kvm *kvm) | ||
1780 | { | ||
1781 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
1782 | int r = 0; | ||
1783 | |||
1784 | down_write(&kvm->slots_lock); | ||
1785 | if (kvm->arch.ept_identity_pagetable) | ||
1786 | goto out; | ||
1787 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
1788 | kvm_userspace_mem.flags = 0; | ||
1789 | kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1790 | kvm_userspace_mem.memory_size = PAGE_SIZE; | ||
1791 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
1792 | if (r) | ||
1793 | goto out; | ||
1794 | |||
1795 | down_read(¤t->mm->mmap_sem); | ||
1796 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | ||
1797 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | ||
1798 | up_read(¤t->mm->mmap_sem); | ||
1799 | out: | ||
1800 | up_write(&kvm->slots_lock); | ||
1801 | return r; | ||
1802 | } | ||
1803 | |||
1567 | static void allocate_vpid(struct vcpu_vmx *vmx) | 1804 | static void allocate_vpid(struct vcpu_vmx *vmx) |
1568 | { | 1805 | { |
1569 | int vpid; | 1806 | int vpid; |
@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1638 | CPU_BASED_CR8_LOAD_EXITING; | 1875 | CPU_BASED_CR8_LOAD_EXITING; |
1639 | #endif | 1876 | #endif |
1640 | } | 1877 | } |
1878 | if (!vm_need_ept()) | ||
1879 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | ||
1880 | CPU_BASED_CR3_LOAD_EXITING; | ||
1641 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 1881 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
1642 | 1882 | ||
1643 | if (cpu_has_secondary_exec_ctrls()) { | 1883 | if (cpu_has_secondary_exec_ctrls()) { |
@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1647 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 1887 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
1648 | if (vmx->vpid == 0) | 1888 | if (vmx->vpid == 0) |
1649 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 1889 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
1890 | if (!vm_need_ept()) | ||
1891 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | ||
1650 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 1892 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
1651 | } | 1893 | } |
1652 | 1894 | ||
@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1722 | return 0; | 1964 | return 0; |
1723 | } | 1965 | } |
1724 | 1966 | ||
1967 | static int init_rmode(struct kvm *kvm) | ||
1968 | { | ||
1969 | if (!init_rmode_tss(kvm)) | ||
1970 | return 0; | ||
1971 | if (!init_rmode_identity_map(kvm)) | ||
1972 | return 0; | ||
1973 | return 1; | ||
1974 | } | ||
1975 | |||
1725 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 1976 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
1726 | { | 1977 | { |
1727 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1978 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1729 | int ret; | 1980 | int ret; |
1730 | 1981 | ||
1731 | down_read(&vcpu->kvm->slots_lock); | 1982 | down_read(&vcpu->kvm->slots_lock); |
1732 | if (!init_rmode_tss(vmx->vcpu.kvm)) { | 1983 | if (!init_rmode(vmx->vcpu.kvm)) { |
1733 | ret = -ENOMEM; | 1984 | ret = -ENOMEM; |
1734 | goto out; | 1985 | goto out; |
1735 | } | 1986 | } |
@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1994 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 2245 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
1995 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2246 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
1996 | if (is_page_fault(intr_info)) { | 2247 | if (is_page_fault(intr_info)) { |
2248 | /* EPT won't cause page fault directly */ | ||
2249 | if (vm_need_ept()) | ||
2250 | BUG(); | ||
1997 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2251 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
1998 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2252 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
1999 | (u32)((u64)cr2 >> 32), handler); | 2253 | (u32)((u64)cr2 >> 32), handler); |
@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2323 | return kvm_task_switch(vcpu, tss_selector, reason); | 2577 | return kvm_task_switch(vcpu, tss_selector, reason); |
2324 | } | 2578 | } |
2325 | 2579 | ||
2580 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2581 | { | ||
2582 | u64 exit_qualification; | ||
2583 | enum emulation_result er; | ||
2584 | gpa_t gpa; | ||
2585 | unsigned long hva; | ||
2586 | int gla_validity; | ||
2587 | int r; | ||
2588 | |||
2589 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | ||
2590 | |||
2591 | if (exit_qualification & (1 << 6)) { | ||
2592 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | ||
2593 | return -ENOTSUPP; | ||
2594 | } | ||
2595 | |||
2596 | gla_validity = (exit_qualification >> 7) & 0x3; | ||
2597 | if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { | ||
2598 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | ||
2599 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2600 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2601 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2602 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2603 | (long unsigned int)exit_qualification); | ||
2604 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
2605 | kvm_run->hw.hardware_exit_reason = 0; | ||
2606 | return -ENOTSUPP; | ||
2607 | } | ||
2608 | |||
2609 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | ||
2610 | hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
2611 | if (!kvm_is_error_hva(hva)) { | ||
2612 | r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | ||
2613 | if (r < 0) { | ||
2614 | printk(KERN_ERR "EPT: Not enough memory!\n"); | ||
2615 | return -ENOMEM; | ||
2616 | } | ||
2617 | return 1; | ||
2618 | } else { | ||
2619 | /* must be MMIO */ | ||
2620 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | ||
2621 | |||
2622 | if (er == EMULATE_FAIL) { | ||
2623 | printk(KERN_ERR | ||
2624 | "EPT: Fail to handle EPT violation vmexit!er is %d\n", | ||
2625 | er); | ||
2626 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2627 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2628 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2629 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2630 | (long unsigned int)exit_qualification); | ||
2631 | return -ENOTSUPP; | ||
2632 | } else if (er == EMULATE_DO_MMIO) | ||
2633 | return 0; | ||
2634 | } | ||
2635 | return 1; | ||
2636 | } | ||
2637 | |||
2326 | /* | 2638 | /* |
2327 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2639 | * The exit handlers return 1 if the exit was handled fully and guest execution |
2328 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2640 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
2346 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2658 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
2347 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 2659 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
2348 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 2660 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
2661 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | ||
2349 | }; | 2662 | }; |
2350 | 2663 | ||
2351 | static const int kvm_vmx_max_exit_handlers = | 2664 | static const int kvm_vmx_max_exit_handlers = |
@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2364 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | 2677 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), |
2365 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | 2678 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); |
2366 | 2679 | ||
2680 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
2681 | * to sync with guest real CR3. */ | ||
2682 | if (vm_need_ept() && is_paging(vcpu)) { | ||
2683 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
2684 | ept_load_pdptrs(vcpu); | ||
2685 | } | ||
2686 | |||
2367 | if (unlikely(vmx->fail)) { | 2687 | if (unlikely(vmx->fail)) { |
2368 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2688 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2369 | kvm_run->fail_entry.hardware_entry_failure_reason | 2689 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2372 | } | 2692 | } |
2373 | 2693 | ||
2374 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 2694 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
2375 | exit_reason != EXIT_REASON_EXCEPTION_NMI) | 2695 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && |
2696 | exit_reason != EXIT_REASON_EPT_VIOLATION)) | ||
2376 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 2697 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
2377 | "exit reason is 0x%x\n", __func__, exit_reason); | 2698 | "exit reason is 0x%x\n", __func__, exit_reason); |
2378 | if (exit_reason < kvm_vmx_max_exit_handlers | 2699 | if (exit_reason < kvm_vmx_max_exit_handlers |
@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2674 | return ERR_PTR(-ENOMEM); | 2995 | return ERR_PTR(-ENOMEM); |
2675 | 2996 | ||
2676 | allocate_vpid(vmx); | 2997 | allocate_vpid(vmx); |
2998 | if (id == 0 && vm_need_ept()) { | ||
2999 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3000 | VMX_EPT_WRITABLE_MASK | | ||
3001 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3002 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3003 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3004 | VMX_EPT_EXECUTABLE_MASK); | ||
3005 | kvm_enable_tdp(); | ||
3006 | } | ||
2677 | 3007 | ||
2678 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3008 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
2679 | if (err) | 3009 | if (err) |
@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2706 | if (alloc_apic_access_page(kvm) != 0) | 3036 | if (alloc_apic_access_page(kvm) != 0) |
2707 | goto free_vmcs; | 3037 | goto free_vmcs; |
2708 | 3038 | ||
3039 | if (vm_need_ept()) | ||
3040 | if (alloc_identity_pagetable(kvm) != 0) | ||
3041 | goto free_vmcs; | ||
3042 | |||
2709 | return &vmx->vcpu; | 3043 | return &vmx->vcpu; |
2710 | 3044 | ||
2711 | free_vmcs: | 3045 | free_vmcs: |
@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
2735 | } | 3069 | } |
2736 | } | 3070 | } |
2737 | 3071 | ||
3072 | static int get_ept_level(void) | ||
3073 | { | ||
3074 | return VMX_EPT_DEFAULT_GAW + 1; | ||
3075 | } | ||
3076 | |||
2738 | static struct kvm_x86_ops vmx_x86_ops = { | 3077 | static struct kvm_x86_ops vmx_x86_ops = { |
2739 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3078 | .cpu_has_kvm_support = cpu_has_kvm_support, |
2740 | .disabled_by_bios = vmx_disabled_by_bios, | 3079 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
2791 | .inject_pending_vectors = do_interrupt_requests, | 3130 | .inject_pending_vectors = do_interrupt_requests, |
2792 | 3131 | ||
2793 | .set_tss_addr = vmx_set_tss_addr, | 3132 | .set_tss_addr = vmx_set_tss_addr, |
3133 | .get_tdp_level = get_ept_level, | ||
2794 | }; | 3134 | }; |
2795 | 3135 | ||
2796 | static int __init vmx_init(void) | 3136 | static int __init vmx_init(void) |
@@ -2843,9 +3183,14 @@ static int __init vmx_init(void) | |||
2843 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3183 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
2844 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3184 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
2845 | 3185 | ||
3186 | if (cpu_has_vmx_ept()) | ||
3187 | bypass_guest_pf = 0; | ||
3188 | |||
2846 | if (bypass_guest_pf) | 3189 | if (bypass_guest_pf) |
2847 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3190 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
2848 | 3191 | ||
3192 | ept_sync_global(); | ||
3193 | |||
2849 | return 0; | 3194 | return 0; |
2850 | 3195 | ||
2851 | out2: | 3196 | out2: |
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5dff4606b988..79d94c610dfe 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
@@ -35,6 +35,8 @@ | |||
35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 | 35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 |
36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 | 36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 |
37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 | 37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 |
38 | #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 | ||
39 | #define CPU_BASED_CR3_STORE_EXITING 0x00010000 | ||
38 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | 40 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 |
39 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | 41 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 |
40 | #define CPU_BASED_TPR_SHADOW 0x00200000 | 42 | #define CPU_BASED_TPR_SHADOW 0x00200000 |
@@ -49,6 +51,7 @@ | |||
49 | * Definitions of Secondary Processor-Based VM-Execution Controls. | 51 | * Definitions of Secondary Processor-Based VM-Execution Controls. |
50 | */ | 52 | */ |
51 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 53 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
54 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | ||
52 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 55 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
53 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 56 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
54 | 57 | ||
@@ -100,10 +103,22 @@ enum vmcs_field { | |||
100 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 103 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
101 | APIC_ACCESS_ADDR = 0x00002014, | 104 | APIC_ACCESS_ADDR = 0x00002014, |
102 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 105 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
106 | EPT_POINTER = 0x0000201a, | ||
107 | EPT_POINTER_HIGH = 0x0000201b, | ||
108 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | ||
109 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | ||
103 | VMCS_LINK_POINTER = 0x00002800, | 110 | VMCS_LINK_POINTER = 0x00002800, |
104 | VMCS_LINK_POINTER_HIGH = 0x00002801, | 111 | VMCS_LINK_POINTER_HIGH = 0x00002801, |
105 | GUEST_IA32_DEBUGCTL = 0x00002802, | 112 | GUEST_IA32_DEBUGCTL = 0x00002802, |
106 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 113 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
114 | GUEST_PDPTR0 = 0x0000280a, | ||
115 | GUEST_PDPTR0_HIGH = 0x0000280b, | ||
116 | GUEST_PDPTR1 = 0x0000280c, | ||
117 | GUEST_PDPTR1_HIGH = 0x0000280d, | ||
118 | GUEST_PDPTR2 = 0x0000280e, | ||
119 | GUEST_PDPTR2_HIGH = 0x0000280f, | ||
120 | GUEST_PDPTR3 = 0x00002810, | ||
121 | GUEST_PDPTR3_HIGH = 0x00002811, | ||
107 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 122 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
108 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 123 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
109 | EXCEPTION_BITMAP = 0x00004004, | 124 | EXCEPTION_BITMAP = 0x00004004, |
@@ -226,6 +241,8 @@ enum vmcs_field { | |||
226 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 241 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
227 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 242 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
228 | #define EXIT_REASON_APIC_ACCESS 44 | 243 | #define EXIT_REASON_APIC_ACCESS 44 |
244 | #define EXIT_REASON_EPT_VIOLATION 48 | ||
245 | #define EXIT_REASON_EPT_MISCONFIG 49 | ||
229 | #define EXIT_REASON_WBINVD 54 | 246 | #define EXIT_REASON_WBINVD 54 |
230 | 247 | ||
231 | /* | 248 | /* |
@@ -316,15 +333,36 @@ enum vmcs_field { | |||
316 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 | 333 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 |
317 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a | 334 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a |
318 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b | 335 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b |
336 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x48c | ||
319 | 337 | ||
320 | #define MSR_IA32_FEATURE_CONTROL 0x3a | 338 | #define MSR_IA32_FEATURE_CONTROL 0x3a |
321 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 | 339 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 |
322 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 | 340 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 |
323 | 341 | ||
324 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 342 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
343 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | ||
325 | 344 | ||
326 | #define VMX_NR_VPIDS (1 << 16) | 345 | #define VMX_NR_VPIDS (1 << 16) |
327 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 346 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
328 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 | 347 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 |
329 | 348 | ||
349 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | ||
350 | #define VMX_EPT_EXTENT_CONTEXT 1 | ||
351 | #define VMX_EPT_EXTENT_GLOBAL 2 | ||
352 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | ||
353 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | ||
354 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | ||
355 | #define VMX_EPT_DEFAULT_GAW 3 | ||
356 | #define VMX_EPT_MAX_GAW 0x4 | ||
357 | #define VMX_EPT_MT_EPTE_SHIFT 3 | ||
358 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | ||
359 | #define VMX_EPT_DEFAULT_MT 0x6ull | ||
360 | #define VMX_EPT_READABLE_MASK 0x1ull | ||
361 | #define VMX_EPT_WRITABLE_MASK 0x2ull | ||
362 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | ||
363 | #define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) | ||
364 | #define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) | ||
365 | |||
366 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | ||
367 | |||
330 | #endif | 368 | #endif |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4d..21338bdb28ff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) | |||
2417 | 2417 | ||
2418 | kvm_x86_ops = ops; | 2418 | kvm_x86_ops = ops; |
2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2420 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
2421 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | ||
2422 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | ||
2420 | return 0; | 2423 | return 0; |
2421 | 2424 | ||
2422 | out: | 2425 | out: |
@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3019 | 3022 | ||
3020 | kvm_x86_ops->decache_regs(vcpu); | 3023 | kvm_x86_ops->decache_regs(vcpu); |
3021 | 3024 | ||
3025 | vcpu->arch.exception.pending = false; | ||
3026 | |||
3022 | vcpu_put(vcpu); | 3027 | vcpu_put(vcpu); |
3023 | 3028 | ||
3024 | return 0; | 3029 | return 0; |
@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3481 | } | 3486 | } |
3482 | 3487 | ||
3483 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3488 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3484 | cseg_desc.type &= ~(1 << 8); //clear the B flag | 3489 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3485 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3490 | save_guest_segment_descriptor(vcpu, tr_seg.selector, |
3486 | &cseg_desc); | 3491 | &cseg_desc); |
3487 | } | 3492 | } |
@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3507 | } | 3512 | } |
3508 | 3513 | ||
3509 | if (reason != TASK_SWITCH_IRET) { | 3514 | if (reason != TASK_SWITCH_IRET) { |
3510 | nseg_desc.type |= (1 << 8); | 3515 | nseg_desc.type |= (1 << 1); |
3511 | save_guest_segment_descriptor(vcpu, tss_selector, | 3516 | save_guest_segment_descriptor(vcpu, tss_selector, |
3512 | &nseg_desc); | 3517 | &nseg_desc); |
3513 | } | 3518 | } |
@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) | |||
3698 | { | 3703 | { |
3699 | unsigned after_mxcsr_mask; | 3704 | unsigned after_mxcsr_mask; |
3700 | 3705 | ||
3706 | /* | ||
3707 | * Touch the fpu the first time in non atomic context as if | ||
3708 | * this is the first fpu instruction the exception handler | ||
3709 | * will fire before the instruction returns and it'll have to | ||
3710 | * allocate ram with GFP_KERNEL. | ||
3711 | */ | ||
3712 | if (!used_math()) | ||
3713 | fx_save(&vcpu->arch.host_fx_image); | ||
3714 | |||
3701 | /* Initialize guest FPU by resetting ours and saving into guest's */ | 3715 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
3702 | preempt_disable(); | 3716 | preempt_disable(); |
3703 | fx_save(&vcpu->arch.host_fx_image); | 3717 | fx_save(&vcpu->arch.host_fx_image); |
3704 | fpu_init(); | 3718 | fx_finit(); |
3705 | fx_save(&vcpu->arch.guest_fx_image); | 3719 | fx_save(&vcpu->arch.guest_fx_image); |
3706 | fx_restore(&vcpu->arch.host_fx_image); | 3720 | fx_restore(&vcpu->arch.host_fx_image); |
3707 | preempt_enable(); | 3721 | preempt_enable(); |
@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
3906 | kvm_free_physmem(kvm); | 3920 | kvm_free_physmem(kvm); |
3907 | if (kvm->arch.apic_access_page) | 3921 | if (kvm->arch.apic_access_page) |
3908 | put_page(kvm->arch.apic_access_page); | 3922 | put_page(kvm->arch.apic_access_page); |
3923 | if (kvm->arch.ept_identity_pagetable) | ||
3924 | put_page(kvm->arch.ept_identity_pagetable); | ||
3909 | kfree(kvm); | 3925 | kfree(kvm); |
3910 | } | 3926 | } |
3911 | 3927 | ||
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2ca08386f993..f2a696d6a243 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1761,6 +1761,7 @@ twobyte_insn: | |||
1761 | case 6: /* lmsw */ | 1761 | case 6: /* lmsw */ |
1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, |
1763 | &ctxt->eflags); | 1763 | &ctxt->eflags); |
1764 | c->dst.type = OP_NONE; | ||
1764 | break; | 1765 | break; |
1765 | case 7: /* invlpg*/ | 1766 | case 7: /* invlpg*/ |
1766 | emulate_invlpg(ctxt->vcpu, memop); | 1767 | emulate_invlpg(ctxt->vcpu, memop); |
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index ecab9fff0fd1..2ad598c104af 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c | |||
@@ -877,7 +877,7 @@ void __init voyager_cat_init(void) | |||
877 | request_resource(&iomem_resource, res); | 877 | request_resource(&iomem_resource, res); |
878 | } | 878 | } |
879 | 879 | ||
880 | qic_addr = (unsigned long)ioremap(qic_addr, 0x400); | 880 | qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400); |
881 | 881 | ||
882 | for (j = 0; j < 4; j++) { | 882 | for (j = 0; j < 4; j++) { |
883 | __u8 cpu; | 883 | __u8 cpu; |
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 18378850e25a..914ccf983687 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
476 | 476 | ||
477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
478 | #endif | 478 | #endif |
479 | |||
480 | #ifndef CONFIG_HAVE_ARCH_PARSE_SRAT | ||
481 | /* | ||
482 | * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA | ||
483 | * | ||
484 | * These stub functions are needed to compile 32-bit NUMA when SRAT is | ||
485 | * not set. There are functions in srat_64.c for parsing this table | ||
486 | * and it may be possible to make them common functions. | ||
487 | */ | ||
488 | void acpi_numa_slit_init (struct acpi_table_slit *slit) | ||
489 | { | ||
490 | printk(KERN_INFO "ACPI: No support for parsing SLIT table\n"); | ||
491 | } | ||
492 | |||
493 | void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa) | ||
494 | { | ||
495 | } | ||
496 | |||
497 | void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma) | ||
498 | { | ||
499 | } | ||
500 | |||
501 | void acpi_numa_arch_fixup(void) | ||
502 | { | ||
503 | } | ||
504 | #endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */ | ||
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 9cf33d3ee5bc..165c871ba9af 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -155,4 +155,3 @@ EXPORT_SYMBOL(kmap); | |||
155 | EXPORT_SYMBOL(kunmap); | 155 | EXPORT_SYMBOL(kunmap); |
156 | EXPORT_SYMBOL(kmap_atomic); | 156 | EXPORT_SYMBOL(kmap_atomic); |
157 | EXPORT_SYMBOL(kunmap_atomic); | 157 | EXPORT_SYMBOL(kunmap_atomic); |
158 | EXPORT_SYMBOL(kmap_atomic_to_page); | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 804de18abcc2..71bb3159031a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -149,7 +149,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
149 | * Don't allow anybody to remap normal RAM that we're using.. | 149 | * Don't allow anybody to remap normal RAM that we're using.. |
150 | */ | 150 | */ |
151 | for (pfn = phys_addr >> PAGE_SHIFT; | 151 | for (pfn = phys_addr >> PAGE_SHIFT; |
152 | (pfn << PAGE_SHIFT) < last_addr; pfn++) { | 152 | (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); |
153 | pfn++) { | ||
153 | 154 | ||
154 | int is_ram = page_is_ram(pfn); | 155 | int is_ram = page_is_ram(pfn); |
155 | 156 | ||
@@ -176,11 +177,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
176 | /* | 177 | /* |
177 | * Do not fallback to certain memory types with certain | 178 | * Do not fallback to certain memory types with certain |
178 | * requested type: | 179 | * requested type: |
179 | * - request is uncached, return cannot be write-back | 180 | * - request is uc-, return cannot be write-back |
180 | * - request is uncached, return cannot be write-combine | 181 | * - request is uc-, return cannot be write-combine |
181 | * - request is write-combine, return cannot be write-back | 182 | * - request is write-combine, return cannot be write-back |
182 | */ | 183 | */ |
183 | if ((prot_val == _PAGE_CACHE_UC && | 184 | if ((prot_val == _PAGE_CACHE_UC_MINUS && |
184 | (new_prot_val == _PAGE_CACHE_WB || | 185 | (new_prot_val == _PAGE_CACHE_WB || |
185 | new_prot_val == _PAGE_CACHE_WC)) || | 186 | new_prot_val == _PAGE_CACHE_WC)) || |
186 | (prot_val == _PAGE_CACHE_WC && | 187 | (prot_val == _PAGE_CACHE_WC && |
@@ -201,6 +202,9 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
201 | default: | 202 | default: |
202 | prot = PAGE_KERNEL_NOCACHE; | 203 | prot = PAGE_KERNEL_NOCACHE; |
203 | break; | 204 | break; |
205 | case _PAGE_CACHE_UC_MINUS: | ||
206 | prot = PAGE_KERNEL_UC_MINUS; | ||
207 | break; | ||
204 | case _PAGE_CACHE_WC: | 208 | case _PAGE_CACHE_WC: |
205 | prot = PAGE_KERNEL_WC; | 209 | prot = PAGE_KERNEL_WC; |
206 | break; | 210 | break; |
@@ -255,7 +259,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
255 | */ | 259 | */ |
256 | void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) | 260 | void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) |
257 | { | 261 | { |
258 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_UC, | 262 | /* |
263 | * Ideally, this should be: | ||
264 | * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; | ||
265 | * | ||
266 | * Till we fix all X drivers to use ioremap_wc(), we will use | ||
267 | * UC MINUS. | ||
268 | */ | ||
269 | unsigned long val = _PAGE_CACHE_UC_MINUS; | ||
270 | |||
271 | return __ioremap_caller(phys_addr, size, val, | ||
259 | __builtin_return_address(0)); | 272 | __builtin_return_address(0)); |
260 | } | 273 | } |
261 | EXPORT_SYMBOL(ioremap_nocache); | 274 | EXPORT_SYMBOL(ioremap_nocache); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bd5e05c654dc..60bcb5b6a37e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -777,14 +777,20 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, | |||
777 | 777 | ||
778 | int _set_memory_uc(unsigned long addr, int numpages) | 778 | int _set_memory_uc(unsigned long addr, int numpages) |
779 | { | 779 | { |
780 | /* | ||
781 | * for now UC MINUS. see comments in ioremap_nocache() | ||
782 | */ | ||
780 | return change_page_attr_set(addr, numpages, | 783 | return change_page_attr_set(addr, numpages, |
781 | __pgprot(_PAGE_CACHE_UC)); | 784 | __pgprot(_PAGE_CACHE_UC_MINUS)); |
782 | } | 785 | } |
783 | 786 | ||
784 | int set_memory_uc(unsigned long addr, int numpages) | 787 | int set_memory_uc(unsigned long addr, int numpages) |
785 | { | 788 | { |
789 | /* | ||
790 | * for now UC MINUS. see comments in ioremap_nocache() | ||
791 | */ | ||
786 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 792 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, |
787 | _PAGE_CACHE_UC, NULL)) | 793 | _PAGE_CACHE_UC_MINUS, NULL)) |
788 | return -EINVAL; | 794 | return -EINVAL; |
789 | 795 | ||
790 | return _set_memory_uc(addr, numpages); | 796 | return _set_memory_uc(addr, numpages); |
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 2a1516efb542..89ec35d00efd 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT) += direct.o | |||
6 | obj-$(CONFIG_PCI_OLPC) += olpc.o | 6 | obj-$(CONFIG_PCI_OLPC) += olpc.o |
7 | 7 | ||
8 | pci-y := fixup.o | 8 | pci-y := fixup.o |
9 | |||
10 | # Do not change the ordering here. There is a nasty init function | ||
11 | # ordering dependency which breaks when you move acpi.o below | ||
12 | # legacy/irq.o | ||
9 | pci-$(CONFIG_ACPI) += acpi.o | 13 | pci-$(CONFIG_ACPI) += acpi.o |
10 | pci-y += legacy.o irq.o | 14 | pci-y += legacy.o irq.o |
11 | 15 | ||
16 | # Careful: VISWS and NUMAQ overrule the pci-y above. The colons are | ||
17 | # therefor correct. This needs a proper fix by distangling the code. | ||
12 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | 18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o |
13 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | 19 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o |
20 | |||
21 | # Necessary for NUMAQ as well | ||
14 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | 22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o |
15 | 23 | ||
16 | obj-y += $(pci-y) common.o early.o | 24 | obj-y += $(pci-y) common.o early.o |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 94f6c73a53d0..8af0f0bae2af 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -301,6 +301,13 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
301 | prot = pgprot_val(vma->vm_page_prot); | 301 | prot = pgprot_val(vma->vm_page_prot); |
302 | if (pat_wc_enabled && write_combine) | 302 | if (pat_wc_enabled && write_combine) |
303 | prot |= _PAGE_CACHE_WC; | 303 | prot |= _PAGE_CACHE_WC; |
304 | else if (pat_wc_enabled) | ||
305 | /* | ||
306 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. | ||
307 | * To avoid attribute conflicts, request UC MINUS here | ||
308 | * aswell. | ||
309 | */ | ||
310 | prot |= _PAGE_CACHE_UC_MINUS; | ||
304 | else if (boot_cpu_data.x86 > 3) | 311 | else if (boot_cpu_data.x86 > 3) |
305 | prot |= _PAGE_CACHE_UC; | 312 | prot |= _PAGE_CACHE_UC; |
306 | 313 | ||
@@ -319,9 +326,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
319 | * - request is uncached, return cannot be write-combine | 326 | * - request is uncached, return cannot be write-combine |
320 | * - request is write-combine, return cannot be write-back | 327 | * - request is write-combine, return cannot be write-back |
321 | */ | 328 | */ |
322 | if ((flags == _PAGE_CACHE_UC && | 329 | if ((flags == _PAGE_CACHE_UC_MINUS && |
323 | (new_flags == _PAGE_CACHE_WB || | 330 | (new_flags == _PAGE_CACHE_WB)) || |
324 | new_flags == _PAGE_CACHE_WC)) || | ||
325 | (flags == _PAGE_CACHE_WC && | 331 | (flags == _PAGE_CACHE_WC && |
326 | new_flags == _PAGE_CACHE_WB)) { | 332 | new_flags == _PAGE_CACHE_WB)) { |
327 | free_memtype(addr, addr+len); | 333 | free_memtype(addr, addr+len); |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index e2af8eee80e3..cf058fecfcee 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) | |||
162 | Elf32_Shdr *shdr; | 162 | Elf32_Shdr *shdr; |
163 | int i; | 163 | int i; |
164 | 164 | ||
165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | 165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || |
166 | !elf_check_arch_ia32(ehdr) || | 166 | !elf_check_arch_ia32(ehdr) || |
167 | ehdr->e_type != ET_DYN); | 167 | ehdr->e_type != ET_DYN); |
168 | 168 | ||
@@ -303,8 +303,6 @@ int __init sysenter_setup(void) | |||
303 | 303 | ||
304 | #ifdef CONFIG_X86_32 | 304 | #ifdef CONFIG_X86_32 |
305 | gate_vma_init(); | 305 | gate_vma_init(); |
306 | |||
307 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
308 | #endif | 306 | #endif |
309 | 307 | ||
310 | if (!vdso32_sysenter()) { | 308 | if (!vdso32_sysenter()) { |
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 4db42bff8c60..69527688f794 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c | |||
@@ -1,5 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * | ||
3 | * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> | 2 | * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> |
4 | * | 3 | * |
5 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
@@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info) | |||
29 | return retval; | 28 | return retval; |
30 | } | 29 | } |
31 | EXPORT_SYMBOL(fb_is_primary_device); | 30 | EXPORT_SYMBOL(fb_is_primary_device); |
31 | MODULE_LICENSE("GPL"); | ||