diff options
Diffstat (limited to 'arch/x86')
30 files changed, 590 insertions, 219 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c3f880902d66..bbcafaa160c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -18,6 +18,7 @@ config X86_64 | |||
18 | ### Arch settings | 18 | ### Arch settings |
19 | config X86 | 19 | config X86 |
20 | def_bool y | 20 | def_bool y |
21 | select HAVE_UNSTABLE_SCHED_CLOCK | ||
21 | select HAVE_IDE | 22 | select HAVE_IDE |
22 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
23 | select HAVE_KPROBES | 24 | select HAVE_KPROBES |
@@ -1661,6 +1662,7 @@ config GEODE_MFGPT_TIMER | |||
1661 | 1662 | ||
1662 | config OLPC | 1663 | config OLPC |
1663 | bool "One Laptop Per Child support" | 1664 | bool "One Laptop Per Child support" |
1665 | depends on MGEODE_LX | ||
1664 | default n | 1666 | default n |
1665 | help | 1667 | help |
1666 | Add support for detecting the unique features of the OLPC | 1668 | Add support for detecting the unique features of the OLPC |
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index d01ea42187e6..edaadea90aaf 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c | |||
@@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp) | |||
191 | die("Cannot read ELF header: %s\n", | 191 | die("Cannot read ELF header: %s\n", |
192 | strerror(errno)); | 192 | strerror(errno)); |
193 | } | 193 | } |
194 | if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { | 194 | if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { |
195 | die("No ELF magic\n"); | 195 | die("No ELF magic\n"); |
196 | } | 196 | } |
197 | if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { | 197 | if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bbdacb398d48..5e618c3b4720 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -83,9 +83,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o | |||
83 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 83 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
84 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 84 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
85 | 85 | ||
86 | ifdef CONFIG_INPUT_PCSPKR | 86 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
87 | obj-y += pcspeaker.o | ||
88 | endif | ||
89 | 87 | ||
90 | obj-$(CONFIG_SCx200) += scx200.o | 88 | obj-$(CONFIG_SCx200) += scx200.o |
91 | scx200-y += scx200_32.o | 89 | scx200-y += scx200_32.o |
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 7335959b6aff..fd5ca97a2ad5 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -10,5 +10,5 @@ endif | |||
10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin | 10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin |
11 | 11 | ||
12 | $(obj)/realmode/wakeup.bin: FORCE | 12 | $(obj)/realmode/wakeup.bin: FORCE |
13 | $(Q)$(MAKE) $(build)=$(obj)/realmode $@ | 13 | $(Q)$(MAKE) $(build)=$(obj)/realmode |
14 | 14 | ||
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 092900854acc..1c31cc0e9def 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile | |||
@@ -6,7 +6,8 @@ | |||
6 | # for more details. | 6 | # for more details. |
7 | # | 7 | # |
8 | 8 | ||
9 | targets := wakeup.bin wakeup.elf | 9 | always := wakeup.bin |
10 | targets := wakeup.elf wakeup.lds | ||
10 | 11 | ||
11 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o | 12 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o |
12 | 13 | ||
@@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T | |||
48 | 49 | ||
49 | CPPFLAGS_wakeup.lds += -P -C | 50 | CPPFLAGS_wakeup.lds += -P -C |
50 | 51 | ||
51 | $(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE | 52 | $(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE |
52 | $(call if_changed,ld) | 53 | $(call if_changed,ld) |
53 | 54 | ||
54 | OBJCOPYFLAGS_wakeup.bin := -O binary | 55 | OBJCOPYFLAGS_wakeup.bin := -O binary |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ddee04043aeb..4bc1be5d5472 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -133,6 +133,7 @@ static int kvm_register_clock(void) | |||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); |
134 | } | 134 | } |
135 | 135 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | ||
136 | static void kvm_setup_secondary_clock(void) | 137 | static void kvm_setup_secondary_clock(void) |
137 | { | 138 | { |
138 | /* | 139 | /* |
@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void) | |||
143 | /* ok, done with our trickery, call native */ | 144 | /* ok, done with our trickery, call native */ |
144 | setup_secondary_APIC_clock(); | 145 | setup_secondary_APIC_clock(); |
145 | } | 146 | } |
147 | #endif | ||
146 | 148 | ||
147 | /* | 149 | /* |
148 | * After the clock is registered, the host will keep writing to the | 150 | * After the clock is registered, the host will keep writing to the |
@@ -177,7 +179,9 @@ void __init kvmclock_init(void) | |||
177 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 179 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
178 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 180 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
179 | pv_time_ops.sched_clock = kvm_clock_read; | 181 | pv_time_ops.sched_clock = kvm_clock_read; |
182 | #ifdef CONFIG_X86_LOCAL_APIC | ||
180 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 183 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
184 | #endif | ||
181 | machine_ops.shutdown = kvm_shutdown; | 185 | machine_ops.shutdown = kvm_shutdown; |
182 | #ifdef CONFIG_KEXEC | 186 | #ifdef CONFIG_KEXEC |
183 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 187 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3e2c54dc8b29..404683b94e79 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -794,6 +794,11 @@ void __init find_smp_config(void) | |||
794 | ACPI-based MP Configuration | 794 | ACPI-based MP Configuration |
795 | -------------------------------------------------------------------------- */ | 795 | -------------------------------------------------------------------------- */ |
796 | 796 | ||
797 | /* | ||
798 | * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: | ||
799 | */ | ||
800 | int es7000_plat; | ||
801 | |||
797 | #ifdef CONFIG_ACPI | 802 | #ifdef CONFIG_ACPI |
798 | 803 | ||
799 | #ifdef CONFIG_X86_IO_APIC | 804 | #ifdef CONFIG_X86_IO_APIC |
@@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
909 | MP_intsrc_info(&intsrc); | 914 | MP_intsrc_info(&intsrc); |
910 | } | 915 | } |
911 | 916 | ||
912 | int es7000_plat; | ||
913 | |||
914 | void __init mp_config_acpi_legacy_irqs(void) | 917 | void __init mp_config_acpi_legacy_irqs(void) |
915 | { | 918 | { |
916 | struct mpc_config_intsrc intsrc; | 919 | struct mpc_config_intsrc intsrc; |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 07c6d42ab5ff..f6be7d5f82f8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
149 | .matches = { | 149 | .matches = { |
150 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | 150 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), |
151 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), | 151 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), |
152 | DMI_MATCH(DMI_BOARD_NAME, "0WF810"), | ||
153 | }, | 152 | }, |
154 | }, | 153 | }, |
155 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ | 154 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c0c68c18a788..cc6f5eb20b24 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void) | |||
95 | 95 | ||
96 | /* Copy section for each CPU (we discard the original) */ | 96 | /* Copy section for each CPU (we discard the original) */ |
97 | size = PERCPU_ENOUGH_ROOM; | 97 | size = PERCPU_ENOUGH_ROOM; |
98 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", | 98 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
99 | size); | 99 | size); |
100 | 100 | ||
101 | for_each_possible_cpu(i) { | 101 | for_each_possible_cpu(i) { |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84241a256dc8..6b087ab6cd8f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void) | |||
299 | /* | 299 | /* |
300 | * Activate a secondary processor. | 300 | * Activate a secondary processor. |
301 | */ | 301 | */ |
302 | void __cpuinit start_secondary(void *unused) | 302 | static void __cpuinit start_secondary(void *unused) |
303 | { | 303 | { |
304 | /* | 304 | /* |
305 | * Don't put *anything* before cpu_init(), SMP booting is too | 305 | * Don't put *anything* before cpu_init(), SMP booting is too |
@@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu) | |||
1306 | cpu_clear(cpu, cpu_sibling_setup_map); | 1306 | cpu_clear(cpu, cpu_sibling_setup_map); |
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | int additional_cpus __initdata = -1; | 1309 | static int additional_cpus __initdata = -1; |
1310 | 1310 | ||
1311 | static __init int setup_additional_cpus(char *s) | 1311 | static __init int setup_additional_cpus(char *s) |
1312 | { | 1312 | { |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f036e1..d2ab52cc1d6b 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -22,23 +22,6 @@ | |||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | #include <asm/unistd.h> | 23 | #include <asm/unistd.h> |
24 | 24 | ||
25 | /* | ||
26 | * sys_pipe() is the normal C calling standard for creating | ||
27 | * a pipe. It's not the way Unix traditionally does this, though. | ||
28 | */ | ||
29 | asmlinkage int sys_pipe(unsigned long __user * fildes) | ||
30 | { | ||
31 | int fd[2]; | ||
32 | int error; | ||
33 | |||
34 | error = do_pipe(fd); | ||
35 | if (!error) { | ||
36 | if (copy_to_user(fildes, fd, 2*sizeof(int))) | ||
37 | error = -EFAULT; | ||
38 | } | ||
39 | return error; | ||
40 | } | ||
41 | |||
42 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | 25 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
43 | unsigned long prot, unsigned long flags, | 26 | unsigned long prot, unsigned long flags, |
44 | unsigned long fd, unsigned long pgoff) | 27 | unsigned long fd, unsigned long pgoff) |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5e1aa3..3b360ef33817 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -17,23 +17,6 @@ | |||
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | 19 | ||
20 | /* | ||
21 | * sys_pipe() is the normal C calling standard for creating | ||
22 | * a pipe. It's not the way Unix traditionally does this, though. | ||
23 | */ | ||
24 | asmlinkage long sys_pipe(int __user *fildes) | ||
25 | { | ||
26 | int fd[2]; | ||
27 | int error; | ||
28 | |||
29 | error = do_pipe(fd); | ||
30 | if (!error) { | ||
31 | if (copy_to_user(fildes, fd, 2*sizeof(int))) | ||
32 | error = -EFAULT; | ||
33 | } | ||
34 | return error; | ||
35 | } | ||
36 | |||
37 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, | 20 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, |
38 | unsigned long fd, unsigned long off) | 21 | unsigned long fd, unsigned long off) |
39 | { | 22 | { |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4c943eabacc3..3324d90038e4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | 288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ |
289 | switch (ps->channels[0].mode) { | 289 | switch (ps->channels[0].mode) { |
290 | case 1: | 290 | case 1: |
291 | /* FIXME: enhance mode 4 precision */ | ||
292 | case 4: | ||
291 | create_pit_timer(&ps->pit_timer, val, 0); | 293 | create_pit_timer(&ps->pit_timer, val, 0); |
292 | break; | 294 | break; |
293 | case 2: | 295 | case 2: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ad6f5481671..36c5406b1813 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -79,36 +79,6 @@ static int dbg = 1; | |||
79 | } | 79 | } |
80 | #endif | 80 | #endif |
81 | 81 | ||
82 | #define PT64_PT_BITS 9 | ||
83 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | ||
84 | #define PT32_PT_BITS 10 | ||
85 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | ||
86 | |||
87 | #define PT_WRITABLE_SHIFT 1 | ||
88 | |||
89 | #define PT_PRESENT_MASK (1ULL << 0) | ||
90 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
91 | #define PT_USER_MASK (1ULL << 2) | ||
92 | #define PT_PWT_MASK (1ULL << 3) | ||
93 | #define PT_PCD_MASK (1ULL << 4) | ||
94 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
95 | #define PT_DIRTY_MASK (1ULL << 6) | ||
96 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
97 | #define PT_PAT_MASK (1ULL << 7) | ||
98 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
99 | #define PT64_NX_SHIFT 63 | ||
100 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
101 | |||
102 | #define PT_PAT_SHIFT 7 | ||
103 | #define PT_DIR_PAT_SHIFT 12 | ||
104 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
105 | |||
106 | #define PT32_DIR_PSE36_SIZE 4 | ||
107 | #define PT32_DIR_PSE36_SHIFT 13 | ||
108 | #define PT32_DIR_PSE36_MASK \ | ||
109 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
110 | |||
111 | |||
112 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 82 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
113 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 83 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
114 | 84 | ||
@@ -154,10 +124,6 @@ static int dbg = 1; | |||
154 | #define PFERR_USER_MASK (1U << 2) | 124 | #define PFERR_USER_MASK (1U << 2) |
155 | #define PFERR_FETCH_MASK (1U << 4) | 125 | #define PFERR_FETCH_MASK (1U << 4) |
156 | 126 | ||
157 | #define PT64_ROOT_LEVEL 4 | ||
158 | #define PT32_ROOT_LEVEL 2 | ||
159 | #define PT32E_ROOT_LEVEL 3 | ||
160 | |||
161 | #define PT_DIRECTORY_LEVEL 2 | 127 | #define PT_DIRECTORY_LEVEL 2 |
162 | #define PT_PAGE_TABLE_LEVEL 1 | 128 | #define PT_PAGE_TABLE_LEVEL 1 |
163 | 129 | ||
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; | |||
186 | 152 | ||
187 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 153 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
188 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 154 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
155 | static u64 __read_mostly shadow_base_present_pte; | ||
156 | static u64 __read_mostly shadow_nx_mask; | ||
157 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | ||
158 | static u64 __read_mostly shadow_user_mask; | ||
159 | static u64 __read_mostly shadow_accessed_mask; | ||
160 | static u64 __read_mostly shadow_dirty_mask; | ||
189 | 161 | ||
190 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 162 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
191 | { | 163 | { |
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
194 | } | 166 | } |
195 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 167 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
196 | 168 | ||
169 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
170 | { | ||
171 | shadow_base_present_pte = base_pte; | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
174 | |||
175 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | ||
176 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | ||
177 | { | ||
178 | shadow_user_mask = user_mask; | ||
179 | shadow_accessed_mask = accessed_mask; | ||
180 | shadow_dirty_mask = dirty_mask; | ||
181 | shadow_nx_mask = nx_mask; | ||
182 | shadow_x_mask = x_mask; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | ||
185 | |||
197 | static int is_write_protection(struct kvm_vcpu *vcpu) | 186 | static int is_write_protection(struct kvm_vcpu *vcpu) |
198 | { | 187 | { |
199 | return vcpu->arch.cr0 & X86_CR0_WP; | 188 | return vcpu->arch.cr0 & X86_CR0_WP; |
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) | |||
232 | 221 | ||
233 | static int is_dirty_pte(unsigned long pte) | 222 | static int is_dirty_pte(unsigned long pte) |
234 | { | 223 | { |
235 | return pte & PT_DIRTY_MASK; | 224 | return pte & shadow_dirty_mask; |
236 | } | 225 | } |
237 | 226 | ||
238 | static int is_rmap_pte(u64 pte) | 227 | static int is_rmap_pte(u64 pte) |
@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
387 | 376 | ||
388 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 377 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); |
389 | *write_count += 1; | 378 | *write_count += 1; |
390 | WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); | ||
391 | } | 379 | } |
392 | 380 | ||
393 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 381 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
547 | return; | 535 | return; |
548 | sp = page_header(__pa(spte)); | 536 | sp = page_header(__pa(spte)); |
549 | pfn = spte_to_pfn(*spte); | 537 | pfn = spte_to_pfn(*spte); |
550 | if (*spte & PT_ACCESSED_MASK) | 538 | if (*spte & shadow_accessed_mask) |
551 | kvm_set_pfn_accessed(pfn); | 539 | kvm_set_pfn_accessed(pfn); |
552 | if (is_writeble_pte(*spte)) | 540 | if (is_writeble_pte(*spte)) |
553 | kvm_release_pfn_dirty(pfn); | 541 | kvm_release_pfn_dirty(pfn); |
@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1073 | * whether the guest actually used the pte (in order to detect | 1061 | * whether the guest actually used the pte (in order to detect |
1074 | * demand paging). | 1062 | * demand paging). |
1075 | */ | 1063 | */ |
1076 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; | 1064 | spte = shadow_base_present_pte | shadow_dirty_mask; |
1077 | if (!speculative) | 1065 | if (!speculative) |
1078 | pte_access |= PT_ACCESSED_MASK; | 1066 | pte_access |= PT_ACCESSED_MASK; |
1079 | if (!dirty) | 1067 | if (!dirty) |
1080 | pte_access &= ~ACC_WRITE_MASK; | 1068 | pte_access &= ~ACC_WRITE_MASK; |
1081 | if (!(pte_access & ACC_EXEC_MASK)) | 1069 | if (pte_access & ACC_EXEC_MASK) |
1082 | spte |= PT64_NX_MASK; | 1070 | spte |= shadow_x_mask; |
1083 | 1071 | else | |
1084 | spte |= PT_PRESENT_MASK; | 1072 | spte |= shadow_nx_mask; |
1085 | if (pte_access & ACC_USER_MASK) | 1073 | if (pte_access & ACC_USER_MASK) |
1086 | spte |= PT_USER_MASK; | 1074 | spte |= shadow_user_mask; |
1087 | if (largepage) | 1075 | if (largepage) |
1088 | spte |= PT_PAGE_SIZE_MASK; | 1076 | spte |= PT_PAGE_SIZE_MASK; |
1089 | 1077 | ||
@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1188 | return -ENOMEM; | 1176 | return -ENOMEM; |
1189 | } | 1177 | } |
1190 | 1178 | ||
1191 | table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | 1179 | table[index] = __pa(new_table->spt) |
1192 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1180 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1181 | | shadow_user_mask | shadow_x_mask; | ||
1193 | } | 1182 | } |
1194 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 1183 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
1195 | } | 1184 | } |
@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1244 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 1233 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
1245 | return; | 1234 | return; |
1246 | spin_lock(&vcpu->kvm->mmu_lock); | 1235 | spin_lock(&vcpu->kvm->mmu_lock); |
1247 | #ifdef CONFIG_X86_64 | ||
1248 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1236 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1249 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1237 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1250 | 1238 | ||
@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1256 | spin_unlock(&vcpu->kvm->mmu_lock); | 1244 | spin_unlock(&vcpu->kvm->mmu_lock); |
1257 | return; | 1245 | return; |
1258 | } | 1246 | } |
1259 | #endif | ||
1260 | for (i = 0; i < 4; ++i) { | 1247 | for (i = 0; i < 4; ++i) { |
1261 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1248 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
1262 | 1249 | ||
@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1282 | 1269 | ||
1283 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 1270 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
1284 | 1271 | ||
1285 | #ifdef CONFIG_X86_64 | ||
1286 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1272 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1287 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1273 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1288 | 1274 | ||
@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1297 | vcpu->arch.mmu.root_hpa = root; | 1283 | vcpu->arch.mmu.root_hpa = root; |
1298 | return; | 1284 | return; |
1299 | } | 1285 | } |
1300 | #endif | ||
1301 | metaphysical = !is_paging(vcpu); | 1286 | metaphysical = !is_paging(vcpu); |
1302 | if (tdp_enabled) | 1287 | if (tdp_enabled) |
1303 | metaphysical = 1; | 1288 | metaphysical = 1; |
@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1377 | spin_lock(&vcpu->kvm->mmu_lock); | 1362 | spin_lock(&vcpu->kvm->mmu_lock); |
1378 | kvm_mmu_free_some_pages(vcpu); | 1363 | kvm_mmu_free_some_pages(vcpu); |
1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1364 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); | 1365 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1381 | spin_unlock(&vcpu->kvm->mmu_lock); | 1366 | spin_unlock(&vcpu->kvm->mmu_lock); |
1382 | 1367 | ||
1383 | return r; | 1368 | return r; |
@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
1484 | context->page_fault = tdp_page_fault; | 1469 | context->page_fault = tdp_page_fault; |
1485 | context->free = nonpaging_free; | 1470 | context->free = nonpaging_free; |
1486 | context->prefetch_page = nonpaging_prefetch_page; | 1471 | context->prefetch_page = nonpaging_prefetch_page; |
1487 | context->shadow_root_level = TDP_ROOT_LEVEL; | 1472 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
1488 | context->root_hpa = INVALID_PAGE; | 1473 | context->root_hpa = INVALID_PAGE; |
1489 | 1474 | ||
1490 | if (!is_paging(vcpu)) { | 1475 | if (!is_paging(vcpu)) { |
@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
1633 | { | 1618 | { |
1634 | u64 *spte = vcpu->arch.last_pte_updated; | 1619 | u64 *spte = vcpu->arch.last_pte_updated; |
1635 | 1620 | ||
1636 | return !!(spte && (*spte & PT_ACCESSED_MASK)); | 1621 | return !!(spte && (*spte & shadow_accessed_mask)); |
1637 | } | 1622 | } |
1638 | 1623 | ||
1639 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1624 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e64e9f56a65e..1730757bbc7a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -3,11 +3,38 @@ | |||
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | 6 | #define PT64_PT_BITS 9 |
7 | #define TDP_ROOT_LEVEL PT64_ROOT_LEVEL | 7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
8 | #else | 8 | #define PT32_PT_BITS 10 |
9 | #define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL | 9 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) |
10 | #endif | 10 | |
11 | #define PT_WRITABLE_SHIFT 1 | ||
12 | |||
13 | #define PT_PRESENT_MASK (1ULL << 0) | ||
14 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
15 | #define PT_USER_MASK (1ULL << 2) | ||
16 | #define PT_PWT_MASK (1ULL << 3) | ||
17 | #define PT_PCD_MASK (1ULL << 4) | ||
18 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
19 | #define PT_DIRTY_MASK (1ULL << 6) | ||
20 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
21 | #define PT_PAT_MASK (1ULL << 7) | ||
22 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
23 | #define PT64_NX_SHIFT 63 | ||
24 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
25 | |||
26 | #define PT_PAT_SHIFT 7 | ||
27 | #define PT_DIR_PAT_SHIFT 12 | ||
28 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
29 | |||
30 | #define PT32_DIR_PSE36_SIZE 4 | ||
31 | #define PT32_DIR_PSE36_SHIFT 13 | ||
32 | #define PT32_DIR_PSE36_MASK \ | ||
33 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
34 | |||
35 | #define PT64_ROOT_LEVEL 4 | ||
36 | #define PT32_ROOT_LEVEL 2 | ||
37 | #define PT32E_ROOT_LEVEL 3 | ||
11 | 38 | ||
12 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 39 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
13 | { | 40 | { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89e0be2c10d0..ab22615eee89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) | |||
1863 | return false; | 1863 | return false; |
1864 | } | 1864 | } |
1865 | 1865 | ||
1866 | static int get_npt_level(void) | ||
1867 | { | ||
1868 | #ifdef CONFIG_X86_64 | ||
1869 | return PT64_ROOT_LEVEL; | ||
1870 | #else | ||
1871 | return PT32E_ROOT_LEVEL; | ||
1872 | #endif | ||
1873 | } | ||
1874 | |||
1866 | static struct kvm_x86_ops svm_x86_ops = { | 1875 | static struct kvm_x86_ops svm_x86_ops = { |
1867 | .cpu_has_kvm_support = has_svm, | 1876 | .cpu_has_kvm_support = has_svm, |
1868 | .disabled_by_bios = is_disabled, | 1877 | .disabled_by_bios = is_disabled, |
@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1920 | .inject_pending_vectors = do_interrupt_requests, | 1929 | .inject_pending_vectors = do_interrupt_requests, |
1921 | 1930 | ||
1922 | .set_tss_addr = svm_set_tss_addr, | 1931 | .set_tss_addr = svm_set_tss_addr, |
1932 | .get_tdp_level = get_npt_level, | ||
1923 | }; | 1933 | }; |
1924 | 1934 | ||
1925 | static int __init svm_init(void) | 1935 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5d6645b90d..bfe4db11989c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0); | |||
42 | static int flexpriority_enabled = 1; | 42 | static int flexpriority_enabled = 1; |
43 | module_param(flexpriority_enabled, bool, 0); | 43 | module_param(flexpriority_enabled, bool, 0); |
44 | 44 | ||
45 | static int enable_ept = 1; | ||
46 | module_param(enable_ept, bool, 0); | ||
47 | |||
45 | struct vmcs { | 48 | struct vmcs { |
46 | u32 revision_id; | 49 | u32 revision_id; |
47 | u32 abort; | 50 | u32 abort; |
@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
84 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 87 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
85 | } | 88 | } |
86 | 89 | ||
87 | static int init_rmode_tss(struct kvm *kvm); | 90 | static int init_rmode(struct kvm *kvm); |
88 | 91 | ||
89 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 92 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
90 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 93 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -107,6 +110,11 @@ static struct vmcs_config { | |||
107 | u32 vmentry_ctrl; | 110 | u32 vmentry_ctrl; |
108 | } vmcs_config; | 111 | } vmcs_config; |
109 | 112 | ||
113 | struct vmx_capability { | ||
114 | u32 ept; | ||
115 | u32 vpid; | ||
116 | } vmx_capability; | ||
117 | |||
110 | #define VMX_SEGMENT_FIELD(seg) \ | 118 | #define VMX_SEGMENT_FIELD(seg) \ |
111 | [VCPU_SREG_##seg] = { \ | 119 | [VCPU_SREG_##seg] = { \ |
112 | .selector = GUEST_##seg##_SELECTOR, \ | 120 | .selector = GUEST_##seg##_SELECTOR, \ |
@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
214 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 222 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
215 | } | 223 | } |
216 | 224 | ||
225 | static inline int cpu_has_vmx_invept_individual_addr(void) | ||
226 | { | ||
227 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); | ||
228 | } | ||
229 | |||
230 | static inline int cpu_has_vmx_invept_context(void) | ||
231 | { | ||
232 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); | ||
233 | } | ||
234 | |||
235 | static inline int cpu_has_vmx_invept_global(void) | ||
236 | { | ||
237 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); | ||
238 | } | ||
239 | |||
240 | static inline int cpu_has_vmx_ept(void) | ||
241 | { | ||
242 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | ||
243 | SECONDARY_EXEC_ENABLE_EPT); | ||
244 | } | ||
245 | |||
246 | static inline int vm_need_ept(void) | ||
247 | { | ||
248 | return (cpu_has_vmx_ept() && enable_ept); | ||
249 | } | ||
250 | |||
217 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 251 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
218 | { | 252 | { |
219 | return ((cpu_has_vmx_virtualize_apic_accesses()) && | 253 | return ((cpu_has_vmx_virtualize_apic_accesses()) && |
@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) | |||
250 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | 284 | : : "a"(&operand), "c"(ext) : "cc", "memory"); |
251 | } | 285 | } |
252 | 286 | ||
287 | static inline void __invept(int ext, u64 eptp, gpa_t gpa) | ||
288 | { | ||
289 | struct { | ||
290 | u64 eptp, gpa; | ||
291 | } operand = {eptp, gpa}; | ||
292 | |||
293 | asm volatile (ASM_VMX_INVEPT | ||
294 | /* CF==1 or ZF==1 --> rc = -1 */ | ||
295 | "; ja 1f ; ud2 ; 1:\n" | ||
296 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | ||
297 | } | ||
298 | |||
253 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 299 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
254 | { | 300 | { |
255 | int i; | 301 | int i; |
@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | |||
301 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 347 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); |
302 | } | 348 | } |
303 | 349 | ||
350 | static inline void ept_sync_global(void) | ||
351 | { | ||
352 | if (cpu_has_vmx_invept_global()) | ||
353 | __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); | ||
354 | } | ||
355 | |||
356 | static inline void ept_sync_context(u64 eptp) | ||
357 | { | ||
358 | if (vm_need_ept()) { | ||
359 | if (cpu_has_vmx_invept_context()) | ||
360 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); | ||
361 | else | ||
362 | ept_sync_global(); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | ||
367 | { | ||
368 | if (vm_need_ept()) { | ||
369 | if (cpu_has_vmx_invept_individual_addr()) | ||
370 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, | ||
371 | eptp, gpa); | ||
372 | else | ||
373 | ept_sync_context(eptp); | ||
374 | } | ||
375 | } | ||
376 | |||
304 | static unsigned long vmcs_readl(unsigned long field) | 377 | static unsigned long vmcs_readl(unsigned long field) |
305 | { | 378 | { |
306 | unsigned long value; | 379 | unsigned long value; |
@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
388 | eb |= 1u << 1; | 461 | eb |= 1u << 1; |
389 | if (vcpu->arch.rmode.active) | 462 | if (vcpu->arch.rmode.active) |
390 | eb = ~0; | 463 | eb = ~0; |
464 | if (vm_need_ept()) | ||
465 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | ||
391 | vmcs_write32(EXCEPTION_BITMAP, eb); | 466 | vmcs_write32(EXCEPTION_BITMAP, eb); |
392 | } | 467 | } |
393 | 468 | ||
@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
985 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1060 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
986 | { | 1061 | { |
987 | u32 vmx_msr_low, vmx_msr_high; | 1062 | u32 vmx_msr_low, vmx_msr_high; |
988 | u32 min, opt; | 1063 | u32 min, opt, min2, opt2; |
989 | u32 _pin_based_exec_control = 0; | 1064 | u32 _pin_based_exec_control = 0; |
990 | u32 _cpu_based_exec_control = 0; | 1065 | u32 _cpu_based_exec_control = 0; |
991 | u32 _cpu_based_2nd_exec_control = 0; | 1066 | u32 _cpu_based_2nd_exec_control = 0; |
@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1003 | CPU_BASED_CR8_LOAD_EXITING | | 1078 | CPU_BASED_CR8_LOAD_EXITING | |
1004 | CPU_BASED_CR8_STORE_EXITING | | 1079 | CPU_BASED_CR8_STORE_EXITING | |
1005 | #endif | 1080 | #endif |
1081 | CPU_BASED_CR3_LOAD_EXITING | | ||
1082 | CPU_BASED_CR3_STORE_EXITING | | ||
1006 | CPU_BASED_USE_IO_BITMAPS | | 1083 | CPU_BASED_USE_IO_BITMAPS | |
1007 | CPU_BASED_MOV_DR_EXITING | | 1084 | CPU_BASED_MOV_DR_EXITING | |
1008 | CPU_BASED_USE_TSC_OFFSETING; | 1085 | CPU_BASED_USE_TSC_OFFSETING; |
@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1018 | ~CPU_BASED_CR8_STORE_EXITING; | 1095 | ~CPU_BASED_CR8_STORE_EXITING; |
1019 | #endif | 1096 | #endif |
1020 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 1097 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
1021 | min = 0; | 1098 | min2 = 0; |
1022 | opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 1099 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
1023 | SECONDARY_EXEC_WBINVD_EXITING | | 1100 | SECONDARY_EXEC_WBINVD_EXITING | |
1024 | SECONDARY_EXEC_ENABLE_VPID; | 1101 | SECONDARY_EXEC_ENABLE_VPID | |
1025 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, | 1102 | SECONDARY_EXEC_ENABLE_EPT; |
1103 | if (adjust_vmx_controls(min2, opt2, | ||
1104 | MSR_IA32_VMX_PROCBASED_CTLS2, | ||
1026 | &_cpu_based_2nd_exec_control) < 0) | 1105 | &_cpu_based_2nd_exec_control) < 0) |
1027 | return -EIO; | 1106 | return -EIO; |
1028 | } | 1107 | } |
@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1031 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 1110 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
1032 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 1111 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
1033 | #endif | 1112 | #endif |
1113 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | ||
1114 | /* CR3 accesses don't need to cause VM Exits when EPT enabled */ | ||
1115 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1116 | CPU_BASED_CR3_STORE_EXITING); | ||
1117 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | ||
1118 | &_cpu_based_exec_control) < 0) | ||
1119 | return -EIO; | ||
1120 | rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, | ||
1121 | vmx_capability.ept, vmx_capability.vpid); | ||
1122 | } | ||
1034 | 1123 | ||
1035 | min = 0; | 1124 | min = 0; |
1036 | #ifdef CONFIG_X86_64 | 1125 | #ifdef CONFIG_X86_64 |
@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1256 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1345 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); |
1257 | 1346 | ||
1258 | kvm_mmu_reset_context(vcpu); | 1347 | kvm_mmu_reset_context(vcpu); |
1259 | init_rmode_tss(vcpu->kvm); | 1348 | init_rmode(vcpu->kvm); |
1260 | } | 1349 | } |
1261 | 1350 | ||
1262 | #ifdef CONFIG_X86_64 | 1351 | #ifdef CONFIG_X86_64 |
@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
1304 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1393 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; |
1305 | } | 1394 | } |
1306 | 1395 | ||
1396 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | ||
1397 | { | ||
1398 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | ||
1399 | if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { | ||
1400 | printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); | ||
1401 | return; | ||
1402 | } | ||
1403 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); | ||
1404 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); | ||
1405 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); | ||
1406 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); | ||
1407 | } | ||
1408 | } | ||
1409 | |||
1410 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | ||
1411 | |||
1412 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | ||
1413 | unsigned long cr0, | ||
1414 | struct kvm_vcpu *vcpu) | ||
1415 | { | ||
1416 | if (!(cr0 & X86_CR0_PG)) { | ||
1417 | /* From paging/starting to nonpaging */ | ||
1418 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1419 | vmcs_config.cpu_based_exec_ctrl | | ||
1420 | (CPU_BASED_CR3_LOAD_EXITING | | ||
1421 | CPU_BASED_CR3_STORE_EXITING)); | ||
1422 | vcpu->arch.cr0 = cr0; | ||
1423 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1424 | *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; | ||
1425 | *hw_cr0 &= ~X86_CR0_WP; | ||
1426 | } else if (!is_paging(vcpu)) { | ||
1427 | /* From nonpaging to paging */ | ||
1428 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1429 | vmcs_config.cpu_based_exec_ctrl & | ||
1430 | ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1431 | CPU_BASED_CR3_STORE_EXITING)); | ||
1432 | vcpu->arch.cr0 = cr0; | ||
1433 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1434 | if (!(vcpu->arch.cr0 & X86_CR0_WP)) | ||
1435 | *hw_cr0 &= ~X86_CR0_WP; | ||
1436 | } | ||
1437 | } | ||
1438 | |||
1439 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1440 | struct kvm_vcpu *vcpu) | ||
1441 | { | ||
1442 | if (!is_paging(vcpu)) { | ||
1443 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1444 | *hw_cr4 |= X86_CR4_PSE; | ||
1445 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1446 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1447 | } | ||
1448 | |||
1307 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1449 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1308 | { | 1450 | { |
1451 | unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | | ||
1452 | KVM_VM_CR0_ALWAYS_ON; | ||
1453 | |||
1309 | vmx_fpu_deactivate(vcpu); | 1454 | vmx_fpu_deactivate(vcpu); |
1310 | 1455 | ||
1311 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) | 1456 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) |
@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1323 | } | 1468 | } |
1324 | #endif | 1469 | #endif |
1325 | 1470 | ||
1471 | if (vm_need_ept()) | ||
1472 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | ||
1473 | |||
1326 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1474 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1327 | vmcs_writel(GUEST_CR0, | 1475 | vmcs_writel(GUEST_CR0, hw_cr0); |
1328 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | ||
1329 | vcpu->arch.cr0 = cr0; | 1476 | vcpu->arch.cr0 = cr0; |
1330 | 1477 | ||
1331 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | 1478 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) |
1332 | vmx_fpu_activate(vcpu); | 1479 | vmx_fpu_activate(vcpu); |
1333 | } | 1480 | } |
1334 | 1481 | ||
1482 | static u64 construct_eptp(unsigned long root_hpa) | ||
1483 | { | ||
1484 | u64 eptp; | ||
1485 | |||
1486 | /* TODO write the value reading from MSR */ | ||
1487 | eptp = VMX_EPT_DEFAULT_MT | | ||
1488 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | ||
1489 | eptp |= (root_hpa & PAGE_MASK); | ||
1490 | |||
1491 | return eptp; | ||
1492 | } | ||
1493 | |||
1335 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1494 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
1336 | { | 1495 | { |
1496 | unsigned long guest_cr3; | ||
1497 | u64 eptp; | ||
1498 | |||
1499 | guest_cr3 = cr3; | ||
1500 | if (vm_need_ept()) { | ||
1501 | eptp = construct_eptp(cr3); | ||
1502 | vmcs_write64(EPT_POINTER, eptp); | ||
1503 | ept_sync_context(eptp); | ||
1504 | ept_load_pdptrs(vcpu); | ||
1505 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | ||
1506 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1507 | } | ||
1508 | |||
1337 | vmx_flush_tlb(vcpu); | 1509 | vmx_flush_tlb(vcpu); |
1338 | vmcs_writel(GUEST_CR3, cr3); | 1510 | vmcs_writel(GUEST_CR3, guest_cr3); |
1339 | if (vcpu->arch.cr0 & X86_CR0_PE) | 1511 | if (vcpu->arch.cr0 & X86_CR0_PE) |
1340 | vmx_fpu_deactivate(vcpu); | 1512 | vmx_fpu_deactivate(vcpu); |
1341 | } | 1513 | } |
1342 | 1514 | ||
1343 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1515 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1344 | { | 1516 | { |
1345 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1517 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? |
1346 | vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? | 1518 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1347 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); | 1519 | |
1348 | vcpu->arch.cr4 = cr4; | 1520 | vcpu->arch.cr4 = cr4; |
1521 | if (vm_need_ept()) | ||
1522 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | ||
1523 | |||
1524 | vmcs_writel(CR4_READ_SHADOW, cr4); | ||
1525 | vmcs_writel(GUEST_CR4, hw_cr4); | ||
1349 | } | 1526 | } |
1350 | 1527 | ||
1351 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1528 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -1530,6 +1707,41 @@ out: | |||
1530 | return ret; | 1707 | return ret; |
1531 | } | 1708 | } |
1532 | 1709 | ||
1710 | static int init_rmode_identity_map(struct kvm *kvm) | ||
1711 | { | ||
1712 | int i, r, ret; | ||
1713 | pfn_t identity_map_pfn; | ||
1714 | u32 tmp; | ||
1715 | |||
1716 | if (!vm_need_ept()) | ||
1717 | return 1; | ||
1718 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | ||
1719 | printk(KERN_ERR "EPT: identity-mapping pagetable " | ||
1720 | "haven't been allocated!\n"); | ||
1721 | return 0; | ||
1722 | } | ||
1723 | if (likely(kvm->arch.ept_identity_pagetable_done)) | ||
1724 | return 1; | ||
1725 | ret = 0; | ||
1726 | identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; | ||
1727 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | ||
1728 | if (r < 0) | ||
1729 | goto out; | ||
1730 | /* Set up identity-mapping pagetable for EPT in real mode */ | ||
1731 | for (i = 0; i < PT32_ENT_PER_PAGE; i++) { | ||
1732 | tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | | ||
1733 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); | ||
1734 | r = kvm_write_guest_page(kvm, identity_map_pfn, | ||
1735 | &tmp, i * sizeof(tmp), sizeof(tmp)); | ||
1736 | if (r < 0) | ||
1737 | goto out; | ||
1738 | } | ||
1739 | kvm->arch.ept_identity_pagetable_done = true; | ||
1740 | ret = 1; | ||
1741 | out: | ||
1742 | return ret; | ||
1743 | } | ||
1744 | |||
1533 | static void seg_setup(int seg) | 1745 | static void seg_setup(int seg) |
1534 | { | 1746 | { |
1535 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -1564,6 +1776,31 @@ out: | |||
1564 | return r; | 1776 | return r; |
1565 | } | 1777 | } |
1566 | 1778 | ||
1779 | static int alloc_identity_pagetable(struct kvm *kvm) | ||
1780 | { | ||
1781 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
1782 | int r = 0; | ||
1783 | |||
1784 | down_write(&kvm->slots_lock); | ||
1785 | if (kvm->arch.ept_identity_pagetable) | ||
1786 | goto out; | ||
1787 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
1788 | kvm_userspace_mem.flags = 0; | ||
1789 | kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1790 | kvm_userspace_mem.memory_size = PAGE_SIZE; | ||
1791 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
1792 | if (r) | ||
1793 | goto out; | ||
1794 | |||
1795 | down_read(¤t->mm->mmap_sem); | ||
1796 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | ||
1797 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | ||
1798 | up_read(¤t->mm->mmap_sem); | ||
1799 | out: | ||
1800 | up_write(&kvm->slots_lock); | ||
1801 | return r; | ||
1802 | } | ||
1803 | |||
1567 | static void allocate_vpid(struct vcpu_vmx *vmx) | 1804 | static void allocate_vpid(struct vcpu_vmx *vmx) |
1568 | { | 1805 | { |
1569 | int vpid; | 1806 | int vpid; |
@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1638 | CPU_BASED_CR8_LOAD_EXITING; | 1875 | CPU_BASED_CR8_LOAD_EXITING; |
1639 | #endif | 1876 | #endif |
1640 | } | 1877 | } |
1878 | if (!vm_need_ept()) | ||
1879 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | ||
1880 | CPU_BASED_CR3_LOAD_EXITING; | ||
1641 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 1881 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
1642 | 1882 | ||
1643 | if (cpu_has_secondary_exec_ctrls()) { | 1883 | if (cpu_has_secondary_exec_ctrls()) { |
@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1647 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 1887 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
1648 | if (vmx->vpid == 0) | 1888 | if (vmx->vpid == 0) |
1649 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 1889 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
1890 | if (!vm_need_ept()) | ||
1891 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | ||
1650 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 1892 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
1651 | } | 1893 | } |
1652 | 1894 | ||
@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1722 | return 0; | 1964 | return 0; |
1723 | } | 1965 | } |
1724 | 1966 | ||
1967 | static int init_rmode(struct kvm *kvm) | ||
1968 | { | ||
1969 | if (!init_rmode_tss(kvm)) | ||
1970 | return 0; | ||
1971 | if (!init_rmode_identity_map(kvm)) | ||
1972 | return 0; | ||
1973 | return 1; | ||
1974 | } | ||
1975 | |||
1725 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 1976 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
1726 | { | 1977 | { |
1727 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1978 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1729 | int ret; | 1980 | int ret; |
1730 | 1981 | ||
1731 | down_read(&vcpu->kvm->slots_lock); | 1982 | down_read(&vcpu->kvm->slots_lock); |
1732 | if (!init_rmode_tss(vmx->vcpu.kvm)) { | 1983 | if (!init_rmode(vmx->vcpu.kvm)) { |
1733 | ret = -ENOMEM; | 1984 | ret = -ENOMEM; |
1734 | goto out; | 1985 | goto out; |
1735 | } | 1986 | } |
@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1994 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 2245 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
1995 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2246 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
1996 | if (is_page_fault(intr_info)) { | 2247 | if (is_page_fault(intr_info)) { |
2248 | /* EPT won't cause page fault directly */ | ||
2249 | if (vm_need_ept()) | ||
2250 | BUG(); | ||
1997 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2251 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
1998 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2252 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
1999 | (u32)((u64)cr2 >> 32), handler); | 2253 | (u32)((u64)cr2 >> 32), handler); |
@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2323 | return kvm_task_switch(vcpu, tss_selector, reason); | 2577 | return kvm_task_switch(vcpu, tss_selector, reason); |
2324 | } | 2578 | } |
2325 | 2579 | ||
2580 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2581 | { | ||
2582 | u64 exit_qualification; | ||
2583 | enum emulation_result er; | ||
2584 | gpa_t gpa; | ||
2585 | unsigned long hva; | ||
2586 | int gla_validity; | ||
2587 | int r; | ||
2588 | |||
2589 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | ||
2590 | |||
2591 | if (exit_qualification & (1 << 6)) { | ||
2592 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | ||
2593 | return -ENOTSUPP; | ||
2594 | } | ||
2595 | |||
2596 | gla_validity = (exit_qualification >> 7) & 0x3; | ||
2597 | if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { | ||
2598 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | ||
2599 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2600 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2601 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2602 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2603 | (long unsigned int)exit_qualification); | ||
2604 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
2605 | kvm_run->hw.hardware_exit_reason = 0; | ||
2606 | return -ENOTSUPP; | ||
2607 | } | ||
2608 | |||
2609 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | ||
2610 | hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
2611 | if (!kvm_is_error_hva(hva)) { | ||
2612 | r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | ||
2613 | if (r < 0) { | ||
2614 | printk(KERN_ERR "EPT: Not enough memory!\n"); | ||
2615 | return -ENOMEM; | ||
2616 | } | ||
2617 | return 1; | ||
2618 | } else { | ||
2619 | /* must be MMIO */ | ||
2620 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | ||
2621 | |||
2622 | if (er == EMULATE_FAIL) { | ||
2623 | printk(KERN_ERR | ||
2624 | "EPT: Fail to handle EPT violation vmexit!er is %d\n", | ||
2625 | er); | ||
2626 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2627 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2628 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2629 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2630 | (long unsigned int)exit_qualification); | ||
2631 | return -ENOTSUPP; | ||
2632 | } else if (er == EMULATE_DO_MMIO) | ||
2633 | return 0; | ||
2634 | } | ||
2635 | return 1; | ||
2636 | } | ||
2637 | |||
2326 | /* | 2638 | /* |
2327 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2639 | * The exit handlers return 1 if the exit was handled fully and guest execution |
2328 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2640 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
2346 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2658 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
2347 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 2659 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
2348 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 2660 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
2661 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | ||
2349 | }; | 2662 | }; |
2350 | 2663 | ||
2351 | static const int kvm_vmx_max_exit_handlers = | 2664 | static const int kvm_vmx_max_exit_handlers = |
@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2364 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | 2677 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), |
2365 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | 2678 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); |
2366 | 2679 | ||
2680 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
2681 | * to sync with guest real CR3. */ | ||
2682 | if (vm_need_ept() && is_paging(vcpu)) { | ||
2683 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
2684 | ept_load_pdptrs(vcpu); | ||
2685 | } | ||
2686 | |||
2367 | if (unlikely(vmx->fail)) { | 2687 | if (unlikely(vmx->fail)) { |
2368 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2688 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2369 | kvm_run->fail_entry.hardware_entry_failure_reason | 2689 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2372 | } | 2692 | } |
2373 | 2693 | ||
2374 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 2694 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
2375 | exit_reason != EXIT_REASON_EXCEPTION_NMI) | 2695 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && |
2696 | exit_reason != EXIT_REASON_EPT_VIOLATION)) | ||
2376 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 2697 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
2377 | "exit reason is 0x%x\n", __func__, exit_reason); | 2698 | "exit reason is 0x%x\n", __func__, exit_reason); |
2378 | if (exit_reason < kvm_vmx_max_exit_handlers | 2699 | if (exit_reason < kvm_vmx_max_exit_handlers |
@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2674 | return ERR_PTR(-ENOMEM); | 2995 | return ERR_PTR(-ENOMEM); |
2675 | 2996 | ||
2676 | allocate_vpid(vmx); | 2997 | allocate_vpid(vmx); |
2998 | if (id == 0 && vm_need_ept()) { | ||
2999 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3000 | VMX_EPT_WRITABLE_MASK | | ||
3001 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3002 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3003 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3004 | VMX_EPT_EXECUTABLE_MASK); | ||
3005 | kvm_enable_tdp(); | ||
3006 | } | ||
2677 | 3007 | ||
2678 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3008 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
2679 | if (err) | 3009 | if (err) |
@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2706 | if (alloc_apic_access_page(kvm) != 0) | 3036 | if (alloc_apic_access_page(kvm) != 0) |
2707 | goto free_vmcs; | 3037 | goto free_vmcs; |
2708 | 3038 | ||
3039 | if (vm_need_ept()) | ||
3040 | if (alloc_identity_pagetable(kvm) != 0) | ||
3041 | goto free_vmcs; | ||
3042 | |||
2709 | return &vmx->vcpu; | 3043 | return &vmx->vcpu; |
2710 | 3044 | ||
2711 | free_vmcs: | 3045 | free_vmcs: |
@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
2735 | } | 3069 | } |
2736 | } | 3070 | } |
2737 | 3071 | ||
3072 | static int get_ept_level(void) | ||
3073 | { | ||
3074 | return VMX_EPT_DEFAULT_GAW + 1; | ||
3075 | } | ||
3076 | |||
2738 | static struct kvm_x86_ops vmx_x86_ops = { | 3077 | static struct kvm_x86_ops vmx_x86_ops = { |
2739 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3078 | .cpu_has_kvm_support = cpu_has_kvm_support, |
2740 | .disabled_by_bios = vmx_disabled_by_bios, | 3079 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
2791 | .inject_pending_vectors = do_interrupt_requests, | 3130 | .inject_pending_vectors = do_interrupt_requests, |
2792 | 3131 | ||
2793 | .set_tss_addr = vmx_set_tss_addr, | 3132 | .set_tss_addr = vmx_set_tss_addr, |
3133 | .get_tdp_level = get_ept_level, | ||
2794 | }; | 3134 | }; |
2795 | 3135 | ||
2796 | static int __init vmx_init(void) | 3136 | static int __init vmx_init(void) |
@@ -2843,9 +3183,14 @@ static int __init vmx_init(void) | |||
2843 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3183 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
2844 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3184 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
2845 | 3185 | ||
3186 | if (cpu_has_vmx_ept()) | ||
3187 | bypass_guest_pf = 0; | ||
3188 | |||
2846 | if (bypass_guest_pf) | 3189 | if (bypass_guest_pf) |
2847 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3190 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
2848 | 3191 | ||
3192 | ept_sync_global(); | ||
3193 | |||
2849 | return 0; | 3194 | return 0; |
2850 | 3195 | ||
2851 | out2: | 3196 | out2: |
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5dff4606b988..79d94c610dfe 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
@@ -35,6 +35,8 @@ | |||
35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 | 35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 |
36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 | 36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 |
37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 | 37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 |
38 | #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 | ||
39 | #define CPU_BASED_CR3_STORE_EXITING 0x00010000 | ||
38 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | 40 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 |
39 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | 41 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 |
40 | #define CPU_BASED_TPR_SHADOW 0x00200000 | 42 | #define CPU_BASED_TPR_SHADOW 0x00200000 |
@@ -49,6 +51,7 @@ | |||
49 | * Definitions of Secondary Processor-Based VM-Execution Controls. | 51 | * Definitions of Secondary Processor-Based VM-Execution Controls. |
50 | */ | 52 | */ |
51 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 53 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
54 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | ||
52 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 55 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
53 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 56 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
54 | 57 | ||
@@ -100,10 +103,22 @@ enum vmcs_field { | |||
100 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 103 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
101 | APIC_ACCESS_ADDR = 0x00002014, | 104 | APIC_ACCESS_ADDR = 0x00002014, |
102 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 105 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
106 | EPT_POINTER = 0x0000201a, | ||
107 | EPT_POINTER_HIGH = 0x0000201b, | ||
108 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | ||
109 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | ||
103 | VMCS_LINK_POINTER = 0x00002800, | 110 | VMCS_LINK_POINTER = 0x00002800, |
104 | VMCS_LINK_POINTER_HIGH = 0x00002801, | 111 | VMCS_LINK_POINTER_HIGH = 0x00002801, |
105 | GUEST_IA32_DEBUGCTL = 0x00002802, | 112 | GUEST_IA32_DEBUGCTL = 0x00002802, |
106 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 113 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
114 | GUEST_PDPTR0 = 0x0000280a, | ||
115 | GUEST_PDPTR0_HIGH = 0x0000280b, | ||
116 | GUEST_PDPTR1 = 0x0000280c, | ||
117 | GUEST_PDPTR1_HIGH = 0x0000280d, | ||
118 | GUEST_PDPTR2 = 0x0000280e, | ||
119 | GUEST_PDPTR2_HIGH = 0x0000280f, | ||
120 | GUEST_PDPTR3 = 0x00002810, | ||
121 | GUEST_PDPTR3_HIGH = 0x00002811, | ||
107 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 122 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
108 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 123 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
109 | EXCEPTION_BITMAP = 0x00004004, | 124 | EXCEPTION_BITMAP = 0x00004004, |
@@ -226,6 +241,8 @@ enum vmcs_field { | |||
226 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 241 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
227 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 242 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
228 | #define EXIT_REASON_APIC_ACCESS 44 | 243 | #define EXIT_REASON_APIC_ACCESS 44 |
244 | #define EXIT_REASON_EPT_VIOLATION 48 | ||
245 | #define EXIT_REASON_EPT_MISCONFIG 49 | ||
229 | #define EXIT_REASON_WBINVD 54 | 246 | #define EXIT_REASON_WBINVD 54 |
230 | 247 | ||
231 | /* | 248 | /* |
@@ -316,15 +333,36 @@ enum vmcs_field { | |||
316 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 | 333 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 |
317 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a | 334 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a |
318 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b | 335 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b |
336 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x48c | ||
319 | 337 | ||
320 | #define MSR_IA32_FEATURE_CONTROL 0x3a | 338 | #define MSR_IA32_FEATURE_CONTROL 0x3a |
321 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 | 339 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 |
322 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 | 340 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 |
323 | 341 | ||
324 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 342 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
343 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | ||
325 | 344 | ||
326 | #define VMX_NR_VPIDS (1 << 16) | 345 | #define VMX_NR_VPIDS (1 << 16) |
327 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 346 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
328 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 | 347 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 |
329 | 348 | ||
349 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | ||
350 | #define VMX_EPT_EXTENT_CONTEXT 1 | ||
351 | #define VMX_EPT_EXTENT_GLOBAL 2 | ||
352 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | ||
353 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | ||
354 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | ||
355 | #define VMX_EPT_DEFAULT_GAW 3 | ||
356 | #define VMX_EPT_MAX_GAW 0x4 | ||
357 | #define VMX_EPT_MT_EPTE_SHIFT 3 | ||
358 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | ||
359 | #define VMX_EPT_DEFAULT_MT 0x6ull | ||
360 | #define VMX_EPT_READABLE_MASK 0x1ull | ||
361 | #define VMX_EPT_WRITABLE_MASK 0x2ull | ||
362 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | ||
363 | #define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) | ||
364 | #define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) | ||
365 | |||
366 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | ||
367 | |||
330 | #endif | 368 | #endif |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4d..21338bdb28ff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) | |||
2417 | 2417 | ||
2418 | kvm_x86_ops = ops; | 2418 | kvm_x86_ops = ops; |
2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2420 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
2421 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | ||
2422 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | ||
2420 | return 0; | 2423 | return 0; |
2421 | 2424 | ||
2422 | out: | 2425 | out: |
@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3019 | 3022 | ||
3020 | kvm_x86_ops->decache_regs(vcpu); | 3023 | kvm_x86_ops->decache_regs(vcpu); |
3021 | 3024 | ||
3025 | vcpu->arch.exception.pending = false; | ||
3026 | |||
3022 | vcpu_put(vcpu); | 3027 | vcpu_put(vcpu); |
3023 | 3028 | ||
3024 | return 0; | 3029 | return 0; |
@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3481 | } | 3486 | } |
3482 | 3487 | ||
3483 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3488 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3484 | cseg_desc.type &= ~(1 << 8); //clear the B flag | 3489 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3485 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3490 | save_guest_segment_descriptor(vcpu, tr_seg.selector, |
3486 | &cseg_desc); | 3491 | &cseg_desc); |
3487 | } | 3492 | } |
@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3507 | } | 3512 | } |
3508 | 3513 | ||
3509 | if (reason != TASK_SWITCH_IRET) { | 3514 | if (reason != TASK_SWITCH_IRET) { |
3510 | nseg_desc.type |= (1 << 8); | 3515 | nseg_desc.type |= (1 << 1); |
3511 | save_guest_segment_descriptor(vcpu, tss_selector, | 3516 | save_guest_segment_descriptor(vcpu, tss_selector, |
3512 | &nseg_desc); | 3517 | &nseg_desc); |
3513 | } | 3518 | } |
@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) | |||
3698 | { | 3703 | { |
3699 | unsigned after_mxcsr_mask; | 3704 | unsigned after_mxcsr_mask; |
3700 | 3705 | ||
3706 | /* | ||
3707 | * Touch the fpu the first time in non atomic context as if | ||
3708 | * this is the first fpu instruction the exception handler | ||
3709 | * will fire before the instruction returns and it'll have to | ||
3710 | * allocate ram with GFP_KERNEL. | ||
3711 | */ | ||
3712 | if (!used_math()) | ||
3713 | fx_save(&vcpu->arch.host_fx_image); | ||
3714 | |||
3701 | /* Initialize guest FPU by resetting ours and saving into guest's */ | 3715 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
3702 | preempt_disable(); | 3716 | preempt_disable(); |
3703 | fx_save(&vcpu->arch.host_fx_image); | 3717 | fx_save(&vcpu->arch.host_fx_image); |
3704 | fpu_init(); | 3718 | fx_finit(); |
3705 | fx_save(&vcpu->arch.guest_fx_image); | 3719 | fx_save(&vcpu->arch.guest_fx_image); |
3706 | fx_restore(&vcpu->arch.host_fx_image); | 3720 | fx_restore(&vcpu->arch.host_fx_image); |
3707 | preempt_enable(); | 3721 | preempt_enable(); |
@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
3906 | kvm_free_physmem(kvm); | 3920 | kvm_free_physmem(kvm); |
3907 | if (kvm->arch.apic_access_page) | 3921 | if (kvm->arch.apic_access_page) |
3908 | put_page(kvm->arch.apic_access_page); | 3922 | put_page(kvm->arch.apic_access_page); |
3923 | if (kvm->arch.ept_identity_pagetable) | ||
3924 | put_page(kvm->arch.ept_identity_pagetable); | ||
3909 | kfree(kvm); | 3925 | kfree(kvm); |
3910 | } | 3926 | } |
3911 | 3927 | ||
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2ca08386f993..f2a696d6a243 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1761,6 +1761,7 @@ twobyte_insn: | |||
1761 | case 6: /* lmsw */ | 1761 | case 6: /* lmsw */ |
1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, |
1763 | &ctxt->eflags); | 1763 | &ctxt->eflags); |
1764 | c->dst.type = OP_NONE; | ||
1764 | break; | 1765 | break; |
1765 | case 7: /* invlpg*/ | 1766 | case 7: /* invlpg*/ |
1766 | emulate_invlpg(ctxt->vcpu, memop); | 1767 | emulate_invlpg(ctxt->vcpu, memop); |
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 18378850e25a..914ccf983687 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
476 | 476 | ||
477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
478 | #endif | 478 | #endif |
479 | |||
480 | #ifndef CONFIG_HAVE_ARCH_PARSE_SRAT | ||
481 | /* | ||
482 | * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA | ||
483 | * | ||
484 | * These stub functions are needed to compile 32-bit NUMA when SRAT is | ||
485 | * not set. There are functions in srat_64.c for parsing this table | ||
486 | * and it may be possible to make them common functions. | ||
487 | */ | ||
488 | void acpi_numa_slit_init (struct acpi_table_slit *slit) | ||
489 | { | ||
490 | printk(KERN_INFO "ACPI: No support for parsing SLIT table\n"); | ||
491 | } | ||
492 | |||
493 | void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa) | ||
494 | { | ||
495 | } | ||
496 | |||
497 | void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma) | ||
498 | { | ||
499 | } | ||
500 | |||
501 | void acpi_numa_arch_fixup(void) | ||
502 | { | ||
503 | } | ||
504 | #endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */ | ||
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9ee007be9142..369cf065b6a4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve) | |||
172 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | 172 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
173 | __VMALLOC_RESERVE += reserve; | 173 | __VMALLOC_RESERVE += reserve; |
174 | } | 174 | } |
175 | |||
176 | int pmd_bad(pmd_t pmd) | ||
177 | { | ||
178 | WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); | ||
179 | |||
180 | return pmd_bad_v1(pmd); | ||
181 | } | ||
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 7fa519868d70..89ec35d00efd 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT) += direct.o | |||
6 | obj-$(CONFIG_PCI_OLPC) += olpc.o | 6 | obj-$(CONFIG_PCI_OLPC) += olpc.o |
7 | 7 | ||
8 | pci-y := fixup.o | 8 | pci-y := fixup.o |
9 | |||
10 | # Do not change the ordering here. There is a nasty init function | ||
11 | # ordering dependency which breaks when you move acpi.o below | ||
12 | # legacy/irq.o | ||
9 | pci-$(CONFIG_ACPI) += acpi.o | 13 | pci-$(CONFIG_ACPI) += acpi.o |
10 | pci-y += legacy.o irq.o | 14 | pci-y += legacy.o irq.o |
11 | 15 | ||
12 | pci-$(CONFIG_X86_VISWS) += visws.o fixup.o | 16 | # Careful: VISWS and NUMAQ overrule the pci-y above. The colons are |
13 | pci-$(CONFIG_X86_NUMAQ) += numa.o irq.o | 17 | # therefor correct. This needs a proper fix by distangling the code. |
18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | ||
19 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | ||
20 | |||
21 | # Necessary for NUMAQ as well | ||
14 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | 22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o |
15 | 23 | ||
16 | obj-y += $(pci-y) common.o early.o | 24 | obj-y += $(pci-y) common.o early.o |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1a9c0c6a1a18..d95de2f199cd 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -6,45 +6,6 @@ | |||
6 | #include <asm/numa.h> | 6 | #include <asm/numa.h> |
7 | #include "pci.h" | 7 | #include "pci.h" |
8 | 8 | ||
9 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) | ||
10 | { | ||
11 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; | ||
12 | printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); | ||
13 | return 0; | ||
14 | } | ||
15 | |||
16 | static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = { | ||
17 | /* | ||
18 | * Systems where PCI IO resource ISA alignment can be skipped | ||
19 | * when the ISA enable bit in the bridge control is not set | ||
20 | */ | ||
21 | { | ||
22 | .callback = can_skip_ioresource_align, | ||
23 | .ident = "IBM System x3800", | ||
24 | .matches = { | ||
25 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
26 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), | ||
27 | }, | ||
28 | }, | ||
29 | { | ||
30 | .callback = can_skip_ioresource_align, | ||
31 | .ident = "IBM System x3850", | ||
32 | .matches = { | ||
33 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
34 | DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), | ||
35 | }, | ||
36 | }, | ||
37 | { | ||
38 | .callback = can_skip_ioresource_align, | ||
39 | .ident = "IBM System x3950", | ||
40 | .matches = { | ||
41 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
42 | DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), | ||
43 | }, | ||
44 | }, | ||
45 | {} | ||
46 | }; | ||
47 | |||
48 | struct pci_root_info { | 9 | struct pci_root_info { |
49 | char *name; | 10 | char *name; |
50 | unsigned int res_num; | 11 | unsigned int res_num; |
@@ -196,8 +157,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
196 | int pxm; | 157 | int pxm; |
197 | #endif | 158 | #endif |
198 | 159 | ||
199 | dmi_check_system(acpi_pciprobe_dmi_table); | ||
200 | |||
201 | if (domain && !pci_domains_supported) { | 160 | if (domain && !pci_domains_supported) { |
202 | printk(KERN_WARNING "PCI: Multiple domains not supported " | 161 | printk(KERN_WARNING "PCI: Multiple domains not supported " |
203 | "(dom %d, bus %d)\n", domain, busnum); | 162 | "(dom %d, bus %d)\n", domain, busnum); |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2a4d751818b7..8545c8a9d107 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -77,17 +77,48 @@ int pcibios_scanned; | |||
77 | */ | 77 | */ |
78 | DEFINE_SPINLOCK(pci_config_lock); | 78 | DEFINE_SPINLOCK(pci_config_lock); |
79 | 79 | ||
80 | static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) | 80 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) |
81 | { | 81 | { |
82 | struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; | 82 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; |
83 | 83 | printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); | |
84 | if (rom_r->parent) | 84 | return 0; |
85 | return; | 85 | } |
86 | if (rom_r->start) | 86 | |
87 | /* we deal with BIOS assigned ROM later */ | 87 | static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = { |
88 | return; | 88 | /* |
89 | if (!(pci_probe & PCI_ASSIGN_ROMS)) | 89 | * Systems where PCI IO resource ISA alignment can be skipped |
90 | rom_r->start = rom_r->end = rom_r->flags = 0; | 90 | * when the ISA enable bit in the bridge control is not set |
91 | */ | ||
92 | { | ||
93 | .callback = can_skip_ioresource_align, | ||
94 | .ident = "IBM System x3800", | ||
95 | .matches = { | ||
96 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
97 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), | ||
98 | }, | ||
99 | }, | ||
100 | { | ||
101 | .callback = can_skip_ioresource_align, | ||
102 | .ident = "IBM System x3850", | ||
103 | .matches = { | ||
104 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
105 | DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), | ||
106 | }, | ||
107 | }, | ||
108 | { | ||
109 | .callback = can_skip_ioresource_align, | ||
110 | .ident = "IBM System x3950", | ||
111 | .matches = { | ||
112 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
113 | DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), | ||
114 | }, | ||
115 | }, | ||
116 | {} | ||
117 | }; | ||
118 | |||
119 | void __init dmi_check_skip_isa_align(void) | ||
120 | { | ||
121 | dmi_check_system(can_skip_pciprobe_dmi_table); | ||
91 | } | 122 | } |
92 | 123 | ||
93 | /* | 124 | /* |
@@ -97,11 +128,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) | |||
97 | 128 | ||
98 | void __devinit pcibios_fixup_bus(struct pci_bus *b) | 129 | void __devinit pcibios_fixup_bus(struct pci_bus *b) |
99 | { | 130 | { |
100 | struct pci_dev *dev; | ||
101 | |||
102 | pci_read_bridge_bases(b); | 131 | pci_read_bridge_bases(b); |
103 | list_for_each_entry(dev, &b->devices, bus_list) | ||
104 | pcibios_fixup_device_resources(dev); | ||
105 | } | 132 | } |
106 | 133 | ||
107 | /* | 134 | /* |
@@ -318,13 +345,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { | |||
318 | {} | 345 | {} |
319 | }; | 346 | }; |
320 | 347 | ||
348 | void __init dmi_check_pciprobe(void) | ||
349 | { | ||
350 | dmi_check_system(pciprobe_dmi_table); | ||
351 | } | ||
352 | |||
321 | struct pci_bus * __devinit pcibios_scan_root(int busnum) | 353 | struct pci_bus * __devinit pcibios_scan_root(int busnum) |
322 | { | 354 | { |
323 | struct pci_bus *bus = NULL; | 355 | struct pci_bus *bus = NULL; |
324 | struct pci_sysdata *sd; | 356 | struct pci_sysdata *sd; |
325 | 357 | ||
326 | dmi_check_system(pciprobe_dmi_table); | ||
327 | |||
328 | while ((bus = pci_find_next_bus(bus)) != NULL) { | 358 | while ((bus = pci_find_next_bus(bus)) != NULL) { |
329 | if (bus->number == busnum) { | 359 | if (bus->number == busnum) { |
330 | /* Already scanned */ | 360 | /* Already scanned */ |
@@ -462,6 +492,9 @@ char * __devinit pcibios_setup(char *str) | |||
462 | } else if (!strcmp(str, "routeirq")) { | 492 | } else if (!strcmp(str, "routeirq")) { |
463 | pci_routeirq = 1; | 493 | pci_routeirq = 1; |
464 | return NULL; | 494 | return NULL; |
495 | } else if (!strcmp(str, "skip_isa_align")) { | ||
496 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; | ||
497 | return NULL; | ||
465 | } | 498 | } |
466 | return str; | 499 | return str; |
467 | } | 500 | } |
@@ -489,7 +522,7 @@ void pcibios_disable_device (struct pci_dev *dev) | |||
489 | pcibios_disable_irq(dev); | 522 | pcibios_disable_irq(dev); |
490 | } | 523 | } |
491 | 524 | ||
492 | struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | 525 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) |
493 | { | 526 | { |
494 | struct pci_bus *bus = NULL; | 527 | struct pci_bus *bus = NULL; |
495 | struct pci_sysdata *sd; | 528 | struct pci_sysdata *sd; |
@@ -512,7 +545,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | |||
512 | return bus; | 545 | return bus; |
513 | } | 546 | } |
514 | 547 | ||
515 | struct pci_bus *pci_scan_bus_with_sysdata(int busno) | 548 | struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) |
516 | { | 549 | { |
517 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); | 550 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); |
518 | } | 551 | } |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b60b2abd480c..ff3a6a336342 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, | |||
502 | */ | 502 | */ |
503 | static void fam10h_pci_cfg_space_size(struct pci_dev *dev) | 503 | static void fam10h_pci_cfg_space_size(struct pci_dev *dev) |
504 | { | 504 | { |
505 | dev->cfg_size = pci_cfg_space_size_ext(dev, 0); | 505 | dev->cfg_size = pci_cfg_space_size_ext(dev); |
506 | } | 506 | } |
507 | 507 | ||
508 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); | 508 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); |
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index dd30c6076b5d..e70b9c57b88e 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -33,6 +33,10 @@ static __init int pci_access_init(void) | |||
33 | printk(KERN_ERR | 33 | printk(KERN_ERR |
34 | "PCI: Fatal: No config space access function found\n"); | 34 | "PCI: Fatal: No config space access function found\n"); |
35 | 35 | ||
36 | dmi_check_pciprobe(); | ||
37 | |||
38 | dmi_check_skip_isa_align(); | ||
39 | |||
36 | return 0; | 40 | return 0; |
37 | } | 41 | } |
38 | arch_initcall(pci_access_init); | 42 | arch_initcall(pci_access_init); |
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c58805a92db5..f3972b12c60a 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -38,6 +38,9 @@ enum pci_bf_sort_state { | |||
38 | pci_dmi_bf, | 38 | pci_dmi_bf, |
39 | }; | 39 | }; |
40 | 40 | ||
41 | extern void __init dmi_check_pciprobe(void); | ||
42 | extern void __init dmi_check_skip_isa_align(void); | ||
43 | |||
41 | /* pci-i386.c */ | 44 | /* pci-i386.c */ |
42 | 45 | ||
43 | extern unsigned int pcibios_max_latency; | 46 | extern unsigned int pcibios_max_latency; |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 4dceeb1fc5e0..cf058fecfcee 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) | |||
162 | Elf32_Shdr *shdr; | 162 | Elf32_Shdr *shdr; |
163 | int i; | 163 | int i; |
164 | 164 | ||
165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | 165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || |
166 | !elf_check_arch_ia32(ehdr) || | 166 | !elf_check_arch_ia32(ehdr) || |
167 | ehdr->e_type != ET_DYN); | 167 | ehdr->e_type != ET_DYN); |
168 | 168 | ||
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 4db42bff8c60..69527688f794 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c | |||
@@ -1,5 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * | ||
3 | * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> | 2 | * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> |
4 | * | 3 | * |
5 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
@@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info) | |||
29 | return retval; | 28 | return retval; |
30 | } | 29 | } |
31 | EXPORT_SYMBOL(fb_is_primary_device); | 30 | EXPORT_SYMBOL(fb_is_primary_device); |
31 | MODULE_LICENSE("GPL"); | ||