diff options
Diffstat (limited to 'arch/x86')
56 files changed, 1079 insertions, 390 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 90195235596..3ad653de710 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -2125,6 +2125,13 @@ config NET5501 | |||
2125 | ---help--- | 2125 | ---help--- |
2126 | This option enables system support for the Soekris Engineering net5501. | 2126 | This option enables system support for the Soekris Engineering net5501. |
2127 | 2127 | ||
2128 | config GEOS | ||
2129 | bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)" | ||
2130 | select GPIOLIB | ||
2131 | depends on DMI | ||
2132 | ---help--- | ||
2133 | This option enables system support for the Traverse Technologies GEOS. | ||
2134 | |||
2128 | endif # X86_32 | 2135 | endif # X86_32 |
2129 | 2136 | ||
2130 | config AMD_NB | 2137 | config AMD_NB |
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 36ddec6a41c..4be406abeef 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um | |||
@@ -8,15 +8,11 @@ ELF_ARCH := i386 | |||
8 | ELF_FORMAT := elf32-i386 | 8 | ELF_FORMAT := elf32-i386 |
9 | CHECKFLAGS += -D__i386__ | 9 | CHECKFLAGS += -D__i386__ |
10 | 10 | ||
11 | ifeq ("$(origin SUBARCH)", "command line") | ||
12 | ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)") | ||
13 | KBUILD_CFLAGS += $(call cc-option,-m32) | 11 | KBUILD_CFLAGS += $(call cc-option,-m32) |
14 | KBUILD_AFLAGS += $(call cc-option,-m32) | 12 | KBUILD_AFLAGS += $(call cc-option,-m32) |
15 | LINK-y += $(call cc-option,-m32) | 13 | LINK-y += $(call cc-option,-m32) |
16 | 14 | ||
17 | export LDFLAGS | 15 | export LDFLAGS |
18 | endif | ||
19 | endif | ||
20 | 16 | ||
21 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. | 17 | # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. |
22 | include $(srctree)/arch/x86/Makefile_32.cpu | 18 | include $(srctree)/arch/x86/Makefile_32.cpu |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 3e02148bb77..5a747dd884d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -37,9 +37,9 @@ setup-y += video-bios.o | |||
37 | targets += $(setup-y) | 37 | targets += $(setup-y) |
38 | hostprogs-y := mkcpustr tools/build | 38 | hostprogs-y := mkcpustr tools/build |
39 | 39 | ||
40 | HOSTCFLAGS_mkcpustr.o := -I$(srctree)/arch/$(SRCARCH)/include | 40 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \ |
41 | HOST_EXTRACFLAGS += -I$(objtree)/include -I$(srctree)/tools/include \ | 41 | -D__EXPORTED_HEADERS__ |
42 | -include $(srctree)/include/linux/kconfig.h | 42 | |
43 | $(obj)/cpu.o: $(obj)/cpustr.h | 43 | $(obj)/cpu.o: $(obj)/cpustr.h |
44 | 44 | ||
45 | quiet_cmd_cpustr = CPUSTR $@ | 45 | quiet_cmd_cpustr = CPUSTR $@ |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index b903d5ea394..2d91580bf22 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -78,8 +78,75 @@ | |||
78 | */ | 78 | */ |
79 | #ifdef __KERNEL__ | 79 | #ifdef __KERNEL__ |
80 | 80 | ||
81 | #include <linux/bug.h> | ||
82 | |||
81 | DECLARE_PER_CPU(unsigned long, cpu_dr7); | 83 | DECLARE_PER_CPU(unsigned long, cpu_dr7); |
82 | 84 | ||
85 | #ifndef CONFIG_PARAVIRT | ||
86 | /* | ||
87 | * These special macros can be used to get or set a debugging register | ||
88 | */ | ||
89 | #define get_debugreg(var, register) \ | ||
90 | (var) = native_get_debugreg(register) | ||
91 | #define set_debugreg(value, register) \ | ||
92 | native_set_debugreg(register, value) | ||
93 | #endif | ||
94 | |||
95 | static inline unsigned long native_get_debugreg(int regno) | ||
96 | { | ||
97 | unsigned long val = 0; /* Damn you, gcc! */ | ||
98 | |||
99 | switch (regno) { | ||
100 | case 0: | ||
101 | asm("mov %%db0, %0" :"=r" (val)); | ||
102 | break; | ||
103 | case 1: | ||
104 | asm("mov %%db1, %0" :"=r" (val)); | ||
105 | break; | ||
106 | case 2: | ||
107 | asm("mov %%db2, %0" :"=r" (val)); | ||
108 | break; | ||
109 | case 3: | ||
110 | asm("mov %%db3, %0" :"=r" (val)); | ||
111 | break; | ||
112 | case 6: | ||
113 | asm("mov %%db6, %0" :"=r" (val)); | ||
114 | break; | ||
115 | case 7: | ||
116 | asm("mov %%db7, %0" :"=r" (val)); | ||
117 | break; | ||
118 | default: | ||
119 | BUG(); | ||
120 | } | ||
121 | return val; | ||
122 | } | ||
123 | |||
124 | static inline void native_set_debugreg(int regno, unsigned long value) | ||
125 | { | ||
126 | switch (regno) { | ||
127 | case 0: | ||
128 | asm("mov %0, %%db0" ::"r" (value)); | ||
129 | break; | ||
130 | case 1: | ||
131 | asm("mov %0, %%db1" ::"r" (value)); | ||
132 | break; | ||
133 | case 2: | ||
134 | asm("mov %0, %%db2" ::"r" (value)); | ||
135 | break; | ||
136 | case 3: | ||
137 | asm("mov %0, %%db3" ::"r" (value)); | ||
138 | break; | ||
139 | case 6: | ||
140 | asm("mov %0, %%db6" ::"r" (value)); | ||
141 | break; | ||
142 | case 7: | ||
143 | asm("mov %0, %%db7" ::"r" (value)); | ||
144 | break; | ||
145 | default: | ||
146 | BUG(); | ||
147 | } | ||
148 | } | ||
149 | |||
83 | static inline void hw_breakpoint_disable(void) | 150 | static inline void hw_breakpoint_disable(void) |
84 | { | 151 | { |
85 | /* Zero the control register for HW Breakpoint */ | 152 | /* Zero the control register for HW Breakpoint */ |
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 77e95f54570..332f98c9111 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h | |||
@@ -64,11 +64,15 @@ enum regnames { | |||
64 | GDB_PS, /* 17 */ | 64 | GDB_PS, /* 17 */ |
65 | GDB_CS, /* 18 */ | 65 | GDB_CS, /* 18 */ |
66 | GDB_SS, /* 19 */ | 66 | GDB_SS, /* 19 */ |
67 | GDB_DS, /* 20 */ | ||
68 | GDB_ES, /* 21 */ | ||
69 | GDB_FS, /* 22 */ | ||
70 | GDB_GS, /* 23 */ | ||
67 | }; | 71 | }; |
68 | #define GDB_ORIG_AX 57 | 72 | #define GDB_ORIG_AX 57 |
69 | #define DBG_MAX_REG_NUM 20 | 73 | #define DBG_MAX_REG_NUM 24 |
70 | /* 17 64 bit regs and 3 32 bit regs */ | 74 | /* 17 64 bit regs and 5 32 bit regs */ |
71 | #define NUMREGBYTES ((17 * 8) + (3 * 4)) | 75 | #define NUMREGBYTES ((17 * 8) + (5 * 4)) |
72 | #endif /* ! CONFIG_X86_32 */ | 76 | #endif /* ! CONFIG_X86_32 */ |
73 | 77 | ||
74 | static inline void arch_kgdb_breakpoint(void) | 78 | static inline void arch_kgdb_breakpoint(void) |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4d8dcbdfc12..e7d1c194d27 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -321,4 +321,8 @@ struct kvm_xcrs { | |||
321 | __u64 padding[16]; | 321 | __u64 padding[16]; |
322 | }; | 322 | }; |
323 | 323 | ||
324 | /* definition of registers in kvm_run */ | ||
325 | struct kvm_sync_regs { | ||
326 | }; | ||
327 | |||
324 | #endif /* _ASM_X86_KVM_H */ | 328 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7b9cfc4878a..c222e1a1b12 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -176,6 +176,7 @@ struct x86_emulate_ops { | |||
176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); | 176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); | 177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); |
178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); | 178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); |
179 | void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val); | ||
179 | int (*cpl)(struct x86_emulate_ctxt *ctxt); | 180 | int (*cpl)(struct x86_emulate_ctxt *ctxt); |
180 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); | 181 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); |
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 182 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
@@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | |||
388 | #define EMULATION_INTERCEPTED 2 | 389 | #define EMULATION_INTERCEPTED 2 |
389 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); | 390 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); |
390 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 391 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
391 | u16 tss_selector, int reason, | 392 | u16 tss_selector, int idt_index, int reason, |
392 | bool has_error_code, u32 error_code); | 393 | bool has_error_code, u32 error_code); |
393 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); | 394 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); |
394 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 395 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52d6640a5ca..e216ba066e7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/msr-index.h> | 29 | #include <asm/msr-index.h> |
30 | 30 | ||
31 | #define KVM_MAX_VCPUS 254 | 31 | #define KVM_MAX_VCPUS 254 |
32 | #define KVM_SOFT_MAX_VCPUS 64 | 32 | #define KVM_SOFT_MAX_VCPUS 160 |
33 | #define KVM_MEMORY_SLOTS 32 | 33 | #define KVM_MEMORY_SLOTS 32 |
34 | /* memory slots that does not exposed to userspace */ | 34 | /* memory slots that does not exposed to userspace */ |
35 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
@@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache { | |||
181 | void *objects[KVM_NR_MEM_OBJS]; | 181 | void *objects[KVM_NR_MEM_OBJS]; |
182 | }; | 182 | }; |
183 | 183 | ||
184 | #define NR_PTE_CHAIN_ENTRIES 5 | ||
185 | |||
186 | struct kvm_pte_chain { | ||
187 | u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; | ||
188 | struct hlist_node link; | ||
189 | }; | ||
190 | |||
191 | /* | 184 | /* |
192 | * kvm_mmu_page_role, below, is defined as: | 185 | * kvm_mmu_page_role, below, is defined as: |
193 | * | 186 | * |
@@ -427,12 +420,16 @@ struct kvm_vcpu_arch { | |||
427 | 420 | ||
428 | u64 last_guest_tsc; | 421 | u64 last_guest_tsc; |
429 | u64 last_kernel_ns; | 422 | u64 last_kernel_ns; |
430 | u64 last_tsc_nsec; | 423 | u64 last_host_tsc; |
431 | u64 last_tsc_write; | 424 | u64 tsc_offset_adjustment; |
432 | u32 virtual_tsc_khz; | 425 | u64 this_tsc_nsec; |
426 | u64 this_tsc_write; | ||
427 | u8 this_tsc_generation; | ||
433 | bool tsc_catchup; | 428 | bool tsc_catchup; |
434 | u32 tsc_catchup_mult; | 429 | bool tsc_always_catchup; |
435 | s8 tsc_catchup_shift; | 430 | s8 virtual_tsc_shift; |
431 | u32 virtual_tsc_mult; | ||
432 | u32 virtual_tsc_khz; | ||
436 | 433 | ||
437 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ | 434 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
438 | unsigned nmi_pending; /* NMI queued after currently running handler */ | 435 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
@@ -478,6 +475,21 @@ struct kvm_vcpu_arch { | |||
478 | u32 id; | 475 | u32 id; |
479 | bool send_user_only; | 476 | bool send_user_only; |
480 | } apf; | 477 | } apf; |
478 | |||
479 | /* OSVW MSRs (AMD only) */ | ||
480 | struct { | ||
481 | u64 length; | ||
482 | u64 status; | ||
483 | } osvw; | ||
484 | }; | ||
485 | |||
486 | struct kvm_lpage_info { | ||
487 | unsigned long rmap_pde; | ||
488 | int write_count; | ||
489 | }; | ||
490 | |||
491 | struct kvm_arch_memory_slot { | ||
492 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | ||
481 | }; | 493 | }; |
482 | 494 | ||
483 | struct kvm_arch { | 495 | struct kvm_arch { |
@@ -511,8 +523,12 @@ struct kvm_arch { | |||
511 | s64 kvmclock_offset; | 523 | s64 kvmclock_offset; |
512 | raw_spinlock_t tsc_write_lock; | 524 | raw_spinlock_t tsc_write_lock; |
513 | u64 last_tsc_nsec; | 525 | u64 last_tsc_nsec; |
514 | u64 last_tsc_offset; | ||
515 | u64 last_tsc_write; | 526 | u64 last_tsc_write; |
527 | u32 last_tsc_khz; | ||
528 | u64 cur_tsc_nsec; | ||
529 | u64 cur_tsc_write; | ||
530 | u64 cur_tsc_offset; | ||
531 | u8 cur_tsc_generation; | ||
516 | 532 | ||
517 | struct kvm_xen_hvm_config xen_hvm_config; | 533 | struct kvm_xen_hvm_config xen_hvm_config; |
518 | 534 | ||
@@ -644,7 +660,7 @@ struct kvm_x86_ops { | |||
644 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 660 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
645 | int (*get_lpage_level)(void); | 661 | int (*get_lpage_level)(void); |
646 | bool (*rdtscp_supported)(void); | 662 | bool (*rdtscp_supported)(void); |
647 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); | 663 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); |
648 | 664 | ||
649 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 665 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
650 | 666 | ||
@@ -652,7 +668,7 @@ struct kvm_x86_ops { | |||
652 | 668 | ||
653 | bool (*has_wbinvd_exit)(void); | 669 | bool (*has_wbinvd_exit)(void); |
654 | 670 | ||
655 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); | 671 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); |
656 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 672 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
657 | 673 | ||
658 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | 674 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); |
@@ -674,6 +690,17 @@ struct kvm_arch_async_pf { | |||
674 | 690 | ||
675 | extern struct kvm_x86_ops *kvm_x86_ops; | 691 | extern struct kvm_x86_ops *kvm_x86_ops; |
676 | 692 | ||
693 | static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, | ||
694 | s64 adjustment) | ||
695 | { | ||
696 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); | ||
697 | } | ||
698 | |||
699 | static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) | ||
700 | { | ||
701 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); | ||
702 | } | ||
703 | |||
677 | int kvm_mmu_module_init(void); | 704 | int kvm_mmu_module_init(void); |
678 | void kvm_mmu_module_exit(void); | 705 | void kvm_mmu_module_exit(void); |
679 | 706 | ||
@@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | |||
741 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 768 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
742 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 769 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
743 | 770 | ||
744 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 771 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
745 | bool has_error_code, u32 error_code); | 772 | int reason, bool has_error_code, u32 error_code); |
746 | 773 | ||
747 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 774 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
748 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 775 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c0180fd372d..aa0f9130836 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/paravirt_types.h> | 10 | #include <asm/paravirt_types.h> |
11 | 11 | ||
12 | #ifndef __ASSEMBLY__ | 12 | #ifndef __ASSEMBLY__ |
13 | #include <linux/bug.h> | ||
13 | #include <linux/types.h> | 14 | #include <linux/types.h> |
14 | #include <linux/cpumask.h> | 15 | #include <linux/cpumask.h> |
15 | 16 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e8fb2c7a5f4..2291895b183 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
26 | #define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19) | ||
26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | 27 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) |
27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) | 28 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) | 29 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 78e30ea492b..a19542c1685 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -484,61 +484,6 @@ struct thread_struct { | |||
484 | unsigned io_bitmap_max; | 484 | unsigned io_bitmap_max; |
485 | }; | 485 | }; |
486 | 486 | ||
487 | static inline unsigned long native_get_debugreg(int regno) | ||
488 | { | ||
489 | unsigned long val = 0; /* Damn you, gcc! */ | ||
490 | |||
491 | switch (regno) { | ||
492 | case 0: | ||
493 | asm("mov %%db0, %0" :"=r" (val)); | ||
494 | break; | ||
495 | case 1: | ||
496 | asm("mov %%db1, %0" :"=r" (val)); | ||
497 | break; | ||
498 | case 2: | ||
499 | asm("mov %%db2, %0" :"=r" (val)); | ||
500 | break; | ||
501 | case 3: | ||
502 | asm("mov %%db3, %0" :"=r" (val)); | ||
503 | break; | ||
504 | case 6: | ||
505 | asm("mov %%db6, %0" :"=r" (val)); | ||
506 | break; | ||
507 | case 7: | ||
508 | asm("mov %%db7, %0" :"=r" (val)); | ||
509 | break; | ||
510 | default: | ||
511 | BUG(); | ||
512 | } | ||
513 | return val; | ||
514 | } | ||
515 | |||
516 | static inline void native_set_debugreg(int regno, unsigned long value) | ||
517 | { | ||
518 | switch (regno) { | ||
519 | case 0: | ||
520 | asm("mov %0, %%db0" ::"r" (value)); | ||
521 | break; | ||
522 | case 1: | ||
523 | asm("mov %0, %%db1" ::"r" (value)); | ||
524 | break; | ||
525 | case 2: | ||
526 | asm("mov %0, %%db2" ::"r" (value)); | ||
527 | break; | ||
528 | case 3: | ||
529 | asm("mov %0, %%db3" ::"r" (value)); | ||
530 | break; | ||
531 | case 6: | ||
532 | asm("mov %0, %%db6" ::"r" (value)); | ||
533 | break; | ||
534 | case 7: | ||
535 | asm("mov %0, %%db7" ::"r" (value)); | ||
536 | break; | ||
537 | default: | ||
538 | BUG(); | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* | 487 | /* |
543 | * Set IOPL bits in EFLAGS from given mask | 488 | * Set IOPL bits in EFLAGS from given mask |
544 | */ | 489 | */ |
@@ -584,14 +529,6 @@ static inline void native_swapgs(void) | |||
584 | #define __cpuid native_cpuid | 529 | #define __cpuid native_cpuid |
585 | #define paravirt_enabled() 0 | 530 | #define paravirt_enabled() 0 |
586 | 531 | ||
587 | /* | ||
588 | * These special macros can be used to get or set a debugging register | ||
589 | */ | ||
590 | #define get_debugreg(var, register) \ | ||
591 | (var) = native_get_debugreg(register) | ||
592 | #define set_debugreg(value, register) \ | ||
593 | native_set_debugreg(register, value) | ||
594 | |||
595 | static inline void load_sp0(struct tss_struct *tss, | 532 | static inline void load_sp0(struct tss_struct *tss, |
596 | struct thread_struct *thread) | 533 | struct thread_struct *thread) |
597 | { | 534 | { |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 15d99153a96..c91e8b9d588 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); | |||
61 | extern void check_tsc_sync_target(void); | 61 | extern void check_tsc_sync_target(void); |
62 | 62 | ||
63 | extern int notsc_setup(char *); | 63 | extern int notsc_setup(char *); |
64 | extern void save_sched_clock_state(void); | 64 | extern void tsc_save_sched_clock_state(void); |
65 | extern void restore_sched_clock_state(void); | 65 | extern void tsc_restore_sched_clock_state(void); |
66 | 66 | ||
67 | #endif /* _ASM_X86_TSC_H */ | 67 | #endif /* _ASM_X86_TSC_H */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 517d4767ffd..baaca8defec 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -145,9 +145,11 @@ struct x86_init_ops { | |||
145 | /** | 145 | /** |
146 | * struct x86_cpuinit_ops - platform specific cpu hotplug setups | 146 | * struct x86_cpuinit_ops - platform specific cpu hotplug setups |
147 | * @setup_percpu_clockev: set up the per cpu clock event device | 147 | * @setup_percpu_clockev: set up the per cpu clock event device |
148 | * @early_percpu_clock_init: early init of the per cpu clock event device | ||
148 | */ | 149 | */ |
149 | struct x86_cpuinit_ops { | 150 | struct x86_cpuinit_ops { |
150 | void (*setup_percpu_clockev)(void); | 151 | void (*setup_percpu_clockev)(void); |
152 | void (*early_percpu_clock_init)(void); | ||
151 | void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); | 153 | void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); |
152 | }; | 154 | }; |
153 | 155 | ||
@@ -160,6 +162,8 @@ struct x86_cpuinit_ops { | |||
160 | * @is_untracked_pat_range exclude from PAT logic | 162 | * @is_untracked_pat_range exclude from PAT logic |
161 | * @nmi_init enable NMI on cpus | 163 | * @nmi_init enable NMI on cpus |
162 | * @i8042_detect pre-detect if i8042 controller exists | 164 | * @i8042_detect pre-detect if i8042 controller exists |
165 | * @save_sched_clock_state: save state for sched_clock() on suspend | ||
166 | * @restore_sched_clock_state: restore state for sched_clock() on resume | ||
163 | */ | 167 | */ |
164 | struct x86_platform_ops { | 168 | struct x86_platform_ops { |
165 | unsigned long (*calibrate_tsc)(void); | 169 | unsigned long (*calibrate_tsc)(void); |
@@ -171,6 +175,8 @@ struct x86_platform_ops { | |||
171 | void (*nmi_init)(void); | 175 | void (*nmi_init)(void); |
172 | unsigned char (*get_nmi_reason)(void); | 176 | unsigned char (*get_nmi_reason)(void); |
173 | int (*i8042_detect)(void); | 177 | int (*i8042_detect)(void); |
178 | void (*save_sched_clock_state)(void); | ||
179 | void (*restore_sched_clock_state)(void); | ||
174 | }; | 180 | }; |
175 | 181 | ||
176 | struct pci_dev; | 182 | struct pci_dev; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ade9c794ed9..e49477444ff 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/archrandom.h> | 18 | #include <asm/archrandom.h> |
19 | #include <asm/hypervisor.h> | 19 | #include <asm/hypervisor.h> |
20 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
21 | #include <asm/debugreg.h> | ||
21 | #include <asm/sections.h> | 22 | #include <asm/sections.h> |
22 | #include <linux/topology.h> | 23 | #include <linux/topology.h> |
23 | #include <linux/cpumask.h> | 24 | #include <linux/cpumask.h> |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a18d16cb58..fa2900c0e39 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -643,14 +643,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
643 | /* Prefer fixed purpose counters */ | 643 | /* Prefer fixed purpose counters */ |
644 | if (x86_pmu.num_counters_fixed) { | 644 | if (x86_pmu.num_counters_fixed) { |
645 | idx = X86_PMC_IDX_FIXED; | 645 | idx = X86_PMC_IDX_FIXED; |
646 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | 646 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { |
647 | if (!__test_and_set_bit(idx, sched->state.used)) | 647 | if (!__test_and_set_bit(idx, sched->state.used)) |
648 | goto done; | 648 | goto done; |
649 | } | 649 | } |
650 | } | 650 | } |
651 | /* Grab the first unused counter starting with idx */ | 651 | /* Grab the first unused counter starting with idx */ |
652 | idx = sched->state.counter; | 652 | idx = sched->state.counter; |
653 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | 653 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { |
654 | if (!__test_and_set_bit(idx, sched->state.used)) | 654 | if (!__test_and_set_bit(idx, sched->state.used)) |
655 | goto done; | 655 | goto done; |
656 | } | 656 | } |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 79d97e68f04..7b784f4ef1e 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -98,12 +98,6 @@ | |||
98 | #endif | 98 | #endif |
99 | .endm | 99 | .endm |
100 | 100 | ||
101 | #ifdef CONFIG_VM86 | ||
102 | #define resume_userspace_sig check_userspace | ||
103 | #else | ||
104 | #define resume_userspace_sig resume_userspace | ||
105 | #endif | ||
106 | |||
107 | /* | 101 | /* |
108 | * User gs save/restore | 102 | * User gs save/restore |
109 | * | 103 | * |
@@ -327,10 +321,19 @@ ret_from_exception: | |||
327 | preempt_stop(CLBR_ANY) | 321 | preempt_stop(CLBR_ANY) |
328 | ret_from_intr: | 322 | ret_from_intr: |
329 | GET_THREAD_INFO(%ebp) | 323 | GET_THREAD_INFO(%ebp) |
330 | check_userspace: | 324 | resume_userspace_sig: |
325 | #ifdef CONFIG_VM86 | ||
331 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS | 326 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS |
332 | movb PT_CS(%esp), %al | 327 | movb PT_CS(%esp), %al |
333 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax | 328 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax |
329 | #else | ||
330 | /* | ||
331 | * We can be coming here from a syscall done in the kernel space, | ||
332 | * e.g. a failed kernel_execve(). | ||
333 | */ | ||
334 | movl PT_CS(%esp), %eax | ||
335 | andl $SEGMENT_RPL_MASK, %eax | ||
336 | #endif | ||
334 | cmpl $USER_RPL, %eax | 337 | cmpl $USER_RPL, %eax |
335 | jb resume_kernel # not returning to v8086 or userspace | 338 | jb resume_kernel # not returning to v8086 or userspace |
336 | 339 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 99b85b423bb..6d5fc8cfd5d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -305,10 +305,10 @@ void __init native_init_IRQ(void) | |||
305 | * us. (some of these will be overridden and become | 305 | * us. (some of these will be overridden and become |
306 | * 'special' SMP interrupts) | 306 | * 'special' SMP interrupts) |
307 | */ | 307 | */ |
308 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 308 | i = FIRST_EXTERNAL_VECTOR; |
309 | for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { | ||
309 | /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ | 310 | /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ |
310 | if (!test_bit(i, used_vectors)) | 311 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); |
311 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | ||
312 | } | 312 | } |
313 | 313 | ||
314 | if (!acpi_ioapic && !of_ioapic) | 314 | if (!acpi_ioapic && !of_ioapic) |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 4425a12ece4..db6720edfdd 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -66,8 +66,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | |||
66 | { "ss", 4, offsetof(struct pt_regs, ss) }, | 66 | { "ss", 4, offsetof(struct pt_regs, ss) }, |
67 | { "ds", 4, offsetof(struct pt_regs, ds) }, | 67 | { "ds", 4, offsetof(struct pt_regs, ds) }, |
68 | { "es", 4, offsetof(struct pt_regs, es) }, | 68 | { "es", 4, offsetof(struct pt_regs, es) }, |
69 | { "fs", 4, -1 }, | ||
70 | { "gs", 4, -1 }, | ||
71 | #else | 69 | #else |
72 | { "ax", 8, offsetof(struct pt_regs, ax) }, | 70 | { "ax", 8, offsetof(struct pt_regs, ax) }, |
73 | { "bx", 8, offsetof(struct pt_regs, bx) }, | 71 | { "bx", 8, offsetof(struct pt_regs, bx) }, |
@@ -89,7 +87,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | |||
89 | { "flags", 4, offsetof(struct pt_regs, flags) }, | 87 | { "flags", 4, offsetof(struct pt_regs, flags) }, |
90 | { "cs", 4, offsetof(struct pt_regs, cs) }, | 88 | { "cs", 4, offsetof(struct pt_regs, cs) }, |
91 | { "ss", 4, offsetof(struct pt_regs, ss) }, | 89 | { "ss", 4, offsetof(struct pt_regs, ss) }, |
90 | { "ds", 4, -1 }, | ||
91 | { "es", 4, -1 }, | ||
92 | #endif | 92 | #endif |
93 | { "fs", 4, -1 }, | ||
94 | { "gs", 4, -1 }, | ||
93 | }; | 95 | }; |
94 | 96 | ||
95 | int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) | 97 | int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 44842d756b2..f8492da65bf 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt) | |||
136 | return ret; | 136 | return ret; |
137 | } | 137 | } |
138 | 138 | ||
139 | static void kvm_save_sched_clock_state(void) | ||
140 | { | ||
141 | } | ||
142 | |||
143 | static void kvm_restore_sched_clock_state(void) | ||
144 | { | ||
145 | kvm_register_clock("primary cpu clock, resume"); | ||
146 | } | ||
147 | |||
139 | #ifdef CONFIG_X86_LOCAL_APIC | 148 | #ifdef CONFIG_X86_LOCAL_APIC |
140 | static void __cpuinit kvm_setup_secondary_clock(void) | 149 | static void __cpuinit kvm_setup_secondary_clock(void) |
141 | { | 150 | { |
@@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) | |||
144 | * we shouldn't fail. | 153 | * we shouldn't fail. |
145 | */ | 154 | */ |
146 | WARN_ON(kvm_register_clock("secondary cpu clock")); | 155 | WARN_ON(kvm_register_clock("secondary cpu clock")); |
147 | /* ok, done with our trickery, call native */ | ||
148 | setup_secondary_APIC_clock(); | ||
149 | } | 156 | } |
150 | #endif | 157 | #endif |
151 | 158 | ||
@@ -194,9 +201,11 @@ void __init kvmclock_init(void) | |||
194 | x86_platform.get_wallclock = kvm_get_wallclock; | 201 | x86_platform.get_wallclock = kvm_get_wallclock; |
195 | x86_platform.set_wallclock = kvm_set_wallclock; | 202 | x86_platform.set_wallclock = kvm_set_wallclock; |
196 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
197 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.early_percpu_clock_init = |
198 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
199 | #endif | 206 | #endif |
207 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; | ||
208 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; | ||
200 | machine_ops.shutdown = kvm_shutdown; | 209 | machine_ops.shutdown = kvm_shutdown; |
201 | #ifdef CONFIG_KEXEC | 210 | #ifdef CONFIG_KEXEC |
202 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 211 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 2b26485f0c1..ab137605e69 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <asm/bug.h> | 27 | #include <asm/bug.h> |
28 | #include <asm/paravirt.h> | 28 | #include <asm/paravirt.h> |
29 | #include <asm/debugreg.h> | ||
29 | #include <asm/desc.h> | 30 | #include <asm/desc.h> |
30 | #include <asm/setup.h> | 31 | #include <asm/setup.h> |
31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1c4d769e21e..28e5e06fcba 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -262,10 +262,11 @@ rootfs_initcall(pci_iommu_init); | |||
262 | 262 | ||
263 | static __devinit void via_no_dac(struct pci_dev *dev) | 263 | static __devinit void via_no_dac(struct pci_dev *dev) |
264 | { | 264 | { |
265 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | 265 | if (forbid_dac == 0) { |
266 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); | 266 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); |
267 | forbid_dac = 1; | 267 | forbid_dac = 1; |
268 | } | 268 | } |
269 | } | 269 | } |
270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); | 270 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, |
271 | PCI_CLASS_BRIDGE_PCI, 8, via_no_dac); | ||
271 | #endif | 272 | #endif |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e578a79a309..5104a2b685c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -255,6 +255,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
255 | * most necessary things. | 255 | * most necessary things. |
256 | */ | 256 | */ |
257 | cpu_init(); | 257 | cpu_init(); |
258 | x86_cpuinit.early_percpu_clock_init(); | ||
258 | preempt_disable(); | 259 | preempt_disable(); |
259 | smp_callin(); | 260 | smp_callin(); |
260 | 261 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 183c5925a9f..899a03f2d18 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -630,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
630 | 630 | ||
631 | static unsigned long long cyc2ns_suspend; | 631 | static unsigned long long cyc2ns_suspend; |
632 | 632 | ||
633 | void save_sched_clock_state(void) | 633 | void tsc_save_sched_clock_state(void) |
634 | { | 634 | { |
635 | if (!sched_clock_stable) | 635 | if (!sched_clock_stable) |
636 | return; | 636 | return; |
@@ -646,7 +646,7 @@ void save_sched_clock_state(void) | |||
646 | * that sched_clock() continues from the point where it was left off during | 646 | * that sched_clock() continues from the point where it was left off during |
647 | * suspend. | 647 | * suspend. |
648 | */ | 648 | */ |
649 | void restore_sched_clock_state(void) | 649 | void tsc_restore_sched_clock_state(void) |
650 | { | 650 | { |
651 | unsigned long long offset; | 651 | unsigned long long offset; |
652 | unsigned long flags; | 652 | unsigned long flags; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 947a06ccc67..e9f265fd79a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = { | |||
91 | }; | 91 | }; |
92 | 92 | ||
93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
94 | .early_percpu_clock_init = x86_init_noop, | ||
94 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 95 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
95 | .fixup_cpu_id = x86_default_fixup_cpu_id, | 96 | .fixup_cpu_id = x86_default_fixup_cpu_id, |
96 | }; | 97 | }; |
@@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = { | |||
107 | .is_untracked_pat_range = is_ISA_range, | 108 | .is_untracked_pat_range = is_ISA_range, |
108 | .nmi_init = default_nmi_init, | 109 | .nmi_init = default_nmi_init, |
109 | .get_nmi_reason = default_get_nmi_reason, | 110 | .get_nmi_reason = default_get_nmi_reason, |
110 | .i8042_detect = default_i8042_detect | 111 | .i8042_detect = default_i8042_detect, |
112 | .save_sched_clock_state = tsc_save_sched_clock_state, | ||
113 | .restore_sched_clock_state = tsc_restore_sched_clock_state, | ||
111 | }; | 114 | }; |
112 | 115 | ||
113 | EXPORT_SYMBOL_GPL(x86_platform); | 116 | EXPORT_SYMBOL_GPL(x86_platform); |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 89b02bfaaca..9fed5bedaad 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
236 | const u32 kvm_supported_word6_x86_features = | 236 | const u32 kvm_supported_word6_x86_features = |
237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | 237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | |
238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | 238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
239 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | 239 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | 240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
241 | 241 | ||
242 | /* cpuid 0xC0000001.edx */ | 242 | /* cpuid 0xC0000001.edx */ |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 5b97e1797a6..26d1fb437eb 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | |||
43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | 43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); |
44 | } | 44 | } |
45 | 45 | ||
46 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | ||
47 | { | ||
48 | struct kvm_cpuid_entry2 *best; | ||
49 | |||
50 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
51 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | ||
52 | } | ||
53 | |||
46 | #endif | 54 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0982507b962..83756223f8a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #define OpDS 23ull /* DS */ | 57 | #define OpDS 23ull /* DS */ |
58 | #define OpFS 24ull /* FS */ | 58 | #define OpFS 24ull /* FS */ |
59 | #define OpGS 25ull /* GS */ | 59 | #define OpGS 25ull /* GS */ |
60 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ | ||
60 | 61 | ||
61 | #define OpBits 5 /* Width of operand field */ | 62 | #define OpBits 5 /* Width of operand field */ |
62 | #define OpMask ((1ull << OpBits) - 1) | 63 | #define OpMask ((1ull << OpBits) - 1) |
@@ -101,6 +102,7 @@ | |||
101 | #define SrcAcc (OpAcc << SrcShift) | 102 | #define SrcAcc (OpAcc << SrcShift) |
102 | #define SrcImmU16 (OpImmU16 << SrcShift) | 103 | #define SrcImmU16 (OpImmU16 << SrcShift) |
103 | #define SrcDX (OpDX << SrcShift) | 104 | #define SrcDX (OpDX << SrcShift) |
105 | #define SrcMem8 (OpMem8 << SrcShift) | ||
104 | #define SrcMask (OpMask << SrcShift) | 106 | #define SrcMask (OpMask << SrcShift) |
105 | #define BitOp (1<<11) | 107 | #define BitOp (1<<11) |
106 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ | 108 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ |
@@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | |||
858 | } | 860 | } |
859 | 861 | ||
860 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 862 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
861 | struct operand *op, | 863 | struct operand *op) |
862 | int inhibit_bytereg) | ||
863 | { | 864 | { |
864 | unsigned reg = ctxt->modrm_reg; | 865 | unsigned reg = ctxt->modrm_reg; |
865 | int highbyte_regs = ctxt->rex_prefix == 0; | 866 | int highbyte_regs = ctxt->rex_prefix == 0; |
@@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
876 | } | 877 | } |
877 | 878 | ||
878 | op->type = OP_REG; | 879 | op->type = OP_REG; |
879 | if ((ctxt->d & ByteOp) && !inhibit_bytereg) { | 880 | if (ctxt->d & ByteOp) { |
880 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); | 881 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); |
881 | op->bytes = 1; | 882 | op->bytes = 1; |
882 | } else { | 883 | } else { |
@@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1151 | return 1; | 1152 | return 1; |
1152 | } | 1153 | } |
1153 | 1154 | ||
1155 | static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1156 | u16 index, struct desc_struct *desc) | ||
1157 | { | ||
1158 | struct desc_ptr dt; | ||
1159 | ulong addr; | ||
1160 | |||
1161 | ctxt->ops->get_idt(ctxt, &dt); | ||
1162 | |||
1163 | if (dt.size < index * 8 + 7) | ||
1164 | return emulate_gp(ctxt, index << 3 | 0x2); | ||
1165 | |||
1166 | addr = dt.address + index * 8; | ||
1167 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | ||
1168 | &ctxt->exception); | ||
1169 | } | ||
1170 | |||
1154 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1171 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
1155 | u16 selector, struct desc_ptr *dt) | 1172 | u16 selector, struct desc_ptr *dt) |
1156 | { | 1173 | { |
@@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1227 | seg_desc.type = 3; | 1244 | seg_desc.type = 3; |
1228 | seg_desc.p = 1; | 1245 | seg_desc.p = 1; |
1229 | seg_desc.s = 1; | 1246 | seg_desc.s = 1; |
1247 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1248 | seg_desc.dpl = 3; | ||
1230 | goto load; | 1249 | goto load; |
1231 | } | 1250 | } |
1232 | 1251 | ||
@@ -1891,6 +1910,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1891 | ss->p = 1; | 1910 | ss->p = 1; |
1892 | } | 1911 | } |
1893 | 1912 | ||
1913 | static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | ||
1914 | { | ||
1915 | u32 eax, ebx, ecx, edx; | ||
1916 | |||
1917 | eax = ecx = 0; | ||
1918 | return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx) | ||
1919 | && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | ||
1920 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | ||
1921 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | ||
1922 | } | ||
1923 | |||
1894 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | 1924 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) |
1895 | { | 1925 | { |
1896 | struct x86_emulate_ops *ops = ctxt->ops; | 1926 | struct x86_emulate_ops *ops = ctxt->ops; |
@@ -2007,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2007 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2037 | if (ctxt->mode == X86EMUL_MODE_REAL) |
2008 | return emulate_gp(ctxt, 0); | 2038 | return emulate_gp(ctxt, 0); |
2009 | 2039 | ||
2040 | /* | ||
2041 | * Not recognized on AMD in compat mode (but is recognized in legacy | ||
2042 | * mode). | ||
2043 | */ | ||
2044 | if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA) | ||
2045 | && !vendor_intel(ctxt)) | ||
2046 | return emulate_ud(ctxt); | ||
2047 | |||
2010 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 2048 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
2011 | * Therefore, we inject an #UD. | 2049 | * Therefore, we inject an #UD. |
2012 | */ | 2050 | */ |
@@ -2306,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2306 | return emulate_gp(ctxt, 0); | 2344 | return emulate_gp(ctxt, 0); |
2307 | ctxt->_eip = tss->eip; | 2345 | ctxt->_eip = tss->eip; |
2308 | ctxt->eflags = tss->eflags | 2; | 2346 | ctxt->eflags = tss->eflags | 2; |
2347 | |||
2348 | /* General purpose registers */ | ||
2309 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; | 2349 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; |
2310 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; | 2350 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; |
2311 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; | 2351 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; |
@@ -2328,6 +2368,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2328 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); | 2368 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); |
2329 | 2369 | ||
2330 | /* | 2370 | /* |
2371 | * If we're switching between Protected Mode and VM86, we need to make | ||
2372 | * sure to update the mode before loading the segment descriptors so | ||
2373 | * that the selectors are interpreted correctly. | ||
2374 | * | ||
2375 | * Need to get rflags to the vcpu struct immediately because it | ||
2376 | * influences the CPL which is checked at least when loading the segment | ||
2377 | * descriptors and when pushing an error code to the new kernel stack. | ||
2378 | * | ||
2379 | * TODO Introduce a separate ctxt->ops->set_cpl callback | ||
2380 | */ | ||
2381 | if (ctxt->eflags & X86_EFLAGS_VM) | ||
2382 | ctxt->mode = X86EMUL_MODE_VM86; | ||
2383 | else | ||
2384 | ctxt->mode = X86EMUL_MODE_PROT32; | ||
2385 | |||
2386 | ctxt->ops->set_rflags(ctxt, ctxt->eflags); | ||
2387 | |||
2388 | /* | ||
2331 | * Now load segment descriptors. If fault happenes at this stage | 2389 | * Now load segment descriptors. If fault happenes at this stage |
2332 | * it is handled in a context of new task | 2390 | * it is handled in a context of new task |
2333 | */ | 2391 | */ |
@@ -2401,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2401 | } | 2459 | } |
2402 | 2460 | ||
2403 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | 2461 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, |
2404 | u16 tss_selector, int reason, | 2462 | u16 tss_selector, int idt_index, int reason, |
2405 | bool has_error_code, u32 error_code) | 2463 | bool has_error_code, u32 error_code) |
2406 | { | 2464 | { |
2407 | struct x86_emulate_ops *ops = ctxt->ops; | 2465 | struct x86_emulate_ops *ops = ctxt->ops; |
@@ -2423,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2423 | 2481 | ||
2424 | /* FIXME: check that next_tss_desc is tss */ | 2482 | /* FIXME: check that next_tss_desc is tss */ |
2425 | 2483 | ||
2426 | if (reason != TASK_SWITCH_IRET) { | 2484 | /* |
2427 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2485 | * Check privileges. The three cases are task switch caused by... |
2428 | ops->cpl(ctxt) > next_tss_desc.dpl) | 2486 | * |
2429 | return emulate_gp(ctxt, 0); | 2487 | * 1. jmp/call/int to task gate: Check against DPL of the task gate |
2488 | * 2. Exception/IRQ/iret: No check is performed | ||
2489 | * 3. jmp/call to TSS: Check agains DPL of the TSS | ||
2490 | */ | ||
2491 | if (reason == TASK_SWITCH_GATE) { | ||
2492 | if (idt_index != -1) { | ||
2493 | /* Software interrupts */ | ||
2494 | struct desc_struct task_gate_desc; | ||
2495 | int dpl; | ||
2496 | |||
2497 | ret = read_interrupt_descriptor(ctxt, idt_index, | ||
2498 | &task_gate_desc); | ||
2499 | if (ret != X86EMUL_CONTINUE) | ||
2500 | return ret; | ||
2501 | |||
2502 | dpl = task_gate_desc.dpl; | ||
2503 | if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) | ||
2504 | return emulate_gp(ctxt, (idt_index << 3) | 0x2); | ||
2505 | } | ||
2506 | } else if (reason != TASK_SWITCH_IRET) { | ||
2507 | int dpl = next_tss_desc.dpl; | ||
2508 | if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) | ||
2509 | return emulate_gp(ctxt, tss_selector); | ||
2430 | } | 2510 | } |
2431 | 2511 | ||
2512 | |||
2432 | desc_limit = desc_limit_scaled(&next_tss_desc); | 2513 | desc_limit = desc_limit_scaled(&next_tss_desc); |
2433 | if (!next_tss_desc.p || | 2514 | if (!next_tss_desc.p || |
2434 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2515 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
@@ -2481,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2481 | } | 2562 | } |
2482 | 2563 | ||
2483 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 2564 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
2484 | u16 tss_selector, int reason, | 2565 | u16 tss_selector, int idt_index, int reason, |
2485 | bool has_error_code, u32 error_code) | 2566 | bool has_error_code, u32 error_code) |
2486 | { | 2567 | { |
2487 | int rc; | 2568 | int rc; |
@@ -2489,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2489 | ctxt->_eip = ctxt->eip; | 2570 | ctxt->_eip = ctxt->eip; |
2490 | ctxt->dst.type = OP_NONE; | 2571 | ctxt->dst.type = OP_NONE; |
2491 | 2572 | ||
2492 | rc = emulator_do_task_switch(ctxt, tss_selector, reason, | 2573 | rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, |
2493 | has_error_code, error_code); | 2574 | has_error_code, error_code); |
2494 | 2575 | ||
2495 | if (rc == X86EMUL_CONTINUE) | 2576 | if (rc == X86EMUL_CONTINUE) |
@@ -3514,13 +3595,13 @@ static struct opcode twobyte_table[256] = { | |||
3514 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), | 3595 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
3515 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 3596 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
3516 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 3597 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
3517 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3598 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3518 | /* 0xB8 - 0xBF */ | 3599 | /* 0xB8 - 0xBF */ |
3519 | N, N, | 3600 | N, N, |
3520 | G(BitOp, group8), | 3601 | G(BitOp, group8), |
3521 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 3602 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3522 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 3603 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), |
3523 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3604 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3524 | /* 0xC0 - 0xCF */ | 3605 | /* 0xC0 - 0xCF */ |
3525 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3606 | D2bv(DstMem | SrcReg | ModRM | Lock), |
3526 | N, D(DstMem | SrcReg | ModRM | Mov), | 3607 | N, D(DstMem | SrcReg | ModRM | Mov), |
@@ -3602,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3602 | 3683 | ||
3603 | switch (d) { | 3684 | switch (d) { |
3604 | case OpReg: | 3685 | case OpReg: |
3605 | decode_register_operand(ctxt, op, | 3686 | decode_register_operand(ctxt, op); |
3606 | op == &ctxt->dst && | ||
3607 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
3608 | break; | 3687 | break; |
3609 | case OpImmUByte: | 3688 | case OpImmUByte: |
3610 | rc = decode_imm(ctxt, op, 1, false); | 3689 | rc = decode_imm(ctxt, op, 1, false); |
@@ -3656,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3656 | case OpImm: | 3735 | case OpImm: |
3657 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | 3736 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); |
3658 | break; | 3737 | break; |
3738 | case OpMem8: | ||
3739 | ctxt->memop.bytes = 1; | ||
3740 | goto mem_common; | ||
3659 | case OpMem16: | 3741 | case OpMem16: |
3660 | ctxt->memop.bytes = 2; | 3742 | ctxt->memop.bytes = 2; |
3661 | goto mem_common; | 3743 | goto mem_common; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index b6a73537e1e..81cf4fa4a2b 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
307 | if (val & 0x10) { | 307 | if (val & 0x10) { |
308 | s->init4 = val & 1; | 308 | s->init4 = val & 1; |
309 | s->last_irr = 0; | 309 | s->last_irr = 0; |
310 | s->irr &= s->elcr; | ||
310 | s->imr = 0; | 311 | s->imr = 0; |
311 | s->priority_add = 0; | 312 | s->priority_add = 0; |
312 | s->special_mask = 0; | 313 | s->special_mask = 0; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 31bfc6927bc..858432287ab 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
433 | break; | 433 | break; |
434 | 434 | ||
435 | case APIC_DM_INIT: | 435 | case APIC_DM_INIT: |
436 | if (level) { | 436 | if (!trig_mode || level) { |
437 | result = 1; | 437 | result = 1; |
438 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 438 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
439 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 439 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
731 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | 731 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; |
732 | u64 ns = 0; | 732 | u64 ns = 0; |
733 | struct kvm_vcpu *vcpu = apic->vcpu; | 733 | struct kvm_vcpu *vcpu = apic->vcpu; |
734 | unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); | 734 | unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; |
735 | unsigned long flags; | 735 | unsigned long flags; |
736 | 736 | ||
737 | if (unlikely(!tscdeadline || !this_tsc_khz)) | 737 | if (unlikely(!tscdeadline || !this_tsc_khz)) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 224b02c3cda..4cb16426884 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, | |||
688 | { | 688 | { |
689 | unsigned long idx; | 689 | unsigned long idx; |
690 | 690 | ||
691 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 691 | idx = gfn_to_index(gfn, slot->base_gfn, level); |
692 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 692 | return &slot->arch.lpage_info[level - 2][idx]; |
693 | return &slot->lpage_info[level - 2][idx]; | ||
694 | } | 693 | } |
695 | 694 | ||
696 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 695 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
946 | } | 945 | } |
947 | } | 946 | } |
948 | 947 | ||
949 | static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, | 948 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, |
950 | struct kvm_memory_slot *slot) | 949 | struct kvm_memory_slot *slot) |
951 | { | 950 | { |
952 | struct kvm_lpage_info *linfo; | 951 | struct kvm_lpage_info *linfo; |
@@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | |||
966 | struct kvm_memory_slot *slot; | 965 | struct kvm_memory_slot *slot; |
967 | 966 | ||
968 | slot = gfn_to_memslot(kvm, gfn); | 967 | slot = gfn_to_memslot(kvm, gfn); |
969 | return __gfn_to_rmap(kvm, gfn, level, slot); | 968 | return __gfn_to_rmap(gfn, level, slot); |
970 | } | 969 | } |
971 | 970 | ||
972 | static bool rmap_can_add(struct kvm_vcpu *vcpu) | 971 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
@@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
988 | return pte_list_add(vcpu, spte, rmapp); | 987 | return pte_list_add(vcpu, spte, rmapp); |
989 | } | 988 | } |
990 | 989 | ||
991 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 990 | static u64 *rmap_next(unsigned long *rmapp, u64 *spte) |
992 | { | 991 | { |
993 | return pte_list_next(rmapp, spte); | 992 | return pte_list_next(rmapp, spte); |
994 | } | 993 | } |
@@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1018 | u64 *spte; | 1017 | u64 *spte; |
1019 | int i, write_protected = 0; | 1018 | int i, write_protected = 0; |
1020 | 1019 | ||
1021 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); | 1020 | rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); |
1022 | spte = rmap_next(kvm, rmapp, NULL); | 1021 | spte = rmap_next(rmapp, NULL); |
1023 | while (spte) { | 1022 | while (spte) { |
1024 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1023 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1025 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1024 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
@@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1027 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); | 1026 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); |
1028 | write_protected = 1; | 1027 | write_protected = 1; |
1029 | } | 1028 | } |
1030 | spte = rmap_next(kvm, rmapp, spte); | 1029 | spte = rmap_next(rmapp, spte); |
1031 | } | 1030 | } |
1032 | 1031 | ||
1033 | /* check for huge page mappings */ | 1032 | /* check for huge page mappings */ |
1034 | for (i = PT_DIRECTORY_LEVEL; | 1033 | for (i = PT_DIRECTORY_LEVEL; |
1035 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1034 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1036 | rmapp = __gfn_to_rmap(kvm, gfn, i, slot); | 1035 | rmapp = __gfn_to_rmap(gfn, i, slot); |
1037 | spte = rmap_next(kvm, rmapp, NULL); | 1036 | spte = rmap_next(rmapp, NULL); |
1038 | while (spte) { | 1037 | while (spte) { |
1039 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1038 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1040 | BUG_ON(!is_large_pte(*spte)); | 1039 | BUG_ON(!is_large_pte(*spte)); |
@@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1045 | spte = NULL; | 1044 | spte = NULL; |
1046 | write_protected = 1; | 1045 | write_protected = 1; |
1047 | } | 1046 | } |
1048 | spte = rmap_next(kvm, rmapp, spte); | 1047 | spte = rmap_next(rmapp, spte); |
1049 | } | 1048 | } |
1050 | } | 1049 | } |
1051 | 1050 | ||
@@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1066 | u64 *spte; | 1065 | u64 *spte; |
1067 | int need_tlb_flush = 0; | 1066 | int need_tlb_flush = 0; |
1068 | 1067 | ||
1069 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 1068 | while ((spte = rmap_next(rmapp, NULL))) { |
1070 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1069 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1071 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 1070 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
1072 | drop_spte(kvm, spte); | 1071 | drop_spte(kvm, spte); |
@@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1085 | 1084 | ||
1086 | WARN_ON(pte_huge(*ptep)); | 1085 | WARN_ON(pte_huge(*ptep)); |
1087 | new_pfn = pte_pfn(*ptep); | 1086 | new_pfn = pte_pfn(*ptep); |
1088 | spte = rmap_next(kvm, rmapp, NULL); | 1087 | spte = rmap_next(rmapp, NULL); |
1089 | while (spte) { | 1088 | while (spte) { |
1090 | BUG_ON(!is_shadow_present_pte(*spte)); | 1089 | BUG_ON(!is_shadow_present_pte(*spte)); |
1091 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 1090 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
1092 | need_flush = 1; | 1091 | need_flush = 1; |
1093 | if (pte_write(*ptep)) { | 1092 | if (pte_write(*ptep)) { |
1094 | drop_spte(kvm, spte); | 1093 | drop_spte(kvm, spte); |
1095 | spte = rmap_next(kvm, rmapp, NULL); | 1094 | spte = rmap_next(rmapp, NULL); |
1096 | } else { | 1095 | } else { |
1097 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 1096 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
1098 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 1097 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
@@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1102 | new_spte &= ~shadow_accessed_mask; | 1101 | new_spte &= ~shadow_accessed_mask; |
1103 | mmu_spte_clear_track_bits(spte); | 1102 | mmu_spte_clear_track_bits(spte); |
1104 | mmu_spte_set(spte, new_spte); | 1103 | mmu_spte_set(spte, new_spte); |
1105 | spte = rmap_next(kvm, rmapp, spte); | 1104 | spte = rmap_next(rmapp, spte); |
1106 | } | 1105 | } |
1107 | } | 1106 | } |
1108 | if (need_flush) | 1107 | if (need_flush) |
@@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1176 | if (!shadow_accessed_mask) | 1175 | if (!shadow_accessed_mask) |
1177 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1176 | return kvm_unmap_rmapp(kvm, rmapp, data); |
1178 | 1177 | ||
1179 | spte = rmap_next(kvm, rmapp, NULL); | 1178 | spte = rmap_next(rmapp, NULL); |
1180 | while (spte) { | 1179 | while (spte) { |
1181 | int _young; | 1180 | int _young; |
1182 | u64 _spte = *spte; | 1181 | u64 _spte = *spte; |
@@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1186 | young = 1; | 1185 | young = 1; |
1187 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 1186 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); |
1188 | } | 1187 | } |
1189 | spte = rmap_next(kvm, rmapp, spte); | 1188 | spte = rmap_next(rmapp, spte); |
1190 | } | 1189 | } |
1191 | return young; | 1190 | return young; |
1192 | } | 1191 | } |
@@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1205 | if (!shadow_accessed_mask) | 1204 | if (!shadow_accessed_mask) |
1206 | goto out; | 1205 | goto out; |
1207 | 1206 | ||
1208 | spte = rmap_next(kvm, rmapp, NULL); | 1207 | spte = rmap_next(rmapp, NULL); |
1209 | while (spte) { | 1208 | while (spte) { |
1210 | u64 _spte = *spte; | 1209 | u64 _spte = *spte; |
1211 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1210 | BUG_ON(!(_spte & PT_PRESENT_MASK)); |
@@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1214 | young = 1; | 1213 | young = 1; |
1215 | break; | 1214 | break; |
1216 | } | 1215 | } |
1217 | spte = rmap_next(kvm, rmapp, spte); | 1216 | spte = rmap_next(rmapp, spte); |
1218 | } | 1217 | } |
1219 | out: | 1218 | out: |
1220 | return young; | 1219 | return young; |
@@ -1391,11 +1390,6 @@ struct kvm_mmu_pages { | |||
1391 | unsigned int nr; | 1390 | unsigned int nr; |
1392 | }; | 1391 | }; |
1393 | 1392 | ||
1394 | #define for_each_unsync_children(bitmap, idx) \ | ||
1395 | for (idx = find_first_bit(bitmap, 512); \ | ||
1396 | idx < 512; \ | ||
1397 | idx = find_next_bit(bitmap, 512, idx+1)) | ||
1398 | |||
1399 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, | 1393 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, |
1400 | int idx) | 1394 | int idx) |
1401 | { | 1395 | { |
@@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1417 | { | 1411 | { |
1418 | int i, ret, nr_unsync_leaf = 0; | 1412 | int i, ret, nr_unsync_leaf = 0; |
1419 | 1413 | ||
1420 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1414 | for_each_set_bit(i, sp->unsync_child_bitmap, 512) { |
1421 | struct kvm_mmu_page *child; | 1415 | struct kvm_mmu_page *child; |
1422 | u64 ent = sp->spt[i]; | 1416 | u64 ent = sp->spt[i]; |
1423 | 1417 | ||
@@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1803 | { | 1797 | { |
1804 | if (is_large_pte(*sptep)) { | 1798 | if (is_large_pte(*sptep)) { |
1805 | drop_spte(vcpu->kvm, sptep); | 1799 | drop_spte(vcpu->kvm, sptep); |
1800 | --vcpu->kvm->stat.lpages; | ||
1806 | kvm_flush_remote_tlbs(vcpu->kvm); | 1801 | kvm_flush_remote_tlbs(vcpu->kvm); |
1807 | } | 1802 | } |
1808 | } | 1803 | } |
@@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | |||
3190 | #undef PTTYPE | 3185 | #undef PTTYPE |
3191 | 3186 | ||
3192 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | 3187 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, |
3193 | struct kvm_mmu *context, | 3188 | struct kvm_mmu *context) |
3194 | int level) | ||
3195 | { | 3189 | { |
3196 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3190 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3197 | u64 exb_bit_rsvd = 0; | 3191 | u64 exb_bit_rsvd = 0; |
3198 | 3192 | ||
3199 | if (!context->nx) | 3193 | if (!context->nx) |
3200 | exb_bit_rsvd = rsvd_bits(63, 63); | 3194 | exb_bit_rsvd = rsvd_bits(63, 63); |
3201 | switch (level) { | 3195 | switch (context->root_level) { |
3202 | case PT32_ROOT_LEVEL: | 3196 | case PT32_ROOT_LEVEL: |
3203 | /* no rsvd bits for 2 level 4K page table entries */ | 3197 | /* no rsvd bits for 2 level 4K page table entries */ |
3204 | context->rsvd_bits_mask[0][1] = 0; | 3198 | context->rsvd_bits_mask[0][1] = 0; |
@@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3256 | int level) | 3250 | int level) |
3257 | { | 3251 | { |
3258 | context->nx = is_nx(vcpu); | 3252 | context->nx = is_nx(vcpu); |
3253 | context->root_level = level; | ||
3259 | 3254 | ||
3260 | reset_rsvds_bits_mask(vcpu, context, level); | 3255 | reset_rsvds_bits_mask(vcpu, context); |
3261 | 3256 | ||
3262 | ASSERT(is_pae(vcpu)); | 3257 | ASSERT(is_pae(vcpu)); |
3263 | context->new_cr3 = paging_new_cr3; | 3258 | context->new_cr3 = paging_new_cr3; |
@@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3267 | context->invlpg = paging64_invlpg; | 3262 | context->invlpg = paging64_invlpg; |
3268 | context->update_pte = paging64_update_pte; | 3263 | context->update_pte = paging64_update_pte; |
3269 | context->free = paging_free; | 3264 | context->free = paging_free; |
3270 | context->root_level = level; | ||
3271 | context->shadow_root_level = level; | 3265 | context->shadow_root_level = level; |
3272 | context->root_hpa = INVALID_PAGE; | 3266 | context->root_hpa = INVALID_PAGE; |
3273 | context->direct_map = false; | 3267 | context->direct_map = false; |
@@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3284 | struct kvm_mmu *context) | 3278 | struct kvm_mmu *context) |
3285 | { | 3279 | { |
3286 | context->nx = false; | 3280 | context->nx = false; |
3281 | context->root_level = PT32_ROOT_LEVEL; | ||
3287 | 3282 | ||
3288 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | 3283 | reset_rsvds_bits_mask(vcpu, context); |
3289 | 3284 | ||
3290 | context->new_cr3 = paging_new_cr3; | 3285 | context->new_cr3 = paging_new_cr3; |
3291 | context->page_fault = paging32_page_fault; | 3286 | context->page_fault = paging32_page_fault; |
@@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3294 | context->sync_page = paging32_sync_page; | 3289 | context->sync_page = paging32_sync_page; |
3295 | context->invlpg = paging32_invlpg; | 3290 | context->invlpg = paging32_invlpg; |
3296 | context->update_pte = paging32_update_pte; | 3291 | context->update_pte = paging32_update_pte; |
3297 | context->root_level = PT32_ROOT_LEVEL; | ||
3298 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 3292 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
3299 | context->root_hpa = INVALID_PAGE; | 3293 | context->root_hpa = INVALID_PAGE; |
3300 | context->direct_map = false; | 3294 | context->direct_map = false; |
@@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3325 | context->get_cr3 = get_cr3; | 3319 | context->get_cr3 = get_cr3; |
3326 | context->get_pdptr = kvm_pdptr_read; | 3320 | context->get_pdptr = kvm_pdptr_read; |
3327 | context->inject_page_fault = kvm_inject_page_fault; | 3321 | context->inject_page_fault = kvm_inject_page_fault; |
3328 | context->nx = is_nx(vcpu); | ||
3329 | 3322 | ||
3330 | if (!is_paging(vcpu)) { | 3323 | if (!is_paging(vcpu)) { |
3331 | context->nx = false; | 3324 | context->nx = false; |
@@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3333 | context->root_level = 0; | 3326 | context->root_level = 0; |
3334 | } else if (is_long_mode(vcpu)) { | 3327 | } else if (is_long_mode(vcpu)) { |
3335 | context->nx = is_nx(vcpu); | 3328 | context->nx = is_nx(vcpu); |
3336 | reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL); | ||
3337 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3338 | context->root_level = PT64_ROOT_LEVEL; | 3329 | context->root_level = PT64_ROOT_LEVEL; |
3330 | reset_rsvds_bits_mask(vcpu, context); | ||
3331 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3339 | } else if (is_pae(vcpu)) { | 3332 | } else if (is_pae(vcpu)) { |
3340 | context->nx = is_nx(vcpu); | 3333 | context->nx = is_nx(vcpu); |
3341 | reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL); | ||
3342 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3343 | context->root_level = PT32E_ROOT_LEVEL; | 3334 | context->root_level = PT32E_ROOT_LEVEL; |
3335 | reset_rsvds_bits_mask(vcpu, context); | ||
3336 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3344 | } else { | 3337 | } else { |
3345 | context->nx = false; | 3338 | context->nx = false; |
3346 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | ||
3347 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
3348 | context->root_level = PT32_ROOT_LEVEL; | 3339 | context->root_level = PT32_ROOT_LEVEL; |
3340 | reset_rsvds_bits_mask(vcpu, context); | ||
3341 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
3349 | } | 3342 | } |
3350 | 3343 | ||
3351 | return 0; | 3344 | return 0; |
@@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3408 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; | 3401 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; |
3409 | } else if (is_long_mode(vcpu)) { | 3402 | } else if (is_long_mode(vcpu)) { |
3410 | g_context->nx = is_nx(vcpu); | 3403 | g_context->nx = is_nx(vcpu); |
3411 | reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL); | ||
3412 | g_context->root_level = PT64_ROOT_LEVEL; | 3404 | g_context->root_level = PT64_ROOT_LEVEL; |
3405 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3413 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 3406 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
3414 | } else if (is_pae(vcpu)) { | 3407 | } else if (is_pae(vcpu)) { |
3415 | g_context->nx = is_nx(vcpu); | 3408 | g_context->nx = is_nx(vcpu); |
3416 | reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL); | ||
3417 | g_context->root_level = PT32E_ROOT_LEVEL; | 3409 | g_context->root_level = PT32E_ROOT_LEVEL; |
3410 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3418 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 3411 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
3419 | } else { | 3412 | } else { |
3420 | g_context->nx = false; | 3413 | g_context->nx = false; |
3421 | reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL); | ||
3422 | g_context->root_level = PT32_ROOT_LEVEL; | 3414 | g_context->root_level = PT32_ROOT_LEVEL; |
3415 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3423 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3416 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3424 | } | 3417 | } |
3425 | 3418 | ||
@@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
3555 | * If we're seeing too many writes to a page, it may no longer be a page table, | 3548 | * If we're seeing too many writes to a page, it may no longer be a page table, |
3556 | * or we may be forking, in which case it is better to unmap the page. | 3549 | * or we may be forking, in which case it is better to unmap the page. |
3557 | */ | 3550 | */ |
3558 | static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) | 3551 | static bool detect_write_flooding(struct kvm_mmu_page *sp) |
3559 | { | 3552 | { |
3560 | /* | 3553 | /* |
3561 | * Skip write-flooding detected for the sp whose level is 1, because | 3554 | * Skip write-flooding detected for the sp whose level is 1, because |
@@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3664 | 3657 | ||
3665 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3658 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
3666 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3659 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
3667 | spte = get_written_sptes(sp, gpa, &npte); | ||
3668 | |||
3669 | if (detect_write_misaligned(sp, gpa, bytes) || | 3660 | if (detect_write_misaligned(sp, gpa, bytes) || |
3670 | detect_write_flooding(sp, spte)) { | 3661 | detect_write_flooding(sp)) { |
3671 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3662 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
3672 | &invalid_list); | 3663 | &invalid_list); |
3673 | ++vcpu->kvm->stat.mmu_flooded; | 3664 | ++vcpu->kvm->stat.mmu_flooded; |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ea7b4fd3467..715da5a19a5 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
200 | slot = gfn_to_memslot(kvm, sp->gfn); | 200 | slot = gfn_to_memslot(kvm, sp->gfn); |
201 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; | 201 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; |
202 | 202 | ||
203 | spte = rmap_next(kvm, rmapp, NULL); | 203 | spte = rmap_next(rmapp, NULL); |
204 | while (spte) { | 204 | while (spte) { |
205 | if (is_writable_pte(*spte)) | 205 | if (is_writable_pte(*spte)) |
206 | audit_printk(kvm, "shadow page has writable " | 206 | audit_printk(kvm, "shadow page has writable " |
207 | "mappings: gfn %llx role %x\n", | 207 | "mappings: gfn %llx role %x\n", |
208 | sp->gfn, sp->role.word); | 208 | sp->gfn, sp->role.word); |
209 | spte = rmap_next(kvm, rmapp, spte); | 209 | spte = rmap_next(rmapp, spte); |
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 7aad5446f39..a73f0c10481 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping { | |||
33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | 33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, |
34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | 35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, |
36 | [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, | ||
36 | }; | 37 | }; |
37 | 38 | ||
38 | /* mapping between fixed pmc index and arch_events array */ | 39 | /* mapping between fixed pmc index and arch_events array */ |
39 | int fixed_pmc_events[] = {1, 0, 2}; | 40 | int fixed_pmc_events[] = {1, 0, 7}; |
40 | 41 | ||
41 | static bool pmc_is_gp(struct kvm_pmc *pmc) | 42 | static bool pmc_is_gp(struct kvm_pmc *pmc) |
42 | { | 43 | { |
@@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
210 | unsigned config, type = PERF_TYPE_RAW; | 211 | unsigned config, type = PERF_TYPE_RAW; |
211 | u8 event_select, unit_mask; | 212 | u8 event_select, unit_mask; |
212 | 213 | ||
214 | if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) | ||
215 | printk_once("kvm pmu: pin control bit is ignored\n"); | ||
216 | |||
213 | pmc->eventsel = eventsel; | 217 | pmc->eventsel = eventsel; |
214 | 218 | ||
215 | stop_counter(pmc); | 219 | stop_counter(pmc); |
@@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
220 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | 224 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; |
221 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | 225 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; |
222 | 226 | ||
223 | if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | | 227 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
224 | ARCH_PERFMON_EVENTSEL_INV | | 228 | ARCH_PERFMON_EVENTSEL_INV | |
225 | ARCH_PERFMON_EVENTSEL_CMASK))) { | 229 | ARCH_PERFMON_EVENTSEL_CMASK))) { |
226 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 230 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, |
@@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | |||
413 | struct kvm_pmc *counters; | 417 | struct kvm_pmc *counters; |
414 | u64 ctr; | 418 | u64 ctr; |
415 | 419 | ||
416 | pmc &= (3u << 30) - 1; | 420 | pmc &= ~(3u << 30); |
417 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | 421 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) |
418 | return 1; | 422 | return 1; |
419 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | 423 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e385214711c..e334389e1c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -111,6 +111,12 @@ struct nested_state { | |||
111 | #define MSRPM_OFFSETS 16 | 111 | #define MSRPM_OFFSETS 16 |
112 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | 112 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; |
113 | 113 | ||
114 | /* | ||
115 | * Set osvw_len to higher value when updated Revision Guides | ||
116 | * are published and we know what the new status bits are | ||
117 | */ | ||
118 | static uint64_t osvw_len = 4, osvw_status; | ||
119 | |||
114 | struct vcpu_svm { | 120 | struct vcpu_svm { |
115 | struct kvm_vcpu vcpu; | 121 | struct kvm_vcpu vcpu; |
116 | struct vmcb *vmcb; | 122 | struct vmcb *vmcb; |
@@ -177,11 +183,13 @@ static bool npt_enabled = true; | |||
177 | #else | 183 | #else |
178 | static bool npt_enabled; | 184 | static bool npt_enabled; |
179 | #endif | 185 | #endif |
180 | static int npt = 1; | ||
181 | 186 | ||
187 | /* allow nested paging (virtualized MMU) for all guests */ | ||
188 | static int npt = true; | ||
182 | module_param(npt, int, S_IRUGO); | 189 | module_param(npt, int, S_IRUGO); |
183 | 190 | ||
184 | static int nested = 1; | 191 | /* allow nested virtualization in KVM/SVM */ |
192 | static int nested = true; | ||
185 | module_param(nested, int, S_IRUGO); | 193 | module_param(nested, int, S_IRUGO); |
186 | 194 | ||
187 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 195 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void) | |||
557 | erratum_383_found = true; | 565 | erratum_383_found = true; |
558 | } | 566 | } |
559 | 567 | ||
568 | static void svm_init_osvw(struct kvm_vcpu *vcpu) | ||
569 | { | ||
570 | /* | ||
571 | * Guests should see errata 400 and 415 as fixed (assuming that | ||
572 | * HLT and IO instructions are intercepted). | ||
573 | */ | ||
574 | vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; | ||
575 | vcpu->arch.osvw.status = osvw_status & ~(6ULL); | ||
576 | |||
577 | /* | ||
578 | * By increasing VCPU's osvw.length to 3 we are telling the guest that | ||
579 | * all osvw.status bits inside that length, including bit 0 (which is | ||
580 | * reserved for erratum 298), are valid. However, if host processor's | ||
581 | * osvw_len is 0 then osvw_status[0] carries no information. We need to | ||
582 | * be conservative here and therefore we tell the guest that erratum 298 | ||
583 | * is present (because we really don't know). | ||
584 | */ | ||
585 | if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) | ||
586 | vcpu->arch.osvw.status |= 1; | ||
587 | } | ||
588 | |||
560 | static int has_svm(void) | 589 | static int has_svm(void) |
561 | { | 590 | { |
562 | const char *msg; | 591 | const char *msg; |
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage) | |||
623 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; | 652 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; |
624 | } | 653 | } |
625 | 654 | ||
655 | |||
656 | /* | ||
657 | * Get OSVW bits. | ||
658 | * | ||
659 | * Note that it is possible to have a system with mixed processor | ||
660 | * revisions and therefore different OSVW bits. If bits are not the same | ||
661 | * on different processors then choose the worst case (i.e. if erratum | ||
662 | * is present on one processor and not on another then assume that the | ||
663 | * erratum is present everywhere). | ||
664 | */ | ||
665 | if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { | ||
666 | uint64_t len, status = 0; | ||
667 | int err; | ||
668 | |||
669 | len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); | ||
670 | if (!err) | ||
671 | status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, | ||
672 | &err); | ||
673 | |||
674 | if (err) | ||
675 | osvw_status = osvw_len = 0; | ||
676 | else { | ||
677 | if (len < osvw_len) | ||
678 | osvw_len = len; | ||
679 | osvw_status |= status; | ||
680 | osvw_status &= (1ULL << osvw_len) - 1; | ||
681 | } | ||
682 | } else | ||
683 | osvw_status = osvw_len = 0; | ||
684 | |||
626 | svm_init_erratum_383(); | 685 | svm_init_erratum_383(); |
627 | 686 | ||
628 | amd_pmu_enable_virt(); | 687 | amd_pmu_enable_virt(); |
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | |||
910 | return _tsc; | 969 | return _tsc; |
911 | } | 970 | } |
912 | 971 | ||
913 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | 972 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) |
914 | { | 973 | { |
915 | struct vcpu_svm *svm = to_svm(vcpu); | 974 | struct vcpu_svm *svm = to_svm(vcpu); |
916 | u64 ratio; | 975 | u64 ratio; |
917 | u64 khz; | 976 | u64 khz; |
918 | 977 | ||
919 | /* TSC scaling supported? */ | 978 | /* Guest TSC same frequency as host TSC? */ |
920 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) | 979 | if (!scale) { |
980 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
921 | return; | 981 | return; |
982 | } | ||
922 | 983 | ||
923 | /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ | 984 | /* TSC scaling supported? */ |
924 | if (user_tsc_khz == 0) { | 985 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { |
925 | vcpu->arch.virtual_tsc_khz = 0; | 986 | if (user_tsc_khz > tsc_khz) { |
926 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | 987 | vcpu->arch.tsc_catchup = 1; |
988 | vcpu->arch.tsc_always_catchup = 1; | ||
989 | } else | ||
990 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
927 | return; | 991 | return; |
928 | } | 992 | } |
929 | 993 | ||
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | |||
938 | user_tsc_khz); | 1002 | user_tsc_khz); |
939 | return; | 1003 | return; |
940 | } | 1004 | } |
941 | vcpu->arch.virtual_tsc_khz = user_tsc_khz; | ||
942 | svm->tsc_ratio = ratio; | 1005 | svm->tsc_ratio = ratio; |
943 | } | 1006 | } |
944 | 1007 | ||
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
958 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 1021 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
959 | } | 1022 | } |
960 | 1023 | ||
961 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1024 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) |
962 | { | 1025 | { |
963 | struct vcpu_svm *svm = to_svm(vcpu); | 1026 | struct vcpu_svm *svm = to_svm(vcpu); |
964 | 1027 | ||
1028 | WARN_ON(adjustment < 0); | ||
1029 | if (host) | ||
1030 | adjustment = svm_scale_tsc(vcpu, adjustment); | ||
1031 | |||
965 | svm->vmcb->control.tsc_offset += adjustment; | 1032 | svm->vmcb->control.tsc_offset += adjustment; |
966 | if (is_guest_mode(vcpu)) | 1033 | if (is_guest_mode(vcpu)) |
967 | svm->nested.hsave->control.tsc_offset += adjustment; | 1034 | svm->nested.hsave->control.tsc_offset += adjustment; |
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1191 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 1258 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
1192 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1259 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1193 | 1260 | ||
1261 | svm_init_osvw(&svm->vcpu); | ||
1262 | |||
1194 | return &svm->vcpu; | 1263 | return &svm->vcpu; |
1195 | 1264 | ||
1196 | free_page4: | 1265 | free_page4: |
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1268 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1337 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1269 | } | 1338 | } |
1270 | 1339 | ||
1340 | static void svm_update_cpl(struct kvm_vcpu *vcpu) | ||
1341 | { | ||
1342 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1343 | int cpl; | ||
1344 | |||
1345 | if (!is_protmode(vcpu)) | ||
1346 | cpl = 0; | ||
1347 | else if (svm->vmcb->save.rflags & X86_EFLAGS_VM) | ||
1348 | cpl = 3; | ||
1349 | else | ||
1350 | cpl = svm->vmcb->save.cs.selector & 0x3; | ||
1351 | |||
1352 | svm->vmcb->save.cpl = cpl; | ||
1353 | } | ||
1354 | |||
1271 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 1355 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
1272 | { | 1356 | { |
1273 | return to_svm(vcpu)->vmcb->save.rflags; | 1357 | return to_svm(vcpu)->vmcb->save.rflags; |
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | |||
1275 | 1359 | ||
1276 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1360 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
1277 | { | 1361 | { |
1362 | unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags; | ||
1363 | |||
1278 | to_svm(vcpu)->vmcb->save.rflags = rflags; | 1364 | to_svm(vcpu)->vmcb->save.rflags = rflags; |
1365 | if ((old_rflags ^ rflags) & X86_EFLAGS_VM) | ||
1366 | svm_update_cpl(vcpu); | ||
1279 | } | 1367 | } |
1280 | 1368 | ||
1281 | static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | 1369 | static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) |
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1543 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; | 1631 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; |
1544 | } | 1632 | } |
1545 | if (seg == VCPU_SREG_CS) | 1633 | if (seg == VCPU_SREG_CS) |
1546 | svm->vmcb->save.cpl | 1634 | svm_update_cpl(vcpu); |
1547 | = (svm->vmcb->save.cs.attrib | ||
1548 | >> SVM_SELECTOR_DPL_SHIFT) & 3; | ||
1549 | 1635 | ||
1550 | mark_dirty(svm->vmcb, VMCB_SEG); | 1636 | mark_dirty(svm->vmcb, VMCB_SEG); |
1551 | } | 1637 | } |
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2735 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2821 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2736 | skip_emulated_instruction(&svm->vcpu); | 2822 | skip_emulated_instruction(&svm->vcpu); |
2737 | 2823 | ||
2738 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, | 2824 | if (int_type != SVM_EXITINTINFO_TYPE_SOFT) |
2825 | int_vec = -1; | ||
2826 | |||
2827 | if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, | ||
2739 | has_error_code, error_code) == EMULATE_FAIL) { | 2828 | has_error_code, error_code) == EMULATE_FAIL) { |
2740 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2829 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2741 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2830 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 246490f643b..280751c8472 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -70,9 +70,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
70 | static bool __read_mostly vmm_exclusive = 1; | 70 | static bool __read_mostly vmm_exclusive = 1; |
71 | module_param(vmm_exclusive, bool, S_IRUGO); | 71 | module_param(vmm_exclusive, bool, S_IRUGO); |
72 | 72 | ||
73 | static bool __read_mostly yield_on_hlt = 1; | ||
74 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
75 | |||
76 | static bool __read_mostly fasteoi = 1; | 73 | static bool __read_mostly fasteoi = 1; |
77 | module_param(fasteoi, bool, S_IRUGO); | 74 | module_param(fasteoi, bool, S_IRUGO); |
78 | 75 | ||
@@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1655 | vmx_set_interrupt_shadow(vcpu, 0); | 1652 | vmx_set_interrupt_shadow(vcpu, 0); |
1656 | } | 1653 | } |
1657 | 1654 | ||
1658 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1659 | { | ||
1660 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1661 | * explicitly skip the instruction because if the HLT state is set, then | ||
1662 | * the instruction is already executing and RIP has already been | ||
1663 | * advanced. */ | ||
1664 | if (!yield_on_hlt && | ||
1665 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1666 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1667 | } | ||
1668 | |||
1669 | /* | 1655 | /* |
1670 | * KVM wants to inject page-faults which it got to the guest. This function | 1656 | * KVM wants to inject page-faults which it got to the guest. This function |
1671 | * checks whether in a nested guest, we need to inject them to L1 or L2. | 1657 | * checks whether in a nested guest, we need to inject them to L1 or L2. |
@@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu) | |||
1678 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 1664 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
1679 | 1665 | ||
1680 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 1666 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ |
1681 | if (!(vmcs12->exception_bitmap & PF_VECTOR)) | 1667 | if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) |
1682 | return 0; | 1668 | return 0; |
1683 | 1669 | ||
1684 | nested_vmx_vmexit(vcpu); | 1670 | nested_vmx_vmexit(vcpu); |
@@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1718 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1704 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1719 | 1705 | ||
1720 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1706 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1721 | vmx_clear_hlt(vcpu); | ||
1722 | } | 1707 | } |
1723 | 1708 | ||
1724 | static bool vmx_rdtscp_supported(void) | 1709 | static bool vmx_rdtscp_supported(void) |
@@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) | |||
1817 | } | 1802 | } |
1818 | 1803 | ||
1819 | /* | 1804 | /* |
1820 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | 1805 | * Engage any workarounds for mis-matched TSC rates. Currently limited to |
1821 | * ioctl. In this case the call-back should update internal vmx state to make | 1806 | * software catchup for faster rates on slower CPUs. |
1822 | * the changes effective. | ||
1823 | */ | 1807 | */ |
1824 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | 1808 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) |
1825 | { | 1809 | { |
1826 | /* Nothing to do here */ | 1810 | if (!scale) |
1811 | return; | ||
1812 | |||
1813 | if (user_tsc_khz > tsc_khz) { | ||
1814 | vcpu->arch.tsc_catchup = 1; | ||
1815 | vcpu->arch.tsc_always_catchup = 1; | ||
1816 | } else | ||
1817 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
1827 | } | 1818 | } |
1828 | 1819 | ||
1829 | /* | 1820 | /* |
@@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
1850 | } | 1841 | } |
1851 | } | 1842 | } |
1852 | 1843 | ||
1853 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1844 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) |
1854 | { | 1845 | { |
1855 | u64 offset = vmcs_read64(TSC_OFFSET); | 1846 | u64 offset = vmcs_read64(TSC_OFFSET); |
1856 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 1847 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
@@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2219 | msr = find_msr_entry(vmx, msr_index); | 2210 | msr = find_msr_entry(vmx, msr_index); |
2220 | if (msr) { | 2211 | if (msr) { |
2221 | msr->data = data; | 2212 | msr->data = data; |
2213 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) | ||
2214 | kvm_set_shared_msr(msr->index, msr->data, | ||
2215 | msr->mask); | ||
2222 | break; | 2216 | break; |
2223 | } | 2217 | } |
2224 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2218 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
@@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2399 | &_pin_based_exec_control) < 0) | 2393 | &_pin_based_exec_control) < 0) |
2400 | return -EIO; | 2394 | return -EIO; |
2401 | 2395 | ||
2402 | min = | 2396 | min = CPU_BASED_HLT_EXITING | |
2403 | #ifdef CONFIG_X86_64 | 2397 | #ifdef CONFIG_X86_64 |
2404 | CPU_BASED_CR8_LOAD_EXITING | | 2398 | CPU_BASED_CR8_LOAD_EXITING | |
2405 | CPU_BASED_CR8_STORE_EXITING | | 2399 | CPU_BASED_CR8_STORE_EXITING | |
@@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2414 | CPU_BASED_INVLPG_EXITING | | 2408 | CPU_BASED_INVLPG_EXITING | |
2415 | CPU_BASED_RDPMC_EXITING; | 2409 | CPU_BASED_RDPMC_EXITING; |
2416 | 2410 | ||
2417 | if (yield_on_hlt) | ||
2418 | min |= CPU_BASED_HLT_EXITING; | ||
2419 | |||
2420 | opt = CPU_BASED_TPR_SHADOW | | 2411 | opt = CPU_BASED_TPR_SHADOW | |
2421 | CPU_BASED_USE_MSR_BITMAPS | | 2412 | CPU_BASED_USE_MSR_BITMAPS | |
2422 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2413 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -4003,7 +3994,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
4003 | } else | 3994 | } else |
4004 | intr |= INTR_TYPE_EXT_INTR; | 3995 | intr |= INTR_TYPE_EXT_INTR; |
4005 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 3996 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
4006 | vmx_clear_hlt(vcpu); | ||
4007 | } | 3997 | } |
4008 | 3998 | ||
4009 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 3999 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -4035,7 +4025,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
4035 | } | 4025 | } |
4036 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 4026 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
4037 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 4027 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
4038 | vmx_clear_hlt(vcpu); | ||
4039 | } | 4028 | } |
4040 | 4029 | ||
4041 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4030 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -4672,9 +4661,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
4672 | bool has_error_code = false; | 4661 | bool has_error_code = false; |
4673 | u32 error_code = 0; | 4662 | u32 error_code = 0; |
4674 | u16 tss_selector; | 4663 | u16 tss_selector; |
4675 | int reason, type, idt_v; | 4664 | int reason, type, idt_v, idt_index; |
4676 | 4665 | ||
4677 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | 4666 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); |
4667 | idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); | ||
4678 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); | 4668 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); |
4679 | 4669 | ||
4680 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4670 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4712,8 +4702,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
4712 | type != INTR_TYPE_NMI_INTR)) | 4702 | type != INTR_TYPE_NMI_INTR)) |
4713 | skip_emulated_instruction(vcpu); | 4703 | skip_emulated_instruction(vcpu); |
4714 | 4704 | ||
4715 | if (kvm_task_switch(vcpu, tss_selector, reason, | 4705 | if (kvm_task_switch(vcpu, tss_selector, |
4716 | has_error_code, error_code) == EMULATE_FAIL) { | 4706 | type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, |
4707 | has_error_code, error_code) == EMULATE_FAIL) { | ||
4717 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 4708 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
4718 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 4709 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4719 | vcpu->run->internal.ndata = 0; | 4710 | vcpu->run->internal.ndata = 0; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54696b5f844..4044ce0bf7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -97,6 +97,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | |||
97 | u32 kvm_max_guest_tsc_khz; | 97 | u32 kvm_max_guest_tsc_khz; |
98 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | 98 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); |
99 | 99 | ||
100 | /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ | ||
101 | static u32 tsc_tolerance_ppm = 250; | ||
102 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | ||
103 | |||
100 | #define KVM_NR_SHARED_MSRS 16 | 104 | #define KVM_NR_SHARED_MSRS 16 |
101 | 105 | ||
102 | struct kvm_shared_msrs_global { | 106 | struct kvm_shared_msrs_global { |
@@ -969,50 +973,51 @@ static inline u64 get_kernel_ns(void) | |||
969 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 973 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
970 | unsigned long max_tsc_khz; | 974 | unsigned long max_tsc_khz; |
971 | 975 | ||
972 | static inline int kvm_tsc_changes_freq(void) | 976 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) |
973 | { | 977 | { |
974 | int cpu = get_cpu(); | 978 | return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, |
975 | int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | 979 | vcpu->arch.virtual_tsc_shift); |
976 | cpufreq_quick_get(cpu) != 0; | ||
977 | put_cpu(); | ||
978 | return ret; | ||
979 | } | 980 | } |
980 | 981 | ||
981 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) | 982 | static u32 adjust_tsc_khz(u32 khz, s32 ppm) |
982 | { | 983 | { |
983 | if (vcpu->arch.virtual_tsc_khz) | 984 | u64 v = (u64)khz * (1000000 + ppm); |
984 | return vcpu->arch.virtual_tsc_khz; | 985 | do_div(v, 1000000); |
985 | else | 986 | return v; |
986 | return __this_cpu_read(cpu_tsc_khz); | ||
987 | } | 987 | } |
988 | 988 | ||
989 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | 989 | static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) |
990 | { | 990 | { |
991 | u64 ret; | 991 | u32 thresh_lo, thresh_hi; |
992 | 992 | int use_scaling = 0; | |
993 | WARN_ON(preemptible()); | ||
994 | if (kvm_tsc_changes_freq()) | ||
995 | printk_once(KERN_WARNING | ||
996 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | ||
997 | ret = nsec * vcpu_tsc_khz(vcpu); | ||
998 | do_div(ret, USEC_PER_SEC); | ||
999 | return ret; | ||
1000 | } | ||
1001 | 993 | ||
1002 | static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | ||
1003 | { | ||
1004 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 994 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1005 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 995 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1006 | &vcpu->arch.tsc_catchup_shift, | 996 | &vcpu->arch.virtual_tsc_shift, |
1007 | &vcpu->arch.tsc_catchup_mult); | 997 | &vcpu->arch.virtual_tsc_mult); |
998 | vcpu->arch.virtual_tsc_khz = this_tsc_khz; | ||
999 | |||
1000 | /* | ||
1001 | * Compute the variation in TSC rate which is acceptable | ||
1002 | * within the range of tolerance and decide if the | ||
1003 | * rate being applied is within that bounds of the hardware | ||
1004 | * rate. If so, no scaling or compensation need be done. | ||
1005 | */ | ||
1006 | thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); | ||
1007 | thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); | ||
1008 | if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { | ||
1009 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); | ||
1010 | use_scaling = 1; | ||
1011 | } | ||
1012 | kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); | ||
1008 | } | 1013 | } |
1009 | 1014 | ||
1010 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1015 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
1011 | { | 1016 | { |
1012 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | 1017 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec, |
1013 | vcpu->arch.tsc_catchup_mult, | 1018 | vcpu->arch.virtual_tsc_mult, |
1014 | vcpu->arch.tsc_catchup_shift); | 1019 | vcpu->arch.virtual_tsc_shift); |
1015 | tsc += vcpu->arch.last_tsc_write; | 1020 | tsc += vcpu->arch.this_tsc_write; |
1016 | return tsc; | 1021 | return tsc; |
1017 | } | 1022 | } |
1018 | 1023 | ||
@@ -1021,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1021 | struct kvm *kvm = vcpu->kvm; | 1026 | struct kvm *kvm = vcpu->kvm; |
1022 | u64 offset, ns, elapsed; | 1027 | u64 offset, ns, elapsed; |
1023 | unsigned long flags; | 1028 | unsigned long flags; |
1024 | s64 sdiff; | 1029 | s64 usdiff; |
1025 | 1030 | ||
1026 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1031 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1027 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1032 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
1028 | ns = get_kernel_ns(); | 1033 | ns = get_kernel_ns(); |
1029 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1034 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1030 | sdiff = data - kvm->arch.last_tsc_write; | 1035 | |
1031 | if (sdiff < 0) | 1036 | /* n.b - signed multiplication and division required */ |
1032 | sdiff = -sdiff; | 1037 | usdiff = data - kvm->arch.last_tsc_write; |
1038 | #ifdef CONFIG_X86_64 | ||
1039 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | ||
1040 | #else | ||
1041 | /* do_div() only does unsigned */ | ||
1042 | asm("idivl %2; xor %%edx, %%edx" | ||
1043 | : "=A"(usdiff) | ||
1044 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | ||
1045 | #endif | ||
1046 | do_div(elapsed, 1000); | ||
1047 | usdiff -= elapsed; | ||
1048 | if (usdiff < 0) | ||
1049 | usdiff = -usdiff; | ||
1033 | 1050 | ||
1034 | /* | 1051 | /* |
1035 | * Special case: close write to TSC within 5 seconds of | 1052 | * Special case: TSC write with a small delta (1 second) of virtual |
1036 | * another CPU is interpreted as an attempt to synchronize | 1053 | * cycle time against real time is interpreted as an attempt to |
1037 | * The 5 seconds is to accommodate host load / swapping as | 1054 | * synchronize the CPU. |
1038 | * well as any reset of TSC during the boot process. | 1055 | * |
1039 | * | 1056 | * For a reliable TSC, we can match TSC offsets, and for an unstable |
1040 | * In that case, for a reliable TSC, we can match TSC offsets, | 1057 | * TSC, we add elapsed time in this computation. We could let the |
1041 | * or make a best guest using elapsed value. | 1058 | * compensation code attempt to catch up if we fall behind, but |
1042 | */ | 1059 | * it's better to try to match offsets from the beginning. |
1043 | if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && | 1060 | */ |
1044 | elapsed < 5ULL * NSEC_PER_SEC) { | 1061 | if (usdiff < USEC_PER_SEC && |
1062 | vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { | ||
1045 | if (!check_tsc_unstable()) { | 1063 | if (!check_tsc_unstable()) { |
1046 | offset = kvm->arch.last_tsc_offset; | 1064 | offset = kvm->arch.cur_tsc_offset; |
1047 | pr_debug("kvm: matched tsc offset for %llu\n", data); | 1065 | pr_debug("kvm: matched tsc offset for %llu\n", data); |
1048 | } else { | 1066 | } else { |
1049 | u64 delta = nsec_to_cycles(vcpu, elapsed); | 1067 | u64 delta = nsec_to_cycles(vcpu, elapsed); |
1050 | offset += delta; | 1068 | data += delta; |
1069 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | ||
1051 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1070 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1052 | } | 1071 | } |
1053 | ns = kvm->arch.last_tsc_nsec; | 1072 | } else { |
1073 | /* | ||
1074 | * We split periods of matched TSC writes into generations. | ||
1075 | * For each generation, we track the original measured | ||
1076 | * nanosecond time, offset, and write, so if TSCs are in | ||
1077 | * sync, we can match exact offset, and if not, we can match | ||
1078 | * exact software computaion in compute_guest_tsc() | ||
1079 | * | ||
1080 | * These values are tracked in kvm->arch.cur_xxx variables. | ||
1081 | */ | ||
1082 | kvm->arch.cur_tsc_generation++; | ||
1083 | kvm->arch.cur_tsc_nsec = ns; | ||
1084 | kvm->arch.cur_tsc_write = data; | ||
1085 | kvm->arch.cur_tsc_offset = offset; | ||
1086 | pr_debug("kvm: new tsc generation %u, clock %llu\n", | ||
1087 | kvm->arch.cur_tsc_generation, data); | ||
1054 | } | 1088 | } |
1089 | |||
1090 | /* | ||
1091 | * We also track th most recent recorded KHZ, write and time to | ||
1092 | * allow the matching interval to be extended at each write. | ||
1093 | */ | ||
1055 | kvm->arch.last_tsc_nsec = ns; | 1094 | kvm->arch.last_tsc_nsec = ns; |
1056 | kvm->arch.last_tsc_write = data; | 1095 | kvm->arch.last_tsc_write = data; |
1057 | kvm->arch.last_tsc_offset = offset; | 1096 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1058 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
1059 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ||
1060 | 1097 | ||
1061 | /* Reset of TSC must disable overshoot protection below */ | 1098 | /* Reset of TSC must disable overshoot protection below */ |
1062 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1099 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
1063 | vcpu->arch.last_tsc_write = data; | 1100 | vcpu->arch.last_guest_tsc = data; |
1064 | vcpu->arch.last_tsc_nsec = ns; | 1101 | |
1102 | /* Keep track of which generation this VCPU has synchronized to */ | ||
1103 | vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation; | ||
1104 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; | ||
1105 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; | ||
1106 | |||
1107 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
1108 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ||
1065 | } | 1109 | } |
1110 | |||
1066 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 1111 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
1067 | 1112 | ||
1068 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1113 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
@@ -1078,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1078 | local_irq_save(flags); | 1123 | local_irq_save(flags); |
1079 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); | 1124 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); |
1080 | kernel_ns = get_kernel_ns(); | 1125 | kernel_ns = get_kernel_ns(); |
1081 | this_tsc_khz = vcpu_tsc_khz(v); | 1126 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
1082 | if (unlikely(this_tsc_khz == 0)) { | 1127 | if (unlikely(this_tsc_khz == 0)) { |
1083 | local_irq_restore(flags); | 1128 | local_irq_restore(flags); |
1084 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1129 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
@@ -1098,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1098 | if (vcpu->tsc_catchup) { | 1143 | if (vcpu->tsc_catchup) { |
1099 | u64 tsc = compute_guest_tsc(v, kernel_ns); | 1144 | u64 tsc = compute_guest_tsc(v, kernel_ns); |
1100 | if (tsc > tsc_timestamp) { | 1145 | if (tsc > tsc_timestamp) { |
1101 | kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); | 1146 | adjust_tsc_offset_guest(v, tsc - tsc_timestamp); |
1102 | tsc_timestamp = tsc; | 1147 | tsc_timestamp = tsc; |
1103 | } | 1148 | } |
1104 | } | 1149 | } |
@@ -1130,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1130 | * observed by the guest and ensure the new system time is greater. | 1175 | * observed by the guest and ensure the new system time is greater. |
1131 | */ | 1176 | */ |
1132 | max_kernel_ns = 0; | 1177 | max_kernel_ns = 0; |
1133 | if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { | 1178 | if (vcpu->hv_clock.tsc_timestamp) { |
1134 | max_kernel_ns = vcpu->last_guest_tsc - | 1179 | max_kernel_ns = vcpu->last_guest_tsc - |
1135 | vcpu->hv_clock.tsc_timestamp; | 1180 | vcpu->hv_clock.tsc_timestamp; |
1136 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | 1181 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, |
@@ -1504,6 +1549,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1504 | case MSR_K7_HWCR: | 1549 | case MSR_K7_HWCR: |
1505 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1550 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1506 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 1551 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
1552 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | ||
1507 | if (data != 0) { | 1553 | if (data != 0) { |
1508 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1554 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1509 | data); | 1555 | data); |
@@ -1676,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1676 | */ | 1722 | */ |
1677 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | 1723 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); |
1678 | break; | 1724 | break; |
1725 | case MSR_AMD64_OSVW_ID_LENGTH: | ||
1726 | if (!guest_cpuid_has_osvw(vcpu)) | ||
1727 | return 1; | ||
1728 | vcpu->arch.osvw.length = data; | ||
1729 | break; | ||
1730 | case MSR_AMD64_OSVW_STATUS: | ||
1731 | if (!guest_cpuid_has_osvw(vcpu)) | ||
1732 | return 1; | ||
1733 | vcpu->arch.osvw.status = data; | ||
1734 | break; | ||
1679 | default: | 1735 | default: |
1680 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1736 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1681 | return xen_hvm_config(vcpu, data); | 1737 | return xen_hvm_config(vcpu, data); |
@@ -1960,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1960 | */ | 2016 | */ |
1961 | data = 0xbe702111; | 2017 | data = 0xbe702111; |
1962 | break; | 2018 | break; |
2019 | case MSR_AMD64_OSVW_ID_LENGTH: | ||
2020 | if (!guest_cpuid_has_osvw(vcpu)) | ||
2021 | return 1; | ||
2022 | data = vcpu->arch.osvw.length; | ||
2023 | break; | ||
2024 | case MSR_AMD64_OSVW_STATUS: | ||
2025 | if (!guest_cpuid_has_osvw(vcpu)) | ||
2026 | return 1; | ||
2027 | data = vcpu->arch.osvw.status; | ||
2028 | break; | ||
1963 | default: | 2029 | default: |
1964 | if (kvm_pmu_msr(vcpu, msr)) | 2030 | if (kvm_pmu_msr(vcpu, msr)) |
1965 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2031 | return kvm_pmu_get_msr(vcpu, msr, pdata); |
@@ -2080,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2080 | case KVM_CAP_XSAVE: | 2146 | case KVM_CAP_XSAVE: |
2081 | case KVM_CAP_ASYNC_PF: | 2147 | case KVM_CAP_ASYNC_PF: |
2082 | case KVM_CAP_GET_TSC_KHZ: | 2148 | case KVM_CAP_GET_TSC_KHZ: |
2149 | case KVM_CAP_PCI_2_3: | ||
2083 | r = 1; | 2150 | r = 1; |
2084 | break; | 2151 | break; |
2085 | case KVM_CAP_COALESCED_MMIO: | 2152 | case KVM_CAP_COALESCED_MMIO: |
@@ -2214,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2214 | } | 2281 | } |
2215 | 2282 | ||
2216 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 2283 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
2217 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | ||
2218 | /* Make sure TSC doesn't go backwards */ | ||
2219 | s64 tsc_delta; | ||
2220 | u64 tsc; | ||
2221 | 2284 | ||
2222 | tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2285 | /* Apply any externally detected TSC adjustments (due to suspend) */ |
2223 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | 2286 | if (unlikely(vcpu->arch.tsc_offset_adjustment)) { |
2224 | tsc - vcpu->arch.last_guest_tsc; | 2287 | adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); |
2288 | vcpu->arch.tsc_offset_adjustment = 0; | ||
2289 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
2290 | } | ||
2225 | 2291 | ||
2292 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | ||
2293 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : | ||
2294 | native_read_tsc() - vcpu->arch.last_host_tsc; | ||
2226 | if (tsc_delta < 0) | 2295 | if (tsc_delta < 0) |
2227 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2296 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2228 | if (check_tsc_unstable()) { | 2297 | if (check_tsc_unstable()) { |
2229 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2298 | u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, |
2299 | vcpu->arch.last_guest_tsc); | ||
2300 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
2230 | vcpu->arch.tsc_catchup = 1; | 2301 | vcpu->arch.tsc_catchup = 1; |
2231 | } | 2302 | } |
2232 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2303 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
@@ -2243,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2243 | { | 2314 | { |
2244 | kvm_x86_ops->vcpu_put(vcpu); | 2315 | kvm_x86_ops->vcpu_put(vcpu); |
2245 | kvm_put_guest_fpu(vcpu); | 2316 | kvm_put_guest_fpu(vcpu); |
2246 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2317 | vcpu->arch.last_host_tsc = native_read_tsc(); |
2247 | } | 2318 | } |
2248 | 2319 | ||
2249 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2320 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
@@ -2785,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2785 | u32 user_tsc_khz; | 2856 | u32 user_tsc_khz; |
2786 | 2857 | ||
2787 | r = -EINVAL; | 2858 | r = -EINVAL; |
2788 | if (!kvm_has_tsc_control) | ||
2789 | break; | ||
2790 | |||
2791 | user_tsc_khz = (u32)arg; | 2859 | user_tsc_khz = (u32)arg; |
2792 | 2860 | ||
2793 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) | 2861 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) |
2794 | goto out; | 2862 | goto out; |
2795 | 2863 | ||
2796 | kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); | 2864 | if (user_tsc_khz == 0) |
2865 | user_tsc_khz = tsc_khz; | ||
2866 | |||
2867 | kvm_set_tsc_khz(vcpu, user_tsc_khz); | ||
2797 | 2868 | ||
2798 | r = 0; | 2869 | r = 0; |
2799 | goto out; | 2870 | goto out; |
2800 | } | 2871 | } |
2801 | case KVM_GET_TSC_KHZ: { | 2872 | case KVM_GET_TSC_KHZ: { |
2802 | r = -EIO; | 2873 | r = vcpu->arch.virtual_tsc_khz; |
2803 | if (check_tsc_unstable()) | ||
2804 | goto out; | ||
2805 | |||
2806 | r = vcpu_tsc_khz(vcpu); | ||
2807 | |||
2808 | goto out; | 2874 | goto out; |
2809 | } | 2875 | } |
2810 | default: | 2876 | default: |
@@ -2815,6 +2881,11 @@ out: | |||
2815 | return r; | 2881 | return r; |
2816 | } | 2882 | } |
2817 | 2883 | ||
2884 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | ||
2885 | { | ||
2886 | return VM_FAULT_SIGBUS; | ||
2887 | } | ||
2888 | |||
2818 | static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) | 2889 | static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) |
2819 | { | 2890 | { |
2820 | int ret; | 2891 | int ret; |
@@ -2998,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm, | |||
2998 | unsigned long *dirty_bitmap, | 3069 | unsigned long *dirty_bitmap, |
2999 | unsigned long nr_dirty_pages) | 3070 | unsigned long nr_dirty_pages) |
3000 | { | 3071 | { |
3072 | spin_lock(&kvm->mmu_lock); | ||
3073 | |||
3001 | /* Not many dirty pages compared to # of shadow pages. */ | 3074 | /* Not many dirty pages compared to # of shadow pages. */ |
3002 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | 3075 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { |
3003 | unsigned long gfn_offset; | 3076 | unsigned long gfn_offset; |
@@ -3005,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm, | |||
3005 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | 3078 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { |
3006 | unsigned long gfn = memslot->base_gfn + gfn_offset; | 3079 | unsigned long gfn = memslot->base_gfn + gfn_offset; |
3007 | 3080 | ||
3008 | spin_lock(&kvm->mmu_lock); | ||
3009 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | 3081 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); |
3010 | spin_unlock(&kvm->mmu_lock); | ||
3011 | } | 3082 | } |
3012 | kvm_flush_remote_tlbs(kvm); | 3083 | kvm_flush_remote_tlbs(kvm); |
3013 | } else { | 3084 | } else |
3014 | spin_lock(&kvm->mmu_lock); | ||
3015 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | 3085 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); |
3016 | spin_unlock(&kvm->mmu_lock); | 3086 | |
3017 | } | 3087 | spin_unlock(&kvm->mmu_lock); |
3018 | } | 3088 | } |
3019 | 3089 | ||
3020 | /* | 3090 | /* |
@@ -3133,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3133 | r = -EEXIST; | 3203 | r = -EEXIST; |
3134 | if (kvm->arch.vpic) | 3204 | if (kvm->arch.vpic) |
3135 | goto create_irqchip_unlock; | 3205 | goto create_irqchip_unlock; |
3206 | r = -EINVAL; | ||
3207 | if (atomic_read(&kvm->online_vcpus)) | ||
3208 | goto create_irqchip_unlock; | ||
3136 | r = -ENOMEM; | 3209 | r = -ENOMEM; |
3137 | vpic = kvm_create_pic(kvm); | 3210 | vpic = kvm_create_pic(kvm); |
3138 | if (vpic) { | 3211 | if (vpic) { |
@@ -4063,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) | |||
4063 | return res; | 4136 | return res; |
4064 | } | 4137 | } |
4065 | 4138 | ||
4139 | static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val) | ||
4140 | { | ||
4141 | kvm_set_rflags(emul_to_vcpu(ctxt), val); | ||
4142 | } | ||
4143 | |||
4066 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) | 4144 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) |
4067 | { | 4145 | { |
4068 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); | 4146 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); |
@@ -4244,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = { | |||
4244 | .set_idt = emulator_set_idt, | 4322 | .set_idt = emulator_set_idt, |
4245 | .get_cr = emulator_get_cr, | 4323 | .get_cr = emulator_get_cr, |
4246 | .set_cr = emulator_set_cr, | 4324 | .set_cr = emulator_set_cr, |
4325 | .set_rflags = emulator_set_rflags, | ||
4247 | .cpl = emulator_get_cpl, | 4326 | .cpl = emulator_get_cpl, |
4248 | .get_dr = emulator_get_dr, | 4327 | .get_dr = emulator_get_dr, |
4249 | .set_dr = emulator_set_dr, | 4328 | .set_dr = emulator_set_dr, |
@@ -5288,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5288 | profile_hit(KVM_PROFILING, (void *)rip); | 5367 | profile_hit(KVM_PROFILING, (void *)rip); |
5289 | } | 5368 | } |
5290 | 5369 | ||
5370 | if (unlikely(vcpu->arch.tsc_always_catchup)) | ||
5371 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
5291 | 5372 | ||
5292 | kvm_lapic_sync_from_vapic(vcpu); | 5373 | kvm_lapic_sync_from_vapic(vcpu); |
5293 | 5374 | ||
@@ -5587,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
5587 | return 0; | 5668 | return 0; |
5588 | } | 5669 | } |
5589 | 5670 | ||
5590 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 5671 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
5591 | bool has_error_code, u32 error_code) | 5672 | int reason, bool has_error_code, u32 error_code) |
5592 | { | 5673 | { |
5593 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 5674 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
5594 | int ret; | 5675 | int ret; |
5595 | 5676 | ||
5596 | init_emulate_ctxt(vcpu); | 5677 | init_emulate_ctxt(vcpu); |
5597 | 5678 | ||
5598 | ret = emulator_task_switch(ctxt, tss_selector, reason, | 5679 | ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, |
5599 | has_error_code, error_code); | 5680 | has_error_code, error_code); |
5600 | 5681 | ||
5601 | if (ret) | 5682 | if (ret) |
@@ -5928,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage) | |||
5928 | struct kvm *kvm; | 6009 | struct kvm *kvm; |
5929 | struct kvm_vcpu *vcpu; | 6010 | struct kvm_vcpu *vcpu; |
5930 | int i; | 6011 | int i; |
6012 | int ret; | ||
6013 | u64 local_tsc; | ||
6014 | u64 max_tsc = 0; | ||
6015 | bool stable, backwards_tsc = false; | ||
5931 | 6016 | ||
5932 | kvm_shared_msr_cpu_online(); | 6017 | kvm_shared_msr_cpu_online(); |
5933 | list_for_each_entry(kvm, &vm_list, vm_list) | 6018 | ret = kvm_x86_ops->hardware_enable(garbage); |
5934 | kvm_for_each_vcpu(i, vcpu, kvm) | 6019 | if (ret != 0) |
5935 | if (vcpu->cpu == smp_processor_id()) | 6020 | return ret; |
5936 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 6021 | |
5937 | return kvm_x86_ops->hardware_enable(garbage); | 6022 | local_tsc = native_read_tsc(); |
6023 | stable = !check_tsc_unstable(); | ||
6024 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
6025 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
6026 | if (!stable && vcpu->cpu == smp_processor_id()) | ||
6027 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
6028 | if (stable && vcpu->arch.last_host_tsc > local_tsc) { | ||
6029 | backwards_tsc = true; | ||
6030 | if (vcpu->arch.last_host_tsc > max_tsc) | ||
6031 | max_tsc = vcpu->arch.last_host_tsc; | ||
6032 | } | ||
6033 | } | ||
6034 | } | ||
6035 | |||
6036 | /* | ||
6037 | * Sometimes, even reliable TSCs go backwards. This happens on | ||
6038 | * platforms that reset TSC during suspend or hibernate actions, but | ||
6039 | * maintain synchronization. We must compensate. Fortunately, we can | ||
6040 | * detect that condition here, which happens early in CPU bringup, | ||
6041 | * before any KVM threads can be running. Unfortunately, we can't | ||
6042 | * bring the TSCs fully up to date with real time, as we aren't yet far | ||
6043 | * enough into CPU bringup that we know how much real time has actually | ||
6044 | * elapsed; our helper function, get_kernel_ns() will be using boot | ||
6045 | * variables that haven't been updated yet. | ||
6046 | * | ||
6047 | * So we simply find the maximum observed TSC above, then record the | ||
6048 | * adjustment to TSC in each VCPU. When the VCPU later gets loaded, | ||
6049 | * the adjustment will be applied. Note that we accumulate | ||
6050 | * adjustments, in case multiple suspend cycles happen before some VCPU | ||
6051 | * gets a chance to run again. In the event that no KVM threads get a | ||
6052 | * chance to run, we will miss the entire elapsed period, as we'll have | ||
6053 | * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may | ||
6054 | * loose cycle time. This isn't too big a deal, since the loss will be | ||
6055 | * uniform across all VCPUs (not to mention the scenario is extremely | ||
6056 | * unlikely). It is possible that a second hibernate recovery happens | ||
6057 | * much faster than a first, causing the observed TSC here to be | ||
6058 | * smaller; this would require additional padding adjustment, which is | ||
6059 | * why we set last_host_tsc to the local tsc observed here. | ||
6060 | * | ||
6061 | * N.B. - this code below runs only on platforms with reliable TSC, | ||
6062 | * as that is the only way backwards_tsc is set above. Also note | ||
6063 | * that this runs for ALL vcpus, which is not a bug; all VCPUs should | ||
6064 | * have the same delta_cyc adjustment applied if backwards_tsc | ||
6065 | * is detected. Note further, this adjustment is only done once, | ||
6066 | * as we reset last_host_tsc on all VCPUs to stop this from being | ||
6067 | * called multiple times (one for each physical CPU bringup). | ||
6068 | * | ||
6069 | * Platforms with unnreliable TSCs don't have to deal with this, they | ||
6070 | * will be compensated by the logic in vcpu_load, which sets the TSC to | ||
6071 | * catchup mode. This will catchup all VCPUs to real time, but cannot | ||
6072 | * guarantee that they stay in perfect synchronization. | ||
6073 | */ | ||
6074 | if (backwards_tsc) { | ||
6075 | u64 delta_cyc = max_tsc - local_tsc; | ||
6076 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
6077 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
6078 | vcpu->arch.tsc_offset_adjustment += delta_cyc; | ||
6079 | vcpu->arch.last_host_tsc = local_tsc; | ||
6080 | } | ||
6081 | |||
6082 | /* | ||
6083 | * We have to disable TSC offset matching.. if you were | ||
6084 | * booting a VM while issuing an S4 host suspend.... | ||
6085 | * you may have some problem. Solving this issue is | ||
6086 | * left as an exercise to the reader. | ||
6087 | */ | ||
6088 | kvm->arch.last_tsc_nsec = 0; | ||
6089 | kvm->arch.last_tsc_write = 0; | ||
6090 | } | ||
6091 | |||
6092 | } | ||
6093 | return 0; | ||
5938 | } | 6094 | } |
5939 | 6095 | ||
5940 | void kvm_arch_hardware_disable(void *garbage) | 6096 | void kvm_arch_hardware_disable(void *garbage) |
@@ -5958,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn) | |||
5958 | kvm_x86_ops->check_processor_compatibility(rtn); | 6114 | kvm_x86_ops->check_processor_compatibility(rtn); |
5959 | } | 6115 | } |
5960 | 6116 | ||
6117 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | ||
6118 | { | ||
6119 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); | ||
6120 | } | ||
6121 | |||
5961 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 6122 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
5962 | { | 6123 | { |
5963 | struct page *page; | 6124 | struct page *page; |
@@ -5980,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5980 | } | 6141 | } |
5981 | vcpu->arch.pio_data = page_address(page); | 6142 | vcpu->arch.pio_data = page_address(page); |
5982 | 6143 | ||
5983 | kvm_init_tsc_catchup(vcpu, max_tsc_khz); | 6144 | kvm_set_tsc_khz(vcpu, max_tsc_khz); |
5984 | 6145 | ||
5985 | r = kvm_mmu_create(vcpu); | 6146 | r = kvm_mmu_create(vcpu); |
5986 | if (r < 0) | 6147 | if (r < 0) |
@@ -6032,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
6032 | free_page((unsigned long)vcpu->arch.pio_data); | 6193 | free_page((unsigned long)vcpu->arch.pio_data); |
6033 | } | 6194 | } |
6034 | 6195 | ||
6035 | int kvm_arch_init_vm(struct kvm *kvm) | 6196 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
6036 | { | 6197 | { |
6198 | if (type) | ||
6199 | return -EINVAL; | ||
6200 | |||
6037 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6201 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
6038 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6202 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
6039 | 6203 | ||
@@ -6093,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
6093 | put_page(kvm->arch.ept_identity_pagetable); | 6257 | put_page(kvm->arch.ept_identity_pagetable); |
6094 | } | 6258 | } |
6095 | 6259 | ||
6260 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
6261 | struct kvm_memory_slot *dont) | ||
6262 | { | ||
6263 | int i; | ||
6264 | |||
6265 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6266 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | ||
6267 | vfree(free->arch.lpage_info[i]); | ||
6268 | free->arch.lpage_info[i] = NULL; | ||
6269 | } | ||
6270 | } | ||
6271 | } | ||
6272 | |||
6273 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | ||
6274 | { | ||
6275 | int i; | ||
6276 | |||
6277 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6278 | unsigned long ugfn; | ||
6279 | int lpages; | ||
6280 | int level = i + 2; | ||
6281 | |||
6282 | lpages = gfn_to_index(slot->base_gfn + npages - 1, | ||
6283 | slot->base_gfn, level) + 1; | ||
6284 | |||
6285 | slot->arch.lpage_info[i] = | ||
6286 | vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | ||
6287 | if (!slot->arch.lpage_info[i]) | ||
6288 | goto out_free; | ||
6289 | |||
6290 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
6291 | slot->arch.lpage_info[i][0].write_count = 1; | ||
6292 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
6293 | slot->arch.lpage_info[i][lpages - 1].write_count = 1; | ||
6294 | ugfn = slot->userspace_addr >> PAGE_SHIFT; | ||
6295 | /* | ||
6296 | * If the gfn and userspace address are not aligned wrt each | ||
6297 | * other, or if explicitly asked to, disable large page | ||
6298 | * support for this slot | ||
6299 | */ | ||
6300 | if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | ||
6301 | !kvm_largepages_enabled()) { | ||
6302 | unsigned long j; | ||
6303 | |||
6304 | for (j = 0; j < lpages; ++j) | ||
6305 | slot->arch.lpage_info[i][j].write_count = 1; | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | return 0; | ||
6310 | |||
6311 | out_free: | ||
6312 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6313 | vfree(slot->arch.lpage_info[i]); | ||
6314 | slot->arch.lpage_info[i] = NULL; | ||
6315 | } | ||
6316 | return -ENOMEM; | ||
6317 | } | ||
6318 | |||
6096 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6319 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
6097 | struct kvm_memory_slot *memslot, | 6320 | struct kvm_memory_slot *memslot, |
6098 | struct kvm_memory_slot old, | 6321 | struct kvm_memory_slot old, |
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c index 036efbea8b2..aef7140c006 100644 --- a/arch/x86/mm/kmemcheck/selftest.c +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/bug.h> | ||
1 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
2 | 3 | ||
3 | #include "opcode.h" | 4 | #include "opcode.h" |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 49a5cb55429..ed2835e148b 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -416,7 +416,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
416 | kfree(sd); | 416 | kfree(sd); |
417 | } else { | 417 | } else { |
418 | get_current_resources(device, busnum, domain, &resources); | 418 | get_current_resources(device, busnum, domain, &resources); |
419 | if (list_empty(&resources)) | 419 | |
420 | /* | ||
421 | * _CRS with no apertures is normal, so only fall back to | ||
422 | * defaults or native bridge info if we're ignoring _CRS. | ||
423 | */ | ||
424 | if (!pci_use_crs) | ||
420 | x86_pci_root_bus_resources(busnum, &resources); | 425 | x86_pci_root_bus_resources(busnum, &resources); |
421 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, | 426 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, |
422 | &resources); | 427 | &resources); |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 6dd89555fbf..d0e6e403b4f 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_ | |||
164 | */ | 164 | */ |
165 | static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) | 165 | static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) |
166 | { | 166 | { |
167 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && | 167 | if ((dev->device & 0xff00) == 0x2400) |
168 | (dev->device & 0xff00) == 0x2400) | ||
169 | dev->transparent = 1; | 168 | dev->transparent = 1; |
170 | } | 169 | } |
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); | 170 | DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, |
171 | PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge); | ||
172 | 172 | ||
173 | /* | 173 | /* |
174 | * Fixup for C1 Halt Disconnect problem on nForce2 systems. | 174 | * Fixup for C1 Halt Disconnect problem on nForce2 systems. |
@@ -322,9 +322,6 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) | |||
322 | struct pci_bus *bus; | 322 | struct pci_bus *bus; |
323 | u16 config; | 323 | u16 config; |
324 | 324 | ||
325 | if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) | ||
326 | return; | ||
327 | |||
328 | /* Is VGA routed to us? */ | 325 | /* Is VGA routed to us? */ |
329 | bus = pdev->bus; | 326 | bus = pdev->bus; |
330 | while (bus) { | 327 | while (bus) { |
@@ -353,7 +350,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) | |||
353 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); | 350 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); |
354 | } | 351 | } |
355 | } | 352 | } |
356 | DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); | 353 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, |
354 | PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); | ||
357 | 355 | ||
358 | 356 | ||
359 | static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { | 357 | static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 91821a1a0c3..831971e731f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -39,6 +39,87 @@ | |||
39 | #include <asm/io_apic.h> | 39 | #include <asm/io_apic.h> |
40 | 40 | ||
41 | 41 | ||
42 | /* | ||
43 | * This list of dynamic mappings is for temporarily maintaining | ||
44 | * original BIOS BAR addresses for possible reinstatement. | ||
45 | */ | ||
46 | struct pcibios_fwaddrmap { | ||
47 | struct list_head list; | ||
48 | struct pci_dev *dev; | ||
49 | resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; | ||
50 | }; | ||
51 | |||
52 | static LIST_HEAD(pcibios_fwaddrmappings); | ||
53 | static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); | ||
54 | |||
55 | /* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ | ||
56 | static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) | ||
57 | { | ||
58 | struct pcibios_fwaddrmap *map; | ||
59 | |||
60 | WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); | ||
61 | |||
62 | list_for_each_entry(map, &pcibios_fwaddrmappings, list) | ||
63 | if (map->dev == dev) | ||
64 | return map; | ||
65 | |||
66 | return NULL; | ||
67 | } | ||
68 | |||
69 | static void | ||
70 | pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr) | ||
71 | { | ||
72 | unsigned long flags; | ||
73 | struct pcibios_fwaddrmap *map; | ||
74 | |||
75 | spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); | ||
76 | map = pcibios_fwaddrmap_lookup(dev); | ||
77 | if (!map) { | ||
78 | spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); | ||
79 | map = kzalloc(sizeof(*map), GFP_KERNEL); | ||
80 | if (!map) | ||
81 | return; | ||
82 | |||
83 | map->dev = pci_dev_get(dev); | ||
84 | map->fw_addr[idx] = fw_addr; | ||
85 | INIT_LIST_HEAD(&map->list); | ||
86 | |||
87 | spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); | ||
88 | list_add_tail(&map->list, &pcibios_fwaddrmappings); | ||
89 | } else | ||
90 | map->fw_addr[idx] = fw_addr; | ||
91 | spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); | ||
92 | } | ||
93 | |||
94 | resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) | ||
95 | { | ||
96 | unsigned long flags; | ||
97 | struct pcibios_fwaddrmap *map; | ||
98 | resource_size_t fw_addr = 0; | ||
99 | |||
100 | spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); | ||
101 | map = pcibios_fwaddrmap_lookup(dev); | ||
102 | if (map) | ||
103 | fw_addr = map->fw_addr[idx]; | ||
104 | spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); | ||
105 | |||
106 | return fw_addr; | ||
107 | } | ||
108 | |||
109 | static void pcibios_fw_addr_list_del(void) | ||
110 | { | ||
111 | unsigned long flags; | ||
112 | struct pcibios_fwaddrmap *entry, *next; | ||
113 | |||
114 | spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); | ||
115 | list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) { | ||
116 | list_del(&entry->list); | ||
117 | pci_dev_put(entry->dev); | ||
118 | kfree(entry); | ||
119 | } | ||
120 | spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); | ||
121 | } | ||
122 | |||
42 | static int | 123 | static int |
43 | skip_isa_ioresource_align(struct pci_dev *dev) { | 124 | skip_isa_ioresource_align(struct pci_dev *dev) { |
44 | 125 | ||
@@ -182,7 +263,8 @@ static void __init pcibios_allocate_resources(int pass) | |||
182 | idx, r, disabled, pass); | 263 | idx, r, disabled, pass); |
183 | if (pci_claim_resource(dev, idx) < 0) { | 264 | if (pci_claim_resource(dev, idx) < 0) { |
184 | /* We'll assign a new address later */ | 265 | /* We'll assign a new address later */ |
185 | dev->fw_addr[idx] = r->start; | 266 | pcibios_save_fw_addr(dev, |
267 | idx, r->start); | ||
186 | r->end -= r->start; | 268 | r->end -= r->start; |
187 | r->start = 0; | 269 | r->start = 0; |
188 | } | 270 | } |
@@ -228,6 +310,7 @@ static int __init pcibios_assign_resources(void) | |||
228 | } | 310 | } |
229 | 311 | ||
230 | pci_assign_unassigned_resources(); | 312 | pci_assign_unassigned_resources(); |
313 | pcibios_fw_addr_list_del(); | ||
231 | 314 | ||
232 | return 0; | 315 | return 0; |
233 | } | 316 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index cb29191cee5..140942f66b3 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #define PCI_FIXED_BAR_4_SIZE 0x14 | 43 | #define PCI_FIXED_BAR_4_SIZE 0x14 |
44 | #define PCI_FIXED_BAR_5_SIZE 0x1c | 44 | #define PCI_FIXED_BAR_5_SIZE 0x1c |
45 | 45 | ||
46 | static int pci_soc_mode = 0; | ||
47 | |||
46 | /** | 48 | /** |
47 | * fixed_bar_cap - return the offset of the fixed BAR cap if found | 49 | * fixed_bar_cap - return the offset of the fixed BAR cap if found |
48 | * @bus: PCI bus | 50 | * @bus: PCI bus |
@@ -148,7 +150,9 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) | |||
148 | */ | 150 | */ |
149 | if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) | 151 | if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) |
150 | return 0; | 152 | return 0; |
151 | if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) | 153 | if (bus == 0 && (devfn == PCI_DEVFN(2, 0) |
154 | || devfn == PCI_DEVFN(0, 0) | ||
155 | || devfn == PCI_DEVFN(3, 0))) | ||
152 | return 1; | 156 | return 1; |
153 | return 0; /* langwell on others */ | 157 | return 0; /* langwell on others */ |
154 | } | 158 | } |
@@ -231,14 +235,43 @@ struct pci_ops pci_mrst_ops = { | |||
231 | */ | 235 | */ |
232 | int __init pci_mrst_init(void) | 236 | int __init pci_mrst_init(void) |
233 | { | 237 | { |
234 | printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); | 238 | printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n"); |
235 | pci_mmcfg_late_init(); | 239 | pci_mmcfg_late_init(); |
236 | pcibios_enable_irq = mrst_pci_irq_enable; | 240 | pcibios_enable_irq = mrst_pci_irq_enable; |
237 | pci_root_ops = pci_mrst_ops; | 241 | pci_root_ops = pci_mrst_ops; |
242 | pci_soc_mode = 1; | ||
238 | /* Continue with standard init */ | 243 | /* Continue with standard init */ |
239 | return 1; | 244 | return 1; |
240 | } | 245 | } |
241 | 246 | ||
247 | /* Langwell devices are not true pci devices, they are not subject to 10 ms | ||
248 | * d3 to d0 delay required by pci spec. | ||
249 | */ | ||
250 | static void __devinit pci_d3delay_fixup(struct pci_dev *dev) | ||
251 | { | ||
252 | /* PCI fixups are effectively decided compile time. If we have a dual | ||
253 | SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */ | ||
254 | if (!pci_soc_mode) | ||
255 | return; | ||
256 | /* true pci devices in lincroft should allow type 1 access, the rest | ||
257 | * are langwell fake pci devices. | ||
258 | */ | ||
259 | if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) | ||
260 | return; | ||
261 | dev->d3_delay = 0; | ||
262 | } | ||
263 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); | ||
264 | |||
265 | static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) | ||
266 | { | ||
267 | pci_set_power_state(dev, PCI_D3cold); | ||
268 | } | ||
269 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); | ||
270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); | ||
271 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); | ||
272 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); | ||
273 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); | ||
274 | |||
242 | /* | 275 | /* |
243 | * Langwell devices reside at fixed offsets, don't try to move them. | 276 | * Langwell devices reside at fixed offsets, don't try to move them. |
244 | */ | 277 | */ |
@@ -248,6 +281,9 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | |||
248 | u32 size; | 281 | u32 size; |
249 | int i; | 282 | int i; |
250 | 283 | ||
284 | if (!pci_soc_mode) | ||
285 | return; | ||
286 | |||
251 | /* Must have extended configuration space */ | 287 | /* Must have extended configuration space */ |
252 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) | 288 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) |
253 | return; | 289 | return; |
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts index e70be38ce03..ce874f872cc 100644 --- a/arch/x86/platform/ce4100/falconfalls.dts +++ b/arch/x86/platform/ce4100/falconfalls.dts | |||
@@ -208,16 +208,19 @@ | |||
208 | interrupts = <14 1>; | 208 | interrupts = <14 1>; |
209 | }; | 209 | }; |
210 | 210 | ||
211 | gpio@b,1 { | 211 | pcigpio: gpio@b,1 { |
212 | #gpio-cells = <2>; | ||
213 | #interrupt-cells = <2>; | ||
212 | compatible = "pci8086,2e67.2", | 214 | compatible = "pci8086,2e67.2", |
213 | "pci8086,2e67", | 215 | "pci8086,2e67", |
214 | "pciclassff0000", | 216 | "pciclassff0000", |
215 | "pciclassff00"; | 217 | "pciclassff00"; |
216 | 218 | ||
217 | #gpio-cells = <2>; | ||
218 | reg = <0x15900 0x0 0x0 0x0 0x0>; | 219 | reg = <0x15900 0x0 0x0 0x0 0x0>; |
219 | interrupts = <15 1>; | 220 | interrupts = <15 1>; |
221 | interrupt-controller; | ||
220 | gpio-controller; | 222 | gpio-controller; |
223 | intel,muxctl = <0>; | ||
221 | }; | 224 | }; |
222 | 225 | ||
223 | i2c-controller@b,2 { | 226 | i2c-controller@b,2 { |
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile index 246b788847f..5b51194f4c8 100644 --- a/arch/x86/platform/geode/Makefile +++ b/arch/x86/platform/geode/Makefile | |||
@@ -1,2 +1,3 @@ | |||
1 | obj-$(CONFIG_ALIX) += alix.o | 1 | obj-$(CONFIG_ALIX) += alix.o |
2 | obj-$(CONFIG_NET5501) += net5501.o | 2 | obj-$(CONFIG_NET5501) += net5501.o |
3 | obj-$(CONFIG_GEOS) += geos.o | ||
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c new file mode 100644 index 00000000000..c2e6d53558b --- /dev/null +++ b/arch/x86/platform/geode/geos.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * System Specific setup for Traverse Technologies GEOS. | ||
3 | * At the moment this means setup of GPIO control of LEDs. | ||
4 | * | ||
5 | * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> | ||
6 | * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> | ||
7 | * and Philip Prindeville <philipp@redfish-solutions.com> | ||
8 | * | ||
9 | * TODO: There are large similarities with leds-net5501.c | ||
10 | * by Alessandro Zummo <a.zummo@towertech.it> | ||
11 | * In the future leds-net5501.c should be migrated over to platform | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify | ||
14 | * it under the terms of the GNU General Public License version 2 | ||
15 | * as published by the Free Software Foundation. | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/io.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/leds.h> | ||
24 | #include <linux/platform_device.h> | ||
25 | #include <linux/gpio.h> | ||
26 | #include <linux/input.h> | ||
27 | #include <linux/gpio_keys.h> | ||
28 | #include <linux/dmi.h> | ||
29 | |||
30 | #include <asm/geode.h> | ||
31 | |||
32 | static struct gpio_keys_button geos_gpio_buttons[] = { | ||
33 | { | ||
34 | .code = KEY_RESTART, | ||
35 | .gpio = 3, | ||
36 | .active_low = 1, | ||
37 | .desc = "Reset button", | ||
38 | .type = EV_KEY, | ||
39 | .wakeup = 0, | ||
40 | .debounce_interval = 100, | ||
41 | .can_disable = 0, | ||
42 | } | ||
43 | }; | ||
44 | static struct gpio_keys_platform_data geos_buttons_data = { | ||
45 | .buttons = geos_gpio_buttons, | ||
46 | .nbuttons = ARRAY_SIZE(geos_gpio_buttons), | ||
47 | .poll_interval = 20, | ||
48 | }; | ||
49 | |||
50 | static struct platform_device geos_buttons_dev = { | ||
51 | .name = "gpio-keys-polled", | ||
52 | .id = 1, | ||
53 | .dev = { | ||
54 | .platform_data = &geos_buttons_data, | ||
55 | } | ||
56 | }; | ||
57 | |||
58 | static struct gpio_led geos_leds[] = { | ||
59 | { | ||
60 | .name = "geos:1", | ||
61 | .gpio = 6, | ||
62 | .default_trigger = "default-on", | ||
63 | .active_low = 1, | ||
64 | }, | ||
65 | { | ||
66 | .name = "geos:2", | ||
67 | .gpio = 25, | ||
68 | .default_trigger = "default-off", | ||
69 | .active_low = 1, | ||
70 | }, | ||
71 | { | ||
72 | .name = "geos:3", | ||
73 | .gpio = 27, | ||
74 | .default_trigger = "default-off", | ||
75 | .active_low = 1, | ||
76 | }, | ||
77 | }; | ||
78 | |||
79 | static struct gpio_led_platform_data geos_leds_data = { | ||
80 | .num_leds = ARRAY_SIZE(geos_leds), | ||
81 | .leds = geos_leds, | ||
82 | }; | ||
83 | |||
84 | static struct platform_device geos_leds_dev = { | ||
85 | .name = "leds-gpio", | ||
86 | .id = -1, | ||
87 | .dev.platform_data = &geos_leds_data, | ||
88 | }; | ||
89 | |||
90 | static struct __initdata platform_device *geos_devs[] = { | ||
91 | &geos_buttons_dev, | ||
92 | &geos_leds_dev, | ||
93 | }; | ||
94 | |||
95 | static void __init register_geos(void) | ||
96 | { | ||
97 | /* Setup LED control through leds-gpio driver */ | ||
98 | platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs)); | ||
99 | } | ||
100 | |||
101 | static int __init geos_init(void) | ||
102 | { | ||
103 | const char *vendor, *product; | ||
104 | |||
105 | if (!is_geode()) | ||
106 | return 0; | ||
107 | |||
108 | vendor = dmi_get_system_info(DMI_SYS_VENDOR); | ||
109 | if (!vendor || strcmp(vendor, "Traverse Technologies")) | ||
110 | return 0; | ||
111 | |||
112 | product = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
113 | if (!product || strcmp(product, "Geos")) | ||
114 | return 0; | ||
115 | |||
116 | printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n", | ||
117 | KBUILD_MODNAME, vendor, product); | ||
118 | |||
119 | register_geos(); | ||
120 | |||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | module_init(geos_init); | ||
125 | |||
126 | MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); | ||
127 | MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); | ||
128 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4889655ba78..47936830968 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -115,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
115 | void save_processor_state(void) | 115 | void save_processor_state(void) |
116 | { | 116 | { |
117 | __save_processor_state(&saved_context); | 117 | __save_processor_state(&saved_context); |
118 | save_sched_clock_state(); | 118 | x86_platform.save_sched_clock_state(); |
119 | } | 119 | } |
120 | #ifdef CONFIG_X86_32 | 120 | #ifdef CONFIG_X86_32 |
121 | EXPORT_SYMBOL(save_processor_state); | 121 | EXPORT_SYMBOL(save_processor_state); |
@@ -231,8 +231,8 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
231 | /* Needed by apm.c */ | 231 | /* Needed by apm.c */ |
232 | void restore_processor_state(void) | 232 | void restore_processor_state(void) |
233 | { | 233 | { |
234 | x86_platform.restore_sched_clock_state(); | ||
234 | __restore_processor_state(&saved_context); | 235 | __restore_processor_state(&saved_context); |
235 | restore_sched_clock_state(); | ||
236 | } | 236 | } |
237 | #ifdef CONFIG_X86_32 | 237 | #ifdef CONFIG_X86_32 |
238 | EXPORT_SYMBOL(restore_processor_state); | 238 | EXPORT_SYMBOL(restore_processor_state); |
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index ce98e287c06..e7e67cc3c14 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -288,7 +288,7 @@ | |||
288 | 279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend | 288 | 279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend |
289 | 280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive | 289 | 280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive |
290 | 281 i386 mq_notify sys_mq_notify compat_sys_mq_notify | 290 | 281 i386 mq_notify sys_mq_notify compat_sys_mq_notify |
291 | 282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr | 291 | 282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr |
292 | 283 i386 kexec_load sys_kexec_load compat_sys_kexec_load | 292 | 283 i386 kexec_load sys_kexec_load compat_sys_kexec_load |
293 | 284 i386 waitid sys_waitid compat_sys_waitid | 293 | 284 i386 waitid sys_waitid compat_sys_waitid |
294 | # 285 sys_setaltroot | 294 | # 285 sys_setaltroot |
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index b2b54d2edf5..9926e11a772 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig | |||
@@ -15,8 +15,8 @@ config UML_X86 | |||
15 | select GENERIC_FIND_FIRST_BIT | 15 | select GENERIC_FIND_FIRST_BIT |
16 | 16 | ||
17 | config 64BIT | 17 | config 64BIT |
18 | bool | 18 | bool "64-bit kernel" if SUBARCH = "x86" |
19 | default SUBARCH = "x86_64" | 19 | default SUBARCH != "i386" |
20 | 20 | ||
21 | config X86_32 | 21 | config X86_32 |
22 | def_bool !64BIT | 22 | def_bool !64BIT |
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 2c32df6fe23..04f82e020f2 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h | |||
@@ -17,6 +17,16 @@ | |||
17 | #define ARCH_IS_STACKGROW(address) \ | 17 | #define ARCH_IS_STACKGROW(address) \ |
18 | (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(¤t->thread.regs.regs)) | 18 | (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(¤t->thread.regs.regs)) |
19 | 19 | ||
20 | #include <asm/user.h> | ||
21 | |||
22 | /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ | ||
23 | static inline void rep_nop(void) | ||
24 | { | ||
25 | __asm__ __volatile__("rep;nop": : :"memory"); | ||
26 | } | ||
27 | |||
28 | #define cpu_relax() rep_nop() | ||
29 | |||
20 | #include <asm/processor-generic.h> | 30 | #include <asm/processor-generic.h> |
21 | 31 | ||
22 | #endif | 32 | #endif |
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h index 018f732704d..6c6689e574c 100644 --- a/arch/x86/um/asm/processor_32.h +++ b/arch/x86/um/asm/processor_32.h | |||
@@ -45,16 +45,6 @@ static inline void arch_copy_thread(struct arch_thread *from, | |||
45 | memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); | 45 | memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); |
46 | } | 46 | } |
47 | 47 | ||
48 | #include <asm/user.h> | ||
49 | |||
50 | /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ | ||
51 | static inline void rep_nop(void) | ||
52 | { | ||
53 | __asm__ __volatile__("rep;nop": : :"memory"); | ||
54 | } | ||
55 | |||
56 | #define cpu_relax() rep_nop() | ||
57 | |||
58 | /* | 48 | /* |
59 | * Default implementation of macro that returns current | 49 | * Default implementation of macro that returns current |
60 | * instruction pointer ("program counter"). Stolen | 50 | * instruction pointer ("program counter"). Stolen |
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h index 61de92d916c..4b02a8455bd 100644 --- a/arch/x86/um/asm/processor_64.h +++ b/arch/x86/um/asm/processor_64.h | |||
@@ -14,14 +14,6 @@ struct arch_thread { | |||
14 | struct faultinfo faultinfo; | 14 | struct faultinfo faultinfo; |
15 | }; | 15 | }; |
16 | 16 | ||
17 | /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ | ||
18 | static inline void rep_nop(void) | ||
19 | { | ||
20 | __asm__ __volatile__("rep;nop": : :"memory"); | ||
21 | } | ||
22 | |||
23 | #define cpu_relax() rep_nop() | ||
24 | |||
25 | #define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ | 17 | #define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ |
26 | .debugregs_seq = 0, \ | 18 | .debugregs_seq = 0, \ |
27 | .fs = 0, \ | 19 | .fs = 0, \ |
@@ -37,8 +29,6 @@ static inline void arch_copy_thread(struct arch_thread *from, | |||
37 | to->fs = from->fs; | 29 | to->fs = from->fs; |
38 | } | 30 | } |
39 | 31 | ||
40 | #include <asm/user.h> | ||
41 | |||
42 | #define current_text_addr() \ | 32 | #define current_text_addr() \ |
43 | ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) | 33 | ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) |
44 | 34 | ||
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c index a1fba5fb9db..17d88cf2c6c 100644 --- a/arch/x86/um/bugs_32.c +++ b/arch/x86/um/bugs_32.c | |||
@@ -13,8 +13,6 @@ | |||
13 | static int host_has_cmov = 1; | 13 | static int host_has_cmov = 1; |
14 | static jmp_buf cmov_test_return; | 14 | static jmp_buf cmov_test_return; |
15 | 15 | ||
16 | #define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID])) | ||
17 | |||
18 | static void cmov_sigill_test_handler(int sig) | 16 | static void cmov_sigill_test_handler(int sig) |
19 | { | 17 | { |
20 | host_has_cmov = 0; | 18 | host_has_cmov = 0; |
@@ -51,7 +49,7 @@ void arch_examine_signal(int sig, struct uml_pt_regs *regs) | |||
51 | * This is testing for a cmov (0x0f 0x4x) instruction causing a | 49 | * This is testing for a cmov (0x0f 0x4x) instruction causing a |
52 | * SIGILL in init. | 50 | * SIGILL in init. |
53 | */ | 51 | */ |
54 | if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) | 52 | if ((sig != SIGILL) || (get_current_pid() != 1)) |
55 | return; | 53 | return; |
56 | 54 | ||
57 | if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { | 55 | if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { |
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index 639900a6fde..f40281e5d6a 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c | |||
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void) | |||
23 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | 23 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; |
24 | gate_vma.vm_page_prot = __P101; | 24 | gate_vma.vm_page_prot = __P101; |
25 | 25 | ||
26 | /* | ||
27 | * Make sure the vDSO gets into every core dump. | ||
28 | * Dumping its contents makes post-mortem fully interpretable later | ||
29 | * without matching up the same kernel and hardware config to see | ||
30 | * what PC values meant. | ||
31 | */ | ||
32 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
33 | |||
34 | return 0; | 26 | return 0; |
35 | } | 27 | } |
36 | __initcall(gate_vma_init); | 28 | __initcall(gate_vma_init); |
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 91f4ec9a0a5..af91901babb 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c | |||
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
64 | 64 | ||
65 | err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, | 65 | err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, |
66 | VM_READ|VM_EXEC| | 66 | VM_READ|VM_EXEC| |
67 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 67 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
68 | VM_ALWAYSDUMP, | ||
69 | vdsop); | 68 | vdsop); |
70 | 69 | ||
71 | up_write(&mm->mmap_sem); | 70 | up_write(&mm->mmap_sem); |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 468d591dde3..a944020fa85 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void) | |||
250 | gate_vma.vm_end = FIXADDR_USER_END; | 250 | gate_vma.vm_end = FIXADDR_USER_END; |
251 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | 251 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; |
252 | gate_vma.vm_page_prot = __P101; | 252 | gate_vma.vm_page_prot = __P101; |
253 | /* | 253 | |
254 | * Make sure the vDSO gets into every core dump. | ||
255 | * Dumping its contents makes post-mortem fully interpretable later | ||
256 | * without matching up the same kernel and hardware config to see | ||
257 | * what PC values meant. | ||
258 | */ | ||
259 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
260 | return 0; | 254 | return 0; |
261 | } | 255 | } |
262 | 256 | ||
@@ -343,17 +337,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
343 | if (compat_uses_vma || !compat) { | 337 | if (compat_uses_vma || !compat) { |
344 | /* | 338 | /* |
345 | * MAYWRITE to allow gdb to COW and set breakpoints | 339 | * MAYWRITE to allow gdb to COW and set breakpoints |
346 | * | ||
347 | * Make sure the vDSO gets into every core dump. | ||
348 | * Dumping its contents makes post-mortem fully | ||
349 | * interpretable later without matching up the same | ||
350 | * kernel and hardware config to see what PC values | ||
351 | * meant. | ||
352 | */ | 340 | */ |
353 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 341 | ret = install_special_mapping(mm, addr, PAGE_SIZE, |
354 | VM_READ|VM_EXEC| | 342 | VM_READ|VM_EXEC| |
355 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 343 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
356 | VM_ALWAYSDUMP, | ||
357 | vdso32_pages); | 344 | vdso32_pages); |
358 | 345 | ||
359 | if (ret) | 346 | if (ret) |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 153407c35b7..17e18279649 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -124,8 +124,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
124 | 124 | ||
125 | ret = install_special_mapping(mm, addr, vdso_size, | 125 | ret = install_special_mapping(mm, addr, vdso_size, |
126 | VM_READ|VM_EXEC| | 126 | VM_READ|VM_EXEC| |
127 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 127 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
128 | VM_ALWAYSDUMP, | ||
129 | vdso_pages); | 128 | vdso_pages); |
130 | if (ret) { | 129 | if (ret) { |
131 | current->mm->context.vdso = NULL; | 130 | current->mm->context.vdso = NULL; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 12366238d07..1ba8dff2675 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | 11 | #include <linux/memblock.h> |
12 | #include <linux/cpuidle.h> | 12 | #include <linux/cpuidle.h> |
13 | #include <linux/cpufreq.h> | ||
13 | 14 | ||
14 | #include <asm/elf.h> | 15 | #include <asm/elf.h> |
15 | #include <asm/vdso.h> | 16 | #include <asm/vdso.h> |
@@ -420,6 +421,7 @@ void __init xen_arch_setup(void) | |||
420 | boot_cpu_data.hlt_works_ok = 1; | 421 | boot_cpu_data.hlt_works_ok = 1; |
421 | #endif | 422 | #endif |
422 | disable_cpuidle(); | 423 | disable_cpuidle(); |
424 | disable_cpufreq(); | ||
423 | WARN_ON(set_pm_idle_to_default()); | 425 | WARN_ON(set_pm_idle_to_default()); |
424 | fiddle_vdso(); | 426 | fiddle_vdso(); |
425 | } | 427 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 315d8fa0c8f..02900e8ce26 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -75,8 +75,14 @@ static void __cpuinit cpu_bringup(void) | |||
75 | 75 | ||
76 | xen_setup_cpu_clockevents(); | 76 | xen_setup_cpu_clockevents(); |
77 | 77 | ||
78 | notify_cpu_starting(cpu); | ||
79 | |||
80 | ipi_call_lock(); | ||
78 | set_cpu_online(cpu, true); | 81 | set_cpu_online(cpu, true); |
82 | ipi_call_unlock(); | ||
83 | |||
79 | this_cpu_write(cpu_state, CPU_ONLINE); | 84 | this_cpu_write(cpu_state, CPU_ONLINE); |
85 | |||
80 | wmb(); | 86 | wmb(); |
81 | 87 | ||
82 | /* We can take interrupts now: we're officially "up". */ | 88 | /* We can take interrupts now: we're officially "up". */ |