diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-10 12:57:11 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-10 12:57:11 -0500 |
| commit | 3dcf6c1b6b29f327ec24a1986aaa8eba399b463f (patch) | |
| tree | 51e8d0659aa3e4aa50ac94cf13051fef135bda19 | |
| parent | e4e11180dfa545233e5145919b75b7fac88638df (diff) | |
| parent | da69dee073a27ee2d4e50c9b19e9faf8776857eb (diff) | |
Merge branch 'kvm-updates/3.3' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/3.3' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (74 commits)
KVM: PPC: Whitespace fix for kvm.h
KVM: Fix whitespace in kvm_para.h
KVM: PPC: annotate kvm_rma_init as __init
KVM: x86 emulator: implement RDPMC (0F 33)
KVM: x86 emulator: fix RDPMC privilege check
KVM: Expose the architectural performance monitoring CPUID leaf
KVM: VMX: Intercept RDPMC
KVM: SVM: Intercept RDPMC
KVM: Add generic RDPMC support
KVM: Expose a version 2 architectural PMU to a guests
KVM: Expose kvm_lapic_local_deliver()
KVM: x86 emulator: Use opcode::execute for Group 9 instruction
KVM: x86 emulator: Use opcode::execute for Group 4/5 instructions
KVM: x86 emulator: Use opcode::execute for Group 1A instruction
KVM: ensure that debugfs entries have been created
KVM: drop bsp_vcpu pointer from kvm struct
KVM: x86: Consolidate PIT legacy test
KVM: x86: Do not rely on implicit inclusions
KVM: Make KVM_INTEL depend on CPU_SUP_INTEL
KVM: Use memdup_user instead of kmalloc/copy_from_user
...
37 files changed, 2372 insertions, 1752 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 284b44259750..5575759b84ee 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -350,15 +350,6 @@ Who: anybody or Florian Mickler <florian@mickler.org> | |||
| 350 | 350 | ||
| 351 | ---------------------------- | 351 | ---------------------------- |
| 352 | 352 | ||
| 353 | What: KVM paravirt mmu host support | ||
| 354 | When: January 2011 | ||
| 355 | Why: The paravirt mmu host support is slower than non-paravirt mmu, both | ||
| 356 | on newer and older hardware. It is already not exposed to the guest, | ||
| 357 | and kept only for live migration purposes. | ||
| 358 | Who: Avi Kivity <avi@redhat.com> | ||
| 359 | |||
| 360 | ---------------------------- | ||
| 361 | |||
| 362 | What: iwlwifi 50XX module parameters | 353 | What: iwlwifi 50XX module parameters |
| 363 | When: 3.0 | 354 | When: 3.0 |
| 364 | Why: The "..50" modules parameters were used to configure 5000 series and | 355 | Why: The "..50" modules parameters were used to configure 5000 series and |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7b2e5c5eefa6..e69a461a06c2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1178,9 +1178,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1178 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | 1178 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. |
| 1179 | Default is 0 (don't ignore, but inject #GP) | 1179 | Default is 0 (don't ignore, but inject #GP) |
| 1180 | 1180 | ||
| 1181 | kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging. | ||
| 1182 | Default is 1 (enabled) | ||
| 1183 | |||
| 1184 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit | 1181 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit |
| 1185 | KVM MMU at runtime. | 1182 | KVM MMU at runtime. |
| 1186 | Default is 0 (off) | 1183 | Default is 0 (off) |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e2a4b5287361..e1d94bf4056e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -1466,6 +1466,31 @@ is supported; 2 if the processor requires all virtual machines to have | |||
| 1466 | an RMA, or 1 if the processor can use an RMA but doesn't require it, | 1466 | an RMA, or 1 if the processor can use an RMA but doesn't require it, |
| 1467 | because it supports the Virtual RMA (VRMA) facility. | 1467 | because it supports the Virtual RMA (VRMA) facility. |
| 1468 | 1468 | ||
| 1469 | 4.64 KVM_NMI | ||
| 1470 | |||
| 1471 | Capability: KVM_CAP_USER_NMI | ||
| 1472 | Architectures: x86 | ||
| 1473 | Type: vcpu ioctl | ||
| 1474 | Parameters: none | ||
| 1475 | Returns: 0 on success, -1 on error | ||
| 1476 | |||
| 1477 | Queues an NMI on the thread's vcpu. Note this is well defined only | ||
| 1478 | when KVM_CREATE_IRQCHIP has not been called, since this is an interface | ||
| 1479 | between the virtual cpu core and virtual local APIC. After KVM_CREATE_IRQCHIP | ||
| 1480 | has been called, this interface is completely emulated within the kernel. | ||
| 1481 | |||
| 1482 | To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the | ||
| 1483 | following algorithm: | ||
| 1484 | |||
| 1485 | - pause the vpcu | ||
| 1486 | - read the local APIC's state (KVM_GET_LAPIC) | ||
| 1487 | - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1) | ||
| 1488 | - if so, issue KVM_NMI | ||
| 1489 | - resume the vcpu | ||
| 1490 | |||
| 1491 | Some guests configure the LINT1 NMI input to cause a panic, aiding in | ||
| 1492 | debugging. | ||
| 1493 | |||
| 1469 | 5. The kvm_run structure | 1494 | 5. The kvm_run structure |
| 1470 | 1495 | ||
| 1471 | Application code obtains a pointer to the kvm_run structure by | 1496 | Application code obtains a pointer to the kvm_run structure by |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 43f4c92816ef..405052002493 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -774,13 +774,13 @@ struct kvm *kvm_arch_alloc_vm(void) | |||
| 774 | return kvm; | 774 | return kvm; |
| 775 | } | 775 | } |
| 776 | 776 | ||
| 777 | struct kvm_io_range { | 777 | struct kvm_ia64_io_range { |
| 778 | unsigned long start; | 778 | unsigned long start; |
| 779 | unsigned long size; | 779 | unsigned long size; |
| 780 | unsigned long type; | 780 | unsigned long type; |
| 781 | }; | 781 | }; |
| 782 | 782 | ||
| 783 | static const struct kvm_io_range io_ranges[] = { | 783 | static const struct kvm_ia64_io_range io_ranges[] = { |
| 784 | {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, | 784 | {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, |
| 785 | {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, | 785 | {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, |
| 786 | {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, | 786 | {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, |
| @@ -1366,14 +1366,12 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
| 1366 | { | 1366 | { |
| 1367 | struct kvm_memslots *slots; | 1367 | struct kvm_memslots *slots; |
| 1368 | struct kvm_memory_slot *memslot; | 1368 | struct kvm_memory_slot *memslot; |
| 1369 | int i, j; | 1369 | int j; |
| 1370 | unsigned long base_gfn; | 1370 | unsigned long base_gfn; |
| 1371 | 1371 | ||
| 1372 | slots = kvm_memslots(kvm); | 1372 | slots = kvm_memslots(kvm); |
| 1373 | for (i = 0; i < slots->nmemslots; i++) { | 1373 | kvm_for_each_memslot(memslot, slots) { |
| 1374 | memslot = &slots->memslots[i]; | ||
| 1375 | base_gfn = memslot->base_gfn; | 1374 | base_gfn = memslot->base_gfn; |
| 1376 | |||
| 1377 | for (j = 0; j < memslot->npages; j++) { | 1375 | for (j = 0; j < memslot->npages; j++) { |
| 1378 | if (memslot->rmap[j]) | 1376 | if (memslot->rmap[j]) |
| 1379 | put_page((struct page *)memslot->rmap[j]); | 1377 | put_page((struct page *)memslot->rmap[j]); |
| @@ -1820,7 +1818,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 1820 | if (log->slot >= KVM_MEMORY_SLOTS) | 1818 | if (log->slot >= KVM_MEMORY_SLOTS) |
| 1821 | goto out; | 1819 | goto out; |
| 1822 | 1820 | ||
| 1823 | memslot = &kvm->memslots->memslots[log->slot]; | 1821 | memslot = id_to_memslot(kvm->memslots, log->slot); |
| 1824 | r = -ENOENT; | 1822 | r = -ENOENT; |
| 1825 | if (!memslot->dirty_bitmap) | 1823 | if (!memslot->dirty_bitmap) |
| 1826 | goto out; | 1824 | goto out; |
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 0ad432bc81d6..f7727d91ac6b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
| @@ -170,8 +170,8 @@ struct kvm_sregs { | |||
| 170 | } ppc64; | 170 | } ppc64; |
| 171 | struct { | 171 | struct { |
| 172 | __u32 sr[16]; | 172 | __u32 sr[16]; |
| 173 | __u64 ibat[8]; | 173 | __u64 ibat[8]; |
| 174 | __u64 dbat[8]; | 174 | __u64 dbat[8]; |
| 175 | } ppc32; | 175 | } ppc32; |
| 176 | } s; | 176 | } s; |
| 177 | struct { | 177 | struct { |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a459479995c6..e41ac6f7dcf1 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -498,7 +498,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 498 | 498 | ||
| 499 | /* If nothing is dirty, don't bother messing with page tables. */ | 499 | /* If nothing is dirty, don't bother messing with page tables. */ |
| 500 | if (is_dirty) { | 500 | if (is_dirty) { |
| 501 | memslot = &kvm->memslots->memslots[log->slot]; | 501 | memslot = id_to_memslot(kvm->memslots, log->slot); |
| 502 | 502 | ||
| 503 | ga = memslot->base_gfn << PAGE_SHIFT; | 503 | ga = memslot->base_gfn << PAGE_SHIFT; |
| 504 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 504 | ga_end = ga + (memslot->npages << PAGE_SHIFT); |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 286f13d601cf..a795a13f4a70 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
| @@ -86,7 +86,7 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
| 86 | * to allocate contiguous physical memory for the real memory | 86 | * to allocate contiguous physical memory for the real memory |
| 87 | * areas for guests. | 87 | * areas for guests. |
| 88 | */ | 88 | */ |
| 89 | void kvm_rma_init(void) | 89 | void __init kvm_rma_init(void) |
| 90 | { | 90 | { |
| 91 | unsigned long i; | 91 | unsigned long i; |
| 92 | unsigned long j, npages; | 92 | unsigned long j, npages; |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f3444f700f36..17c5d4bdee5e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -197,7 +197,10 @@ | |||
| 197 | 197 | ||
| 198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 198 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
| 199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 199 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
| 200 | #define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ | ||
| 201 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | ||
| 200 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ | 202 | #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ |
| 203 | #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
| 201 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 204 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
| 202 | 205 | ||
| 203 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 206 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a026507893e9..ab4092e3214e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -181,6 +181,7 @@ struct x86_emulate_ops { | |||
| 181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
| 182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | 182 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); |
| 183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | 183 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); |
| 184 | int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); | ||
| 184 | void (*halt)(struct x86_emulate_ctxt *ctxt); | 185 | void (*halt)(struct x86_emulate_ctxt *ctxt); |
| 185 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); | 186 | void (*wbinvd)(struct x86_emulate_ctxt *ctxt); |
| 186 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); | 187 | int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); |
| @@ -364,6 +365,7 @@ enum x86_intercept { | |||
| 364 | #endif | 365 | #endif |
| 365 | 366 | ||
| 366 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); | 367 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); |
| 368 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | ||
| 367 | #define EMULATION_FAILED -1 | 369 | #define EMULATION_FAILED -1 |
| 368 | #define EMULATION_OK 0 | 370 | #define EMULATION_OK 0 |
| 369 | #define EMULATION_RESTART 1 | 371 | #define EMULATION_RESTART 1 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4973f4dab98..52d6640a5ca1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -16,10 +16,12 @@ | |||
| 16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
| 17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
| 18 | #include <linux/cpumask.h> | 18 | #include <linux/cpumask.h> |
| 19 | #include <linux/irq_work.h> | ||
| 19 | 20 | ||
| 20 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
| 21 | #include <linux/kvm_para.h> | 22 | #include <linux/kvm_para.h> |
| 22 | #include <linux/kvm_types.h> | 23 | #include <linux/kvm_types.h> |
| 24 | #include <linux/perf_event.h> | ||
| 23 | 25 | ||
| 24 | #include <asm/pvclock-abi.h> | 26 | #include <asm/pvclock-abi.h> |
| 25 | #include <asm/desc.h> | 27 | #include <asm/desc.h> |
| @@ -31,6 +33,8 @@ | |||
| 31 | #define KVM_MEMORY_SLOTS 32 | 33 | #define KVM_MEMORY_SLOTS 32 |
| 32 | /* memory slots that does not exposed to userspace */ | 34 | /* memory slots that does not exposed to userspace */ |
| 33 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
| 36 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
| 37 | |||
| 34 | #define KVM_MMIO_SIZE 16 | 38 | #define KVM_MMIO_SIZE 16 |
| 35 | 39 | ||
| 36 | #define KVM_PIO_PAGE_OFFSET 1 | 40 | #define KVM_PIO_PAGE_OFFSET 1 |
| @@ -228,7 +232,7 @@ struct kvm_mmu_page { | |||
| 228 | * One bit set per slot which has memory | 232 | * One bit set per slot which has memory |
| 229 | * in this shadow page. | 233 | * in this shadow page. |
| 230 | */ | 234 | */ |
| 231 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 235 | DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); |
| 232 | bool unsync; | 236 | bool unsync; |
| 233 | int root_count; /* Currently serving as active root */ | 237 | int root_count; /* Currently serving as active root */ |
| 234 | unsigned int unsync_children; | 238 | unsigned int unsync_children; |
| @@ -239,14 +243,9 @@ struct kvm_mmu_page { | |||
| 239 | int clear_spte_count; | 243 | int clear_spte_count; |
| 240 | #endif | 244 | #endif |
| 241 | 245 | ||
| 242 | struct rcu_head rcu; | 246 | int write_flooding_count; |
| 243 | }; | ||
| 244 | 247 | ||
| 245 | struct kvm_pv_mmu_op_buffer { | 248 | struct rcu_head rcu; |
| 246 | void *ptr; | ||
| 247 | unsigned len; | ||
| 248 | unsigned processed; | ||
| 249 | char buf[512] __aligned(sizeof(long)); | ||
| 250 | }; | 249 | }; |
| 251 | 250 | ||
| 252 | struct kvm_pio_request { | 251 | struct kvm_pio_request { |
| @@ -294,6 +293,37 @@ struct kvm_mmu { | |||
| 294 | u64 pdptrs[4]; /* pae */ | 293 | u64 pdptrs[4]; /* pae */ |
| 295 | }; | 294 | }; |
| 296 | 295 | ||
| 296 | enum pmc_type { | ||
| 297 | KVM_PMC_GP = 0, | ||
| 298 | KVM_PMC_FIXED, | ||
| 299 | }; | ||
| 300 | |||
| 301 | struct kvm_pmc { | ||
| 302 | enum pmc_type type; | ||
| 303 | u8 idx; | ||
| 304 | u64 counter; | ||
| 305 | u64 eventsel; | ||
| 306 | struct perf_event *perf_event; | ||
| 307 | struct kvm_vcpu *vcpu; | ||
| 308 | }; | ||
| 309 | |||
| 310 | struct kvm_pmu { | ||
| 311 | unsigned nr_arch_gp_counters; | ||
| 312 | unsigned nr_arch_fixed_counters; | ||
| 313 | unsigned available_event_types; | ||
| 314 | u64 fixed_ctr_ctrl; | ||
| 315 | u64 global_ctrl; | ||
| 316 | u64 global_status; | ||
| 317 | u64 global_ovf_ctrl; | ||
| 318 | u64 counter_bitmask[2]; | ||
| 319 | u64 global_ctrl_mask; | ||
| 320 | u8 version; | ||
| 321 | struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; | ||
| 322 | struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; | ||
| 323 | struct irq_work irq_work; | ||
| 324 | u64 reprogram_pmi; | ||
| 325 | }; | ||
| 326 | |||
| 297 | struct kvm_vcpu_arch { | 327 | struct kvm_vcpu_arch { |
| 298 | /* | 328 | /* |
| 299 | * rip and regs accesses must go through | 329 | * rip and regs accesses must go through |
| @@ -345,19 +375,10 @@ struct kvm_vcpu_arch { | |||
| 345 | */ | 375 | */ |
| 346 | struct kvm_mmu *walk_mmu; | 376 | struct kvm_mmu *walk_mmu; |
| 347 | 377 | ||
| 348 | /* only needed in kvm_pv_mmu_op() path, but it's hot so | ||
| 349 | * put it here to avoid allocation */ | ||
| 350 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | ||
| 351 | |||
| 352 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; | 378 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
| 353 | struct kvm_mmu_memory_cache mmu_page_cache; | 379 | struct kvm_mmu_memory_cache mmu_page_cache; |
| 354 | struct kvm_mmu_memory_cache mmu_page_header_cache; | 380 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
| 355 | 381 | ||
| 356 | gfn_t last_pt_write_gfn; | ||
| 357 | int last_pt_write_count; | ||
| 358 | u64 *last_pte_updated; | ||
| 359 | gfn_t last_pte_gfn; | ||
| 360 | |||
| 361 | struct fpu guest_fpu; | 382 | struct fpu guest_fpu; |
| 362 | u64 xcr0; | 383 | u64 xcr0; |
| 363 | 384 | ||
| @@ -436,6 +457,8 @@ struct kvm_vcpu_arch { | |||
| 436 | unsigned access; | 457 | unsigned access; |
| 437 | gfn_t mmio_gfn; | 458 | gfn_t mmio_gfn; |
| 438 | 459 | ||
| 460 | struct kvm_pmu pmu; | ||
| 461 | |||
| 439 | /* used for guest single stepping over the given code position */ | 462 | /* used for guest single stepping over the given code position */ |
| 440 | unsigned long singlestep_rip; | 463 | unsigned long singlestep_rip; |
| 441 | 464 | ||
| @@ -444,6 +467,9 @@ struct kvm_vcpu_arch { | |||
| 444 | 467 | ||
| 445 | cpumask_var_t wbinvd_dirty_mask; | 468 | cpumask_var_t wbinvd_dirty_mask; |
| 446 | 469 | ||
| 470 | unsigned long last_retry_eip; | ||
| 471 | unsigned long last_retry_addr; | ||
| 472 | |||
| 447 | struct { | 473 | struct { |
| 448 | bool halted; | 474 | bool halted; |
| 449 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | 475 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; |
| @@ -459,7 +485,6 @@ struct kvm_arch { | |||
| 459 | unsigned int n_requested_mmu_pages; | 485 | unsigned int n_requested_mmu_pages; |
| 460 | unsigned int n_max_mmu_pages; | 486 | unsigned int n_max_mmu_pages; |
| 461 | unsigned int indirect_shadow_pages; | 487 | unsigned int indirect_shadow_pages; |
| 462 | atomic_t invlpg_counter; | ||
| 463 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 488 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
| 464 | /* | 489 | /* |
| 465 | * Hash table of struct kvm_mmu_page. | 490 | * Hash table of struct kvm_mmu_page. |
| @@ -660,6 +685,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
| 660 | 685 | ||
| 661 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 686 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
| 662 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 687 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
| 688 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | ||
| 689 | struct kvm_memory_slot *slot); | ||
| 663 | void kvm_mmu_zap_all(struct kvm *kvm); | 690 | void kvm_mmu_zap_all(struct kvm *kvm); |
| 664 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 691 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
| 665 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 692 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
| @@ -668,8 +695,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); | |||
| 668 | 695 | ||
| 669 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 696 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 670 | const void *val, int bytes); | 697 | const void *val, int bytes); |
| 671 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
| 672 | gpa_t addr, unsigned long *ret); | ||
| 673 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | 698 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); |
| 674 | 699 | ||
| 675 | extern bool tdp_enabled; | 700 | extern bool tdp_enabled; |
| @@ -692,6 +717,7 @@ enum emulation_result { | |||
| 692 | #define EMULTYPE_NO_DECODE (1 << 0) | 717 | #define EMULTYPE_NO_DECODE (1 << 0) |
| 693 | #define EMULTYPE_TRAP_UD (1 << 1) | 718 | #define EMULTYPE_TRAP_UD (1 << 1) |
| 694 | #define EMULTYPE_SKIP (1 << 2) | 719 | #define EMULTYPE_SKIP (1 << 2) |
| 720 | #define EMULTYPE_RETRY (1 << 3) | ||
| 695 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 721 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 696 | int emulation_type, void *insn, int insn_len); | 722 | int emulation_type, void *insn, int insn_len); |
| 697 | 723 | ||
| @@ -734,6 +760,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | |||
| 734 | 760 | ||
| 735 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | 761 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
| 736 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 762 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
| 763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu); | ||
| 737 | 764 | ||
| 738 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 765 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
| 739 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 766 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
| @@ -754,13 +781,14 @@ int fx_init(struct kvm_vcpu *vcpu); | |||
| 754 | 781 | ||
| 755 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 782 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
| 756 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 783 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 757 | const u8 *new, int bytes, | 784 | const u8 *new, int bytes); |
| 758 | bool guest_initiated); | 785 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
| 759 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 786 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
| 760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 787 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
| 761 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 788 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
| 762 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 789 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
| 763 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 790 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
| 791 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | ||
| 764 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, | 792 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
| 765 | struct x86_exception *exception); | 793 | struct x86_exception *exception); |
| 766 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, | 794 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
| @@ -782,6 +810,11 @@ void kvm_disable_tdp(void); | |||
| 782 | int complete_pio(struct kvm_vcpu *vcpu); | 810 | int complete_pio(struct kvm_vcpu *vcpu); |
| 783 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 811 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
| 784 | 812 | ||
| 813 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
| 814 | { | ||
| 815 | return gpa; | ||
| 816 | } | ||
| 817 | |||
| 785 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 818 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
| 786 | { | 819 | { |
| 787 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 820 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
| @@ -894,4 +927,17 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
| 894 | 927 | ||
| 895 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | 928 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); |
| 896 | 929 | ||
| 930 | int kvm_is_in_guest(void); | ||
| 931 | |||
| 932 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | ||
| 933 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | ||
| 934 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | ||
| 935 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | ||
| 936 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | ||
| 937 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
| 938 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
| 939 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||
| 940 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | ||
| 941 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | ||
| 942 | |||
| 897 | #endif /* _ASM_X86_KVM_HOST_H */ | 943 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d6..f0c6fd6f176b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -39,8 +39,6 @@ | |||
| 39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
| 40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
| 41 | 41 | ||
| 42 | #define MMU_QUEUE_SIZE 1024 | ||
| 43 | |||
| 44 | static int kvmapf = 1; | 42 | static int kvmapf = 1; |
| 45 | 43 | ||
| 46 | static int parse_no_kvmapf(char *arg) | 44 | static int parse_no_kvmapf(char *arg) |
| @@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg) | |||
| 60 | 58 | ||
| 61 | early_param("no-steal-acc", parse_no_stealacc); | 59 | early_param("no-steal-acc", parse_no_stealacc); |
| 62 | 60 | ||
| 63 | struct kvm_para_state { | ||
| 64 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
| 65 | int mmu_queue_len; | ||
| 66 | }; | ||
| 67 | |||
| 68 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
| 69 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 61 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
| 70 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | 62 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
| 71 | static int has_steal_clock = 0; | 63 | static int has_steal_clock = 0; |
| 72 | 64 | ||
| 73 | static struct kvm_para_state *kvm_para_state(void) | ||
| 74 | { | ||
| 75 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | 65 | /* |
| 79 | * No need for any "IO delay" on KVM | 66 | * No need for any "IO delay" on KVM |
| 80 | */ | 67 | */ |
| @@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 271 | } | 258 | } |
| 272 | } | 259 | } |
| 273 | 260 | ||
| 274 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
| 275 | { | ||
| 276 | int r; | ||
| 277 | unsigned long a1, a2; | ||
| 278 | |||
| 279 | do { | ||
| 280 | a1 = __pa(buffer); | ||
| 281 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
| 282 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
| 283 | buffer += r; | ||
| 284 | len -= r; | ||
| 285 | } while (len); | ||
| 286 | } | ||
| 287 | |||
| 288 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
| 289 | { | ||
| 290 | if (state->mmu_queue_len) { | ||
| 291 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
| 292 | state->mmu_queue_len = 0; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
| 297 | { | ||
| 298 | struct kvm_para_state *state = kvm_para_state(); | ||
| 299 | |||
| 300 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { | ||
| 301 | kvm_mmu_op(buffer, len); | ||
| 302 | return; | ||
| 303 | } | ||
| 304 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
| 305 | mmu_queue_flush(state); | ||
| 306 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
| 307 | state->mmu_queue_len += len; | ||
| 308 | } | ||
| 309 | |||
| 310 | static void kvm_mmu_write(void *dest, u64 val) | ||
| 311 | { | ||
| 312 | __u64 pte_phys; | ||
| 313 | struct kvm_mmu_op_write_pte wpte; | ||
| 314 | |||
| 315 | #ifdef CONFIG_HIGHPTE | ||
| 316 | struct page *page; | ||
| 317 | unsigned long dst = (unsigned long) dest; | ||
| 318 | |||
| 319 | page = kmap_atomic_to_page(dest); | ||
| 320 | pte_phys = page_to_pfn(page); | ||
| 321 | pte_phys <<= PAGE_SHIFT; | ||
| 322 | pte_phys += (dst & ~(PAGE_MASK)); | ||
| 323 | #else | ||
| 324 | pte_phys = (unsigned long)__pa(dest); | ||
| 325 | #endif | ||
| 326 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
| 327 | wpte.pte_val = val; | ||
| 328 | wpte.pte_phys = pte_phys; | ||
| 329 | |||
| 330 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
| 331 | } | ||
| 332 | |||
| 333 | /* | ||
| 334 | * We only need to hook operations that are MMU writes. We hook these so that | ||
| 335 | * we can use lazy MMU mode to batch these operations. We could probably | ||
| 336 | * improve the performance of the host code if we used some of the information | ||
| 337 | * here to simplify processing of batched writes. | ||
| 338 | */ | ||
| 339 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
| 340 | { | ||
| 341 | kvm_mmu_write(ptep, pte_val(pte)); | ||
| 342 | } | ||
| 343 | |||
| 344 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
| 345 | pte_t *ptep, pte_t pte) | ||
| 346 | { | ||
| 347 | kvm_mmu_write(ptep, pte_val(pte)); | ||
| 348 | } | ||
| 349 | |||
| 350 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
| 351 | { | ||
| 352 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
| 353 | } | ||
| 354 | |||
| 355 | #if PAGETABLE_LEVELS >= 3 | ||
| 356 | #ifdef CONFIG_X86_PAE | ||
| 357 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
| 358 | { | ||
| 359 | kvm_mmu_write(ptep, pte_val(pte)); | ||
| 360 | } | ||
| 361 | |||
| 362 | static void kvm_pte_clear(struct mm_struct *mm, | ||
| 363 | unsigned long addr, pte_t *ptep) | ||
| 364 | { | ||
| 365 | kvm_mmu_write(ptep, 0); | ||
| 366 | } | ||
| 367 | |||
| 368 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
| 369 | { | ||
| 370 | kvm_mmu_write(pmdp, 0); | ||
| 371 | } | ||
| 372 | #endif | ||
| 373 | |||
| 374 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
| 375 | { | ||
| 376 | kvm_mmu_write(pudp, pud_val(pud)); | ||
| 377 | } | ||
| 378 | |||
| 379 | #if PAGETABLE_LEVELS == 4 | ||
| 380 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
| 381 | { | ||
| 382 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
| 383 | } | ||
| 384 | #endif | ||
| 385 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
| 386 | |||
| 387 | static void kvm_flush_tlb(void) | ||
| 388 | { | ||
| 389 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
| 390 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
| 391 | }; | ||
| 392 | |||
| 393 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
| 394 | } | ||
| 395 | |||
| 396 | static void kvm_release_pt(unsigned long pfn) | ||
| 397 | { | ||
| 398 | struct kvm_mmu_op_release_pt rpt = { | ||
| 399 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
| 400 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
| 401 | }; | ||
| 402 | |||
| 403 | kvm_mmu_op(&rpt, sizeof rpt); | ||
| 404 | } | ||
| 405 | |||
| 406 | static void kvm_enter_lazy_mmu(void) | ||
| 407 | { | ||
| 408 | paravirt_enter_lazy_mmu(); | ||
| 409 | } | ||
| 410 | |||
| 411 | static void kvm_leave_lazy_mmu(void) | ||
| 412 | { | ||
| 413 | struct kvm_para_state *state = kvm_para_state(); | ||
| 414 | |||
| 415 | mmu_queue_flush(state); | ||
| 416 | paravirt_leave_lazy_mmu(); | ||
| 417 | } | ||
| 418 | |||
| 419 | static void __init paravirt_ops_setup(void) | 261 | static void __init paravirt_ops_setup(void) |
| 420 | { | 262 | { |
| 421 | pv_info.name = "KVM"; | 263 | pv_info.name = "KVM"; |
| @@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void) | |||
| 424 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | 266 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
| 425 | pv_cpu_ops.io_delay = kvm_io_delay; | 267 | pv_cpu_ops.io_delay = kvm_io_delay; |
| 426 | 268 | ||
| 427 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
| 428 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
| 429 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
| 430 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
| 431 | #if PAGETABLE_LEVELS >= 3 | ||
| 432 | #ifdef CONFIG_X86_PAE | ||
| 433 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
| 434 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
| 435 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
| 436 | #endif | ||
| 437 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
| 438 | #if PAGETABLE_LEVELS == 4 | ||
| 439 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
| 440 | #endif | ||
| 441 | #endif | ||
| 442 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
| 443 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
| 444 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
| 445 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
| 446 | |||
| 447 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
| 448 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
| 449 | } | ||
| 450 | #ifdef CONFIG_X86_IO_APIC | 269 | #ifdef CONFIG_X86_IO_APIC |
| 451 | no_timer_check = 1; | 270 | no_timer_check = 1; |
| 452 | #endif | 271 | #endif |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ff5790d8e990..1a7fe868f375 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -35,6 +35,7 @@ config KVM | |||
| 35 | select KVM_MMIO | 35 | select KVM_MMIO |
| 36 | select TASKSTATS | 36 | select TASKSTATS |
| 37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
| 38 | select PERF_EVENTS | ||
| 38 | ---help--- | 39 | ---help--- |
| 39 | Support hosting fully virtualized guest machines using hardware | 40 | Support hosting fully virtualized guest machines using hardware |
| 40 | virtualization extensions. You will need a fairly recent | 41 | virtualization extensions. You will need a fairly recent |
| @@ -52,6 +53,8 @@ config KVM | |||
| 52 | config KVM_INTEL | 53 | config KVM_INTEL |
| 53 | tristate "KVM for Intel processors support" | 54 | tristate "KVM for Intel processors support" |
| 54 | depends on KVM | 55 | depends on KVM |
| 56 | # for perf_guest_get_msrs(): | ||
| 57 | depends on CPU_SUP_INTEL | ||
| 55 | ---help--- | 58 | ---help--- |
| 56 | Provides support for KVM on Intel processors equipped with the VT | 59 | Provides support for KVM on Intel processors equipped with the VT |
| 57 | extensions. | 60 | extensions. |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f15501f431c8..4f579e8dcacf 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | |||
| 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
| 13 | 13 | ||
| 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
| 15 | i8254.o timer.o | 15 | i8254.o timer.o cpuid.o pmu.o |
| 16 | kvm-intel-y += vmx.o | 16 | kvm-intel-y += vmx.o |
| 17 | kvm-amd-y += svm.o | 17 | kvm-amd-y += svm.o |
| 18 | 18 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c new file mode 100644 index 000000000000..89b02bfaaca5 --- /dev/null +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -0,0 +1,670 @@ | |||
| 1 | /* | ||
| 2 | * Kernel-based Virtual Machine driver for Linux | ||
| 3 | * cpuid support routines | ||
| 4 | * | ||
| 5 | * derived from arch/x86/kvm/x86.c | ||
| 6 | * | ||
| 7 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
| 8 | * Copyright IBM Corporation, 2008 | ||
| 9 | * | ||
| 10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 11 | * the COPYING file in the top-level directory. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/kvm_host.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/vmalloc.h> | ||
| 18 | #include <linux/uaccess.h> | ||
| 19 | #include <asm/user.h> | ||
| 20 | #include <asm/xsave.h> | ||
| 21 | #include "cpuid.h" | ||
| 22 | #include "lapic.h" | ||
| 23 | #include "mmu.h" | ||
| 24 | #include "trace.h" | ||
| 25 | |||
| 26 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | ||
| 27 | { | ||
| 28 | struct kvm_cpuid_entry2 *best; | ||
| 29 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 30 | |||
| 31 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 32 | if (!best) | ||
| 33 | return; | ||
| 34 | |||
| 35 | /* Update OSXSAVE bit */ | ||
| 36 | if (cpu_has_xsave && best->function == 0x1) { | ||
| 37 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
| 38 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
| 39 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
| 40 | } | ||
| 41 | |||
| 42 | if (apic) { | ||
| 43 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
| 44 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
| 45 | else | ||
| 46 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
| 47 | } | ||
| 48 | |||
| 49 | kvm_pmu_cpuid_update(vcpu); | ||
| 50 | } | ||
| 51 | |||
| 52 | static int is_efer_nx(void) | ||
| 53 | { | ||
| 54 | unsigned long long efer = 0; | ||
| 55 | |||
| 56 | rdmsrl_safe(MSR_EFER, &efer); | ||
| 57 | return efer & EFER_NX; | ||
| 58 | } | ||
| 59 | |||
| 60 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
| 61 | { | ||
| 62 | int i; | ||
| 63 | struct kvm_cpuid_entry2 *e, *entry; | ||
| 64 | |||
| 65 | entry = NULL; | ||
| 66 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
| 67 | e = &vcpu->arch.cpuid_entries[i]; | ||
| 68 | if (e->function == 0x80000001) { | ||
| 69 | entry = e; | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
| 74 | entry->edx &= ~(1 << 20); | ||
| 75 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | /* when an old userspace process fills a new kernel module */ | ||
| 80 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
| 81 | struct kvm_cpuid *cpuid, | ||
| 82 | struct kvm_cpuid_entry __user *entries) | ||
| 83 | { | ||
| 84 | int r, i; | ||
| 85 | struct kvm_cpuid_entry *cpuid_entries; | ||
| 86 | |||
| 87 | r = -E2BIG; | ||
| 88 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 89 | goto out; | ||
| 90 | r = -ENOMEM; | ||
| 91 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
| 92 | if (!cpuid_entries) | ||
| 93 | goto out; | ||
| 94 | r = -EFAULT; | ||
| 95 | if (copy_from_user(cpuid_entries, entries, | ||
| 96 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
| 97 | goto out_free; | ||
| 98 | for (i = 0; i < cpuid->nent; i++) { | ||
| 99 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
| 100 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
| 101 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
| 102 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
| 103 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
| 104 | vcpu->arch.cpuid_entries[i].index = 0; | ||
| 105 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
| 106 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
| 107 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
| 108 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
| 109 | } | ||
| 110 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
| 111 | cpuid_fix_nx_cap(vcpu); | ||
| 112 | r = 0; | ||
| 113 | kvm_apic_set_version(vcpu); | ||
| 114 | kvm_x86_ops->cpuid_update(vcpu); | ||
| 115 | kvm_update_cpuid(vcpu); | ||
| 116 | |||
| 117 | out_free: | ||
| 118 | vfree(cpuid_entries); | ||
| 119 | out: | ||
| 120 | return r; | ||
| 121 | } | ||
| 122 | |||
| 123 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
| 124 | struct kvm_cpuid2 *cpuid, | ||
| 125 | struct kvm_cpuid_entry2 __user *entries) | ||
| 126 | { | ||
| 127 | int r; | ||
| 128 | |||
| 129 | r = -E2BIG; | ||
| 130 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 131 | goto out; | ||
| 132 | r = -EFAULT; | ||
| 133 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
| 134 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 135 | goto out; | ||
| 136 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
| 137 | kvm_apic_set_version(vcpu); | ||
| 138 | kvm_x86_ops->cpuid_update(vcpu); | ||
| 139 | kvm_update_cpuid(vcpu); | ||
| 140 | return 0; | ||
| 141 | |||
| 142 | out: | ||
| 143 | return r; | ||
| 144 | } | ||
| 145 | |||
| 146 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
| 147 | struct kvm_cpuid2 *cpuid, | ||
| 148 | struct kvm_cpuid_entry2 __user *entries) | ||
| 149 | { | ||
| 150 | int r; | ||
| 151 | |||
| 152 | r = -E2BIG; | ||
| 153 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
| 154 | goto out; | ||
| 155 | r = -EFAULT; | ||
| 156 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
| 157 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 158 | goto out; | ||
| 159 | return 0; | ||
| 160 | |||
| 161 | out: | ||
| 162 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
| 163 | return r; | ||
| 164 | } | ||
| 165 | |||
| 166 | static void cpuid_mask(u32 *word, int wordnum) | ||
| 167 | { | ||
| 168 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
| 169 | } | ||
| 170 | |||
| 171 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
| 172 | u32 index) | ||
| 173 | { | ||
| 174 | entry->function = function; | ||
| 175 | entry->index = index; | ||
| 176 | cpuid_count(entry->function, entry->index, | ||
| 177 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
| 178 | entry->flags = 0; | ||
| 179 | } | ||
| 180 | |||
| 181 | static bool supported_xcr0_bit(unsigned bit) | ||
| 182 | { | ||
| 183 | u64 mask = ((u64)1 << bit); | ||
| 184 | |||
| 185 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
| 186 | } | ||
| 187 | |||
| 188 | #define F(x) bit(X86_FEATURE_##x) | ||
| 189 | |||
| 190 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
| 191 | u32 index, int *nent, int maxnent) | ||
| 192 | { | ||
| 193 | int r; | ||
| 194 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
| 195 | #ifdef CONFIG_X86_64 | ||
| 196 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
| 197 | ? F(GBPAGES) : 0; | ||
| 198 | unsigned f_lm = F(LM); | ||
| 199 | #else | ||
| 200 | unsigned f_gbpages = 0; | ||
| 201 | unsigned f_lm = 0; | ||
| 202 | #endif | ||
| 203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
| 204 | |||
| 205 | /* cpuid 1.edx */ | ||
| 206 | const u32 kvm_supported_word0_x86_features = | ||
| 207 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
| 208 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
| 209 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
| 210 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 211 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
| 212 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
| 213 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
| 214 | 0 /* HTT, TM, Reserved, PBE */; | ||
| 215 | /* cpuid 0x80000001.edx */ | ||
| 216 | const u32 kvm_supported_word1_x86_features = | ||
| 217 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
| 218 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
| 219 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
| 220 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 221 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
| 222 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
| 223 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
| 224 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
| 225 | /* cpuid 1.ecx */ | ||
| 226 | const u32 kvm_supported_word4_x86_features = | ||
| 227 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
| 228 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
| 229 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
| 230 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
| 231 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
| 232 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
| 233 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
| 234 | F(F16C) | F(RDRAND); | ||
| 235 | /* cpuid 0x80000001.ecx */ | ||
| 236 | const u32 kvm_supported_word6_x86_features = | ||
| 237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
| 238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
| 239 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
| 240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
| 241 | |||
| 242 | /* cpuid 0xC0000001.edx */ | ||
| 243 | const u32 kvm_supported_word5_x86_features = | ||
| 244 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
| 245 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
| 246 | F(PMM) | F(PMM_EN); | ||
| 247 | |||
| 248 | /* cpuid 7.0.ebx */ | ||
| 249 | const u32 kvm_supported_word9_x86_features = | ||
| 250 | F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); | ||
| 251 | |||
| 252 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
| 253 | get_cpu(); | ||
| 254 | |||
| 255 | r = -E2BIG; | ||
| 256 | |||
| 257 | if (*nent >= maxnent) | ||
| 258 | goto out; | ||
| 259 | |||
| 260 | do_cpuid_1_ent(entry, function, index); | ||
| 261 | ++*nent; | ||
| 262 | |||
| 263 | switch (function) { | ||
| 264 | case 0: | ||
| 265 | entry->eax = min(entry->eax, (u32)0xd); | ||
| 266 | break; | ||
| 267 | case 1: | ||
| 268 | entry->edx &= kvm_supported_word0_x86_features; | ||
| 269 | cpuid_mask(&entry->edx, 0); | ||
| 270 | entry->ecx &= kvm_supported_word4_x86_features; | ||
| 271 | cpuid_mask(&entry->ecx, 4); | ||
| 272 | /* we support x2apic emulation even if host does not support | ||
| 273 | * it since we emulate x2apic in software */ | ||
| 274 | entry->ecx |= F(X2APIC); | ||
| 275 | break; | ||
| 276 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
| 277 | * may return different values. This forces us to get_cpu() before | ||
| 278 | * issuing the first command, and also to emulate this annoying behavior | ||
| 279 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
| 280 | case 2: { | ||
| 281 | int t, times = entry->eax & 0xff; | ||
| 282 | |||
| 283 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
| 284 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 285 | for (t = 1; t < times; ++t) { | ||
| 286 | if (*nent >= maxnent) | ||
| 287 | goto out; | ||
| 288 | |||
| 289 | do_cpuid_1_ent(&entry[t], function, 0); | ||
| 290 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
| 291 | ++*nent; | ||
| 292 | } | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | /* function 4 has additional index. */ | ||
| 296 | case 4: { | ||
| 297 | int i, cache_type; | ||
| 298 | |||
| 299 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 300 | /* read more entries until cache_type is zero */ | ||
| 301 | for (i = 1; ; ++i) { | ||
| 302 | if (*nent >= maxnent) | ||
| 303 | goto out; | ||
| 304 | |||
| 305 | cache_type = entry[i - 1].eax & 0x1f; | ||
| 306 | if (!cache_type) | ||
| 307 | break; | ||
| 308 | do_cpuid_1_ent(&entry[i], function, i); | ||
| 309 | entry[i].flags |= | ||
| 310 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 311 | ++*nent; | ||
| 312 | } | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | case 7: { | ||
| 316 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 317 | /* Mask ebx against host capbability word 9 */ | ||
| 318 | if (index == 0) { | ||
| 319 | entry->ebx &= kvm_supported_word9_x86_features; | ||
| 320 | cpuid_mask(&entry->ebx, 9); | ||
| 321 | } else | ||
| 322 | entry->ebx = 0; | ||
| 323 | entry->eax = 0; | ||
| 324 | entry->ecx = 0; | ||
| 325 | entry->edx = 0; | ||
| 326 | break; | ||
| 327 | } | ||
| 328 | case 9: | ||
| 329 | break; | ||
| 330 | case 0xa: { /* Architectural Performance Monitoring */ | ||
| 331 | struct x86_pmu_capability cap; | ||
| 332 | union cpuid10_eax eax; | ||
| 333 | union cpuid10_edx edx; | ||
| 334 | |||
| 335 | perf_get_x86_pmu_capability(&cap); | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Only support guest architectural pmu on a host | ||
| 339 | * with architectural pmu. | ||
| 340 | */ | ||
| 341 | if (!cap.version) | ||
| 342 | memset(&cap, 0, sizeof(cap)); | ||
| 343 | |||
| 344 | eax.split.version_id = min(cap.version, 2); | ||
| 345 | eax.split.num_counters = cap.num_counters_gp; | ||
| 346 | eax.split.bit_width = cap.bit_width_gp; | ||
| 347 | eax.split.mask_length = cap.events_mask_len; | ||
| 348 | |||
| 349 | edx.split.num_counters_fixed = cap.num_counters_fixed; | ||
| 350 | edx.split.bit_width_fixed = cap.bit_width_fixed; | ||
| 351 | edx.split.reserved = 0; | ||
| 352 | |||
| 353 | entry->eax = eax.full; | ||
| 354 | entry->ebx = cap.events_mask; | ||
| 355 | entry->ecx = 0; | ||
| 356 | entry->edx = edx.full; | ||
| 357 | break; | ||
| 358 | } | ||
| 359 | /* function 0xb has additional index. */ | ||
| 360 | case 0xb: { | ||
| 361 | int i, level_type; | ||
| 362 | |||
| 363 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 364 | /* read more entries until level_type is zero */ | ||
| 365 | for (i = 1; ; ++i) { | ||
| 366 | if (*nent >= maxnent) | ||
| 367 | goto out; | ||
| 368 | |||
| 369 | level_type = entry[i - 1].ecx & 0xff00; | ||
| 370 | if (!level_type) | ||
| 371 | break; | ||
| 372 | do_cpuid_1_ent(&entry[i], function, i); | ||
| 373 | entry[i].flags |= | ||
| 374 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 375 | ++*nent; | ||
| 376 | } | ||
| 377 | break; | ||
| 378 | } | ||
| 379 | case 0xd: { | ||
| 380 | int idx, i; | ||
| 381 | |||
| 382 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 383 | for (idx = 1, i = 1; idx < 64; ++idx) { | ||
| 384 | if (*nent >= maxnent) | ||
| 385 | goto out; | ||
| 386 | |||
| 387 | do_cpuid_1_ent(&entry[i], function, idx); | ||
| 388 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
| 389 | continue; | ||
| 390 | entry[i].flags |= | ||
| 391 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 392 | ++*nent; | ||
| 393 | ++i; | ||
| 394 | } | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | case KVM_CPUID_SIGNATURE: { | ||
| 398 | char signature[12] = "KVMKVMKVM\0\0"; | ||
| 399 | u32 *sigptr = (u32 *)signature; | ||
| 400 | entry->eax = 0; | ||
| 401 | entry->ebx = sigptr[0]; | ||
| 402 | entry->ecx = sigptr[1]; | ||
| 403 | entry->edx = sigptr[2]; | ||
| 404 | break; | ||
| 405 | } | ||
| 406 | case KVM_CPUID_FEATURES: | ||
| 407 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
| 408 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
| 409 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
| 410 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
| 411 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
| 412 | |||
| 413 | if (sched_info_on()) | ||
| 414 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
| 415 | |||
| 416 | entry->ebx = 0; | ||
| 417 | entry->ecx = 0; | ||
| 418 | entry->edx = 0; | ||
| 419 | break; | ||
| 420 | case 0x80000000: | ||
| 421 | entry->eax = min(entry->eax, 0x8000001a); | ||
| 422 | break; | ||
| 423 | case 0x80000001: | ||
| 424 | entry->edx &= kvm_supported_word1_x86_features; | ||
| 425 | cpuid_mask(&entry->edx, 1); | ||
| 426 | entry->ecx &= kvm_supported_word6_x86_features; | ||
| 427 | cpuid_mask(&entry->ecx, 6); | ||
| 428 | break; | ||
| 429 | case 0x80000008: { | ||
| 430 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
| 431 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
| 432 | unsigned phys_as = entry->eax & 0xff; | ||
| 433 | |||
| 434 | if (!g_phys_as) | ||
| 435 | g_phys_as = phys_as; | ||
| 436 | entry->eax = g_phys_as | (virt_as << 8); | ||
| 437 | entry->ebx = entry->edx = 0; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | case 0x80000019: | ||
| 441 | entry->ecx = entry->edx = 0; | ||
| 442 | break; | ||
| 443 | case 0x8000001a: | ||
| 444 | break; | ||
| 445 | case 0x8000001d: | ||
| 446 | break; | ||
| 447 | /*Add support for Centaur's CPUID instruction*/ | ||
| 448 | case 0xC0000000: | ||
| 449 | /*Just support up to 0xC0000004 now*/ | ||
| 450 | entry->eax = min(entry->eax, 0xC0000004); | ||
| 451 | break; | ||
| 452 | case 0xC0000001: | ||
| 453 | entry->edx &= kvm_supported_word5_x86_features; | ||
| 454 | cpuid_mask(&entry->edx, 5); | ||
| 455 | break; | ||
| 456 | case 3: /* Processor serial number */ | ||
| 457 | case 5: /* MONITOR/MWAIT */ | ||
| 458 | case 6: /* Thermal management */ | ||
| 459 | case 0x80000007: /* Advanced power management */ | ||
| 460 | case 0xC0000002: | ||
| 461 | case 0xC0000003: | ||
| 462 | case 0xC0000004: | ||
| 463 | default: | ||
| 464 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | |||
| 468 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
| 469 | |||
| 470 | r = 0; | ||
| 471 | |||
| 472 | out: | ||
| 473 | put_cpu(); | ||
| 474 | |||
| 475 | return r; | ||
| 476 | } | ||
| 477 | |||
| 478 | #undef F | ||
| 479 | |||
| 480 | struct kvm_cpuid_param { | ||
| 481 | u32 func; | ||
| 482 | u32 idx; | ||
| 483 | bool has_leaf_count; | ||
| 484 | bool (*qualifier)(struct kvm_cpuid_param *param); | ||
| 485 | }; | ||
| 486 | |||
| 487 | static bool is_centaur_cpu(struct kvm_cpuid_param *param) | ||
| 488 | { | ||
| 489 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; | ||
| 490 | } | ||
| 491 | |||
| 492 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
| 493 | struct kvm_cpuid_entry2 __user *entries) | ||
| 494 | { | ||
| 495 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
| 496 | int limit, nent = 0, r = -E2BIG, i; | ||
| 497 | u32 func; | ||
| 498 | static struct kvm_cpuid_param param[] = { | ||
| 499 | { .func = 0, .has_leaf_count = true }, | ||
| 500 | { .func = 0x80000000, .has_leaf_count = true }, | ||
| 501 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, | ||
| 502 | { .func = KVM_CPUID_SIGNATURE }, | ||
| 503 | { .func = KVM_CPUID_FEATURES }, | ||
| 504 | }; | ||
| 505 | |||
| 506 | if (cpuid->nent < 1) | ||
| 507 | goto out; | ||
| 508 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 509 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
| 510 | r = -ENOMEM; | ||
| 511 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
| 512 | if (!cpuid_entries) | ||
| 513 | goto out; | ||
| 514 | |||
| 515 | r = 0; | ||
| 516 | for (i = 0; i < ARRAY_SIZE(param); i++) { | ||
| 517 | struct kvm_cpuid_param *ent = ¶m[i]; | ||
| 518 | |||
| 519 | if (ent->qualifier && !ent->qualifier(ent)) | ||
| 520 | continue; | ||
| 521 | |||
| 522 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, | ||
| 523 | &nent, cpuid->nent); | ||
| 524 | |||
| 525 | if (r) | ||
| 526 | goto out_free; | ||
| 527 | |||
| 528 | if (!ent->has_leaf_count) | ||
| 529 | continue; | ||
| 530 | |||
| 531 | limit = cpuid_entries[nent - 1].eax; | ||
| 532 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) | ||
| 533 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, | ||
| 534 | &nent, cpuid->nent); | ||
| 535 | |||
| 536 | if (r) | ||
| 537 | goto out_free; | ||
| 538 | } | ||
| 539 | |||
| 540 | r = -EFAULT; | ||
| 541 | if (copy_to_user(entries, cpuid_entries, | ||
| 542 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 543 | goto out_free; | ||
| 544 | cpuid->nent = nent; | ||
| 545 | r = 0; | ||
| 546 | |||
| 547 | out_free: | ||
| 548 | vfree(cpuid_entries); | ||
| 549 | out: | ||
| 550 | return r; | ||
| 551 | } | ||
| 552 | |||
| 553 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
| 554 | { | ||
| 555 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
| 556 | int j, nent = vcpu->arch.cpuid_nent; | ||
| 557 | |||
| 558 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 559 | /* when no next entry is found, the current entry[i] is reselected */ | ||
| 560 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
| 561 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
| 562 | if (ej->function == e->function) { | ||
| 563 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 564 | return j; | ||
| 565 | } | ||
| 566 | } | ||
| 567 | return 0; /* silence gcc, even though control never reaches here */ | ||
| 568 | } | ||
| 569 | |||
| 570 | /* find an entry with matching function, matching index (if needed), and that | ||
| 571 | * should be read next (if it's stateful) */ | ||
| 572 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
| 573 | u32 function, u32 index) | ||
| 574 | { | ||
| 575 | if (e->function != function) | ||
| 576 | return 0; | ||
| 577 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
| 578 | return 0; | ||
| 579 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
| 580 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
| 581 | return 0; | ||
| 582 | return 1; | ||
| 583 | } | ||
| 584 | |||
| 585 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
| 586 | u32 function, u32 index) | ||
| 587 | { | ||
| 588 | int i; | ||
| 589 | struct kvm_cpuid_entry2 *best = NULL; | ||
| 590 | |||
| 591 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
| 592 | struct kvm_cpuid_entry2 *e; | ||
| 593 | |||
| 594 | e = &vcpu->arch.cpuid_entries[i]; | ||
| 595 | if (is_matching_cpuid_entry(e, function, index)) { | ||
| 596 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
| 597 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
| 598 | best = e; | ||
| 599 | break; | ||
| 600 | } | ||
| 601 | } | ||
| 602 | return best; | ||
| 603 | } | ||
| 604 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
| 605 | |||
| 606 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
| 607 | { | ||
| 608 | struct kvm_cpuid_entry2 *best; | ||
| 609 | |||
| 610 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
| 611 | if (!best || best->eax < 0x80000008) | ||
| 612 | goto not_found; | ||
| 613 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
| 614 | if (best) | ||
| 615 | return best->eax & 0xff; | ||
| 616 | not_found: | ||
| 617 | return 36; | ||
| 618 | } | ||
| 619 | |||
| 620 | /* | ||
| 621 | * If no match is found, check whether we exceed the vCPU's limit | ||
| 622 | * and return the content of the highest valid _standard_ leaf instead. | ||
| 623 | * This is to satisfy the CPUID specification. | ||
| 624 | */ | ||
| 625 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
| 626 | u32 function, u32 index) | ||
| 627 | { | ||
| 628 | struct kvm_cpuid_entry2 *maxlevel; | ||
| 629 | |||
| 630 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
| 631 | if (!maxlevel || maxlevel->eax >= function) | ||
| 632 | return NULL; | ||
| 633 | if (function & 0x80000000) { | ||
| 634 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
| 635 | if (!maxlevel) | ||
| 636 | return NULL; | ||
| 637 | } | ||
| 638 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
| 639 | } | ||
| 640 | |||
| 641 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
| 642 | { | ||
| 643 | u32 function, index; | ||
| 644 | struct kvm_cpuid_entry2 *best; | ||
| 645 | |||
| 646 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 647 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 648 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
| 649 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
| 650 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
| 651 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
| 652 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
| 653 | |||
| 654 | if (!best) | ||
| 655 | best = check_cpuid_limit(vcpu, function, index); | ||
| 656 | |||
| 657 | if (best) { | ||
| 658 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
| 659 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
| 660 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
| 661 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
| 662 | } | ||
| 663 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 664 | trace_kvm_cpuid(function, | ||
| 665 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
| 666 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
| 667 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
| 668 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
| 669 | } | ||
| 670 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h new file mode 100644 index 000000000000..5b97e1797a6d --- /dev/null +++ b/arch/x86/kvm/cpuid.h | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #ifndef ARCH_X86_KVM_CPUID_H | ||
| 2 | #define ARCH_X86_KVM_CPUID_H | ||
| 3 | |||
| 4 | #include "x86.h" | ||
| 5 | |||
| 6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); | ||
| 7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
| 8 | u32 function, u32 index); | ||
| 9 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
| 10 | struct kvm_cpuid_entry2 __user *entries); | ||
| 11 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
| 12 | struct kvm_cpuid *cpuid, | ||
| 13 | struct kvm_cpuid_entry __user *entries); | ||
| 14 | int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
| 15 | struct kvm_cpuid2 *cpuid, | ||
| 16 | struct kvm_cpuid_entry2 __user *entries); | ||
| 17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
| 18 | struct kvm_cpuid2 *cpuid, | ||
| 19 | struct kvm_cpuid_entry2 __user *entries); | ||
| 20 | |||
| 21 | |||
| 22 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
| 23 | { | ||
| 24 | struct kvm_cpuid_entry2 *best; | ||
| 25 | |||
| 26 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 27 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
| 31 | { | ||
| 32 | struct kvm_cpuid_entry2 *best; | ||
| 33 | |||
| 34 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 35 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
| 36 | } | ||
| 37 | |||
| 38 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
| 39 | { | ||
| 40 | struct kvm_cpuid_entry2 *best; | ||
| 41 | |||
| 42 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
| 44 | } | ||
| 45 | |||
| 46 | #endif | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f1e3be18a08f..05a562b85025 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -125,8 +125,9 @@ | |||
| 125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | 125 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ |
| 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
| 127 | #define No64 (1<<28) | 127 | #define No64 (1<<28) |
| 128 | #define PageTable (1 << 29) /* instruction used to write page table */ | ||
| 128 | /* Source 2 operand type */ | 129 | /* Source 2 operand type */ |
| 129 | #define Src2Shift (29) | 130 | #define Src2Shift (30) |
| 130 | #define Src2None (OpNone << Src2Shift) | 131 | #define Src2None (OpNone << Src2Shift) |
| 131 | #define Src2CL (OpCL << Src2Shift) | 132 | #define Src2CL (OpCL << Src2Shift) |
| 132 | #define Src2ImmByte (OpImmByte << Src2Shift) | 133 | #define Src2ImmByte (OpImmByte << Src2Shift) |
| @@ -1674,11 +1675,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
| 1674 | return X86EMUL_CONTINUE; | 1675 | return X86EMUL_CONTINUE; |
| 1675 | } | 1676 | } |
| 1676 | 1677 | ||
| 1677 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | ||
| 1678 | { | ||
| 1679 | return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes); | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | 1678 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
| 1683 | { | 1679 | { |
| 1684 | switch (ctxt->modrm_reg) { | 1680 | switch (ctxt->modrm_reg) { |
| @@ -1788,7 +1784,7 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
| 1788 | return rc; | 1784 | return rc; |
| 1789 | } | 1785 | } |
| 1790 | 1786 | ||
| 1791 | static int em_grp9(struct x86_emulate_ctxt *ctxt) | 1787 | static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) |
| 1792 | { | 1788 | { |
| 1793 | u64 old = ctxt->dst.orig_val64; | 1789 | u64 old = ctxt->dst.orig_val64; |
| 1794 | 1790 | ||
| @@ -1831,6 +1827,24 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
| 1831 | return rc; | 1827 | return rc; |
| 1832 | } | 1828 | } |
| 1833 | 1829 | ||
| 1830 | static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | ||
| 1831 | { | ||
| 1832 | /* Save real source value, then compare EAX against destination. */ | ||
| 1833 | ctxt->src.orig_val = ctxt->src.val; | ||
| 1834 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
| 1835 | emulate_2op_SrcV(ctxt, "cmp"); | ||
| 1836 | |||
| 1837 | if (ctxt->eflags & EFLG_ZF) { | ||
| 1838 | /* Success: write back to memory. */ | ||
| 1839 | ctxt->dst.val = ctxt->src.orig_val; | ||
| 1840 | } else { | ||
| 1841 | /* Failure: write the value we saw to EAX. */ | ||
| 1842 | ctxt->dst.type = OP_REG; | ||
| 1843 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
| 1844 | } | ||
| 1845 | return X86EMUL_CONTINUE; | ||
| 1846 | } | ||
| 1847 | |||
| 1834 | static int em_lseg(struct x86_emulate_ctxt *ctxt) | 1848 | static int em_lseg(struct x86_emulate_ctxt *ctxt) |
| 1835 | { | 1849 | { |
| 1836 | int seg = ctxt->src2.val; | 1850 | int seg = ctxt->src2.val; |
| @@ -2481,6 +2495,15 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
| 2481 | return X86EMUL_CONTINUE; | 2495 | return X86EMUL_CONTINUE; |
| 2482 | } | 2496 | } |
| 2483 | 2497 | ||
| 2498 | static int em_call(struct x86_emulate_ctxt *ctxt) | ||
| 2499 | { | ||
| 2500 | long rel = ctxt->src.val; | ||
| 2501 | |||
| 2502 | ctxt->src.val = (unsigned long)ctxt->_eip; | ||
| 2503 | jmp_rel(ctxt, rel); | ||
| 2504 | return em_push(ctxt); | ||
| 2505 | } | ||
| 2506 | |||
| 2484 | static int em_call_far(struct x86_emulate_ctxt *ctxt) | 2507 | static int em_call_far(struct x86_emulate_ctxt *ctxt) |
| 2485 | { | 2508 | { |
| 2486 | u16 sel, old_cs; | 2509 | u16 sel, old_cs; |
| @@ -2622,12 +2645,75 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
| 2622 | return X86EMUL_CONTINUE; | 2645 | return X86EMUL_CONTINUE; |
| 2623 | } | 2646 | } |
| 2624 | 2647 | ||
| 2648 | static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | ||
| 2649 | { | ||
| 2650 | u64 pmc; | ||
| 2651 | |||
| 2652 | if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) | ||
| 2653 | return emulate_gp(ctxt, 0); | ||
| 2654 | ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; | ||
| 2655 | ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; | ||
| 2656 | return X86EMUL_CONTINUE; | ||
| 2657 | } | ||
| 2658 | |||
| 2625 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2659 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
| 2626 | { | 2660 | { |
| 2627 | ctxt->dst.val = ctxt->src.val; | 2661 | ctxt->dst.val = ctxt->src.val; |
| 2628 | return X86EMUL_CONTINUE; | 2662 | return X86EMUL_CONTINUE; |
| 2629 | } | 2663 | } |
| 2630 | 2664 | ||
| 2665 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) | ||
| 2666 | { | ||
| 2667 | if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) | ||
| 2668 | return emulate_gp(ctxt, 0); | ||
| 2669 | |||
| 2670 | /* Disable writeback. */ | ||
| 2671 | ctxt->dst.type = OP_NONE; | ||
| 2672 | return X86EMUL_CONTINUE; | ||
| 2673 | } | ||
| 2674 | |||
| 2675 | static int em_dr_write(struct x86_emulate_ctxt *ctxt) | ||
| 2676 | { | ||
| 2677 | unsigned long val; | ||
| 2678 | |||
| 2679 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
| 2680 | val = ctxt->src.val & ~0ULL; | ||
| 2681 | else | ||
| 2682 | val = ctxt->src.val & ~0U; | ||
| 2683 | |||
| 2684 | /* #UD condition is already handled. */ | ||
| 2685 | if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0) | ||
| 2686 | return emulate_gp(ctxt, 0); | ||
| 2687 | |||
| 2688 | /* Disable writeback. */ | ||
| 2689 | ctxt->dst.type = OP_NONE; | ||
| 2690 | return X86EMUL_CONTINUE; | ||
| 2691 | } | ||
| 2692 | |||
| 2693 | static int em_wrmsr(struct x86_emulate_ctxt *ctxt) | ||
| 2694 | { | ||
| 2695 | u64 msr_data; | ||
| 2696 | |||
| 2697 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
| 2698 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
| 2699 | if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) | ||
| 2700 | return emulate_gp(ctxt, 0); | ||
| 2701 | |||
| 2702 | return X86EMUL_CONTINUE; | ||
| 2703 | } | ||
| 2704 | |||
| 2705 | static int em_rdmsr(struct x86_emulate_ctxt *ctxt) | ||
| 2706 | { | ||
| 2707 | u64 msr_data; | ||
| 2708 | |||
| 2709 | if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) | ||
| 2710 | return emulate_gp(ctxt, 0); | ||
| 2711 | |||
| 2712 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
| 2713 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
| 2714 | return X86EMUL_CONTINUE; | ||
| 2715 | } | ||
| 2716 | |||
| 2631 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) | 2717 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) |
| 2632 | { | 2718 | { |
| 2633 | if (ctxt->modrm_reg > VCPU_SREG_GS) | 2719 | if (ctxt->modrm_reg > VCPU_SREG_GS) |
| @@ -2775,6 +2861,24 @@ static int em_jcxz(struct x86_emulate_ctxt *ctxt) | |||
| 2775 | return X86EMUL_CONTINUE; | 2861 | return X86EMUL_CONTINUE; |
| 2776 | } | 2862 | } |
| 2777 | 2863 | ||
| 2864 | static int em_in(struct x86_emulate_ctxt *ctxt) | ||
| 2865 | { | ||
| 2866 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
| 2867 | &ctxt->dst.val)) | ||
| 2868 | return X86EMUL_IO_NEEDED; | ||
| 2869 | |||
| 2870 | return X86EMUL_CONTINUE; | ||
| 2871 | } | ||
| 2872 | |||
| 2873 | static int em_out(struct x86_emulate_ctxt *ctxt) | ||
| 2874 | { | ||
| 2875 | ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
| 2876 | &ctxt->src.val, 1); | ||
| 2877 | /* Disable writeback. */ | ||
| 2878 | ctxt->dst.type = OP_NONE; | ||
| 2879 | return X86EMUL_CONTINUE; | ||
| 2880 | } | ||
| 2881 | |||
| 2778 | static int em_cli(struct x86_emulate_ctxt *ctxt) | 2882 | static int em_cli(struct x86_emulate_ctxt *ctxt) |
| 2779 | { | 2883 | { |
| 2780 | if (emulator_bad_iopl(ctxt)) | 2884 | if (emulator_bad_iopl(ctxt)) |
| @@ -2794,6 +2898,69 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) | |||
| 2794 | return X86EMUL_CONTINUE; | 2898 | return X86EMUL_CONTINUE; |
| 2795 | } | 2899 | } |
| 2796 | 2900 | ||
| 2901 | static int em_bt(struct x86_emulate_ctxt *ctxt) | ||
| 2902 | { | ||
| 2903 | /* Disable writeback. */ | ||
| 2904 | ctxt->dst.type = OP_NONE; | ||
| 2905 | /* only subword offset */ | ||
| 2906 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
| 2907 | |||
| 2908 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
| 2909 | return X86EMUL_CONTINUE; | ||
| 2910 | } | ||
| 2911 | |||
| 2912 | static int em_bts(struct x86_emulate_ctxt *ctxt) | ||
| 2913 | { | ||
| 2914 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
| 2915 | return X86EMUL_CONTINUE; | ||
| 2916 | } | ||
| 2917 | |||
| 2918 | static int em_btr(struct x86_emulate_ctxt *ctxt) | ||
| 2919 | { | ||
| 2920 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
| 2921 | return X86EMUL_CONTINUE; | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | static int em_btc(struct x86_emulate_ctxt *ctxt) | ||
| 2925 | { | ||
| 2926 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
| 2927 | return X86EMUL_CONTINUE; | ||
| 2928 | } | ||
| 2929 | |||
| 2930 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | ||
| 2931 | { | ||
| 2932 | u8 zf; | ||
| 2933 | |||
| 2934 | __asm__ ("bsf %2, %0; setz %1" | ||
| 2935 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
| 2936 | : "r"(ctxt->src.val)); | ||
| 2937 | |||
| 2938 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
| 2939 | if (zf) { | ||
| 2940 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
| 2941 | /* Disable writeback. */ | ||
| 2942 | ctxt->dst.type = OP_NONE; | ||
| 2943 | } | ||
| 2944 | return X86EMUL_CONTINUE; | ||
| 2945 | } | ||
| 2946 | |||
| 2947 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | ||
| 2948 | { | ||
| 2949 | u8 zf; | ||
| 2950 | |||
| 2951 | __asm__ ("bsr %2, %0; setz %1" | ||
| 2952 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
| 2953 | : "r"(ctxt->src.val)); | ||
| 2954 | |||
| 2955 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
| 2956 | if (zf) { | ||
| 2957 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
| 2958 | /* Disable writeback. */ | ||
| 2959 | ctxt->dst.type = OP_NONE; | ||
| 2960 | } | ||
| 2961 | return X86EMUL_CONTINUE; | ||
| 2962 | } | ||
| 2963 | |||
| 2797 | static bool valid_cr(int nr) | 2964 | static bool valid_cr(int nr) |
| 2798 | { | 2965 | { |
| 2799 | switch (nr) { | 2966 | switch (nr) { |
| @@ -2867,9 +3034,6 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) | |||
| 2867 | break; | 3034 | break; |
| 2868 | } | 3035 | } |
| 2869 | case 4: { | 3036 | case 4: { |
| 2870 | u64 cr4; | ||
| 2871 | |||
| 2872 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
| 2873 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 3037 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
| 2874 | 3038 | ||
| 2875 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) | 3039 | if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) |
| @@ -3003,6 +3167,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
| 3003 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3167 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
| 3004 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | 3168 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) |
| 3005 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3169 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
| 3170 | #define I2bvIP(_f, _e, _i, _p) \ | ||
| 3171 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) | ||
| 3006 | 3172 | ||
| 3007 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ | 3173 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
| 3008 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3174 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
| @@ -3033,17 +3199,17 @@ static struct opcode group7_rm7[] = { | |||
| 3033 | 3199 | ||
| 3034 | static struct opcode group1[] = { | 3200 | static struct opcode group1[] = { |
| 3035 | I(Lock, em_add), | 3201 | I(Lock, em_add), |
| 3036 | I(Lock, em_or), | 3202 | I(Lock | PageTable, em_or), |
| 3037 | I(Lock, em_adc), | 3203 | I(Lock, em_adc), |
| 3038 | I(Lock, em_sbb), | 3204 | I(Lock, em_sbb), |
| 3039 | I(Lock, em_and), | 3205 | I(Lock | PageTable, em_and), |
| 3040 | I(Lock, em_sub), | 3206 | I(Lock, em_sub), |
| 3041 | I(Lock, em_xor), | 3207 | I(Lock, em_xor), |
| 3042 | I(0, em_cmp), | 3208 | I(0, em_cmp), |
| 3043 | }; | 3209 | }; |
| 3044 | 3210 | ||
| 3045 | static struct opcode group1A[] = { | 3211 | static struct opcode group1A[] = { |
| 3046 | D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N, | 3212 | I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
| 3047 | }; | 3213 | }; |
| 3048 | 3214 | ||
| 3049 | static struct opcode group3[] = { | 3215 | static struct opcode group3[] = { |
| @@ -3058,16 +3224,19 @@ static struct opcode group3[] = { | |||
| 3058 | }; | 3224 | }; |
| 3059 | 3225 | ||
| 3060 | static struct opcode group4[] = { | 3226 | static struct opcode group4[] = { |
| 3061 | D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock), | 3227 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), |
| 3228 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), | ||
| 3062 | N, N, N, N, N, N, | 3229 | N, N, N, N, N, N, |
| 3063 | }; | 3230 | }; |
| 3064 | 3231 | ||
| 3065 | static struct opcode group5[] = { | 3232 | static struct opcode group5[] = { |
| 3066 | D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock), | 3233 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
| 3067 | D(SrcMem | ModRM | Stack), | 3234 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), |
| 3235 | I(SrcMem | ModRM | Stack, em_grp45), | ||
| 3068 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), | 3236 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), |
| 3069 | D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps), | 3237 | I(SrcMem | ModRM | Stack, em_grp45), |
| 3070 | D(SrcMem | ModRM | Stack), N, | 3238 | I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), |
| 3239 | I(SrcMem | ModRM | Stack, em_grp45), N, | ||
| 3071 | }; | 3240 | }; |
| 3072 | 3241 | ||
| 3073 | static struct opcode group6[] = { | 3242 | static struct opcode group6[] = { |
| @@ -3096,18 +3265,21 @@ static struct group_dual group7 = { { | |||
| 3096 | 3265 | ||
| 3097 | static struct opcode group8[] = { | 3266 | static struct opcode group8[] = { |
| 3098 | N, N, N, N, | 3267 | N, N, N, N, |
| 3099 | D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock), | 3268 | I(DstMem | SrcImmByte | ModRM, em_bt), |
| 3100 | D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock), | 3269 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), |
| 3270 | I(DstMem | SrcImmByte | ModRM | Lock, em_btr), | ||
| 3271 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), | ||
| 3101 | }; | 3272 | }; |
| 3102 | 3273 | ||
| 3103 | static struct group_dual group9 = { { | 3274 | static struct group_dual group9 = { { |
| 3104 | N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N, | 3275 | N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, |
| 3105 | }, { | 3276 | }, { |
| 3106 | N, N, N, N, N, N, N, N, | 3277 | N, N, N, N, N, N, N, N, |
| 3107 | } }; | 3278 | } }; |
| 3108 | 3279 | ||
| 3109 | static struct opcode group11[] = { | 3280 | static struct opcode group11[] = { |
| 3110 | I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), | 3281 | I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), |
| 3282 | X7(D(Undefined)), | ||
| 3111 | }; | 3283 | }; |
| 3112 | 3284 | ||
| 3113 | static struct gprefix pfx_0f_6f_0f_7f = { | 3285 | static struct gprefix pfx_0f_6f_0f_7f = { |
| @@ -3120,7 +3292,7 @@ static struct opcode opcode_table[256] = { | |||
| 3120 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3292 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
| 3121 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | 3293 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), |
| 3122 | /* 0x08 - 0x0F */ | 3294 | /* 0x08 - 0x0F */ |
| 3123 | I6ALU(Lock, em_or), | 3295 | I6ALU(Lock | PageTable, em_or), |
| 3124 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), | 3296 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
| 3125 | N, | 3297 | N, |
| 3126 | /* 0x10 - 0x17 */ | 3298 | /* 0x10 - 0x17 */ |
| @@ -3132,7 +3304,7 @@ static struct opcode opcode_table[256] = { | |||
| 3132 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), | 3304 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
| 3133 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | 3305 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), |
| 3134 | /* 0x20 - 0x27 */ | 3306 | /* 0x20 - 0x27 */ |
| 3135 | I6ALU(Lock, em_and), N, N, | 3307 | I6ALU(Lock | PageTable, em_and), N, N, |
| 3136 | /* 0x28 - 0x2F */ | 3308 | /* 0x28 - 0x2F */ |
| 3137 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), | 3309 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
| 3138 | /* 0x30 - 0x37 */ | 3310 | /* 0x30 - 0x37 */ |
| @@ -3155,8 +3327,8 @@ static struct opcode opcode_table[256] = { | |||
| 3155 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3327 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
| 3156 | I(SrcImmByte | Mov | Stack, em_push), | 3328 | I(SrcImmByte | Mov | Stack, em_push), |
| 3157 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3329 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
| 3158 | D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */ | 3330 | I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ |
| 3159 | D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */ | 3331 | I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ |
| 3160 | /* 0x70 - 0x7F */ | 3332 | /* 0x70 - 0x7F */ |
| 3161 | X16(D(SrcImmByte)), | 3333 | X16(D(SrcImmByte)), |
| 3162 | /* 0x80 - 0x87 */ | 3334 | /* 0x80 - 0x87 */ |
| @@ -3165,11 +3337,11 @@ static struct opcode opcode_table[256] = { | |||
| 3165 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), | 3337 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), |
| 3166 | G(DstMem | SrcImmByte | ModRM | Group, group1), | 3338 | G(DstMem | SrcImmByte | ModRM | Group, group1), |
| 3167 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3339 | I2bv(DstMem | SrcReg | ModRM, em_test), |
| 3168 | I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg), | 3340 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
| 3169 | /* 0x88 - 0x8F */ | 3341 | /* 0x88 - 0x8F */ |
| 3170 | I2bv(DstMem | SrcReg | ModRM | Mov, em_mov), | 3342 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), |
| 3171 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), | 3343 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), |
| 3172 | I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg), | 3344 | I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg), |
| 3173 | D(ModRM | SrcMem | NoAccess | DstReg), | 3345 | D(ModRM | SrcMem | NoAccess | DstReg), |
| 3174 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), | 3346 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), |
| 3175 | G(0, group1A), | 3347 | G(0, group1A), |
| @@ -3182,7 +3354,7 @@ static struct opcode opcode_table[256] = { | |||
| 3182 | II(ImplicitOps | Stack, em_popf, popf), N, N, | 3354 | II(ImplicitOps | Stack, em_popf, popf), N, N, |
| 3183 | /* 0xA0 - 0xA7 */ | 3355 | /* 0xA0 - 0xA7 */ |
| 3184 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3356 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
| 3185 | I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), | 3357 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
| 3186 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3358 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
| 3187 | I2bv(SrcSI | DstDI | String, em_cmp), | 3359 | I2bv(SrcSI | DstDI | String, em_cmp), |
| 3188 | /* 0xA8 - 0xAF */ | 3360 | /* 0xA8 - 0xAF */ |
| @@ -3213,13 +3385,13 @@ static struct opcode opcode_table[256] = { | |||
| 3213 | /* 0xE0 - 0xE7 */ | 3385 | /* 0xE0 - 0xE7 */ |
| 3214 | X3(I(SrcImmByte, em_loop)), | 3386 | X3(I(SrcImmByte, em_loop)), |
| 3215 | I(SrcImmByte, em_jcxz), | 3387 | I(SrcImmByte, em_jcxz), |
| 3216 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), | 3388 | I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), |
| 3217 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | 3389 | I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), |
| 3218 | /* 0xE8 - 0xEF */ | 3390 | /* 0xE8 - 0xEF */ |
| 3219 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3391 | I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), |
| 3220 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), | 3392 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), |
| 3221 | D2bvIP(SrcDX | DstAcc, in, check_perm_in), | 3393 | I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), |
| 3222 | D2bvIP(SrcAcc | DstDX, out, check_perm_out), | 3394 | I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), |
| 3223 | /* 0xF0 - 0xF7 */ | 3395 | /* 0xF0 - 0xF7 */ |
| 3224 | N, DI(ImplicitOps, icebp), N, N, | 3396 | N, DI(ImplicitOps, icebp), N, N, |
| 3225 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), | 3397 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
| @@ -3242,15 +3414,15 @@ static struct opcode twobyte_table[256] = { | |||
| 3242 | /* 0x20 - 0x2F */ | 3414 | /* 0x20 - 0x2F */ |
| 3243 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), | 3415 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
| 3244 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), | 3416 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
| 3245 | DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), | 3417 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), |
| 3246 | DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), | 3418 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), |
| 3247 | N, N, N, N, | 3419 | N, N, N, N, |
| 3248 | N, N, N, N, N, N, N, N, | 3420 | N, N, N, N, N, N, N, N, |
| 3249 | /* 0x30 - 0x3F */ | 3421 | /* 0x30 - 0x3F */ |
| 3250 | DI(ImplicitOps | Priv, wrmsr), | 3422 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
| 3251 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3423 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
| 3252 | DI(ImplicitOps | Priv, rdmsr), | 3424 | II(ImplicitOps | Priv, em_rdmsr, rdmsr), |
| 3253 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | 3425 | IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), |
| 3254 | I(ImplicitOps | VendorSpecific, em_sysenter), | 3426 | I(ImplicitOps | VendorSpecific, em_sysenter), |
| 3255 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), | 3427 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), |
| 3256 | N, N, | 3428 | N, N, |
| @@ -3275,26 +3447,28 @@ static struct opcode twobyte_table[256] = { | |||
| 3275 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3447 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
| 3276 | /* 0xA0 - 0xA7 */ | 3448 | /* 0xA0 - 0xA7 */ |
| 3277 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3449 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
| 3278 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), | 3450 | DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), |
| 3279 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3451 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
| 3280 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3452 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
| 3281 | /* 0xA8 - 0xAF */ | 3453 | /* 0xA8 - 0xAF */ |
| 3282 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 3454 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
| 3283 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3455 | DI(ImplicitOps, rsm), |
| 3456 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | ||
| 3284 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3457 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
| 3285 | D(DstMem | SrcReg | Src2CL | ModRM), | 3458 | D(DstMem | SrcReg | Src2CL | ModRM), |
| 3286 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3459 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
| 3287 | /* 0xB0 - 0xB7 */ | 3460 | /* 0xB0 - 0xB7 */ |
| 3288 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3461 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
| 3289 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 3462 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
| 3290 | D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3463 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
| 3291 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 3464 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
| 3292 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 3465 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
| 3293 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3466 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
| 3294 | /* 0xB8 - 0xBF */ | 3467 | /* 0xB8 - 0xBF */ |
| 3295 | N, N, | 3468 | N, N, |
| 3296 | G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3469 | G(BitOp, group8), |
| 3297 | D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM), | 3470 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
| 3471 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | ||
| 3298 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3472 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
| 3299 | /* 0xC0 - 0xCF */ | 3473 | /* 0xC0 - 0xCF */ |
| 3300 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3474 | D2bv(DstMem | SrcReg | ModRM | Lock), |
| @@ -3320,6 +3494,7 @@ static struct opcode twobyte_table[256] = { | |||
| 3320 | #undef D2bv | 3494 | #undef D2bv |
| 3321 | #undef D2bvIP | 3495 | #undef D2bvIP |
| 3322 | #undef I2bv | 3496 | #undef I2bv |
| 3497 | #undef I2bvIP | ||
| 3323 | #undef I6ALU | 3498 | #undef I6ALU |
| 3324 | 3499 | ||
| 3325 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) | 3500 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) |
| @@ -3697,6 +3872,11 @@ done: | |||
| 3697 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 3872 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
| 3698 | } | 3873 | } |
| 3699 | 3874 | ||
| 3875 | bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt) | ||
| 3876 | { | ||
| 3877 | return ctxt->d & PageTable; | ||
| 3878 | } | ||
| 3879 | |||
| 3700 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3880 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
| 3701 | { | 3881 | { |
| 3702 | /* The second termination condition only applies for REPE | 3882 | /* The second termination condition only applies for REPE |
| @@ -3720,7 +3900,6 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
| 3720 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 3900 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
| 3721 | { | 3901 | { |
| 3722 | struct x86_emulate_ops *ops = ctxt->ops; | 3902 | struct x86_emulate_ops *ops = ctxt->ops; |
| 3723 | u64 msr_data; | ||
| 3724 | int rc = X86EMUL_CONTINUE; | 3903 | int rc = X86EMUL_CONTINUE; |
| 3725 | int saved_dst_type = ctxt->dst.type; | 3904 | int saved_dst_type = ctxt->dst.type; |
| 3726 | 3905 | ||
| @@ -3854,15 +4033,6 @@ special_insn: | |||
| 3854 | goto cannot_emulate; | 4033 | goto cannot_emulate; |
| 3855 | ctxt->dst.val = (s32) ctxt->src.val; | 4034 | ctxt->dst.val = (s32) ctxt->src.val; |
| 3856 | break; | 4035 | break; |
| 3857 | case 0x6c: /* insb */ | ||
| 3858 | case 0x6d: /* insw/insd */ | ||
| 3859 | ctxt->src.val = ctxt->regs[VCPU_REGS_RDX]; | ||
| 3860 | goto do_io_in; | ||
| 3861 | case 0x6e: /* outsb */ | ||
| 3862 | case 0x6f: /* outsw/outsd */ | ||
| 3863 | ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX]; | ||
| 3864 | goto do_io_out; | ||
| 3865 | break; | ||
| 3866 | case 0x70 ... 0x7f: /* jcc (short) */ | 4036 | case 0x70 ... 0x7f: /* jcc (short) */ |
| 3867 | if (test_cc(ctxt->b, ctxt->eflags)) | 4037 | if (test_cc(ctxt->b, ctxt->eflags)) |
| 3868 | jmp_rel(ctxt, ctxt->src.val); | 4038 | jmp_rel(ctxt, ctxt->src.val); |
| @@ -3870,9 +4040,6 @@ special_insn: | |||
| 3870 | case 0x8d: /* lea r16/r32, m */ | 4040 | case 0x8d: /* lea r16/r32, m */ |
| 3871 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4041 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
| 3872 | break; | 4042 | break; |
| 3873 | case 0x8f: /* pop (sole member of Grp1a) */ | ||
| 3874 | rc = em_grp1a(ctxt); | ||
| 3875 | break; | ||
| 3876 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 4043 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
| 3877 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) | 4044 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) |
| 3878 | break; | 4045 | break; |
| @@ -3905,38 +4072,11 @@ special_insn: | |||
| 3905 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; | 4072 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; |
| 3906 | rc = em_grp2(ctxt); | 4073 | rc = em_grp2(ctxt); |
| 3907 | break; | 4074 | break; |
| 3908 | case 0xe4: /* inb */ | ||
| 3909 | case 0xe5: /* in */ | ||
| 3910 | goto do_io_in; | ||
| 3911 | case 0xe6: /* outb */ | ||
| 3912 | case 0xe7: /* out */ | ||
| 3913 | goto do_io_out; | ||
| 3914 | case 0xe8: /* call (near) */ { | ||
| 3915 | long int rel = ctxt->src.val; | ||
| 3916 | ctxt->src.val = (unsigned long) ctxt->_eip; | ||
| 3917 | jmp_rel(ctxt, rel); | ||
| 3918 | rc = em_push(ctxt); | ||
| 3919 | break; | ||
| 3920 | } | ||
| 3921 | case 0xe9: /* jmp rel */ | 4075 | case 0xe9: /* jmp rel */ |
| 3922 | case 0xeb: /* jmp rel short */ | 4076 | case 0xeb: /* jmp rel short */ |
| 3923 | jmp_rel(ctxt, ctxt->src.val); | 4077 | jmp_rel(ctxt, ctxt->src.val); |
| 3924 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4078 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
| 3925 | break; | 4079 | break; |
| 3926 | case 0xec: /* in al,dx */ | ||
| 3927 | case 0xed: /* in (e/r)ax,dx */ | ||
| 3928 | do_io_in: | ||
| 3929 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, | ||
| 3930 | &ctxt->dst.val)) | ||
| 3931 | goto done; /* IO is needed */ | ||
| 3932 | break; | ||
| 3933 | case 0xee: /* out dx,al */ | ||
| 3934 | case 0xef: /* out dx,(e/r)ax */ | ||
| 3935 | do_io_out: | ||
| 3936 | ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, | ||
| 3937 | &ctxt->src.val, 1); | ||
| 3938 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 3939 | break; | ||
| 3940 | case 0xf4: /* hlt */ | 4080 | case 0xf4: /* hlt */ |
| 3941 | ctxt->ops->halt(ctxt); | 4081 | ctxt->ops->halt(ctxt); |
| 3942 | break; | 4082 | break; |
| @@ -3956,12 +4096,6 @@ special_insn: | |||
| 3956 | case 0xfd: /* std */ | 4096 | case 0xfd: /* std */ |
| 3957 | ctxt->eflags |= EFLG_DF; | 4097 | ctxt->eflags |= EFLG_DF; |
| 3958 | break; | 4098 | break; |
| 3959 | case 0xfe: /* Grp4 */ | ||
| 3960 | rc = em_grp45(ctxt); | ||
| 3961 | break; | ||
| 3962 | case 0xff: /* Grp5 */ | ||
| 3963 | rc = em_grp45(ctxt); | ||
| 3964 | break; | ||
| 3965 | default: | 4099 | default: |
| 3966 | goto cannot_emulate; | 4100 | goto cannot_emulate; |
| 3967 | } | 4101 | } |
| @@ -4036,49 +4170,6 @@ twobyte_insn: | |||
| 4036 | case 0x21: /* mov from dr to reg */ | 4170 | case 0x21: /* mov from dr to reg */ |
| 4037 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); | 4171 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); |
| 4038 | break; | 4172 | break; |
| 4039 | case 0x22: /* mov reg, cr */ | ||
| 4040 | if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) { | ||
| 4041 | emulate_gp(ctxt, 0); | ||
| 4042 | rc = X86EMUL_PROPAGATE_FAULT; | ||
| 4043 | goto done; | ||
| 4044 | } | ||
| 4045 | ctxt->dst.type = OP_NONE; | ||
| 4046 | break; | ||
| 4047 | case 0x23: /* mov from reg to dr */ | ||
| 4048 | if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val & | ||
| 4049 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
| 4050 | ~0ULL : ~0U)) < 0) { | ||
| 4051 | /* #UD condition is already handled by the code above */ | ||
| 4052 | emulate_gp(ctxt, 0); | ||
| 4053 | rc = X86EMUL_PROPAGATE_FAULT; | ||
| 4054 | goto done; | ||
| 4055 | } | ||
| 4056 | |||
| 4057 | ctxt->dst.type = OP_NONE; /* no writeback */ | ||
| 4058 | break; | ||
| 4059 | case 0x30: | ||
| 4060 | /* wrmsr */ | ||
| 4061 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | ||
| 4062 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | ||
| 4063 | if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) { | ||
| 4064 | emulate_gp(ctxt, 0); | ||
| 4065 | rc = X86EMUL_PROPAGATE_FAULT; | ||
| 4066 | goto done; | ||
| 4067 | } | ||
| 4068 | rc = X86EMUL_CONTINUE; | ||
| 4069 | break; | ||
| 4070 | case 0x32: | ||
| 4071 | /* rdmsr */ | ||
| 4072 | if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) { | ||
| 4073 | emulate_gp(ctxt, 0); | ||
| 4074 | rc = X86EMUL_PROPAGATE_FAULT; | ||
| 4075 | goto done; | ||
| 4076 | } else { | ||
| 4077 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | ||
| 4078 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | ||
| 4079 | } | ||
| 4080 | rc = X86EMUL_CONTINUE; | ||
| 4081 | break; | ||
| 4082 | case 0x40 ... 0x4f: /* cmov */ | 4173 | case 0x40 ... 0x4f: /* cmov */ |
| 4083 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; | 4174 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; |
| 4084 | if (!test_cc(ctxt->b, ctxt->eflags)) | 4175 | if (!test_cc(ctxt->b, ctxt->eflags)) |
| @@ -4091,93 +4182,21 @@ twobyte_insn: | |||
| 4091 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4182 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
| 4092 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4183 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
| 4093 | break; | 4184 | break; |
| 4094 | case 0xa3: | ||
| 4095 | bt: /* bt */ | ||
| 4096 | ctxt->dst.type = OP_NONE; | ||
| 4097 | /* only subword offset */ | ||
| 4098 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
| 4099 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
| 4100 | break; | ||
| 4101 | case 0xa4: /* shld imm8, r, r/m */ | 4185 | case 0xa4: /* shld imm8, r, r/m */ |
| 4102 | case 0xa5: /* shld cl, r, r/m */ | 4186 | case 0xa5: /* shld cl, r, r/m */ |
| 4103 | emulate_2op_cl(ctxt, "shld"); | 4187 | emulate_2op_cl(ctxt, "shld"); |
| 4104 | break; | 4188 | break; |
| 4105 | case 0xab: | ||
| 4106 | bts: /* bts */ | ||
| 4107 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
| 4108 | break; | ||
| 4109 | case 0xac: /* shrd imm8, r, r/m */ | 4189 | case 0xac: /* shrd imm8, r, r/m */ |
| 4110 | case 0xad: /* shrd cl, r, r/m */ | 4190 | case 0xad: /* shrd cl, r, r/m */ |
| 4111 | emulate_2op_cl(ctxt, "shrd"); | 4191 | emulate_2op_cl(ctxt, "shrd"); |
| 4112 | break; | 4192 | break; |
| 4113 | case 0xae: /* clflush */ | 4193 | case 0xae: /* clflush */ |
| 4114 | break; | 4194 | break; |
| 4115 | case 0xb0 ... 0xb1: /* cmpxchg */ | ||
| 4116 | /* | ||
| 4117 | * Save real source value, then compare EAX against | ||
| 4118 | * destination. | ||
| 4119 | */ | ||
| 4120 | ctxt->src.orig_val = ctxt->src.val; | ||
| 4121 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | ||
| 4122 | emulate_2op_SrcV(ctxt, "cmp"); | ||
| 4123 | if (ctxt->eflags & EFLG_ZF) { | ||
| 4124 | /* Success: write back to memory. */ | ||
| 4125 | ctxt->dst.val = ctxt->src.orig_val; | ||
| 4126 | } else { | ||
| 4127 | /* Failure: write the value we saw to EAX. */ | ||
| 4128 | ctxt->dst.type = OP_REG; | ||
| 4129 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | ||
| 4130 | } | ||
| 4131 | break; | ||
| 4132 | case 0xb3: | ||
| 4133 | btr: /* btr */ | ||
| 4134 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
| 4135 | break; | ||
| 4136 | case 0xb6 ... 0xb7: /* movzx */ | 4195 | case 0xb6 ... 0xb7: /* movzx */ |
| 4137 | ctxt->dst.bytes = ctxt->op_bytes; | 4196 | ctxt->dst.bytes = ctxt->op_bytes; |
| 4138 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val | 4197 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val |
| 4139 | : (u16) ctxt->src.val; | 4198 | : (u16) ctxt->src.val; |
| 4140 | break; | 4199 | break; |
| 4141 | case 0xba: /* Grp8 */ | ||
| 4142 | switch (ctxt->modrm_reg & 3) { | ||
| 4143 | case 0: | ||
| 4144 | goto bt; | ||
| 4145 | case 1: | ||
| 4146 | goto bts; | ||
| 4147 | case 2: | ||
| 4148 | goto btr; | ||
| 4149 | case 3: | ||
| 4150 | goto btc; | ||
| 4151 | } | ||
| 4152 | break; | ||
| 4153 | case 0xbb: | ||
| 4154 | btc: /* btc */ | ||
| 4155 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
| 4156 | break; | ||
| 4157 | case 0xbc: { /* bsf */ | ||
| 4158 | u8 zf; | ||
| 4159 | __asm__ ("bsf %2, %0; setz %1" | ||
| 4160 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
| 4161 | : "r"(ctxt->src.val)); | ||
| 4162 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
| 4163 | if (zf) { | ||
| 4164 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
| 4165 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 4166 | } | ||
| 4167 | break; | ||
| 4168 | } | ||
| 4169 | case 0xbd: { /* bsr */ | ||
| 4170 | u8 zf; | ||
| 4171 | __asm__ ("bsr %2, %0; setz %1" | ||
| 4172 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
| 4173 | : "r"(ctxt->src.val)); | ||
| 4174 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
| 4175 | if (zf) { | ||
| 4176 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
| 4177 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 4178 | } | ||
| 4179 | break; | ||
| 4180 | } | ||
| 4181 | case 0xbe ... 0xbf: /* movsx */ | 4200 | case 0xbe ... 0xbf: /* movsx */ |
| 4182 | ctxt->dst.bytes = ctxt->op_bytes; | 4201 | ctxt->dst.bytes = ctxt->op_bytes; |
| 4183 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : | 4202 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : |
| @@ -4194,9 +4213,6 @@ twobyte_insn: | |||
| 4194 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 4213 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : |
| 4195 | (u64) ctxt->src.val; | 4214 | (u64) ctxt->src.val; |
| 4196 | break; | 4215 | break; |
| 4197 | case 0xc7: /* Grp9 (cmpxchg8b) */ | ||
| 4198 | rc = em_grp9(ctxt); | ||
| 4199 | break; | ||
| 4200 | default: | 4216 | default: |
| 4201 | goto cannot_emulate; | 4217 | goto cannot_emulate; |
| 4202 | } | 4218 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 405f2620392f..d68f99df690c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -344,7 +344,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
| 344 | struct kvm_timer *pt = &ps->pit_timer; | 344 | struct kvm_timer *pt = &ps->pit_timer; |
| 345 | s64 interval; | 345 | s64 interval; |
| 346 | 346 | ||
| 347 | if (!irqchip_in_kernel(kvm)) | 347 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) |
| 348 | return; | 348 | return; |
| 349 | 349 | ||
| 350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 350 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
| @@ -397,15 +397,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
| 397 | case 1: | 397 | case 1: |
| 398 | /* FIXME: enhance mode 4 precision */ | 398 | /* FIXME: enhance mode 4 precision */ |
| 399 | case 4: | 399 | case 4: |
| 400 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { | 400 | create_pit_timer(kvm, val, 0); |
| 401 | create_pit_timer(kvm, val, 0); | ||
| 402 | } | ||
| 403 | break; | 401 | break; |
| 404 | case 2: | 402 | case 2: |
| 405 | case 3: | 403 | case 3: |
| 406 | if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ | 404 | create_pit_timer(kvm, val, 1); |
| 407 | create_pit_timer(kvm, val, 1); | ||
| 408 | } | ||
| 409 | break; | 405 | break; |
| 410 | default: | 406 | default: |
| 411 | destroy_pit_timer(kvm->arch.vpit); | 407 | destroy_pit_timer(kvm->arch.vpit); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cac4746d7ffb..b6a73537e1ef 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -262,9 +262,10 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 262 | 262 | ||
| 263 | void kvm_pic_reset(struct kvm_kpic_state *s) | 263 | void kvm_pic_reset(struct kvm_kpic_state *s) |
| 264 | { | 264 | { |
| 265 | int irq; | 265 | int irq, i; |
| 266 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; | 266 | struct kvm_vcpu *vcpu; |
| 267 | u8 irr = s->irr, isr = s->imr; | 267 | u8 irr = s->irr, isr = s->imr; |
| 268 | bool found = false; | ||
| 268 | 269 | ||
| 269 | s->last_irr = 0; | 270 | s->last_irr = 0; |
| 270 | s->irr = 0; | 271 | s->irr = 0; |
| @@ -281,12 +282,19 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
| 281 | s->special_fully_nested_mode = 0; | 282 | s->special_fully_nested_mode = 0; |
| 282 | s->init4 = 0; | 283 | s->init4 = 0; |
| 283 | 284 | ||
| 284 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | 285 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) |
| 285 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | 286 | if (kvm_apic_accept_pic_intr(vcpu)) { |
| 286 | if (irr & (1 << irq) || isr & (1 << irq)) { | 287 | found = true; |
| 287 | pic_clear_isr(s, irq); | 288 | break; |
| 288 | } | 289 | } |
| 289 | } | 290 | |
| 291 | |||
| 292 | if (!found) | ||
| 293 | return; | ||
| 294 | |||
| 295 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
| 296 | if (irr & (1 << irq) || isr & (1 << irq)) | ||
| 297 | pic_clear_isr(s, irq); | ||
| 290 | } | 298 | } |
| 291 | 299 | ||
| 292 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 300 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 54abb40199d6..cfdc6e0ef002 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include "irq.h" | 38 | #include "irq.h" |
| 39 | #include "trace.h" | 39 | #include "trace.h" |
| 40 | #include "x86.h" | 40 | #include "x86.h" |
| 41 | #include "cpuid.h" | ||
| 41 | 42 | ||
| 42 | #ifndef CONFIG_X86_64 | 43 | #ifndef CONFIG_X86_64 |
| 43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 44 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
| @@ -1120,7 +1121,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 1120 | return 0; | 1121 | return 0; |
| 1121 | } | 1122 | } |
| 1122 | 1123 | ||
| 1123 | static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | 1124 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
| 1124 | { | 1125 | { |
| 1125 | u32 reg = apic_get_reg(apic, lvt_type); | 1126 | u32 reg = apic_get_reg(apic, lvt_type); |
| 1126 | int vector, mode, trig_mode; | 1127 | int vector, mode, trig_mode; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 138e8cc6fea6..6f4ce2575d09 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -34,6 +34,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); | |||
| 34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 34 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
| 35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 35 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
| 36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 36 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
| 37 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | ||
| 37 | 38 | ||
| 38 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 39 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
| 39 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 40 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1b36cf3e3d0..2a2a9b40db19 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -59,15 +59,6 @@ enum { | |||
| 59 | AUDIT_POST_SYNC | 59 | AUDIT_POST_SYNC |
| 60 | }; | 60 | }; |
| 61 | 61 | ||
| 62 | char *audit_point_name[] = { | ||
| 63 | "pre page fault", | ||
| 64 | "post page fault", | ||
| 65 | "pre pte write", | ||
| 66 | "post pte write", | ||
| 67 | "pre sync", | ||
| 68 | "post sync" | ||
| 69 | }; | ||
| 70 | |||
| 71 | #undef MMU_DEBUG | 62 | #undef MMU_DEBUG |
| 72 | 63 | ||
| 73 | #ifdef MMU_DEBUG | 64 | #ifdef MMU_DEBUG |
| @@ -87,9 +78,6 @@ static int dbg = 0; | |||
| 87 | module_param(dbg, bool, 0644); | 78 | module_param(dbg, bool, 0644); |
| 88 | #endif | 79 | #endif |
| 89 | 80 | ||
| 90 | static int oos_shadow = 1; | ||
| 91 | module_param(oos_shadow, bool, 0644); | ||
| 92 | |||
| 93 | #ifndef MMU_DEBUG | 81 | #ifndef MMU_DEBUG |
| 94 | #define ASSERT(x) do { } while (0) | 82 | #define ASSERT(x) do { } while (0) |
| 95 | #else | 83 | #else |
| @@ -593,6 +581,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
| 593 | return 0; | 581 | return 0; |
| 594 | } | 582 | } |
| 595 | 583 | ||
| 584 | static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache) | ||
| 585 | { | ||
| 586 | return cache->nobjs; | ||
| 587 | } | ||
| 588 | |||
| 596 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | 589 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
| 597 | struct kmem_cache *cache) | 590 | struct kmem_cache *cache) |
| 598 | { | 591 | { |
| @@ -953,21 +946,35 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
| 953 | } | 946 | } |
| 954 | } | 947 | } |
| 955 | 948 | ||
| 949 | static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, | ||
| 950 | struct kvm_memory_slot *slot) | ||
| 951 | { | ||
| 952 | struct kvm_lpage_info *linfo; | ||
| 953 | |||
| 954 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
| 955 | return &slot->rmap[gfn - slot->base_gfn]; | ||
| 956 | |||
| 957 | linfo = lpage_info_slot(gfn, slot, level); | ||
| 958 | return &linfo->rmap_pde; | ||
| 959 | } | ||
| 960 | |||
| 956 | /* | 961 | /* |
| 957 | * Take gfn and return the reverse mapping to it. | 962 | * Take gfn and return the reverse mapping to it. |
| 958 | */ | 963 | */ |
| 959 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 964 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
| 960 | { | 965 | { |
| 961 | struct kvm_memory_slot *slot; | 966 | struct kvm_memory_slot *slot; |
| 962 | struct kvm_lpage_info *linfo; | ||
| 963 | 967 | ||
| 964 | slot = gfn_to_memslot(kvm, gfn); | 968 | slot = gfn_to_memslot(kvm, gfn); |
| 965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 969 | return __gfn_to_rmap(kvm, gfn, level, slot); |
| 966 | return &slot->rmap[gfn - slot->base_gfn]; | 970 | } |
| 967 | 971 | ||
| 968 | linfo = lpage_info_slot(gfn, slot, level); | 972 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
| 973 | { | ||
| 974 | struct kvm_mmu_memory_cache *cache; | ||
| 969 | 975 | ||
| 970 | return &linfo->rmap_pde; | 976 | cache = &vcpu->arch.mmu_pte_list_desc_cache; |
| 977 | return mmu_memory_cache_free_objects(cache); | ||
| 971 | } | 978 | } |
| 972 | 979 | ||
| 973 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 980 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
| @@ -1004,17 +1011,16 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) | |||
| 1004 | rmap_remove(kvm, sptep); | 1011 | rmap_remove(kvm, sptep); |
| 1005 | } | 1012 | } |
| 1006 | 1013 | ||
| 1007 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1014 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, |
| 1015 | struct kvm_memory_slot *slot) | ||
| 1008 | { | 1016 | { |
| 1009 | unsigned long *rmapp; | 1017 | unsigned long *rmapp; |
| 1010 | u64 *spte; | 1018 | u64 *spte; |
| 1011 | int i, write_protected = 0; | 1019 | int i, write_protected = 0; |
| 1012 | 1020 | ||
| 1013 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 1021 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); |
| 1014 | |||
| 1015 | spte = rmap_next(kvm, rmapp, NULL); | 1022 | spte = rmap_next(kvm, rmapp, NULL); |
| 1016 | while (spte) { | 1023 | while (spte) { |
| 1017 | BUG_ON(!spte); | ||
| 1018 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1024 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
| 1019 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1025 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
| 1020 | if (is_writable_pte(*spte)) { | 1026 | if (is_writable_pte(*spte)) { |
| @@ -1027,12 +1033,11 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 1027 | /* check for huge page mappings */ | 1033 | /* check for huge page mappings */ |
| 1028 | for (i = PT_DIRECTORY_LEVEL; | 1034 | for (i = PT_DIRECTORY_LEVEL; |
| 1029 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1035 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 1030 | rmapp = gfn_to_rmap(kvm, gfn, i); | 1036 | rmapp = __gfn_to_rmap(kvm, gfn, i, slot); |
| 1031 | spte = rmap_next(kvm, rmapp, NULL); | 1037 | spte = rmap_next(kvm, rmapp, NULL); |
| 1032 | while (spte) { | 1038 | while (spte) { |
| 1033 | BUG_ON(!spte); | ||
| 1034 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1039 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
| 1035 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 1040 | BUG_ON(!is_large_pte(*spte)); |
| 1036 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 1041 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
| 1037 | if (is_writable_pte(*spte)) { | 1042 | if (is_writable_pte(*spte)) { |
| 1038 | drop_spte(kvm, spte); | 1043 | drop_spte(kvm, spte); |
| @@ -1047,6 +1052,14 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 1047 | return write_protected; | 1052 | return write_protected; |
| 1048 | } | 1053 | } |
| 1049 | 1054 | ||
| 1055 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | ||
| 1056 | { | ||
| 1057 | struct kvm_memory_slot *slot; | ||
| 1058 | |||
| 1059 | slot = gfn_to_memslot(kvm, gfn); | ||
| 1060 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | ||
| 1061 | } | ||
| 1062 | |||
| 1050 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1063 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 1051 | unsigned long data) | 1064 | unsigned long data) |
| 1052 | { | 1065 | { |
| @@ -1103,15 +1116,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 1103 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1116 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, |
| 1104 | unsigned long data)) | 1117 | unsigned long data)) |
| 1105 | { | 1118 | { |
| 1106 | int i, j; | 1119 | int j; |
| 1107 | int ret; | 1120 | int ret; |
| 1108 | int retval = 0; | 1121 | int retval = 0; |
| 1109 | struct kvm_memslots *slots; | 1122 | struct kvm_memslots *slots; |
| 1123 | struct kvm_memory_slot *memslot; | ||
| 1110 | 1124 | ||
| 1111 | slots = kvm_memslots(kvm); | 1125 | slots = kvm_memslots(kvm); |
| 1112 | 1126 | ||
| 1113 | for (i = 0; i < slots->nmemslots; i++) { | 1127 | kvm_for_each_memslot(memslot, slots) { |
| 1114 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | ||
| 1115 | unsigned long start = memslot->userspace_addr; | 1128 | unsigned long start = memslot->userspace_addr; |
| 1116 | unsigned long end; | 1129 | unsigned long end; |
| 1117 | 1130 | ||
| @@ -1324,7 +1337,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 1324 | PAGE_SIZE); | 1337 | PAGE_SIZE); |
| 1325 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1338 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 1326 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1339 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 1327 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 1340 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); |
| 1328 | sp->parent_ptes = 0; | 1341 | sp->parent_ptes = 0; |
| 1329 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1342 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1330 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 1343 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
| @@ -1511,6 +1524,13 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | |||
| 1511 | return ret; | 1524 | return ret; |
| 1512 | } | 1525 | } |
| 1513 | 1526 | ||
| 1527 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
| 1528 | #include "mmu_audit.c" | ||
| 1529 | #else | ||
| 1530 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | ||
| 1531 | static void mmu_audit_disable(void) { } | ||
| 1532 | #endif | ||
| 1533 | |||
| 1514 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1534 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| 1515 | struct list_head *invalid_list) | 1535 | struct list_head *invalid_list) |
| 1516 | { | 1536 | { |
| @@ -1640,6 +1660,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp) | |||
| 1640 | sp->spt[i] = 0ull; | 1660 | sp->spt[i] = 0ull; |
| 1641 | } | 1661 | } |
| 1642 | 1662 | ||
| 1663 | static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) | ||
| 1664 | { | ||
| 1665 | sp->write_flooding_count = 0; | ||
| 1666 | } | ||
| 1667 | |||
| 1668 | static void clear_sp_write_flooding_count(u64 *spte) | ||
| 1669 | { | ||
| 1670 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
| 1671 | |||
| 1672 | __clear_sp_write_flooding_count(sp); | ||
| 1673 | } | ||
| 1674 | |||
| 1643 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1675 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
| 1644 | gfn_t gfn, | 1676 | gfn_t gfn, |
| 1645 | gva_t gaddr, | 1677 | gva_t gaddr, |
| @@ -1683,6 +1715,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1683 | } else if (sp->unsync) | 1715 | } else if (sp->unsync) |
| 1684 | kvm_mmu_mark_parents_unsync(sp); | 1716 | kvm_mmu_mark_parents_unsync(sp); |
| 1685 | 1717 | ||
| 1718 | __clear_sp_write_flooding_count(sp); | ||
| 1686 | trace_kvm_mmu_get_page(sp, false); | 1719 | trace_kvm_mmu_get_page(sp, false); |
| 1687 | return sp; | 1720 | return sp; |
| 1688 | } | 1721 | } |
| @@ -1796,7 +1829,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1796 | } | 1829 | } |
| 1797 | } | 1830 | } |
| 1798 | 1831 | ||
| 1799 | static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | 1832 | static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 1800 | u64 *spte) | 1833 | u64 *spte) |
| 1801 | { | 1834 | { |
| 1802 | u64 pte; | 1835 | u64 pte; |
| @@ -1804,17 +1837,21 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 1804 | 1837 | ||
| 1805 | pte = *spte; | 1838 | pte = *spte; |
| 1806 | if (is_shadow_present_pte(pte)) { | 1839 | if (is_shadow_present_pte(pte)) { |
| 1807 | if (is_last_spte(pte, sp->role.level)) | 1840 | if (is_last_spte(pte, sp->role.level)) { |
| 1808 | drop_spte(kvm, spte); | 1841 | drop_spte(kvm, spte); |
| 1809 | else { | 1842 | if (is_large_pte(pte)) |
| 1843 | --kvm->stat.lpages; | ||
| 1844 | } else { | ||
| 1810 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1845 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
| 1811 | drop_parent_pte(child, spte); | 1846 | drop_parent_pte(child, spte); |
| 1812 | } | 1847 | } |
| 1813 | } else if (is_mmio_spte(pte)) | 1848 | return true; |
| 1849 | } | ||
| 1850 | |||
| 1851 | if (is_mmio_spte(pte)) | ||
| 1814 | mmu_spte_clear_no_track(spte); | 1852 | mmu_spte_clear_no_track(spte); |
| 1815 | 1853 | ||
| 1816 | if (is_large_pte(pte)) | 1854 | return false; |
| 1817 | --kvm->stat.lpages; | ||
| 1818 | } | 1855 | } |
| 1819 | 1856 | ||
| 1820 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1857 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
| @@ -1831,15 +1868,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | |||
| 1831 | mmu_page_remove_parent_pte(sp, parent_pte); | 1868 | mmu_page_remove_parent_pte(sp, parent_pte); |
| 1832 | } | 1869 | } |
| 1833 | 1870 | ||
| 1834 | static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | ||
| 1835 | { | ||
| 1836 | int i; | ||
| 1837 | struct kvm_vcpu *vcpu; | ||
| 1838 | |||
| 1839 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
| 1840 | vcpu->arch.last_pte_updated = NULL; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 1871 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 1844 | { | 1872 | { |
| 1845 | u64 *parent_pte; | 1873 | u64 *parent_pte; |
| @@ -1899,7 +1927,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 1899 | } | 1927 | } |
| 1900 | 1928 | ||
| 1901 | sp->role.invalid = 1; | 1929 | sp->role.invalid = 1; |
| 1902 | kvm_mmu_reset_last_pte_updated(kvm); | ||
| 1903 | return ret; | 1930 | return ret; |
| 1904 | } | 1931 | } |
| 1905 | 1932 | ||
| @@ -1985,7 +2012,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
| 1985 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 2012 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
| 1986 | } | 2013 | } |
| 1987 | 2014 | ||
| 1988 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2015 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
| 1989 | { | 2016 | { |
| 1990 | struct kvm_mmu_page *sp; | 2017 | struct kvm_mmu_page *sp; |
| 1991 | struct hlist_node *node; | 2018 | struct hlist_node *node; |
| @@ -1994,7 +2021,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 1994 | 2021 | ||
| 1995 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); | 2022 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
| 1996 | r = 0; | 2023 | r = 0; |
| 1997 | 2024 | spin_lock(&kvm->mmu_lock); | |
| 1998 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2025 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
| 1999 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, | 2026 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
| 2000 | sp->role.word); | 2027 | sp->role.word); |
| @@ -2002,22 +2029,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 2002 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 2029 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
| 2003 | } | 2030 | } |
| 2004 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2031 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 2005 | return r; | 2032 | spin_unlock(&kvm->mmu_lock); |
| 2006 | } | ||
| 2007 | |||
| 2008 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | ||
| 2009 | { | ||
| 2010 | struct kvm_mmu_page *sp; | ||
| 2011 | struct hlist_node *node; | ||
| 2012 | LIST_HEAD(invalid_list); | ||
| 2013 | 2033 | ||
| 2014 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 2034 | return r; |
| 2015 | pgprintk("%s: zap %llx %x\n", | ||
| 2016 | __func__, gfn, sp->role.word); | ||
| 2017 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
| 2018 | } | ||
| 2019 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
| 2020 | } | 2035 | } |
| 2036 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | ||
| 2021 | 2037 | ||
| 2022 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 2038 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
| 2023 | { | 2039 | { |
| @@ -2169,8 +2185,6 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 2169 | return 1; | 2185 | return 1; |
| 2170 | 2186 | ||
| 2171 | if (!need_unsync && !s->unsync) { | 2187 | if (!need_unsync && !s->unsync) { |
| 2172 | if (!oos_shadow) | ||
| 2173 | return 1; | ||
| 2174 | need_unsync = true; | 2188 | need_unsync = true; |
| 2175 | } | 2189 | } |
| 2176 | } | 2190 | } |
| @@ -2191,11 +2205,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2191 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2205 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) |
| 2192 | return 0; | 2206 | return 0; |
| 2193 | 2207 | ||
| 2194 | /* | ||
| 2195 | * We don't set the accessed bit, since we sometimes want to see | ||
| 2196 | * whether the guest actually used the pte (in order to detect | ||
| 2197 | * demand paging). | ||
| 2198 | */ | ||
| 2199 | spte = PT_PRESENT_MASK; | 2208 | spte = PT_PRESENT_MASK; |
| 2200 | if (!speculative) | 2209 | if (!speculative) |
| 2201 | spte |= shadow_accessed_mask; | 2210 | spte |= shadow_accessed_mask; |
| @@ -2346,10 +2355,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 2346 | } | 2355 | } |
| 2347 | } | 2356 | } |
| 2348 | kvm_release_pfn_clean(pfn); | 2357 | kvm_release_pfn_clean(pfn); |
| 2349 | if (speculative) { | ||
| 2350 | vcpu->arch.last_pte_updated = sptep; | ||
| 2351 | vcpu->arch.last_pte_gfn = gfn; | ||
| 2352 | } | ||
| 2353 | } | 2358 | } |
| 2354 | 2359 | ||
| 2355 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2360 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
| @@ -2840,12 +2845,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 2840 | return; | 2845 | return; |
| 2841 | 2846 | ||
| 2842 | vcpu_clear_mmio_info(vcpu, ~0ul); | 2847 | vcpu_clear_mmio_info(vcpu, ~0ul); |
| 2843 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 2848 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
| 2844 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 2849 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
| 2845 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2850 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 2846 | sp = page_header(root); | 2851 | sp = page_header(root); |
| 2847 | mmu_sync_children(vcpu, sp); | 2852 | mmu_sync_children(vcpu, sp); |
| 2848 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2853 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
| 2849 | return; | 2854 | return; |
| 2850 | } | 2855 | } |
| 2851 | for (i = 0; i < 4; ++i) { | 2856 | for (i = 0; i < 4; ++i) { |
| @@ -2857,7 +2862,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 2857 | mmu_sync_children(vcpu, sp); | 2862 | mmu_sync_children(vcpu, sp); |
| 2858 | } | 2863 | } |
| 2859 | } | 2864 | } |
| 2860 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | 2865 | kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); |
| 2861 | } | 2866 | } |
| 2862 | 2867 | ||
| 2863 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2868 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
| @@ -3510,28 +3515,119 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | |||
| 3510 | kvm_mmu_flush_tlb(vcpu); | 3515 | kvm_mmu_flush_tlb(vcpu); |
| 3511 | } | 3516 | } |
| 3512 | 3517 | ||
| 3513 | static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | 3518 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
| 3519 | const u8 *new, int *bytes) | ||
| 3514 | { | 3520 | { |
| 3515 | u64 *spte = vcpu->arch.last_pte_updated; | 3521 | u64 gentry; |
| 3522 | int r; | ||
| 3523 | |||
| 3524 | /* | ||
| 3525 | * Assume that the pte write on a page table of the same type | ||
| 3526 | * as the current vcpu paging mode since we update the sptes only | ||
| 3527 | * when they have the same mode. | ||
| 3528 | */ | ||
| 3529 | if (is_pae(vcpu) && *bytes == 4) { | ||
| 3530 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 3531 | *gpa &= ~(gpa_t)7; | ||
| 3532 | *bytes = 8; | ||
| 3533 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | ||
| 3534 | if (r) | ||
| 3535 | gentry = 0; | ||
| 3536 | new = (const u8 *)&gentry; | ||
| 3537 | } | ||
| 3516 | 3538 | ||
| 3517 | return !!(spte && (*spte & shadow_accessed_mask)); | 3539 | switch (*bytes) { |
| 3540 | case 4: | ||
| 3541 | gentry = *(const u32 *)new; | ||
| 3542 | break; | ||
| 3543 | case 8: | ||
| 3544 | gentry = *(const u64 *)new; | ||
| 3545 | break; | ||
| 3546 | default: | ||
| 3547 | gentry = 0; | ||
| 3548 | break; | ||
| 3549 | } | ||
| 3550 | |||
| 3551 | return gentry; | ||
| 3518 | } | 3552 | } |
| 3519 | 3553 | ||
| 3520 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3554 | /* |
| 3555 | * If we're seeing too many writes to a page, it may no longer be a page table, | ||
| 3556 | * or we may be forking, in which case it is better to unmap the page. | ||
| 3557 | */ | ||
| 3558 | static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) | ||
| 3521 | { | 3559 | { |
| 3522 | u64 *spte = vcpu->arch.last_pte_updated; | 3560 | /* |
| 3561 | * Skip write-flooding detected for the sp whose level is 1, because | ||
| 3562 | * it can become unsync, then the guest page is not write-protected. | ||
| 3563 | */ | ||
| 3564 | if (sp->role.level == 1) | ||
| 3565 | return false; | ||
| 3523 | 3566 | ||
| 3524 | if (spte | 3567 | return ++sp->write_flooding_count >= 3; |
| 3525 | && vcpu->arch.last_pte_gfn == gfn | 3568 | } |
| 3526 | && shadow_accessed_mask | 3569 | |
| 3527 | && !(*spte & shadow_accessed_mask) | 3570 | /* |
| 3528 | && is_shadow_present_pte(*spte)) | 3571 | * Misaligned accesses are too much trouble to fix up; also, they usually |
| 3529 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 3572 | * indicate a page is not used as a page table. |
| 3573 | */ | ||
| 3574 | static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, | ||
| 3575 | int bytes) | ||
| 3576 | { | ||
| 3577 | unsigned offset, pte_size, misaligned; | ||
| 3578 | |||
| 3579 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
| 3580 | gpa, bytes, sp->role.word); | ||
| 3581 | |||
| 3582 | offset = offset_in_page(gpa); | ||
| 3583 | pte_size = sp->role.cr4_pae ? 8 : 4; | ||
| 3584 | |||
| 3585 | /* | ||
| 3586 | * Sometimes, the OS only writes the last one bytes to update status | ||
| 3587 | * bits, for example, in linux, andb instruction is used in clear_bit(). | ||
| 3588 | */ | ||
| 3589 | if (!(offset & (pte_size - 1)) && bytes == 1) | ||
| 3590 | return false; | ||
| 3591 | |||
| 3592 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||
| 3593 | misaligned |= bytes < 4; | ||
| 3594 | |||
| 3595 | return misaligned; | ||
| 3596 | } | ||
| 3597 | |||
| 3598 | static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | ||
| 3599 | { | ||
| 3600 | unsigned page_offset, quadrant; | ||
| 3601 | u64 *spte; | ||
| 3602 | int level; | ||
| 3603 | |||
| 3604 | page_offset = offset_in_page(gpa); | ||
| 3605 | level = sp->role.level; | ||
| 3606 | *nspte = 1; | ||
| 3607 | if (!sp->role.cr4_pae) { | ||
| 3608 | page_offset <<= 1; /* 32->64 */ | ||
| 3609 | /* | ||
| 3610 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
| 3611 | * only 2MB. So we need to double the offset again | ||
| 3612 | * and zap two pdes instead of one. | ||
| 3613 | */ | ||
| 3614 | if (level == PT32_ROOT_LEVEL) { | ||
| 3615 | page_offset &= ~7; /* kill rounding error */ | ||
| 3616 | page_offset <<= 1; | ||
| 3617 | *nspte = 2; | ||
| 3618 | } | ||
| 3619 | quadrant = page_offset >> PAGE_SHIFT; | ||
| 3620 | page_offset &= ~PAGE_MASK; | ||
| 3621 | if (quadrant != sp->role.quadrant) | ||
| 3622 | return NULL; | ||
| 3623 | } | ||
| 3624 | |||
| 3625 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
| 3626 | return spte; | ||
| 3530 | } | 3627 | } |
| 3531 | 3628 | ||
| 3532 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 3629 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 3533 | const u8 *new, int bytes, | 3630 | const u8 *new, int bytes) |
| 3534 | bool guest_initiated) | ||
| 3535 | { | 3631 | { |
| 3536 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3632 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 3537 | union kvm_mmu_page_role mask = { .word = 0 }; | 3633 | union kvm_mmu_page_role mask = { .word = 0 }; |
| @@ -3539,8 +3635,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 3539 | struct hlist_node *node; | 3635 | struct hlist_node *node; |
| 3540 | LIST_HEAD(invalid_list); | 3636 | LIST_HEAD(invalid_list); |
| 3541 | u64 entry, gentry, *spte; | 3637 | u64 entry, gentry, *spte; |
| 3542 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | 3638 | int npte; |
| 3543 | int level, npte, invlpg_counter, r, flooded = 0; | ||
| 3544 | bool remote_flush, local_flush, zap_page; | 3639 | bool remote_flush, local_flush, zap_page; |
| 3545 | 3640 | ||
| 3546 | /* | 3641 | /* |
| @@ -3551,112 +3646,45 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 3551 | return; | 3646 | return; |
| 3552 | 3647 | ||
| 3553 | zap_page = remote_flush = local_flush = false; | 3648 | zap_page = remote_flush = local_flush = false; |
| 3554 | offset = offset_in_page(gpa); | ||
| 3555 | 3649 | ||
| 3556 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3650 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
| 3557 | 3651 | ||
| 3558 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | 3652 | gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); |
| 3559 | 3653 | ||
| 3560 | /* | 3654 | /* |
| 3561 | * Assume that the pte write on a page table of the same type | 3655 | * No need to care whether allocation memory is successful |
| 3562 | * as the current vcpu paging mode since we update the sptes only | 3656 | * or not since pte prefetch is skiped if it does not have |
| 3563 | * when they have the same mode. | 3657 | * enough objects in the cache. |
| 3564 | */ | 3658 | */ |
| 3565 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3659 | mmu_topup_memory_caches(vcpu); |
| 3566 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 3567 | if (is_pae(vcpu)) { | ||
| 3568 | gpa &= ~(gpa_t)7; | ||
| 3569 | bytes = 8; | ||
| 3570 | } | ||
| 3571 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
| 3572 | if (r) | ||
| 3573 | gentry = 0; | ||
| 3574 | new = (const u8 *)&gentry; | ||
| 3575 | } | ||
| 3576 | |||
| 3577 | switch (bytes) { | ||
| 3578 | case 4: | ||
| 3579 | gentry = *(const u32 *)new; | ||
| 3580 | break; | ||
| 3581 | case 8: | ||
| 3582 | gentry = *(const u64 *)new; | ||
| 3583 | break; | ||
| 3584 | default: | ||
| 3585 | gentry = 0; | ||
| 3586 | break; | ||
| 3587 | } | ||
| 3588 | 3660 | ||
| 3589 | spin_lock(&vcpu->kvm->mmu_lock); | 3661 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3590 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
| 3591 | gentry = 0; | ||
| 3592 | kvm_mmu_free_some_pages(vcpu); | ||
| 3593 | ++vcpu->kvm->stat.mmu_pte_write; | 3662 | ++vcpu->kvm->stat.mmu_pte_write; |
| 3594 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3663 | kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
| 3595 | if (guest_initiated) { | ||
| 3596 | kvm_mmu_access_page(vcpu, gfn); | ||
| 3597 | if (gfn == vcpu->arch.last_pt_write_gfn | ||
| 3598 | && !last_updated_pte_accessed(vcpu)) { | ||
| 3599 | ++vcpu->arch.last_pt_write_count; | ||
| 3600 | if (vcpu->arch.last_pt_write_count >= 3) | ||
| 3601 | flooded = 1; | ||
| 3602 | } else { | ||
| 3603 | vcpu->arch.last_pt_write_gfn = gfn; | ||
| 3604 | vcpu->arch.last_pt_write_count = 1; | ||
| 3605 | vcpu->arch.last_pte_updated = NULL; | ||
| 3606 | } | ||
| 3607 | } | ||
| 3608 | 3664 | ||
| 3609 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3665 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
| 3610 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3666 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
| 3611 | pte_size = sp->role.cr4_pae ? 8 : 4; | 3667 | spte = get_written_sptes(sp, gpa, &npte); |
| 3612 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 3668 | |
| 3613 | misaligned |= bytes < 4; | 3669 | if (detect_write_misaligned(sp, gpa, bytes) || |
| 3614 | if (misaligned || flooded) { | 3670 | detect_write_flooding(sp, spte)) { |
| 3615 | /* | ||
| 3616 | * Misaligned accesses are too much trouble to fix | ||
| 3617 | * up; also, they usually indicate a page is not used | ||
| 3618 | * as a page table. | ||
| 3619 | * | ||
| 3620 | * If we're seeing too many writes to a page, | ||
| 3621 | * it may no longer be a page table, or we may be | ||
| 3622 | * forking, in which case it is better to unmap the | ||
| 3623 | * page. | ||
| 3624 | */ | ||
| 3625 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
| 3626 | gpa, bytes, sp->role.word); | ||
| 3627 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3671 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
| 3628 | &invalid_list); | 3672 | &invalid_list); |
| 3629 | ++vcpu->kvm->stat.mmu_flooded; | 3673 | ++vcpu->kvm->stat.mmu_flooded; |
| 3630 | continue; | 3674 | continue; |
| 3631 | } | 3675 | } |
| 3632 | page_offset = offset; | 3676 | |
| 3633 | level = sp->role.level; | 3677 | spte = get_written_sptes(sp, gpa, &npte); |
| 3634 | npte = 1; | 3678 | if (!spte) |
| 3635 | if (!sp->role.cr4_pae) { | 3679 | continue; |
| 3636 | page_offset <<= 1; /* 32->64 */ | 3680 | |
| 3637 | /* | ||
| 3638 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
| 3639 | * only 2MB. So we need to double the offset again | ||
| 3640 | * and zap two pdes instead of one. | ||
| 3641 | */ | ||
| 3642 | if (level == PT32_ROOT_LEVEL) { | ||
| 3643 | page_offset &= ~7; /* kill rounding error */ | ||
| 3644 | page_offset <<= 1; | ||
| 3645 | npte = 2; | ||
| 3646 | } | ||
| 3647 | quadrant = page_offset >> PAGE_SHIFT; | ||
| 3648 | page_offset &= ~PAGE_MASK; | ||
| 3649 | if (quadrant != sp->role.quadrant) | ||
| 3650 | continue; | ||
| 3651 | } | ||
| 3652 | local_flush = true; | 3681 | local_flush = true; |
| 3653 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
| 3654 | while (npte--) { | 3682 | while (npte--) { |
| 3655 | entry = *spte; | 3683 | entry = *spte; |
| 3656 | mmu_page_zap_pte(vcpu->kvm, sp, spte); | 3684 | mmu_page_zap_pte(vcpu->kvm, sp, spte); |
| 3657 | if (gentry && | 3685 | if (gentry && |
| 3658 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3686 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
| 3659 | & mask.word)) | 3687 | & mask.word) && rmap_can_add(vcpu)) |
| 3660 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3688 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
| 3661 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3689 | if (!remote_flush && need_remote_flush(entry, *spte)) |
| 3662 | remote_flush = true; | 3690 | remote_flush = true; |
| @@ -3665,7 +3693,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 3665 | } | 3693 | } |
| 3666 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 3694 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); |
| 3667 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3695 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
| 3668 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3696 | kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
| 3669 | spin_unlock(&vcpu->kvm->mmu_lock); | 3697 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 3670 | } | 3698 | } |
| 3671 | 3699 | ||
| @@ -3679,9 +3707,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 3679 | 3707 | ||
| 3680 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 3708 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
| 3681 | 3709 | ||
| 3682 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 3683 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3710 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 3684 | spin_unlock(&vcpu->kvm->mmu_lock); | 3711 | |
| 3685 | return r; | 3712 | return r; |
| 3686 | } | 3713 | } |
| 3687 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 3714 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
| @@ -3702,10 +3729,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
| 3702 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3729 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
| 3703 | } | 3730 | } |
| 3704 | 3731 | ||
| 3732 | static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) | ||
| 3733 | { | ||
| 3734 | if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) | ||
| 3735 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
| 3736 | |||
| 3737 | return vcpu_match_mmio_gva(vcpu, addr); | ||
| 3738 | } | ||
| 3739 | |||
| 3705 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 3740 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
| 3706 | void *insn, int insn_len) | 3741 | void *insn, int insn_len) |
| 3707 | { | 3742 | { |
| 3708 | int r; | 3743 | int r, emulation_type = EMULTYPE_RETRY; |
| 3709 | enum emulation_result er; | 3744 | enum emulation_result er; |
| 3710 | 3745 | ||
| 3711 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); | 3746 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
| @@ -3717,11 +3752,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
| 3717 | goto out; | 3752 | goto out; |
| 3718 | } | 3753 | } |
| 3719 | 3754 | ||
| 3720 | r = mmu_topup_memory_caches(vcpu); | 3755 | if (is_mmio_page_fault(vcpu, cr2)) |
| 3721 | if (r) | 3756 | emulation_type = 0; |
| 3722 | goto out; | ||
| 3723 | 3757 | ||
| 3724 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); | 3758 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); |
| 3725 | 3759 | ||
| 3726 | switch (er) { | 3760 | switch (er) { |
| 3727 | case EMULATE_DONE: | 3761 | case EMULATE_DONE: |
| @@ -3792,7 +3826,11 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
| 3792 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 3826 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
| 3793 | { | 3827 | { |
| 3794 | ASSERT(vcpu); | 3828 | ASSERT(vcpu); |
| 3795 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3829 | |
| 3830 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
| 3831 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
| 3832 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
| 3833 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
| 3796 | 3834 | ||
| 3797 | return alloc_mmu_pages(vcpu); | 3835 | return alloc_mmu_pages(vcpu); |
| 3798 | } | 3836 | } |
| @@ -3852,14 +3890,14 @@ restart: | |||
| 3852 | spin_unlock(&kvm->mmu_lock); | 3890 | spin_unlock(&kvm->mmu_lock); |
| 3853 | } | 3891 | } |
| 3854 | 3892 | ||
| 3855 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 3893 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
| 3856 | struct list_head *invalid_list) | 3894 | struct list_head *invalid_list) |
| 3857 | { | 3895 | { |
| 3858 | struct kvm_mmu_page *page; | 3896 | struct kvm_mmu_page *page; |
| 3859 | 3897 | ||
| 3860 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3898 | page = container_of(kvm->arch.active_mmu_pages.prev, |
| 3861 | struct kvm_mmu_page, link); | 3899 | struct kvm_mmu_page, link); |
| 3862 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 3900 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
| 3863 | } | 3901 | } |
| 3864 | 3902 | ||
| 3865 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 3903 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
| @@ -3874,15 +3912,15 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 3874 | raw_spin_lock(&kvm_lock); | 3912 | raw_spin_lock(&kvm_lock); |
| 3875 | 3913 | ||
| 3876 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3914 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| 3877 | int idx, freed_pages; | 3915 | int idx; |
| 3878 | LIST_HEAD(invalid_list); | 3916 | LIST_HEAD(invalid_list); |
| 3879 | 3917 | ||
| 3880 | idx = srcu_read_lock(&kvm->srcu); | 3918 | idx = srcu_read_lock(&kvm->srcu); |
| 3881 | spin_lock(&kvm->mmu_lock); | 3919 | spin_lock(&kvm->mmu_lock); |
| 3882 | if (!kvm_freed && nr_to_scan > 0 && | 3920 | if (!kvm_freed && nr_to_scan > 0 && |
| 3883 | kvm->arch.n_used_mmu_pages > 0) { | 3921 | kvm->arch.n_used_mmu_pages > 0) { |
| 3884 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, | 3922 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
| 3885 | &invalid_list); | 3923 | &invalid_list); |
| 3886 | kvm_freed = kvm; | 3924 | kvm_freed = kvm; |
| 3887 | } | 3925 | } |
| 3888 | nr_to_scan--; | 3926 | nr_to_scan--; |
| @@ -3944,15 +3982,15 @@ nomem: | |||
| 3944 | */ | 3982 | */ |
| 3945 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | 3983 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) |
| 3946 | { | 3984 | { |
| 3947 | int i; | ||
| 3948 | unsigned int nr_mmu_pages; | 3985 | unsigned int nr_mmu_pages; |
| 3949 | unsigned int nr_pages = 0; | 3986 | unsigned int nr_pages = 0; |
| 3950 | struct kvm_memslots *slots; | 3987 | struct kvm_memslots *slots; |
| 3988 | struct kvm_memory_slot *memslot; | ||
| 3951 | 3989 | ||
| 3952 | slots = kvm_memslots(kvm); | 3990 | slots = kvm_memslots(kvm); |
| 3953 | 3991 | ||
| 3954 | for (i = 0; i < slots->nmemslots; i++) | 3992 | kvm_for_each_memslot(memslot, slots) |
| 3955 | nr_pages += slots->memslots[i].npages; | 3993 | nr_pages += memslot->npages; |
| 3956 | 3994 | ||
| 3957 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3995 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
| 3958 | nr_mmu_pages = max(nr_mmu_pages, | 3996 | nr_mmu_pages = max(nr_mmu_pages, |
| @@ -3961,127 +3999,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
| 3961 | return nr_mmu_pages; | 3999 | return nr_mmu_pages; |
| 3962 | } | 4000 | } |
| 3963 | 4001 | ||
| 3964 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
| 3965 | unsigned len) | ||
| 3966 | { | ||
| 3967 | if (len > buffer->len) | ||
| 3968 | return NULL; | ||
| 3969 | return buffer->ptr; | ||
| 3970 | } | ||
| 3971 | |||
| 3972 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
| 3973 | unsigned len) | ||
| 3974 | { | ||
| 3975 | void *ret; | ||
| 3976 | |||
| 3977 | ret = pv_mmu_peek_buffer(buffer, len); | ||
| 3978 | if (!ret) | ||
| 3979 | return ret; | ||
| 3980 | buffer->ptr += len; | ||
| 3981 | buffer->len -= len; | ||
| 3982 | buffer->processed += len; | ||
| 3983 | return ret; | ||
| 3984 | } | ||
| 3985 | |||
| 3986 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | ||
| 3987 | gpa_t addr, gpa_t value) | ||
| 3988 | { | ||
| 3989 | int bytes = 8; | ||
| 3990 | int r; | ||
| 3991 | |||
| 3992 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | ||
| 3993 | bytes = 4; | ||
| 3994 | |||
| 3995 | r = mmu_topup_memory_caches(vcpu); | ||
| 3996 | if (r) | ||
| 3997 | return r; | ||
| 3998 | |||
| 3999 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | ||
| 4000 | return -EFAULT; | ||
| 4001 | |||
| 4002 | return 1; | ||
| 4003 | } | ||
| 4004 | |||
| 4005 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
| 4006 | { | ||
| 4007 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); | ||
| 4008 | return 1; | ||
| 4009 | } | ||
| 4010 | |||
| 4011 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | ||
| 4012 | { | ||
| 4013 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 4014 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | ||
| 4015 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 4016 | return 1; | ||
| 4017 | } | ||
| 4018 | |||
| 4019 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | ||
| 4020 | struct kvm_pv_mmu_op_buffer *buffer) | ||
| 4021 | { | ||
| 4022 | struct kvm_mmu_op_header *header; | ||
| 4023 | |||
| 4024 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | ||
| 4025 | if (!header) | ||
| 4026 | return 0; | ||
| 4027 | switch (header->op) { | ||
| 4028 | case KVM_MMU_OP_WRITE_PTE: { | ||
| 4029 | struct kvm_mmu_op_write_pte *wpte; | ||
| 4030 | |||
| 4031 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | ||
| 4032 | if (!wpte) | ||
| 4033 | return 0; | ||
| 4034 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | ||
| 4035 | wpte->pte_val); | ||
| 4036 | } | ||
| 4037 | case KVM_MMU_OP_FLUSH_TLB: { | ||
| 4038 | struct kvm_mmu_op_flush_tlb *ftlb; | ||
| 4039 | |||
| 4040 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | ||
| 4041 | if (!ftlb) | ||
| 4042 | return 0; | ||
| 4043 | return kvm_pv_mmu_flush_tlb(vcpu); | ||
| 4044 | } | ||
| 4045 | case KVM_MMU_OP_RELEASE_PT: { | ||
| 4046 | struct kvm_mmu_op_release_pt *rpt; | ||
| 4047 | |||
| 4048 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | ||
| 4049 | if (!rpt) | ||
| 4050 | return 0; | ||
| 4051 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | ||
| 4052 | } | ||
| 4053 | default: return 0; | ||
| 4054 | } | ||
| 4055 | } | ||
| 4056 | |||
| 4057 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
| 4058 | gpa_t addr, unsigned long *ret) | ||
| 4059 | { | ||
| 4060 | int r; | ||
| 4061 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; | ||
| 4062 | |||
| 4063 | buffer->ptr = buffer->buf; | ||
| 4064 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); | ||
| 4065 | buffer->processed = 0; | ||
| 4066 | |||
| 4067 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); | ||
| 4068 | if (r) | ||
| 4069 | goto out; | ||
| 4070 | |||
| 4071 | while (buffer->len) { | ||
| 4072 | r = kvm_pv_mmu_op_one(vcpu, buffer); | ||
| 4073 | if (r < 0) | ||
| 4074 | goto out; | ||
| 4075 | if (r == 0) | ||
| 4076 | break; | ||
| 4077 | } | ||
| 4078 | |||
| 4079 | r = 1; | ||
| 4080 | out: | ||
| 4081 | *ret = buffer->processed; | ||
| 4082 | return r; | ||
| 4083 | } | ||
| 4084 | |||
| 4085 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | 4002 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) |
| 4086 | { | 4003 | { |
| 4087 | struct kvm_shadow_walk_iterator iterator; | 4004 | struct kvm_shadow_walk_iterator iterator; |
| @@ -4110,12 +4027,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
| 4110 | mmu_free_memory_caches(vcpu); | 4027 | mmu_free_memory_caches(vcpu); |
| 4111 | } | 4028 | } |
| 4112 | 4029 | ||
| 4113 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
| 4114 | #include "mmu_audit.c" | ||
| 4115 | #else | ||
| 4116 | static void mmu_audit_disable(void) { } | ||
| 4117 | #endif | ||
| 4118 | |||
| 4119 | void kvm_mmu_module_exit(void) | 4030 | void kvm_mmu_module_exit(void) |
| 4120 | { | 4031 | { |
| 4121 | mmu_destroy_caches(); | 4032 | mmu_destroy_caches(); |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 746ec259d024..fe15dcc07a6b 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
| @@ -19,6 +19,15 @@ | |||
| 19 | 19 | ||
| 20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
| 21 | 21 | ||
| 22 | char const *audit_point_name[] = { | ||
| 23 | "pre page fault", | ||
| 24 | "post page fault", | ||
| 25 | "pre pte write", | ||
| 26 | "post pte write", | ||
| 27 | "pre sync", | ||
| 28 | "post sync" | ||
| 29 | }; | ||
| 30 | |||
| 22 | #define audit_printk(kvm, fmt, args...) \ | 31 | #define audit_printk(kvm, fmt, args...) \ |
| 23 | printk(KERN_ERR "audit: (%s) error: " \ | 32 | printk(KERN_ERR "audit: (%s) error: " \ |
| 24 | fmt, audit_point_name[kvm->arch.audit_point], ##args) | 33 | fmt, audit_point_name[kvm->arch.audit_point], ##args) |
| @@ -224,7 +233,10 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
| 224 | mmu_spte_walk(vcpu, audit_spte); | 233 | mmu_spte_walk(vcpu, audit_spte); |
| 225 | } | 234 | } |
| 226 | 235 | ||
| 227 | static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | 236 | static bool mmu_audit; |
| 237 | static struct jump_label_key mmu_audit_key; | ||
| 238 | |||
| 239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | ||
| 228 | { | 240 | { |
| 229 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | 241 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); |
| 230 | 242 | ||
| @@ -236,18 +248,18 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | |||
| 236 | audit_vcpu_spte(vcpu); | 248 | audit_vcpu_spte(vcpu); |
| 237 | } | 249 | } |
| 238 | 250 | ||
| 239 | static bool mmu_audit; | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
| 252 | { | ||
| 253 | if (static_branch((&mmu_audit_key))) | ||
| 254 | __kvm_mmu_audit(vcpu, point); | ||
| 255 | } | ||
| 240 | 256 | ||
| 241 | static void mmu_audit_enable(void) | 257 | static void mmu_audit_enable(void) |
| 242 | { | 258 | { |
| 243 | int ret; | ||
| 244 | |||
| 245 | if (mmu_audit) | 259 | if (mmu_audit) |
| 246 | return; | 260 | return; |
| 247 | 261 | ||
| 248 | ret = register_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 262 | jump_label_inc(&mmu_audit_key); |
| 249 | WARN_ON(ret); | ||
| 250 | |||
| 251 | mmu_audit = true; | 263 | mmu_audit = true; |
| 252 | } | 264 | } |
| 253 | 265 | ||
| @@ -256,8 +268,7 @@ static void mmu_audit_disable(void) | |||
| 256 | if (!mmu_audit) | 268 | if (!mmu_audit) |
| 257 | return; | 269 | return; |
| 258 | 270 | ||
| 259 | unregister_trace_kvm_mmu_audit(kvm_mmu_audit, NULL); | 271 | jump_label_dec(&mmu_audit_key); |
| 260 | tracepoint_synchronize_unregister(); | ||
| 261 | mmu_audit = false; | 272 | mmu_audit = false; |
| 262 | } | 273 | } |
| 263 | 274 | ||
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index eed67f34146d..89fb0e81322a 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -243,25 +243,6 @@ TRACE_EVENT( | |||
| 243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | 243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, |
| 244 | __entry->access) | 244 | __entry->access) |
| 245 | ); | 245 | ); |
| 246 | |||
| 247 | TRACE_EVENT( | ||
| 248 | kvm_mmu_audit, | ||
| 249 | TP_PROTO(struct kvm_vcpu *vcpu, int audit_point), | ||
| 250 | TP_ARGS(vcpu, audit_point), | ||
| 251 | |||
| 252 | TP_STRUCT__entry( | ||
| 253 | __field(struct kvm_vcpu *, vcpu) | ||
| 254 | __field(int, audit_point) | ||
| 255 | ), | ||
| 256 | |||
| 257 | TP_fast_assign( | ||
| 258 | __entry->vcpu = vcpu; | ||
| 259 | __entry->audit_point = audit_point; | ||
| 260 | ), | ||
| 261 | |||
| 262 | TP_printk("vcpu:%d %s", __entry->vcpu->cpu, | ||
| 263 | audit_point_name[__entry->audit_point]) | ||
| 264 | ); | ||
| 265 | #endif /* _TRACE_KVMMMU_H */ | 246 | #endif /* _TRACE_KVMMMU_H */ |
| 266 | 247 | ||
| 267 | #undef TRACE_INCLUDE_PATH | 248 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 92994100638b..15610285ebb6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 497 | shadow_walk_next(&it)) { | 497 | shadow_walk_next(&it)) { |
| 498 | gfn_t table_gfn; | 498 | gfn_t table_gfn; |
| 499 | 499 | ||
| 500 | clear_sp_write_flooding_count(it.sptep); | ||
| 500 | drop_large_spte(vcpu, it.sptep); | 501 | drop_large_spte(vcpu, it.sptep); |
| 501 | 502 | ||
| 502 | sp = NULL; | 503 | sp = NULL; |
| @@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 522 | shadow_walk_next(&it)) { | 523 | shadow_walk_next(&it)) { |
| 523 | gfn_t direct_gfn; | 524 | gfn_t direct_gfn; |
| 524 | 525 | ||
| 526 | clear_sp_write_flooding_count(it.sptep); | ||
| 525 | validate_direct_spte(vcpu, it.sptep, direct_access); | 527 | validate_direct_spte(vcpu, it.sptep, direct_access); |
| 526 | 528 | ||
| 527 | drop_large_spte(vcpu, it.sptep); | 529 | drop_large_spte(vcpu, it.sptep); |
| @@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 536 | link_shadow_page(it.sptep, sp); | 538 | link_shadow_page(it.sptep, sp); |
| 537 | } | 539 | } |
| 538 | 540 | ||
| 541 | clear_sp_write_flooding_count(it.sptep); | ||
| 539 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 542 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, |
| 540 | user_fault, write_fault, emulate, it.level, | 543 | user_fault, write_fault, emulate, it.level, |
| 541 | gw->gfn, pfn, prefault, map_writable); | 544 | gw->gfn, pfn, prefault, map_writable); |
| @@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 599 | */ | 602 | */ |
| 600 | if (!r) { | 603 | if (!r) { |
| 601 | pgprintk("%s: guest page fault\n", __func__); | 604 | pgprintk("%s: guest page fault\n", __func__); |
| 602 | if (!prefault) { | 605 | if (!prefault) |
| 603 | inject_page_fault(vcpu, &walker.fault); | 606 | inject_page_fault(vcpu, &walker.fault); |
| 604 | /* reset fork detector */ | 607 | |
| 605 | vcpu->arch.last_pt_write_count = 0; | ||
| 606 | } | ||
| 607 | return 0; | 608 | return 0; |
| 608 | } | 609 | } |
| 609 | 610 | ||
| @@ -631,7 +632,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 631 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 632 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| 632 | goto out_unlock; | 633 | goto out_unlock; |
| 633 | 634 | ||
| 634 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 635 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
| 635 | kvm_mmu_free_some_pages(vcpu); | 636 | kvm_mmu_free_some_pages(vcpu); |
| 636 | if (!force_pt_level) | 637 | if (!force_pt_level) |
| 637 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 638 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
| @@ -641,11 +642,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 641 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, | 642 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, |
| 642 | sptep, *sptep, emulate); | 643 | sptep, *sptep, emulate); |
| 643 | 644 | ||
| 644 | if (!emulate) | ||
| 645 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | ||
| 646 | |||
| 647 | ++vcpu->stat.pf_fixed; | 645 | ++vcpu->stat.pf_fixed; |
| 648 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 646 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
| 649 | spin_unlock(&vcpu->kvm->mmu_lock); | 647 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 650 | 648 | ||
| 651 | return emulate; | 649 | return emulate; |
| @@ -656,65 +654,66 @@ out_unlock: | |||
| 656 | return 0; | 654 | return 0; |
| 657 | } | 655 | } |
| 658 | 656 | ||
| 657 | static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) | ||
| 658 | { | ||
| 659 | int offset = 0; | ||
| 660 | |||
| 661 | WARN_ON(sp->role.level != 1); | ||
| 662 | |||
| 663 | if (PTTYPE == 32) | ||
| 664 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
| 665 | |||
| 666 | return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
| 667 | } | ||
| 668 | |||
| 659 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 669 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
| 660 | { | 670 | { |
| 661 | struct kvm_shadow_walk_iterator iterator; | 671 | struct kvm_shadow_walk_iterator iterator; |
| 662 | struct kvm_mmu_page *sp; | 672 | struct kvm_mmu_page *sp; |
| 663 | gpa_t pte_gpa = -1; | ||
| 664 | int level; | 673 | int level; |
| 665 | u64 *sptep; | 674 | u64 *sptep; |
| 666 | int need_flush = 0; | ||
| 667 | 675 | ||
| 668 | vcpu_clear_mmio_info(vcpu, gva); | 676 | vcpu_clear_mmio_info(vcpu, gva); |
| 669 | 677 | ||
| 670 | spin_lock(&vcpu->kvm->mmu_lock); | 678 | /* |
| 679 | * No need to check return value here, rmap_can_add() can | ||
| 680 | * help us to skip pte prefetch later. | ||
| 681 | */ | ||
| 682 | mmu_topup_memory_caches(vcpu); | ||
| 671 | 683 | ||
| 684 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 672 | for_each_shadow_entry(vcpu, gva, iterator) { | 685 | for_each_shadow_entry(vcpu, gva, iterator) { |
| 673 | level = iterator.level; | 686 | level = iterator.level; |
| 674 | sptep = iterator.sptep; | 687 | sptep = iterator.sptep; |
| 675 | 688 | ||
| 676 | sp = page_header(__pa(sptep)); | 689 | sp = page_header(__pa(sptep)); |
| 677 | if (is_last_spte(*sptep, level)) { | 690 | if (is_last_spte(*sptep, level)) { |
| 678 | int offset, shift; | 691 | pt_element_t gpte; |
| 692 | gpa_t pte_gpa; | ||
| 679 | 693 | ||
| 680 | if (!sp->unsync) | 694 | if (!sp->unsync) |
| 681 | break; | 695 | break; |
| 682 | 696 | ||
| 683 | shift = PAGE_SHIFT - | 697 | pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
| 684 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
| 685 | offset = sp->role.quadrant << shift; | ||
| 686 | |||
| 687 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
| 688 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 698 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
| 689 | 699 | ||
| 690 | if (is_shadow_present_pte(*sptep)) { | 700 | if (mmu_page_zap_pte(vcpu->kvm, sp, sptep)) |
| 691 | if (is_large_pte(*sptep)) | 701 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 692 | --vcpu->kvm->stat.lpages; | ||
| 693 | drop_spte(vcpu->kvm, sptep); | ||
| 694 | need_flush = 1; | ||
| 695 | } else if (is_mmio_spte(*sptep)) | ||
| 696 | mmu_spte_clear_no_track(sptep); | ||
| 697 | 702 | ||
| 698 | break; | 703 | if (!rmap_can_add(vcpu)) |
| 704 | break; | ||
| 705 | |||
| 706 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
| 707 | sizeof(pt_element_t))) | ||
| 708 | break; | ||
| 709 | |||
| 710 | FNAME(update_pte)(vcpu, sp, sptep, &gpte); | ||
| 699 | } | 711 | } |
| 700 | 712 | ||
| 701 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) | 713 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
| 702 | break; | 714 | break; |
| 703 | } | 715 | } |
| 704 | |||
| 705 | if (need_flush) | ||
| 706 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
| 707 | |||
| 708 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
| 709 | |||
| 710 | spin_unlock(&vcpu->kvm->mmu_lock); | 716 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 711 | |||
| 712 | if (pte_gpa == -1) | ||
| 713 | return; | ||
| 714 | |||
| 715 | if (mmu_topup_memory_caches(vcpu)) | ||
| 716 | return; | ||
| 717 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
| 718 | } | 717 | } |
| 719 | 718 | ||
| 720 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 719 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
| @@ -769,19 +768,14 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
| 769 | */ | 768 | */ |
| 770 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 769 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 771 | { | 770 | { |
| 772 | int i, offset, nr_present; | 771 | int i, nr_present = 0; |
| 773 | bool host_writable; | 772 | bool host_writable; |
| 774 | gpa_t first_pte_gpa; | 773 | gpa_t first_pte_gpa; |
| 775 | 774 | ||
| 776 | offset = nr_present = 0; | ||
| 777 | |||
| 778 | /* direct kvm_mmu_page can not be unsync. */ | 775 | /* direct kvm_mmu_page can not be unsync. */ |
| 779 | BUG_ON(sp->role.direct); | 776 | BUG_ON(sp->role.direct); |
| 780 | 777 | ||
| 781 | if (PTTYPE == 32) | 778 | first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); |
| 782 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
| 783 | |||
| 784 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
| 785 | 779 | ||
| 786 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 780 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
| 787 | unsigned pte_access; | 781 | unsigned pte_access; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c new file mode 100644 index 000000000000..7aad5446f393 --- /dev/null +++ b/arch/x86/kvm/pmu.c | |||
| @@ -0,0 +1,533 @@ | |||
| 1 | /* | ||
| 2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | ||
| 3 | * | ||
| 4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
| 5 | * | ||
| 6 | * Authors: | ||
| 7 | * Avi Kivity <avi@redhat.com> | ||
| 8 | * Gleb Natapov <gleb@redhat.com> | ||
| 9 | * | ||
| 10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 11 | * the COPYING file in the top-level directory. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/types.h> | ||
| 16 | #include <linux/kvm_host.h> | ||
| 17 | #include <linux/perf_event.h> | ||
| 18 | #include "x86.h" | ||
| 19 | #include "cpuid.h" | ||
| 20 | #include "lapic.h" | ||
| 21 | |||
| 22 | static struct kvm_arch_event_perf_mapping { | ||
| 23 | u8 eventsel; | ||
| 24 | u8 unit_mask; | ||
| 25 | unsigned event_type; | ||
| 26 | bool inexact; | ||
| 27 | } arch_events[] = { | ||
| 28 | /* Index must match CPUID 0x0A.EBX bit vector */ | ||
| 29 | [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
| 30 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
| 31 | [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, | ||
| 32 | [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
| 33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | ||
| 34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
| 35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
| 36 | }; | ||
| 37 | |||
| 38 | /* mapping between fixed pmc index and arch_events array */ | ||
| 39 | int fixed_pmc_events[] = {1, 0, 2}; | ||
| 40 | |||
| 41 | static bool pmc_is_gp(struct kvm_pmc *pmc) | ||
| 42 | { | ||
| 43 | return pmc->type == KVM_PMC_GP; | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline u64 pmc_bitmask(struct kvm_pmc *pmc) | ||
| 47 | { | ||
| 48 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
| 49 | |||
| 50 | return pmu->counter_bitmask[pmc->type]; | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline bool pmc_enabled(struct kvm_pmc *pmc) | ||
| 54 | { | ||
| 55 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
| 56 | return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); | ||
| 57 | } | ||
| 58 | |||
| 59 | static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, | ||
| 60 | u32 base) | ||
| 61 | { | ||
| 62 | if (msr >= base && msr < base + pmu->nr_arch_gp_counters) | ||
| 63 | return &pmu->gp_counters[msr - base]; | ||
| 64 | return NULL; | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) | ||
| 68 | { | ||
| 69 | int base = MSR_CORE_PERF_FIXED_CTR0; | ||
| 70 | if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) | ||
| 71 | return &pmu->fixed_counters[msr - base]; | ||
| 72 | return NULL; | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | ||
| 76 | { | ||
| 77 | return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx); | ||
| 78 | } | ||
| 79 | |||
| 80 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | ||
| 81 | { | ||
| 82 | if (idx < X86_PMC_IDX_FIXED) | ||
| 83 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | ||
| 84 | else | ||
| 85 | return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); | ||
| 86 | } | ||
| 87 | |||
| 88 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | ||
| 89 | { | ||
| 90 | if (vcpu->arch.apic) | ||
| 91 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); | ||
| 92 | } | ||
| 93 | |||
| 94 | static void trigger_pmi(struct irq_work *irq_work) | ||
| 95 | { | ||
| 96 | struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, | ||
| 97 | irq_work); | ||
| 98 | struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu, | ||
| 99 | arch.pmu); | ||
| 100 | |||
| 101 | kvm_deliver_pmi(vcpu); | ||
| 102 | } | ||
| 103 | |||
| 104 | static void kvm_perf_overflow(struct perf_event *perf_event, | ||
| 105 | struct perf_sample_data *data, | ||
| 106 | struct pt_regs *regs) | ||
| 107 | { | ||
| 108 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
| 109 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
| 110 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | ||
| 111 | } | ||
| 112 | |||
| 113 | static void kvm_perf_overflow_intr(struct perf_event *perf_event, | ||
| 114 | struct perf_sample_data *data, struct pt_regs *regs) | ||
| 115 | { | ||
| 116 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
| 117 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
| 118 | if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { | ||
| 119 | kvm_perf_overflow(perf_event, data, regs); | ||
| 120 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | ||
| 121 | /* | ||
| 122 | * Inject PMI. If vcpu was in a guest mode during NMI PMI | ||
| 123 | * can be ejected on a guest mode re-entry. Otherwise we can't | ||
| 124 | * be sure that vcpu wasn't executing hlt instruction at the | ||
| 125 | * time of vmexit and is not going to re-enter guest mode until, | ||
| 126 | * woken up. So we should wake it, but this is impossible from | ||
| 127 | * NMI context. Do it from irq work instead. | ||
| 128 | */ | ||
| 129 | if (!kvm_is_in_guest()) | ||
| 130 | irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); | ||
| 131 | else | ||
| 132 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | static u64 read_pmc(struct kvm_pmc *pmc) | ||
| 137 | { | ||
| 138 | u64 counter, enabled, running; | ||
| 139 | |||
| 140 | counter = pmc->counter; | ||
| 141 | |||
| 142 | if (pmc->perf_event) | ||
| 143 | counter += perf_event_read_value(pmc->perf_event, | ||
| 144 | &enabled, &running); | ||
| 145 | |||
| 146 | /* FIXME: Scaling needed? */ | ||
| 147 | |||
| 148 | return counter & pmc_bitmask(pmc); | ||
| 149 | } | ||
| 150 | |||
| 151 | static void stop_counter(struct kvm_pmc *pmc) | ||
| 152 | { | ||
| 153 | if (pmc->perf_event) { | ||
| 154 | pmc->counter = read_pmc(pmc); | ||
| 155 | perf_event_release_kernel(pmc->perf_event); | ||
| 156 | pmc->perf_event = NULL; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | ||
| 161 | unsigned config, bool exclude_user, bool exclude_kernel, | ||
| 162 | bool intr) | ||
| 163 | { | ||
| 164 | struct perf_event *event; | ||
| 165 | struct perf_event_attr attr = { | ||
| 166 | .type = type, | ||
| 167 | .size = sizeof(attr), | ||
| 168 | .pinned = true, | ||
| 169 | .exclude_idle = true, | ||
| 170 | .exclude_host = 1, | ||
| 171 | .exclude_user = exclude_user, | ||
| 172 | .exclude_kernel = exclude_kernel, | ||
| 173 | .config = config, | ||
| 174 | }; | ||
| 175 | |||
| 176 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | ||
| 177 | |||
| 178 | event = perf_event_create_kernel_counter(&attr, -1, current, | ||
| 179 | intr ? kvm_perf_overflow_intr : | ||
| 180 | kvm_perf_overflow, pmc); | ||
| 181 | if (IS_ERR(event)) { | ||
| 182 | printk_once("kvm: pmu event creation failed %ld\n", | ||
| 183 | PTR_ERR(event)); | ||
| 184 | return; | ||
| 185 | } | ||
| 186 | |||
| 187 | pmc->perf_event = event; | ||
| 188 | clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi); | ||
| 189 | } | ||
| 190 | |||
| 191 | static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select, | ||
| 192 | u8 unit_mask) | ||
| 193 | { | ||
| 194 | int i; | ||
| 195 | |||
| 196 | for (i = 0; i < ARRAY_SIZE(arch_events); i++) | ||
| 197 | if (arch_events[i].eventsel == event_select | ||
| 198 | && arch_events[i].unit_mask == unit_mask | ||
| 199 | && (pmu->available_event_types & (1 << i))) | ||
| 200 | break; | ||
| 201 | |||
| 202 | if (i == ARRAY_SIZE(arch_events)) | ||
| 203 | return PERF_COUNT_HW_MAX; | ||
| 204 | |||
| 205 | return arch_events[i].event_type; | ||
| 206 | } | ||
| 207 | |||
| 208 | static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||
| 209 | { | ||
| 210 | unsigned config, type = PERF_TYPE_RAW; | ||
| 211 | u8 event_select, unit_mask; | ||
| 212 | |||
| 213 | pmc->eventsel = eventsel; | ||
| 214 | |||
| 215 | stop_counter(pmc); | ||
| 216 | |||
| 217 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc)) | ||
| 218 | return; | ||
| 219 | |||
| 220 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | ||
| 221 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | ||
| 222 | |||
| 223 | if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | | ||
| 224 | ARCH_PERFMON_EVENTSEL_INV | | ||
| 225 | ARCH_PERFMON_EVENTSEL_CMASK))) { | ||
| 226 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | ||
| 227 | unit_mask); | ||
| 228 | if (config != PERF_COUNT_HW_MAX) | ||
| 229 | type = PERF_TYPE_HARDWARE; | ||
| 230 | } | ||
| 231 | |||
| 232 | if (type == PERF_TYPE_RAW) | ||
| 233 | config = eventsel & X86_RAW_EVENT_MASK; | ||
| 234 | |||
| 235 | reprogram_counter(pmc, type, config, | ||
| 236 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | ||
| 237 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | ||
| 238 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | ||
| 239 | } | ||
| 240 | |||
| 241 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | ||
| 242 | { | ||
| 243 | unsigned en = en_pmi & 0x3; | ||
| 244 | bool pmi = en_pmi & 0x8; | ||
| 245 | |||
| 246 | stop_counter(pmc); | ||
| 247 | |||
| 248 | if (!en || !pmc_enabled(pmc)) | ||
| 249 | return; | ||
| 250 | |||
| 251 | reprogram_counter(pmc, PERF_TYPE_HARDWARE, | ||
| 252 | arch_events[fixed_pmc_events[idx]].event_type, | ||
| 253 | !(en & 0x2), /* exclude user */ | ||
| 254 | !(en & 0x1), /* exclude kernel */ | ||
| 255 | pmi); | ||
| 256 | } | ||
| 257 | |||
| 258 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | ||
| 259 | { | ||
| 260 | return (ctrl >> (idx * 4)) & 0xf; | ||
| 261 | } | ||
| 262 | |||
| 263 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||
| 264 | { | ||
| 265 | int i; | ||
| 266 | |||
| 267 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||
| 268 | u8 en_pmi = fixed_en_pmi(data, i); | ||
| 269 | struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i); | ||
| 270 | |||
| 271 | if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi) | ||
| 272 | continue; | ||
| 273 | |||
| 274 | reprogram_fixed_counter(pmc, en_pmi, i); | ||
| 275 | } | ||
| 276 | |||
| 277 | pmu->fixed_ctr_ctrl = data; | ||
| 278 | } | ||
| 279 | |||
| 280 | static void reprogram_idx(struct kvm_pmu *pmu, int idx) | ||
| 281 | { | ||
| 282 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx); | ||
| 283 | |||
| 284 | if (!pmc) | ||
| 285 | return; | ||
| 286 | |||
| 287 | if (pmc_is_gp(pmc)) | ||
| 288 | reprogram_gp_counter(pmc, pmc->eventsel); | ||
| 289 | else { | ||
| 290 | int fidx = idx - X86_PMC_IDX_FIXED; | ||
| 291 | reprogram_fixed_counter(pmc, | ||
| 292 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) | ||
| 297 | { | ||
| 298 | int bit; | ||
| 299 | u64 diff = pmu->global_ctrl ^ data; | ||
| 300 | |||
| 301 | pmu->global_ctrl = data; | ||
| 302 | |||
| 303 | for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) | ||
| 304 | reprogram_idx(pmu, bit); | ||
| 305 | } | ||
| 306 | |||
| 307 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) | ||
| 308 | { | ||
| 309 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 310 | int ret; | ||
| 311 | |||
| 312 | switch (msr) { | ||
| 313 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
| 314 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
| 315 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
| 316 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
| 317 | ret = pmu->version > 1; | ||
| 318 | break; | ||
| 319 | default: | ||
| 320 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) | ||
| 321 | || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) | ||
| 322 | || get_fixed_pmc(pmu, msr); | ||
| 323 | break; | ||
| 324 | } | ||
| 325 | return ret; | ||
| 326 | } | ||
| 327 | |||
| 328 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | ||
| 329 | { | ||
| 330 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 331 | struct kvm_pmc *pmc; | ||
| 332 | |||
| 333 | switch (index) { | ||
| 334 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
| 335 | *data = pmu->fixed_ctr_ctrl; | ||
| 336 | return 0; | ||
| 337 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
| 338 | *data = pmu->global_status; | ||
| 339 | return 0; | ||
| 340 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
| 341 | *data = pmu->global_ctrl; | ||
| 342 | return 0; | ||
| 343 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
| 344 | *data = pmu->global_ovf_ctrl; | ||
| 345 | return 0; | ||
| 346 | default: | ||
| 347 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
| 348 | (pmc = get_fixed_pmc(pmu, index))) { | ||
| 349 | *data = read_pmc(pmc); | ||
| 350 | return 0; | ||
| 351 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
| 352 | *data = pmc->eventsel; | ||
| 353 | return 0; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | return 1; | ||
| 357 | } | ||
| 358 | |||
| 359 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | ||
| 360 | { | ||
| 361 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 362 | struct kvm_pmc *pmc; | ||
| 363 | |||
| 364 | switch (index) { | ||
| 365 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
| 366 | if (pmu->fixed_ctr_ctrl == data) | ||
| 367 | return 0; | ||
| 368 | if (!(data & 0xfffffffffffff444)) { | ||
| 369 | reprogram_fixed_counters(pmu, data); | ||
| 370 | return 0; | ||
| 371 | } | ||
| 372 | break; | ||
| 373 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
| 374 | break; /* RO MSR */ | ||
| 375 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
| 376 | if (pmu->global_ctrl == data) | ||
| 377 | return 0; | ||
| 378 | if (!(data & pmu->global_ctrl_mask)) { | ||
| 379 | global_ctrl_changed(pmu, data); | ||
| 380 | return 0; | ||
| 381 | } | ||
| 382 | break; | ||
| 383 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
| 384 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | ||
| 385 | pmu->global_status &= ~data; | ||
| 386 | pmu->global_ovf_ctrl = data; | ||
| 387 | return 0; | ||
| 388 | } | ||
| 389 | break; | ||
| 390 | default: | ||
| 391 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
| 392 | (pmc = get_fixed_pmc(pmu, index))) { | ||
| 393 | data = (s64)(s32)data; | ||
| 394 | pmc->counter += data - read_pmc(pmc); | ||
| 395 | return 0; | ||
| 396 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
| 397 | if (data == pmc->eventsel) | ||
| 398 | return 0; | ||
| 399 | if (!(data & 0xffffffff00200000ull)) { | ||
| 400 | reprogram_gp_counter(pmc, data); | ||
| 401 | return 0; | ||
| 402 | } | ||
| 403 | } | ||
| 404 | } | ||
| 405 | return 1; | ||
| 406 | } | ||
| 407 | |||
| 408 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | ||
| 409 | { | ||
| 410 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 411 | bool fast_mode = pmc & (1u << 31); | ||
| 412 | bool fixed = pmc & (1u << 30); | ||
| 413 | struct kvm_pmc *counters; | ||
| 414 | u64 ctr; | ||
| 415 | |||
| 416 | pmc &= (3u << 30) - 1; | ||
| 417 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | ||
| 418 | return 1; | ||
| 419 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | ||
| 420 | return 1; | ||
| 421 | counters = fixed ? pmu->fixed_counters : pmu->gp_counters; | ||
| 422 | ctr = read_pmc(&counters[pmc]); | ||
| 423 | if (fast_mode) | ||
| 424 | ctr = (u32)ctr; | ||
| 425 | *data = ctr; | ||
| 426 | |||
| 427 | return 0; | ||
| 428 | } | ||
| 429 | |||
| 430 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | ||
| 431 | { | ||
| 432 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 433 | struct kvm_cpuid_entry2 *entry; | ||
| 434 | unsigned bitmap_len; | ||
| 435 | |||
| 436 | pmu->nr_arch_gp_counters = 0; | ||
| 437 | pmu->nr_arch_fixed_counters = 0; | ||
| 438 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | ||
| 439 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
| 440 | pmu->version = 0; | ||
| 441 | |||
| 442 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
| 443 | if (!entry) | ||
| 444 | return; | ||
| 445 | |||
| 446 | pmu->version = entry->eax & 0xff; | ||
| 447 | if (!pmu->version) | ||
| 448 | return; | ||
| 449 | |||
| 450 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | ||
| 451 | X86_PMC_MAX_GENERIC); | ||
| 452 | pmu->counter_bitmask[KVM_PMC_GP] = | ||
| 453 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | ||
| 454 | bitmap_len = (entry->eax >> 24) & 0xff; | ||
| 455 | pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); | ||
| 456 | |||
| 457 | if (pmu->version == 1) { | ||
| 458 | pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | |||
| 462 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | ||
| 463 | X86_PMC_MAX_FIXED); | ||
| 464 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
| 465 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | ||
| 466 | pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) | ||
| 467 | | (((1ull << pmu->nr_arch_fixed_counters) - 1) | ||
| 468 | << X86_PMC_IDX_FIXED)); | ||
| 469 | } | ||
| 470 | |||
| 471 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | ||
| 472 | { | ||
| 473 | int i; | ||
| 474 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 475 | |||
| 476 | memset(pmu, 0, sizeof(*pmu)); | ||
| 477 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
| 478 | pmu->gp_counters[i].type = KVM_PMC_GP; | ||
| 479 | pmu->gp_counters[i].vcpu = vcpu; | ||
| 480 | pmu->gp_counters[i].idx = i; | ||
| 481 | } | ||
| 482 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) { | ||
| 483 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||
| 484 | pmu->fixed_counters[i].vcpu = vcpu; | ||
| 485 | pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; | ||
| 486 | } | ||
| 487 | init_irq_work(&pmu->irq_work, trigger_pmi); | ||
| 488 | kvm_pmu_cpuid_update(vcpu); | ||
| 489 | } | ||
| 490 | |||
| 491 | void kvm_pmu_reset(struct kvm_vcpu *vcpu) | ||
| 492 | { | ||
| 493 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 494 | int i; | ||
| 495 | |||
| 496 | irq_work_sync(&pmu->irq_work); | ||
| 497 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | ||
| 498 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | ||
| 499 | stop_counter(pmc); | ||
| 500 | pmc->counter = pmc->eventsel = 0; | ||
| 501 | } | ||
| 502 | |||
| 503 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) | ||
| 504 | stop_counter(&pmu->fixed_counters[i]); | ||
| 505 | |||
| 506 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | ||
| 507 | pmu->global_ovf_ctrl = 0; | ||
| 508 | } | ||
| 509 | |||
| 510 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | ||
| 511 | { | ||
| 512 | kvm_pmu_reset(vcpu); | ||
| 513 | } | ||
| 514 | |||
| 515 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu) | ||
| 516 | { | ||
| 517 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
| 518 | u64 bitmask; | ||
| 519 | int bit; | ||
| 520 | |||
| 521 | bitmask = pmu->reprogram_pmi; | ||
| 522 | |||
| 523 | for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { | ||
| 524 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit); | ||
| 525 | |||
| 526 | if (unlikely(!pmc || !pmc->perf_event)) { | ||
| 527 | clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); | ||
| 528 | continue; | ||
| 529 | } | ||
| 530 | |||
| 531 | reprogram_idx(pmu, bit); | ||
| 532 | } | ||
| 533 | } | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e32243eac2f4..5fa553babe56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -1014,6 +1014,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1014 | set_intercept(svm, INTERCEPT_NMI); | 1014 | set_intercept(svm, INTERCEPT_NMI); |
| 1015 | set_intercept(svm, INTERCEPT_SMI); | 1015 | set_intercept(svm, INTERCEPT_SMI); |
| 1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); | 1016 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); |
| 1017 | set_intercept(svm, INTERCEPT_RDPMC); | ||
| 1017 | set_intercept(svm, INTERCEPT_CPUID); | 1018 | set_intercept(svm, INTERCEPT_CPUID); |
| 1018 | set_intercept(svm, INTERCEPT_INVD); | 1019 | set_intercept(svm, INTERCEPT_INVD); |
| 1019 | set_intercept(svm, INTERCEPT_HLT); | 1020 | set_intercept(svm, INTERCEPT_HLT); |
| @@ -2770,6 +2771,19 @@ static int emulate_on_interception(struct vcpu_svm *svm) | |||
| 2770 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | 2771 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
| 2771 | } | 2772 | } |
| 2772 | 2773 | ||
| 2774 | static int rdpmc_interception(struct vcpu_svm *svm) | ||
| 2775 | { | ||
| 2776 | int err; | ||
| 2777 | |||
| 2778 | if (!static_cpu_has(X86_FEATURE_NRIPS)) | ||
| 2779 | return emulate_on_interception(svm); | ||
| 2780 | |||
| 2781 | err = kvm_rdpmc(&svm->vcpu); | ||
| 2782 | kvm_complete_insn_gp(&svm->vcpu, err); | ||
| 2783 | |||
| 2784 | return 1; | ||
| 2785 | } | ||
| 2786 | |||
| 2773 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) | 2787 | bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) |
| 2774 | { | 2788 | { |
| 2775 | unsigned long cr0 = svm->vcpu.arch.cr0; | 2789 | unsigned long cr0 = svm->vcpu.arch.cr0; |
| @@ -3190,6 +3204,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 3190 | [SVM_EXIT_SMI] = nop_on_interception, | 3204 | [SVM_EXIT_SMI] = nop_on_interception, |
| 3191 | [SVM_EXIT_INIT] = nop_on_interception, | 3205 | [SVM_EXIT_INIT] = nop_on_interception, |
| 3192 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 3206 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
| 3207 | [SVM_EXIT_RDPMC] = rdpmc_interception, | ||
| 3193 | [SVM_EXIT_CPUID] = cpuid_interception, | 3208 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 3194 | [SVM_EXIT_IRET] = iret_interception, | 3209 | [SVM_EXIT_IRET] = iret_interception, |
| 3195 | [SVM_EXIT_INVD] = emulate_on_interception, | 3210 | [SVM_EXIT_INVD] = emulate_on_interception, |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index ae432ea1cd83..6b85cc647f34 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
| @@ -18,9 +18,10 @@ | |||
| 18 | #include <linux/atomic.h> | 18 | #include <linux/atomic.h> |
| 19 | #include "kvm_timer.h" | 19 | #include "kvm_timer.h" |
| 20 | 20 | ||
| 21 | static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | 21 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) |
| 22 | { | 22 | { |
| 23 | int restart_timer = 0; | 23 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); |
| 24 | struct kvm_vcpu *vcpu = ktimer->vcpu; | ||
| 24 | wait_queue_head_t *q = &vcpu->wq; | 25 | wait_queue_head_t *q = &vcpu->wq; |
| 25 | 26 | ||
| 26 | /* | 27 | /* |
| @@ -40,26 +41,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
| 40 | 41 | ||
| 41 | if (ktimer->t_ops->is_periodic(ktimer)) { | 42 | if (ktimer->t_ops->is_periodic(ktimer)) { |
| 42 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | 43 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); |
| 43 | restart_timer = 1; | ||
| 44 | } | ||
| 45 | |||
| 46 | return restart_timer; | ||
| 47 | } | ||
| 48 | |||
| 49 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
| 50 | { | ||
| 51 | int restart_timer; | ||
| 52 | struct kvm_vcpu *vcpu; | ||
| 53 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
| 54 | |||
| 55 | vcpu = ktimer->vcpu; | ||
| 56 | if (!vcpu) | ||
| 57 | return HRTIMER_NORESTART; | ||
| 58 | |||
| 59 | restart_timer = __kvm_timer_fn(vcpu, ktimer); | ||
| 60 | if (restart_timer) | ||
| 61 | return HRTIMER_RESTART; | 44 | return HRTIMER_RESTART; |
| 62 | else | 45 | } else |
| 63 | return HRTIMER_NORESTART; | 46 | return HRTIMER_NORESTART; |
| 64 | } | 47 | } |
| 65 | |||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 579a0b51696a..906a7e84200f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include "irq.h" | 19 | #include "irq.h" |
| 20 | #include "mmu.h" | 20 | #include "mmu.h" |
| 21 | #include "cpuid.h" | ||
| 21 | 22 | ||
| 22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
| 23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
| @@ -1747,7 +1748,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 1747 | int save_nmsrs, index; | 1748 | int save_nmsrs, index; |
| 1748 | unsigned long *msr_bitmap; | 1749 | unsigned long *msr_bitmap; |
| 1749 | 1750 | ||
| 1750 | vmx_load_host_state(vmx); | ||
| 1751 | save_nmsrs = 0; | 1751 | save_nmsrs = 0; |
| 1752 | #ifdef CONFIG_X86_64 | 1752 | #ifdef CONFIG_X86_64 |
| 1753 | if (is_long_mode(&vmx->vcpu)) { | 1753 | if (is_long_mode(&vmx->vcpu)) { |
| @@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
| 1956 | #endif | 1956 | #endif |
| 1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 1957 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
| 1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 1958 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
| 1959 | CPU_BASED_RDPMC_EXITING | | ||
| 1959 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1960 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| 1960 | /* | 1961 | /* |
| 1961 | * We can allow some features even when not supported by the | 1962 | * We can allow some features even when not supported by the |
| @@ -2142,12 +2143,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 2142 | return 1; | 2143 | return 1; |
| 2143 | /* Otherwise falls through */ | 2144 | /* Otherwise falls through */ |
| 2144 | default: | 2145 | default: |
| 2145 | vmx_load_host_state(to_vmx(vcpu)); | ||
| 2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | 2146 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) |
| 2147 | return 0; | 2147 | return 0; |
| 2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2148 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
| 2149 | if (msr) { | 2149 | if (msr) { |
| 2150 | vmx_load_host_state(to_vmx(vcpu)); | ||
| 2151 | data = msr->data; | 2150 | data = msr->data; |
| 2152 | break; | 2151 | break; |
| 2153 | } | 2152 | } |
| @@ -2171,7 +2170,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 2171 | 2170 | ||
| 2172 | switch (msr_index) { | 2171 | switch (msr_index) { |
| 2173 | case MSR_EFER: | 2172 | case MSR_EFER: |
| 2174 | vmx_load_host_state(vmx); | ||
| 2175 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2173 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
| 2176 | break; | 2174 | break; |
| 2177 | #ifdef CONFIG_X86_64 | 2175 | #ifdef CONFIG_X86_64 |
| @@ -2220,7 +2218,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 2220 | break; | 2218 | break; |
| 2221 | msr = find_msr_entry(vmx, msr_index); | 2219 | msr = find_msr_entry(vmx, msr_index); |
| 2222 | if (msr) { | 2220 | if (msr) { |
| 2223 | vmx_load_host_state(vmx); | ||
| 2224 | msr->data = data; | 2221 | msr->data = data; |
| 2225 | break; | 2222 | break; |
| 2226 | } | 2223 | } |
| @@ -2414,7 +2411,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2414 | CPU_BASED_USE_TSC_OFFSETING | | 2411 | CPU_BASED_USE_TSC_OFFSETING | |
| 2415 | CPU_BASED_MWAIT_EXITING | | 2412 | CPU_BASED_MWAIT_EXITING | |
| 2416 | CPU_BASED_MONITOR_EXITING | | 2413 | CPU_BASED_MONITOR_EXITING | |
| 2417 | CPU_BASED_INVLPG_EXITING; | 2414 | CPU_BASED_INVLPG_EXITING | |
| 2415 | CPU_BASED_RDPMC_EXITING; | ||
| 2418 | 2416 | ||
| 2419 | if (yield_on_hlt) | 2417 | if (yield_on_hlt) |
| 2420 | min |= CPU_BASED_HLT_EXITING; | 2418 | min |= CPU_BASED_HLT_EXITING; |
| @@ -2716,11 +2714,13 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
| 2716 | { | 2714 | { |
| 2717 | if (!kvm->arch.tss_addr) { | 2715 | if (!kvm->arch.tss_addr) { |
| 2718 | struct kvm_memslots *slots; | 2716 | struct kvm_memslots *slots; |
| 2717 | struct kvm_memory_slot *slot; | ||
| 2719 | gfn_t base_gfn; | 2718 | gfn_t base_gfn; |
| 2720 | 2719 | ||
| 2721 | slots = kvm_memslots(kvm); | 2720 | slots = kvm_memslots(kvm); |
| 2722 | base_gfn = slots->memslots[0].base_gfn + | 2721 | slot = id_to_memslot(slots, 0); |
| 2723 | kvm->memslots->memslots[0].npages - 3; | 2722 | base_gfn = slot->base_gfn + slot->npages - 3; |
| 2723 | |||
| 2724 | return base_gfn << PAGE_SHIFT; | 2724 | return base_gfn << PAGE_SHIFT; |
| 2725 | } | 2725 | } |
| 2726 | return kvm->arch.tss_addr; | 2726 | return kvm->arch.tss_addr; |
| @@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
| 3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3945 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
| 3946 | { | 3946 | { |
| 3947 | u32 cpu_based_vm_exec_control; | 3947 | u32 cpu_based_vm_exec_control; |
| 3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | 3948 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
| 3949 | /* We can get here when nested_run_pending caused | 3949 | /* |
| 3950 | * vmx_interrupt_allowed() to return false. In this case, do | 3950 | * We get here if vmx_interrupt_allowed() said we can't |
| 3951 | * nothing - the interrupt will be injected later. | 3951 | * inject to L1 now because L2 must run. Ask L2 to exit |
| 3952 | * right after entry, so we can inject to L1 more promptly. | ||
| 3952 | */ | 3953 | */ |
| 3954 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
| 3953 | return; | 3955 | return; |
| 3956 | } | ||
| 3954 | 3957 | ||
| 3955 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 3958 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 3956 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 3959 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
| @@ -4077,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
| 4077 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4080 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 4078 | { | 4081 | { |
| 4079 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4082 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { |
| 4080 | struct vmcs12 *vmcs12; | 4083 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 4081 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4084 | if (to_vmx(vcpu)->nested.nested_run_pending || |
| 4085 | (vmcs12->idt_vectoring_info_field & | ||
| 4086 | VECTORING_INFO_VALID_MASK)) | ||
| 4082 | return 0; | 4087 | return 0; |
| 4083 | nested_vmx_vmexit(vcpu); | 4088 | nested_vmx_vmexit(vcpu); |
| 4084 | vmcs12 = get_vmcs12(vcpu); | ||
| 4085 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4089 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; |
| 4086 | vmcs12->vm_exit_intr_info = 0; | 4090 | vmcs12->vm_exit_intr_info = 0; |
| 4087 | /* fall through to normal code, but now in L1, not L2 */ | 4091 | /* fall through to normal code, but now in L1, not L2 */ |
| @@ -4611,6 +4615,16 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
| 4611 | return 1; | 4615 | return 1; |
| 4612 | } | 4616 | } |
| 4613 | 4617 | ||
| 4618 | static int handle_rdpmc(struct kvm_vcpu *vcpu) | ||
| 4619 | { | ||
| 4620 | int err; | ||
| 4621 | |||
| 4622 | err = kvm_rdpmc(vcpu); | ||
| 4623 | kvm_complete_insn_gp(vcpu, err); | ||
| 4624 | |||
| 4625 | return 1; | ||
| 4626 | } | ||
| 4627 | |||
| 4614 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 4628 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
| 4615 | { | 4629 | { |
| 4616 | skip_emulated_instruction(vcpu); | 4630 | skip_emulated_instruction(vcpu); |
| @@ -5561,6 +5575,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
| 5561 | [EXIT_REASON_HLT] = handle_halt, | 5575 | [EXIT_REASON_HLT] = handle_halt, |
| 5562 | [EXIT_REASON_INVD] = handle_invd, | 5576 | [EXIT_REASON_INVD] = handle_invd, |
| 5563 | [EXIT_REASON_INVLPG] = handle_invlpg, | 5577 | [EXIT_REASON_INVLPG] = handle_invlpg, |
| 5578 | [EXIT_REASON_RDPMC] = handle_rdpmc, | ||
| 5564 | [EXIT_REASON_VMCALL] = handle_vmcall, | 5579 | [EXIT_REASON_VMCALL] = handle_vmcall, |
| 5565 | [EXIT_REASON_VMCLEAR] = handle_vmclear, | 5580 | [EXIT_REASON_VMCLEAR] = handle_vmclear, |
| 5566 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, | 5581 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c938da2ba00..1171def5f96b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "tss.h" | 26 | #include "tss.h" |
| 27 | #include "kvm_cache_regs.h" | 27 | #include "kvm_cache_regs.h" |
| 28 | #include "x86.h" | 28 | #include "x86.h" |
| 29 | #include "cpuid.h" | ||
| 29 | 30 | ||
| 30 | #include <linux/clocksource.h> | 31 | #include <linux/clocksource.h> |
| 31 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
| @@ -82,8 +83,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
| 82 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 83 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
| 83 | 84 | ||
| 84 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 85 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
| 85 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
| 86 | struct kvm_cpuid_entry2 __user *entries); | ||
| 87 | static void process_nmi(struct kvm_vcpu *vcpu); | 86 | static void process_nmi(struct kvm_vcpu *vcpu); |
| 88 | 87 | ||
| 89 | struct kvm_x86_ops *kvm_x86_ops; | 88 | struct kvm_x86_ops *kvm_x86_ops; |
| @@ -574,54 +573,6 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
| 574 | } | 573 | } |
| 575 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | 574 | EXPORT_SYMBOL_GPL(kvm_set_xcr); |
| 576 | 575 | ||
| 577 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
| 578 | { | ||
| 579 | struct kvm_cpuid_entry2 *best; | ||
| 580 | |||
| 581 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 582 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
| 583 | } | ||
| 584 | |||
| 585 | static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
| 586 | { | ||
| 587 | struct kvm_cpuid_entry2 *best; | ||
| 588 | |||
| 589 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 590 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
| 591 | } | ||
| 592 | |||
| 593 | static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
| 594 | { | ||
| 595 | struct kvm_cpuid_entry2 *best; | ||
| 596 | |||
| 597 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 598 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
| 599 | } | ||
| 600 | |||
| 601 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
| 602 | { | ||
| 603 | struct kvm_cpuid_entry2 *best; | ||
| 604 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 605 | |||
| 606 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 607 | if (!best) | ||
| 608 | return; | ||
| 609 | |||
| 610 | /* Update OSXSAVE bit */ | ||
| 611 | if (cpu_has_xsave && best->function == 0x1) { | ||
| 612 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
| 613 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
| 614 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
| 615 | } | ||
| 616 | |||
| 617 | if (apic) { | ||
| 618 | if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
| 619 | apic->lapic_timer.timer_mode_mask = 3 << 17; | ||
| 620 | else | ||
| 621 | apic->lapic_timer.timer_mode_mask = 1 << 17; | ||
| 622 | } | ||
| 623 | } | ||
| 624 | |||
| 625 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 576 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
| 626 | { | 577 | { |
| 627 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 578 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
| @@ -655,7 +606,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 655 | kvm_mmu_reset_context(vcpu); | 606 | kvm_mmu_reset_context(vcpu); |
| 656 | 607 | ||
| 657 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 608 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
| 658 | update_cpuid(vcpu); | 609 | kvm_update_cpuid(vcpu); |
| 659 | 610 | ||
| 660 | return 0; | 611 | return 0; |
| 661 | } | 612 | } |
| @@ -809,6 +760,21 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
| 809 | } | 760 | } |
| 810 | EXPORT_SYMBOL_GPL(kvm_get_dr); | 761 | EXPORT_SYMBOL_GPL(kvm_get_dr); |
| 811 | 762 | ||
| 763 | bool kvm_rdpmc(struct kvm_vcpu *vcpu) | ||
| 764 | { | ||
| 765 | u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 766 | u64 data; | ||
| 767 | int err; | ||
| 768 | |||
| 769 | err = kvm_pmu_read_pmc(vcpu, ecx, &data); | ||
| 770 | if (err) | ||
| 771 | return err; | ||
| 772 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); | ||
| 773 | kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); | ||
| 774 | return err; | ||
| 775 | } | ||
| 776 | EXPORT_SYMBOL_GPL(kvm_rdpmc); | ||
| 777 | |||
| 812 | /* | 778 | /* |
| 813 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 779 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
| 814 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 780 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
| @@ -1358,12 +1324,11 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | |||
| 1358 | if (page_num >= blob_size) | 1324 | if (page_num >= blob_size) |
| 1359 | goto out; | 1325 | goto out; |
| 1360 | r = -ENOMEM; | 1326 | r = -ENOMEM; |
| 1361 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 1327 | page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE); |
| 1362 | if (!page) | 1328 | if (IS_ERR(page)) { |
| 1329 | r = PTR_ERR(page); | ||
| 1363 | goto out; | 1330 | goto out; |
| 1364 | r = -EFAULT; | 1331 | } |
| 1365 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
| 1366 | goto out_free; | ||
| 1367 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | 1332 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) |
| 1368 | goto out_free; | 1333 | goto out_free; |
| 1369 | r = 0; | 1334 | r = 0; |
| @@ -1652,8 +1617,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1652 | * which we perfectly emulate ;-). Any other value should be at least | 1617 | * which we perfectly emulate ;-). Any other value should be at least |
| 1653 | * reported, some guests depend on them. | 1618 | * reported, some guests depend on them. |
| 1654 | */ | 1619 | */ |
| 1655 | case MSR_P6_EVNTSEL0: | ||
| 1656 | case MSR_P6_EVNTSEL1: | ||
| 1657 | case MSR_K7_EVNTSEL0: | 1620 | case MSR_K7_EVNTSEL0: |
| 1658 | case MSR_K7_EVNTSEL1: | 1621 | case MSR_K7_EVNTSEL1: |
| 1659 | case MSR_K7_EVNTSEL2: | 1622 | case MSR_K7_EVNTSEL2: |
| @@ -1665,8 +1628,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1665 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | 1628 | /* at least RHEL 4 unconditionally writes to the perfctr registers, |
| 1666 | * so we ignore writes to make it happy. | 1629 | * so we ignore writes to make it happy. |
| 1667 | */ | 1630 | */ |
| 1668 | case MSR_P6_PERFCTR0: | ||
| 1669 | case MSR_P6_PERFCTR1: | ||
| 1670 | case MSR_K7_PERFCTR0: | 1631 | case MSR_K7_PERFCTR0: |
| 1671 | case MSR_K7_PERFCTR1: | 1632 | case MSR_K7_PERFCTR1: |
| 1672 | case MSR_K7_PERFCTR2: | 1633 | case MSR_K7_PERFCTR2: |
| @@ -1703,6 +1664,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1703 | default: | 1664 | default: |
| 1704 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1665 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
| 1705 | return xen_hvm_config(vcpu, data); | 1666 | return xen_hvm_config(vcpu, data); |
| 1667 | if (kvm_pmu_msr(vcpu, msr)) | ||
| 1668 | return kvm_pmu_set_msr(vcpu, msr, data); | ||
| 1706 | if (!ignore_msrs) { | 1669 | if (!ignore_msrs) { |
| 1707 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1670 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
| 1708 | msr, data); | 1671 | msr, data); |
| @@ -1865,10 +1828,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1865 | case MSR_K8_SYSCFG: | 1828 | case MSR_K8_SYSCFG: |
| 1866 | case MSR_K7_HWCR: | 1829 | case MSR_K7_HWCR: |
| 1867 | case MSR_VM_HSAVE_PA: | 1830 | case MSR_VM_HSAVE_PA: |
| 1868 | case MSR_P6_PERFCTR0: | ||
| 1869 | case MSR_P6_PERFCTR1: | ||
| 1870 | case MSR_P6_EVNTSEL0: | ||
| 1871 | case MSR_P6_EVNTSEL1: | ||
| 1872 | case MSR_K7_EVNTSEL0: | 1831 | case MSR_K7_EVNTSEL0: |
| 1873 | case MSR_K7_PERFCTR0: | 1832 | case MSR_K7_PERFCTR0: |
| 1874 | case MSR_K8_INT_PENDING_MSG: | 1833 | case MSR_K8_INT_PENDING_MSG: |
| @@ -1979,6 +1938,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1979 | data = 0xbe702111; | 1938 | data = 0xbe702111; |
| 1980 | break; | 1939 | break; |
| 1981 | default: | 1940 | default: |
| 1941 | if (kvm_pmu_msr(vcpu, msr)) | ||
| 1942 | return kvm_pmu_get_msr(vcpu, msr, pdata); | ||
| 1982 | if (!ignore_msrs) { | 1943 | if (!ignore_msrs) { |
| 1983 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1944 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
| 1984 | return 1; | 1945 | return 1; |
| @@ -2037,15 +1998,12 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, | |||
| 2037 | if (msrs.nmsrs >= MAX_IO_MSRS) | 1998 | if (msrs.nmsrs >= MAX_IO_MSRS) |
| 2038 | goto out; | 1999 | goto out; |
| 2039 | 2000 | ||
| 2040 | r = -ENOMEM; | ||
| 2041 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; | 2001 | size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; |
| 2042 | entries = kmalloc(size, GFP_KERNEL); | 2002 | entries = memdup_user(user_msrs->entries, size); |
| 2043 | if (!entries) | 2003 | if (IS_ERR(entries)) { |
| 2004 | r = PTR_ERR(entries); | ||
| 2044 | goto out; | 2005 | goto out; |
| 2045 | 2006 | } | |
| 2046 | r = -EFAULT; | ||
| 2047 | if (copy_from_user(entries, user_msrs->entries, size)) | ||
| 2048 | goto out_free; | ||
| 2049 | 2007 | ||
| 2050 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); | 2008 | r = n = __msr_io(vcpu, &msrs, entries, do_msr); |
| 2051 | if (r < 0) | 2009 | if (r < 0) |
| @@ -2265,466 +2223,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 2265 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2223 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
| 2266 | } | 2224 | } |
| 2267 | 2225 | ||
| 2268 | static int is_efer_nx(void) | ||
| 2269 | { | ||
| 2270 | unsigned long long efer = 0; | ||
| 2271 | |||
| 2272 | rdmsrl_safe(MSR_EFER, &efer); | ||
| 2273 | return efer & EFER_NX; | ||
| 2274 | } | ||
| 2275 | |||
| 2276 | static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||
| 2277 | { | ||
| 2278 | int i; | ||
| 2279 | struct kvm_cpuid_entry2 *e, *entry; | ||
| 2280 | |||
| 2281 | entry = NULL; | ||
| 2282 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
| 2283 | e = &vcpu->arch.cpuid_entries[i]; | ||
| 2284 | if (e->function == 0x80000001) { | ||
| 2285 | entry = e; | ||
| 2286 | break; | ||
| 2287 | } | ||
| 2288 | } | ||
| 2289 | if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { | ||
| 2290 | entry->edx &= ~(1 << 20); | ||
| 2291 | printk(KERN_INFO "kvm: guest NX capability removed\n"); | ||
| 2292 | } | ||
| 2293 | } | ||
| 2294 | |||
| 2295 | /* when an old userspace process fills a new kernel module */ | ||
| 2296 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
| 2297 | struct kvm_cpuid *cpuid, | ||
| 2298 | struct kvm_cpuid_entry __user *entries) | ||
| 2299 | { | ||
| 2300 | int r, i; | ||
| 2301 | struct kvm_cpuid_entry *cpuid_entries; | ||
| 2302 | |||
| 2303 | r = -E2BIG; | ||
| 2304 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 2305 | goto out; | ||
| 2306 | r = -ENOMEM; | ||
| 2307 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); | ||
| 2308 | if (!cpuid_entries) | ||
| 2309 | goto out; | ||
| 2310 | r = -EFAULT; | ||
| 2311 | if (copy_from_user(cpuid_entries, entries, | ||
| 2312 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
| 2313 | goto out_free; | ||
| 2314 | for (i = 0; i < cpuid->nent; i++) { | ||
| 2315 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | ||
| 2316 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | ||
| 2317 | vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; | ||
| 2318 | vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; | ||
| 2319 | vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; | ||
| 2320 | vcpu->arch.cpuid_entries[i].index = 0; | ||
| 2321 | vcpu->arch.cpuid_entries[i].flags = 0; | ||
| 2322 | vcpu->arch.cpuid_entries[i].padding[0] = 0; | ||
| 2323 | vcpu->arch.cpuid_entries[i].padding[1] = 0; | ||
| 2324 | vcpu->arch.cpuid_entries[i].padding[2] = 0; | ||
| 2325 | } | ||
| 2326 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
| 2327 | cpuid_fix_nx_cap(vcpu); | ||
| 2328 | r = 0; | ||
| 2329 | kvm_apic_set_version(vcpu); | ||
| 2330 | kvm_x86_ops->cpuid_update(vcpu); | ||
| 2331 | update_cpuid(vcpu); | ||
| 2332 | |||
| 2333 | out_free: | ||
| 2334 | vfree(cpuid_entries); | ||
| 2335 | out: | ||
| 2336 | return r; | ||
| 2337 | } | ||
| 2338 | |||
| 2339 | static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | ||
| 2340 | struct kvm_cpuid2 *cpuid, | ||
| 2341 | struct kvm_cpuid_entry2 __user *entries) | ||
| 2342 | { | ||
| 2343 | int r; | ||
| 2344 | |||
| 2345 | r = -E2BIG; | ||
| 2346 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 2347 | goto out; | ||
| 2348 | r = -EFAULT; | ||
| 2349 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | ||
| 2350 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 2351 | goto out; | ||
| 2352 | vcpu->arch.cpuid_nent = cpuid->nent; | ||
| 2353 | kvm_apic_set_version(vcpu); | ||
| 2354 | kvm_x86_ops->cpuid_update(vcpu); | ||
| 2355 | update_cpuid(vcpu); | ||
| 2356 | return 0; | ||
| 2357 | |||
| 2358 | out: | ||
| 2359 | return r; | ||
| 2360 | } | ||
| 2361 | |||
| 2362 | static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | ||
| 2363 | struct kvm_cpuid2 *cpuid, | ||
| 2364 | struct kvm_cpuid_entry2 __user *entries) | ||
| 2365 | { | ||
| 2366 | int r; | ||
| 2367 | |||
| 2368 | r = -E2BIG; | ||
| 2369 | if (cpuid->nent < vcpu->arch.cpuid_nent) | ||
| 2370 | goto out; | ||
| 2371 | r = -EFAULT; | ||
| 2372 | if (copy_to_user(entries, &vcpu->arch.cpuid_entries, | ||
| 2373 | vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 2374 | goto out; | ||
| 2375 | return 0; | ||
| 2376 | |||
| 2377 | out: | ||
| 2378 | cpuid->nent = vcpu->arch.cpuid_nent; | ||
| 2379 | return r; | ||
| 2380 | } | ||
| 2381 | |||
| 2382 | static void cpuid_mask(u32 *word, int wordnum) | ||
| 2383 | { | ||
| 2384 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
| 2385 | } | ||
| 2386 | |||
| 2387 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
| 2388 | u32 index) | ||
| 2389 | { | ||
| 2390 | entry->function = function; | ||
| 2391 | entry->index = index; | ||
| 2392 | cpuid_count(entry->function, entry->index, | ||
| 2393 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | ||
| 2394 | entry->flags = 0; | ||
| 2395 | } | ||
| 2396 | |||
| 2397 | static bool supported_xcr0_bit(unsigned bit) | ||
| 2398 | { | ||
| 2399 | u64 mask = ((u64)1 << bit); | ||
| 2400 | |||
| 2401 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | #define F(x) bit(X86_FEATURE_##x) | ||
| 2405 | |||
| 2406 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
| 2407 | u32 index, int *nent, int maxnent) | ||
| 2408 | { | ||
| 2409 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | ||
| 2410 | #ifdef CONFIG_X86_64 | ||
| 2411 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
| 2412 | ? F(GBPAGES) : 0; | ||
| 2413 | unsigned f_lm = F(LM); | ||
| 2414 | #else | ||
| 2415 | unsigned f_gbpages = 0; | ||
| 2416 | unsigned f_lm = 0; | ||
| 2417 | #endif | ||
| 2418 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
| 2419 | |||
| 2420 | /* cpuid 1.edx */ | ||
| 2421 | const u32 kvm_supported_word0_x86_features = | ||
| 2422 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
| 2423 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
| 2424 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | ||
| 2425 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 2426 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
| 2427 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
| 2428 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
| 2429 | 0 /* HTT, TM, Reserved, PBE */; | ||
| 2430 | /* cpuid 0x80000001.edx */ | ||
| 2431 | const u32 kvm_supported_word1_x86_features = | ||
| 2432 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
| 2433 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
| 2434 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
| 2435 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 2436 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
| 2437 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
| 2438 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | | ||
| 2439 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
| 2440 | /* cpuid 1.ecx */ | ||
| 2441 | const u32 kvm_supported_word4_x86_features = | ||
| 2442 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | | ||
| 2443 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
| 2444 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
| 2445 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
| 2446 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
| 2447 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | ||
| 2448 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | ||
| 2449 | F(F16C) | F(RDRAND); | ||
| 2450 | /* cpuid 0x80000001.ecx */ | ||
| 2451 | const u32 kvm_supported_word6_x86_features = | ||
| 2452 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | ||
| 2453 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | ||
| 2454 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | ||
| 2455 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
| 2456 | |||
| 2457 | /* cpuid 0xC0000001.edx */ | ||
| 2458 | const u32 kvm_supported_word5_x86_features = | ||
| 2459 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
| 2460 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
| 2461 | F(PMM) | F(PMM_EN); | ||
| 2462 | |||
| 2463 | /* cpuid 7.0.ebx */ | ||
| 2464 | const u32 kvm_supported_word9_x86_features = | ||
| 2465 | F(SMEP) | F(FSGSBASE) | F(ERMS); | ||
| 2466 | |||
| 2467 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
| 2468 | get_cpu(); | ||
| 2469 | do_cpuid_1_ent(entry, function, index); | ||
| 2470 | ++*nent; | ||
| 2471 | |||
| 2472 | switch (function) { | ||
| 2473 | case 0: | ||
| 2474 | entry->eax = min(entry->eax, (u32)0xd); | ||
| 2475 | break; | ||
| 2476 | case 1: | ||
| 2477 | entry->edx &= kvm_supported_word0_x86_features; | ||
| 2478 | cpuid_mask(&entry->edx, 0); | ||
| 2479 | entry->ecx &= kvm_supported_word4_x86_features; | ||
| 2480 | cpuid_mask(&entry->ecx, 4); | ||
| 2481 | /* we support x2apic emulation even if host does not support | ||
| 2482 | * it since we emulate x2apic in software */ | ||
| 2483 | entry->ecx |= F(X2APIC); | ||
| 2484 | break; | ||
| 2485 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | ||
| 2486 | * may return different values. This forces us to get_cpu() before | ||
| 2487 | * issuing the first command, and also to emulate this annoying behavior | ||
| 2488 | * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ | ||
| 2489 | case 2: { | ||
| 2490 | int t, times = entry->eax & 0xff; | ||
| 2491 | |||
| 2492 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
| 2493 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 2494 | for (t = 1; t < times && *nent < maxnent; ++t) { | ||
| 2495 | do_cpuid_1_ent(&entry[t], function, 0); | ||
| 2496 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
| 2497 | ++*nent; | ||
| 2498 | } | ||
| 2499 | break; | ||
| 2500 | } | ||
| 2501 | /* function 4 has additional index. */ | ||
| 2502 | case 4: { | ||
| 2503 | int i, cache_type; | ||
| 2504 | |||
| 2505 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2506 | /* read more entries until cache_type is zero */ | ||
| 2507 | for (i = 1; *nent < maxnent; ++i) { | ||
| 2508 | cache_type = entry[i - 1].eax & 0x1f; | ||
| 2509 | if (!cache_type) | ||
| 2510 | break; | ||
| 2511 | do_cpuid_1_ent(&entry[i], function, i); | ||
| 2512 | entry[i].flags |= | ||
| 2513 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2514 | ++*nent; | ||
| 2515 | } | ||
| 2516 | break; | ||
| 2517 | } | ||
| 2518 | case 7: { | ||
| 2519 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2520 | /* Mask ebx against host capbability word 9 */ | ||
| 2521 | if (index == 0) { | ||
| 2522 | entry->ebx &= kvm_supported_word9_x86_features; | ||
| 2523 | cpuid_mask(&entry->ebx, 9); | ||
| 2524 | } else | ||
| 2525 | entry->ebx = 0; | ||
| 2526 | entry->eax = 0; | ||
| 2527 | entry->ecx = 0; | ||
| 2528 | entry->edx = 0; | ||
| 2529 | break; | ||
| 2530 | } | ||
| 2531 | case 9: | ||
| 2532 | break; | ||
| 2533 | /* function 0xb has additional index. */ | ||
| 2534 | case 0xb: { | ||
| 2535 | int i, level_type; | ||
| 2536 | |||
| 2537 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2538 | /* read more entries until level_type is zero */ | ||
| 2539 | for (i = 1; *nent < maxnent; ++i) { | ||
| 2540 | level_type = entry[i - 1].ecx & 0xff00; | ||
| 2541 | if (!level_type) | ||
| 2542 | break; | ||
| 2543 | do_cpuid_1_ent(&entry[i], function, i); | ||
| 2544 | entry[i].flags |= | ||
| 2545 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2546 | ++*nent; | ||
| 2547 | } | ||
| 2548 | break; | ||
| 2549 | } | ||
| 2550 | case 0xd: { | ||
| 2551 | int idx, i; | ||
| 2552 | |||
| 2553 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2554 | for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) { | ||
| 2555 | do_cpuid_1_ent(&entry[i], function, idx); | ||
| 2556 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
| 2557 | continue; | ||
| 2558 | entry[i].flags |= | ||
| 2559 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2560 | ++*nent; | ||
| 2561 | ++i; | ||
| 2562 | } | ||
| 2563 | break; | ||
| 2564 | } | ||
| 2565 | case KVM_CPUID_SIGNATURE: { | ||
| 2566 | char signature[12] = "KVMKVMKVM\0\0"; | ||
| 2567 | u32 *sigptr = (u32 *)signature; | ||
| 2568 | entry->eax = 0; | ||
| 2569 | entry->ebx = sigptr[0]; | ||
| 2570 | entry->ecx = sigptr[1]; | ||
| 2571 | entry->edx = sigptr[2]; | ||
| 2572 | break; | ||
| 2573 | } | ||
| 2574 | case KVM_CPUID_FEATURES: | ||
| 2575 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
| 2576 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
| 2577 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
| 2578 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
| 2579 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
| 2580 | |||
| 2581 | if (sched_info_on()) | ||
| 2582 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
| 2583 | |||
| 2584 | entry->ebx = 0; | ||
| 2585 | entry->ecx = 0; | ||
| 2586 | entry->edx = 0; | ||
| 2587 | break; | ||
| 2588 | case 0x80000000: | ||
| 2589 | entry->eax = min(entry->eax, 0x8000001a); | ||
| 2590 | break; | ||
| 2591 | case 0x80000001: | ||
| 2592 | entry->edx &= kvm_supported_word1_x86_features; | ||
| 2593 | cpuid_mask(&entry->edx, 1); | ||
| 2594 | entry->ecx &= kvm_supported_word6_x86_features; | ||
| 2595 | cpuid_mask(&entry->ecx, 6); | ||
| 2596 | break; | ||
| 2597 | case 0x80000008: { | ||
| 2598 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
| 2599 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
| 2600 | unsigned phys_as = entry->eax & 0xff; | ||
| 2601 | |||
| 2602 | if (!g_phys_as) | ||
| 2603 | g_phys_as = phys_as; | ||
| 2604 | entry->eax = g_phys_as | (virt_as << 8); | ||
| 2605 | entry->ebx = entry->edx = 0; | ||
| 2606 | break; | ||
| 2607 | } | ||
| 2608 | case 0x80000019: | ||
| 2609 | entry->ecx = entry->edx = 0; | ||
| 2610 | break; | ||
| 2611 | case 0x8000001a: | ||
| 2612 | break; | ||
| 2613 | case 0x8000001d: | ||
| 2614 | break; | ||
| 2615 | /*Add support for Centaur's CPUID instruction*/ | ||
| 2616 | case 0xC0000000: | ||
| 2617 | /*Just support up to 0xC0000004 now*/ | ||
| 2618 | entry->eax = min(entry->eax, 0xC0000004); | ||
| 2619 | break; | ||
| 2620 | case 0xC0000001: | ||
| 2621 | entry->edx &= kvm_supported_word5_x86_features; | ||
| 2622 | cpuid_mask(&entry->edx, 5); | ||
| 2623 | break; | ||
| 2624 | case 3: /* Processor serial number */ | ||
| 2625 | case 5: /* MONITOR/MWAIT */ | ||
| 2626 | case 6: /* Thermal management */ | ||
| 2627 | case 0xA: /* Architectural Performance Monitoring */ | ||
| 2628 | case 0x80000007: /* Advanced power management */ | ||
| 2629 | case 0xC0000002: | ||
| 2630 | case 0xC0000003: | ||
| 2631 | case 0xC0000004: | ||
| 2632 | default: | ||
| 2633 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
| 2634 | break; | ||
| 2635 | } | ||
| 2636 | |||
| 2637 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
| 2638 | |||
| 2639 | put_cpu(); | ||
| 2640 | } | ||
| 2641 | |||
| 2642 | #undef F | ||
| 2643 | |||
| 2644 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
| 2645 | struct kvm_cpuid_entry2 __user *entries) | ||
| 2646 | { | ||
| 2647 | struct kvm_cpuid_entry2 *cpuid_entries; | ||
| 2648 | int limit, nent = 0, r = -E2BIG; | ||
| 2649 | u32 func; | ||
| 2650 | |||
| 2651 | if (cpuid->nent < 1) | ||
| 2652 | goto out; | ||
| 2653 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 2654 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
| 2655 | r = -ENOMEM; | ||
| 2656 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | ||
| 2657 | if (!cpuid_entries) | ||
| 2658 | goto out; | ||
| 2659 | |||
| 2660 | do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); | ||
| 2661 | limit = cpuid_entries[0].eax; | ||
| 2662 | for (func = 1; func <= limit && nent < cpuid->nent; ++func) | ||
| 2663 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
| 2664 | &nent, cpuid->nent); | ||
| 2665 | r = -E2BIG; | ||
| 2666 | if (nent >= cpuid->nent) | ||
| 2667 | goto out_free; | ||
| 2668 | |||
| 2669 | do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); | ||
| 2670 | limit = cpuid_entries[nent - 1].eax; | ||
| 2671 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | ||
| 2672 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
| 2673 | &nent, cpuid->nent); | ||
| 2674 | |||
| 2675 | |||
| 2676 | |||
| 2677 | r = -E2BIG; | ||
| 2678 | if (nent >= cpuid->nent) | ||
| 2679 | goto out_free; | ||
| 2680 | |||
| 2681 | /* Add support for Centaur's CPUID instruction. */ | ||
| 2682 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
| 2683 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
| 2684 | &nent, cpuid->nent); | ||
| 2685 | |||
| 2686 | r = -E2BIG; | ||
| 2687 | if (nent >= cpuid->nent) | ||
| 2688 | goto out_free; | ||
| 2689 | |||
| 2690 | limit = cpuid_entries[nent - 1].eax; | ||
| 2691 | for (func = 0xC0000001; | ||
| 2692 | func <= limit && nent < cpuid->nent; ++func) | ||
| 2693 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
| 2694 | &nent, cpuid->nent); | ||
| 2695 | |||
| 2696 | r = -E2BIG; | ||
| 2697 | if (nent >= cpuid->nent) | ||
| 2698 | goto out_free; | ||
| 2699 | } | ||
| 2700 | |||
| 2701 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
| 2702 | cpuid->nent); | ||
| 2703 | |||
| 2704 | r = -E2BIG; | ||
| 2705 | if (nent >= cpuid->nent) | ||
| 2706 | goto out_free; | ||
| 2707 | |||
| 2708 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
| 2709 | cpuid->nent); | ||
| 2710 | |||
| 2711 | r = -E2BIG; | ||
| 2712 | if (nent >= cpuid->nent) | ||
| 2713 | goto out_free; | ||
| 2714 | |||
| 2715 | r = -EFAULT; | ||
| 2716 | if (copy_to_user(entries, cpuid_entries, | ||
| 2717 | nent * sizeof(struct kvm_cpuid_entry2))) | ||
| 2718 | goto out_free; | ||
| 2719 | cpuid->nent = nent; | ||
| 2720 | r = 0; | ||
| 2721 | |||
| 2722 | out_free: | ||
| 2723 | vfree(cpuid_entries); | ||
| 2724 | out: | ||
| 2725 | return r; | ||
| 2726 | } | ||
| 2727 | |||
| 2728 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2226 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
| 2729 | struct kvm_lapic_state *s) | 2227 | struct kvm_lapic_state *s) |
| 2730 | { | 2228 | { |
| @@ -3042,13 +2540,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 3042 | r = -EINVAL; | 2540 | r = -EINVAL; |
| 3043 | if (!vcpu->arch.apic) | 2541 | if (!vcpu->arch.apic) |
| 3044 | goto out; | 2542 | goto out; |
| 3045 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2543 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); |
| 3046 | r = -ENOMEM; | 2544 | if (IS_ERR(u.lapic)) { |
| 3047 | if (!u.lapic) | 2545 | r = PTR_ERR(u.lapic); |
| 3048 | goto out; | ||
| 3049 | r = -EFAULT; | ||
| 3050 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) | ||
| 3051 | goto out; | 2546 | goto out; |
| 2547 | } | ||
| 2548 | |||
| 3052 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); | 2549 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
| 3053 | if (r) | 2550 | if (r) |
| 3054 | goto out; | 2551 | goto out; |
| @@ -3227,14 +2724,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 3227 | break; | 2724 | break; |
| 3228 | } | 2725 | } |
| 3229 | case KVM_SET_XSAVE: { | 2726 | case KVM_SET_XSAVE: { |
| 3230 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | 2727 | u.xsave = memdup_user(argp, sizeof(*u.xsave)); |
| 3231 | r = -ENOMEM; | 2728 | if (IS_ERR(u.xsave)) { |
| 3232 | if (!u.xsave) | 2729 | r = PTR_ERR(u.xsave); |
| 3233 | break; | 2730 | goto out; |
| 3234 | 2731 | } | |
| 3235 | r = -EFAULT; | ||
| 3236 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
| 3237 | break; | ||
| 3238 | 2732 | ||
| 3239 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | 2733 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); |
| 3240 | break; | 2734 | break; |
| @@ -3255,15 +2749,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 3255 | break; | 2749 | break; |
| 3256 | } | 2750 | } |
| 3257 | case KVM_SET_XCRS: { | 2751 | case KVM_SET_XCRS: { |
| 3258 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | 2752 | u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); |
| 3259 | r = -ENOMEM; | 2753 | if (IS_ERR(u.xcrs)) { |
| 3260 | if (!u.xcrs) | 2754 | r = PTR_ERR(u.xcrs); |
| 3261 | break; | 2755 | goto out; |
| 3262 | 2756 | } | |
| 3263 | r = -EFAULT; | ||
| 3264 | if (copy_from_user(u.xcrs, argp, | ||
| 3265 | sizeof(struct kvm_xcrs))) | ||
| 3266 | break; | ||
| 3267 | 2757 | ||
| 3268 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 2758 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
| 3269 | break; | 2759 | break; |
| @@ -3460,16 +2950,59 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
| 3460 | return 0; | 2950 | return 0; |
| 3461 | } | 2951 | } |
| 3462 | 2952 | ||
| 2953 | /** | ||
| 2954 | * write_protect_slot - write protect a slot for dirty logging | ||
| 2955 | * @kvm: the kvm instance | ||
| 2956 | * @memslot: the slot we protect | ||
| 2957 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
| 2958 | * @nr_dirty_pages: the number of dirty pages | ||
| 2959 | * | ||
| 2960 | * We have two ways to find all sptes to protect: | ||
| 2961 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | ||
| 2962 | * checks ones that have a spte mapping a page in the slot. | ||
| 2963 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
| 2964 | * | ||
| 2965 | * Generally speaking, if there are not so many dirty pages compared to the | ||
| 2966 | * number of shadow pages, we should use the latter. | ||
| 2967 | * | ||
| 2968 | * Note that letting others write into a page marked dirty in the old bitmap | ||
| 2969 | * by using the remaining tlb entry is not a problem. That page will become | ||
| 2970 | * write protected again when we flush the tlb and then be reported dirty to | ||
| 2971 | * the user space by copying the old bitmap. | ||
| 2972 | */ | ||
| 2973 | static void write_protect_slot(struct kvm *kvm, | ||
| 2974 | struct kvm_memory_slot *memslot, | ||
| 2975 | unsigned long *dirty_bitmap, | ||
| 2976 | unsigned long nr_dirty_pages) | ||
| 2977 | { | ||
| 2978 | /* Not many dirty pages compared to # of shadow pages. */ | ||
| 2979 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
| 2980 | unsigned long gfn_offset; | ||
| 2981 | |||
| 2982 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | ||
| 2983 | unsigned long gfn = memslot->base_gfn + gfn_offset; | ||
| 2984 | |||
| 2985 | spin_lock(&kvm->mmu_lock); | ||
| 2986 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | ||
| 2987 | spin_unlock(&kvm->mmu_lock); | ||
| 2988 | } | ||
| 2989 | kvm_flush_remote_tlbs(kvm); | ||
| 2990 | } else { | ||
| 2991 | spin_lock(&kvm->mmu_lock); | ||
| 2992 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
| 2993 | spin_unlock(&kvm->mmu_lock); | ||
| 2994 | } | ||
| 2995 | } | ||
| 2996 | |||
| 3463 | /* | 2997 | /* |
| 3464 | * Get (and clear) the dirty memory log for a memory slot. | 2998 | * Get (and clear) the dirty memory log for a memory slot. |
| 3465 | */ | 2999 | */ |
| 3466 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 3000 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
| 3467 | struct kvm_dirty_log *log) | 3001 | struct kvm_dirty_log *log) |
| 3468 | { | 3002 | { |
| 3469 | int r, i; | 3003 | int r; |
| 3470 | struct kvm_memory_slot *memslot; | 3004 | struct kvm_memory_slot *memslot; |
| 3471 | unsigned long n; | 3005 | unsigned long n, nr_dirty_pages; |
| 3472 | unsigned long is_dirty = 0; | ||
| 3473 | 3006 | ||
| 3474 | mutex_lock(&kvm->slots_lock); | 3007 | mutex_lock(&kvm->slots_lock); |
| 3475 | 3008 | ||
| @@ -3477,43 +3010,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 3477 | if (log->slot >= KVM_MEMORY_SLOTS) | 3010 | if (log->slot >= KVM_MEMORY_SLOTS) |
| 3478 | goto out; | 3011 | goto out; |
| 3479 | 3012 | ||
| 3480 | memslot = &kvm->memslots->memslots[log->slot]; | 3013 | memslot = id_to_memslot(kvm->memslots, log->slot); |
| 3481 | r = -ENOENT; | 3014 | r = -ENOENT; |
| 3482 | if (!memslot->dirty_bitmap) | 3015 | if (!memslot->dirty_bitmap) |
| 3483 | goto out; | 3016 | goto out; |
| 3484 | 3017 | ||
| 3485 | n = kvm_dirty_bitmap_bytes(memslot); | 3018 | n = kvm_dirty_bitmap_bytes(memslot); |
| 3486 | 3019 | nr_dirty_pages = memslot->nr_dirty_pages; | |
| 3487 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
| 3488 | is_dirty = memslot->dirty_bitmap[i]; | ||
| 3489 | 3020 | ||
| 3490 | /* If nothing is dirty, don't bother messing with page tables. */ | 3021 | /* If nothing is dirty, don't bother messing with page tables. */ |
| 3491 | if (is_dirty) { | 3022 | if (nr_dirty_pages) { |
| 3492 | struct kvm_memslots *slots, *old_slots; | 3023 | struct kvm_memslots *slots, *old_slots; |
| 3493 | unsigned long *dirty_bitmap; | 3024 | unsigned long *dirty_bitmap, *dirty_bitmap_head; |
| 3494 | 3025 | ||
| 3495 | dirty_bitmap = memslot->dirty_bitmap_head; | 3026 | dirty_bitmap = memslot->dirty_bitmap; |
| 3496 | if (memslot->dirty_bitmap == dirty_bitmap) | 3027 | dirty_bitmap_head = memslot->dirty_bitmap_head; |
| 3497 | dirty_bitmap += n / sizeof(long); | 3028 | if (dirty_bitmap == dirty_bitmap_head) |
| 3498 | memset(dirty_bitmap, 0, n); | 3029 | dirty_bitmap_head += n / sizeof(long); |
| 3030 | memset(dirty_bitmap_head, 0, n); | ||
| 3499 | 3031 | ||
| 3500 | r = -ENOMEM; | 3032 | r = -ENOMEM; |
| 3501 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3033 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); |
| 3502 | if (!slots) | 3034 | if (!slots) |
| 3503 | goto out; | 3035 | goto out; |
| 3504 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3036 | |
| 3505 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3037 | memslot = id_to_memslot(slots, log->slot); |
| 3506 | slots->generation++; | 3038 | memslot->nr_dirty_pages = 0; |
| 3039 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
| 3040 | update_memslots(slots, NULL); | ||
| 3507 | 3041 | ||
| 3508 | old_slots = kvm->memslots; | 3042 | old_slots = kvm->memslots; |
| 3509 | rcu_assign_pointer(kvm->memslots, slots); | 3043 | rcu_assign_pointer(kvm->memslots, slots); |
| 3510 | synchronize_srcu_expedited(&kvm->srcu); | 3044 | synchronize_srcu_expedited(&kvm->srcu); |
| 3511 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
| 3512 | kfree(old_slots); | 3045 | kfree(old_slots); |
| 3513 | 3046 | ||
| 3514 | spin_lock(&kvm->mmu_lock); | 3047 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); |
| 3515 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | ||
| 3516 | spin_unlock(&kvm->mmu_lock); | ||
| 3517 | 3048 | ||
| 3518 | r = -EFAULT; | 3049 | r = -EFAULT; |
| 3519 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3050 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
| @@ -3658,14 +3189,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 3658 | } | 3189 | } |
| 3659 | case KVM_GET_IRQCHIP: { | 3190 | case KVM_GET_IRQCHIP: { |
| 3660 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3191 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 3661 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3192 | struct kvm_irqchip *chip; |
| 3662 | 3193 | ||
| 3663 | r = -ENOMEM; | 3194 | chip = memdup_user(argp, sizeof(*chip)); |
| 3664 | if (!chip) | 3195 | if (IS_ERR(chip)) { |
| 3196 | r = PTR_ERR(chip); | ||
| 3665 | goto out; | 3197 | goto out; |
| 3666 | r = -EFAULT; | 3198 | } |
| 3667 | if (copy_from_user(chip, argp, sizeof *chip)) | 3199 | |
| 3668 | goto get_irqchip_out; | ||
| 3669 | r = -ENXIO; | 3200 | r = -ENXIO; |
| 3670 | if (!irqchip_in_kernel(kvm)) | 3201 | if (!irqchip_in_kernel(kvm)) |
| 3671 | goto get_irqchip_out; | 3202 | goto get_irqchip_out; |
| @@ -3684,14 +3215,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 3684 | } | 3215 | } |
| 3685 | case KVM_SET_IRQCHIP: { | 3216 | case KVM_SET_IRQCHIP: { |
| 3686 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3217 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 3687 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); | 3218 | struct kvm_irqchip *chip; |
| 3688 | 3219 | ||
| 3689 | r = -ENOMEM; | 3220 | chip = memdup_user(argp, sizeof(*chip)); |
| 3690 | if (!chip) | 3221 | if (IS_ERR(chip)) { |
| 3222 | r = PTR_ERR(chip); | ||
| 3691 | goto out; | 3223 | goto out; |
| 3692 | r = -EFAULT; | 3224 | } |
| 3693 | if (copy_from_user(chip, argp, sizeof *chip)) | 3225 | |
| 3694 | goto set_irqchip_out; | ||
| 3695 | r = -ENXIO; | 3226 | r = -ENXIO; |
| 3696 | if (!irqchip_in_kernel(kvm)) | 3227 | if (!irqchip_in_kernel(kvm)) |
| 3697 | goto set_irqchip_out; | 3228 | goto set_irqchip_out; |
| @@ -3898,12 +3429,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, | |||
| 3898 | kvm_x86_ops->get_segment(vcpu, var, seg); | 3429 | kvm_x86_ops->get_segment(vcpu, var, seg); |
| 3899 | } | 3430 | } |
| 3900 | 3431 | ||
| 3901 | static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 3432 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
| 3902 | { | ||
| 3903 | return gpa; | ||
| 3904 | } | ||
| 3905 | |||
| 3906 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
| 3907 | { | 3433 | { |
| 3908 | gpa_t t_gpa; | 3434 | gpa_t t_gpa; |
| 3909 | struct x86_exception exception; | 3435 | struct x86_exception exception; |
| @@ -4087,7 +3613,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 4087 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 3613 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
| 4088 | if (ret < 0) | 3614 | if (ret < 0) |
| 4089 | return 0; | 3615 | return 0; |
| 4090 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | 3616 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
| 4091 | return 1; | 3617 | return 1; |
| 4092 | } | 3618 | } |
| 4093 | 3619 | ||
| @@ -4324,7 +3850,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
| 4324 | if (!exchanged) | 3850 | if (!exchanged) |
| 4325 | return X86EMUL_CMPXCHG_FAILED; | 3851 | return X86EMUL_CMPXCHG_FAILED; |
| 4326 | 3852 | ||
| 4327 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | 3853 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); |
| 4328 | 3854 | ||
| 4329 | return X86EMUL_CONTINUE; | 3855 | return X86EMUL_CONTINUE; |
| 4330 | 3856 | ||
| @@ -4349,32 +3875,24 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
| 4349 | return r; | 3875 | return r; |
| 4350 | } | 3876 | } |
| 4351 | 3877 | ||
| 4352 | 3878 | static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, | |
| 4353 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 3879 | unsigned short port, void *val, |
| 4354 | int size, unsigned short port, void *val, | 3880 | unsigned int count, bool in) |
| 4355 | unsigned int count) | ||
| 4356 | { | 3881 | { |
| 4357 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3882 | trace_kvm_pio(!in, port, size, count); |
| 4358 | |||
| 4359 | if (vcpu->arch.pio.count) | ||
| 4360 | goto data_avail; | ||
| 4361 | |||
| 4362 | trace_kvm_pio(0, port, size, count); | ||
| 4363 | 3883 | ||
| 4364 | vcpu->arch.pio.port = port; | 3884 | vcpu->arch.pio.port = port; |
| 4365 | vcpu->arch.pio.in = 1; | 3885 | vcpu->arch.pio.in = in; |
| 4366 | vcpu->arch.pio.count = count; | 3886 | vcpu->arch.pio.count = count; |
| 4367 | vcpu->arch.pio.size = size; | 3887 | vcpu->arch.pio.size = size; |
| 4368 | 3888 | ||
| 4369 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3889 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
| 4370 | data_avail: | ||
| 4371 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
| 4372 | vcpu->arch.pio.count = 0; | 3890 | vcpu->arch.pio.count = 0; |
| 4373 | return 1; | 3891 | return 1; |
| 4374 | } | 3892 | } |
| 4375 | 3893 | ||
| 4376 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3894 | vcpu->run->exit_reason = KVM_EXIT_IO; |
| 4377 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | 3895 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
| 4378 | vcpu->run->io.size = size; | 3896 | vcpu->run->io.size = size; |
| 4379 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | 3897 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; |
| 4380 | vcpu->run->io.count = count; | 3898 | vcpu->run->io.count = count; |
| @@ -4383,36 +3901,37 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
| 4383 | return 0; | 3901 | return 0; |
| 4384 | } | 3902 | } |
| 4385 | 3903 | ||
| 4386 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | 3904 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
| 4387 | int size, unsigned short port, | 3905 | int size, unsigned short port, void *val, |
| 4388 | const void *val, unsigned int count) | 3906 | unsigned int count) |
| 4389 | { | 3907 | { |
| 4390 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3908 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
| 3909 | int ret; | ||
| 4391 | 3910 | ||
| 4392 | trace_kvm_pio(1, port, size, count); | 3911 | if (vcpu->arch.pio.count) |
| 4393 | 3912 | goto data_avail; | |
| 4394 | vcpu->arch.pio.port = port; | ||
| 4395 | vcpu->arch.pio.in = 0; | ||
| 4396 | vcpu->arch.pio.count = count; | ||
| 4397 | vcpu->arch.pio.size = size; | ||
| 4398 | |||
| 4399 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
| 4400 | 3913 | ||
| 4401 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3914 | ret = emulator_pio_in_out(vcpu, size, port, val, count, true); |
| 3915 | if (ret) { | ||
| 3916 | data_avail: | ||
| 3917 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
| 4402 | vcpu->arch.pio.count = 0; | 3918 | vcpu->arch.pio.count = 0; |
| 4403 | return 1; | 3919 | return 1; |
| 4404 | } | 3920 | } |
| 4405 | 3921 | ||
| 4406 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 4407 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
| 4408 | vcpu->run->io.size = size; | ||
| 4409 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 4410 | vcpu->run->io.count = count; | ||
| 4411 | vcpu->run->io.port = port; | ||
| 4412 | |||
| 4413 | return 0; | 3922 | return 0; |
| 4414 | } | 3923 | } |
| 4415 | 3924 | ||
| 3925 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 3926 | int size, unsigned short port, | ||
| 3927 | const void *val, unsigned int count) | ||
| 3928 | { | ||
| 3929 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
| 3930 | |||
| 3931 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
| 3932 | return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); | ||
| 3933 | } | ||
| 3934 | |||
| 4416 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3935 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
| 4417 | { | 3936 | { |
| 4418 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3937 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
| @@ -4627,6 +4146,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | |||
| 4627 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | 4146 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); |
| 4628 | } | 4147 | } |
| 4629 | 4148 | ||
| 4149 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | ||
| 4150 | u32 pmc, u64 *pdata) | ||
| 4151 | { | ||
| 4152 | return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata); | ||
| 4153 | } | ||
| 4154 | |||
| 4630 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) | 4155 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
| 4631 | { | 4156 | { |
| 4632 | emul_to_vcpu(ctxt)->arch.halt_request = 1; | 4157 | emul_to_vcpu(ctxt)->arch.halt_request = 1; |
| @@ -4679,6 +4204,7 @@ static struct x86_emulate_ops emulate_ops = { | |||
| 4679 | .set_dr = emulator_set_dr, | 4204 | .set_dr = emulator_set_dr, |
| 4680 | .set_msr = emulator_set_msr, | 4205 | .set_msr = emulator_set_msr, |
| 4681 | .get_msr = emulator_get_msr, | 4206 | .get_msr = emulator_get_msr, |
| 4207 | .read_pmc = emulator_read_pmc, | ||
| 4682 | .halt = emulator_halt, | 4208 | .halt = emulator_halt, |
| 4683 | .wbinvd = emulator_wbinvd, | 4209 | .wbinvd = emulator_wbinvd, |
| 4684 | .fix_hypercall = emulator_fix_hypercall, | 4210 | .fix_hypercall = emulator_fix_hypercall, |
| @@ -4836,6 +4362,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 4836 | return false; | 4362 | return false; |
| 4837 | } | 4363 | } |
| 4838 | 4364 | ||
| 4365 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | ||
| 4366 | unsigned long cr2, int emulation_type) | ||
| 4367 | { | ||
| 4368 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
| 4369 | unsigned long last_retry_eip, last_retry_addr, gpa = cr2; | ||
| 4370 | |||
| 4371 | last_retry_eip = vcpu->arch.last_retry_eip; | ||
| 4372 | last_retry_addr = vcpu->arch.last_retry_addr; | ||
| 4373 | |||
| 4374 | /* | ||
| 4375 | * If the emulation is caused by #PF and it is non-page_table | ||
| 4376 | * writing instruction, it means the VM-EXIT is caused by shadow | ||
| 4377 | * page protected, we can zap the shadow page and retry this | ||
| 4378 | * instruction directly. | ||
| 4379 | * | ||
| 4380 | * Note: if the guest uses a non-page-table modifying instruction | ||
| 4381 | * on the PDE that points to the instruction, then we will unmap | ||
| 4382 | * the instruction and go to an infinite loop. So, we cache the | ||
| 4383 | * last retried eip and the last fault address, if we meet the eip | ||
| 4384 | * and the address again, we can break out of the potential infinite | ||
| 4385 | * loop. | ||
| 4386 | */ | ||
| 4387 | vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; | ||
| 4388 | |||
| 4389 | if (!(emulation_type & EMULTYPE_RETRY)) | ||
| 4390 | return false; | ||
| 4391 | |||
| 4392 | if (x86_page_table_writing_insn(ctxt)) | ||
| 4393 | return false; | ||
| 4394 | |||
| 4395 | if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) | ||
| 4396 | return false; | ||
| 4397 | |||
| 4398 | vcpu->arch.last_retry_eip = ctxt->eip; | ||
| 4399 | vcpu->arch.last_retry_addr = cr2; | ||
| 4400 | |||
| 4401 | if (!vcpu->arch.mmu.direct_map) | ||
| 4402 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | ||
| 4403 | |||
| 4404 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
| 4405 | |||
| 4406 | return true; | ||
| 4407 | } | ||
| 4408 | |||
| 4839 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 4409 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
| 4840 | unsigned long cr2, | 4410 | unsigned long cr2, |
| 4841 | int emulation_type, | 4411 | int emulation_type, |
| @@ -4877,6 +4447,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 4877 | return EMULATE_DONE; | 4447 | return EMULATE_DONE; |
| 4878 | } | 4448 | } |
| 4879 | 4449 | ||
| 4450 | if (retry_instruction(ctxt, cr2, emulation_type)) | ||
| 4451 | return EMULATE_DONE; | ||
| 4452 | |||
| 4880 | /* this is needed for vmware backdoor interface to work since it | 4453 | /* this is needed for vmware backdoor interface to work since it |
| 4881 | changes registers values during IO operation */ | 4454 | changes registers values during IO operation */ |
| 4882 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4455 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
| @@ -5095,17 +4668,17 @@ static void kvm_timer_init(void) | |||
| 5095 | 4668 | ||
| 5096 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 4669 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
| 5097 | 4670 | ||
| 5098 | static int kvm_is_in_guest(void) | 4671 | int kvm_is_in_guest(void) |
| 5099 | { | 4672 | { |
| 5100 | return percpu_read(current_vcpu) != NULL; | 4673 | return __this_cpu_read(current_vcpu) != NULL; |
| 5101 | } | 4674 | } |
| 5102 | 4675 | ||
| 5103 | static int kvm_is_user_mode(void) | 4676 | static int kvm_is_user_mode(void) |
| 5104 | { | 4677 | { |
| 5105 | int user_mode = 3; | 4678 | int user_mode = 3; |
| 5106 | 4679 | ||
| 5107 | if (percpu_read(current_vcpu)) | 4680 | if (__this_cpu_read(current_vcpu)) |
| 5108 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | 4681 | user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu)); |
| 5109 | 4682 | ||
| 5110 | return user_mode != 0; | 4683 | return user_mode != 0; |
| 5111 | } | 4684 | } |
| @@ -5114,8 +4687,8 @@ static unsigned long kvm_get_guest_ip(void) | |||
| 5114 | { | 4687 | { |
| 5115 | unsigned long ip = 0; | 4688 | unsigned long ip = 0; |
| 5116 | 4689 | ||
| 5117 | if (percpu_read(current_vcpu)) | 4690 | if (__this_cpu_read(current_vcpu)) |
| 5118 | ip = kvm_rip_read(percpu_read(current_vcpu)); | 4691 | ip = kvm_rip_read(__this_cpu_read(current_vcpu)); |
| 5119 | 4692 | ||
| 5120 | return ip; | 4693 | return ip; |
| 5121 | } | 4694 | } |
| @@ -5128,13 +4701,13 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { | |||
| 5128 | 4701 | ||
| 5129 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | 4702 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) |
| 5130 | { | 4703 | { |
| 5131 | percpu_write(current_vcpu, vcpu); | 4704 | __this_cpu_write(current_vcpu, vcpu); |
| 5132 | } | 4705 | } |
| 5133 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | 4706 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); |
| 5134 | 4707 | ||
| 5135 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | 4708 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) |
| 5136 | { | 4709 | { |
| 5137 | percpu_write(current_vcpu, NULL); | 4710 | __this_cpu_write(current_vcpu, NULL); |
| 5138 | } | 4711 | } |
| 5139 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | 4712 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); |
| 5140 | 4713 | ||
| @@ -5233,15 +4806,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
| 5233 | } | 4806 | } |
| 5234 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 4807 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
| 5235 | 4808 | ||
| 5236 | static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | ||
| 5237 | unsigned long a1) | ||
| 5238 | { | ||
| 5239 | if (is_long_mode(vcpu)) | ||
| 5240 | return a0; | ||
| 5241 | else | ||
| 5242 | return a0 | ((gpa_t)a1 << 32); | ||
| 5243 | } | ||
| 5244 | |||
| 5245 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 4809 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
| 5246 | { | 4810 | { |
| 5247 | u64 param, ingpa, outgpa, ret; | 4811 | u64 param, ingpa, outgpa, ret; |
| @@ -5337,9 +4901,6 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
| 5337 | case KVM_HC_VAPIC_POLL_IRQ: | 4901 | case KVM_HC_VAPIC_POLL_IRQ: |
| 5338 | ret = 0; | 4902 | ret = 0; |
| 5339 | break; | 4903 | break; |
| 5340 | case KVM_HC_MMU_OP: | ||
| 5341 | r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); | ||
| 5342 | break; | ||
| 5343 | default: | 4904 | default: |
| 5344 | ret = -KVM_ENOSYS; | 4905 | ret = -KVM_ENOSYS; |
| 5345 | break; | 4906 | break; |
| @@ -5369,125 +4930,6 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
| 5369 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); | 4930 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
| 5370 | } | 4931 | } |
| 5371 | 4932 | ||
| 5372 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | ||
| 5373 | { | ||
| 5374 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | ||
| 5375 | int j, nent = vcpu->arch.cpuid_nent; | ||
| 5376 | |||
| 5377 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 5378 | /* when no next entry is found, the current entry[i] is reselected */ | ||
| 5379 | for (j = i + 1; ; j = (j + 1) % nent) { | ||
| 5380 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | ||
| 5381 | if (ej->function == e->function) { | ||
| 5382 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
| 5383 | return j; | ||
| 5384 | } | ||
| 5385 | } | ||
| 5386 | return 0; /* silence gcc, even though control never reaches here */ | ||
| 5387 | } | ||
| 5388 | |||
| 5389 | /* find an entry with matching function, matching index (if needed), and that | ||
| 5390 | * should be read next (if it's stateful) */ | ||
| 5391 | static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, | ||
| 5392 | u32 function, u32 index) | ||
| 5393 | { | ||
| 5394 | if (e->function != function) | ||
| 5395 | return 0; | ||
| 5396 | if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) | ||
| 5397 | return 0; | ||
| 5398 | if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && | ||
| 5399 | !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) | ||
| 5400 | return 0; | ||
| 5401 | return 1; | ||
| 5402 | } | ||
| 5403 | |||
| 5404 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
| 5405 | u32 function, u32 index) | ||
| 5406 | { | ||
| 5407 | int i; | ||
| 5408 | struct kvm_cpuid_entry2 *best = NULL; | ||
| 5409 | |||
| 5410 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | ||
| 5411 | struct kvm_cpuid_entry2 *e; | ||
| 5412 | |||
| 5413 | e = &vcpu->arch.cpuid_entries[i]; | ||
| 5414 | if (is_matching_cpuid_entry(e, function, index)) { | ||
| 5415 | if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) | ||
| 5416 | move_to_next_stateful_cpuid_entry(vcpu, i); | ||
| 5417 | best = e; | ||
| 5418 | break; | ||
| 5419 | } | ||
| 5420 | } | ||
| 5421 | return best; | ||
| 5422 | } | ||
| 5423 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
| 5424 | |||
| 5425 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
| 5426 | { | ||
| 5427 | struct kvm_cpuid_entry2 *best; | ||
| 5428 | |||
| 5429 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
| 5430 | if (!best || best->eax < 0x80000008) | ||
| 5431 | goto not_found; | ||
| 5432 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
| 5433 | if (best) | ||
| 5434 | return best->eax & 0xff; | ||
| 5435 | not_found: | ||
| 5436 | return 36; | ||
| 5437 | } | ||
| 5438 | |||
| 5439 | /* | ||
| 5440 | * If no match is found, check whether we exceed the vCPU's limit | ||
| 5441 | * and return the content of the highest valid _standard_ leaf instead. | ||
| 5442 | * This is to satisfy the CPUID specification. | ||
| 5443 | */ | ||
| 5444 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
| 5445 | u32 function, u32 index) | ||
| 5446 | { | ||
| 5447 | struct kvm_cpuid_entry2 *maxlevel; | ||
| 5448 | |||
| 5449 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
| 5450 | if (!maxlevel || maxlevel->eax >= function) | ||
| 5451 | return NULL; | ||
| 5452 | if (function & 0x80000000) { | ||
| 5453 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
| 5454 | if (!maxlevel) | ||
| 5455 | return NULL; | ||
| 5456 | } | ||
| 5457 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
| 5458 | } | ||
| 5459 | |||
| 5460 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
| 5461 | { | ||
| 5462 | u32 function, index; | ||
| 5463 | struct kvm_cpuid_entry2 *best; | ||
| 5464 | |||
| 5465 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 5466 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 5467 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
| 5468 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
| 5469 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
| 5470 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
| 5471 | best = kvm_find_cpuid_entry(vcpu, function, index); | ||
| 5472 | |||
| 5473 | if (!best) | ||
| 5474 | best = check_cpuid_limit(vcpu, function, index); | ||
| 5475 | |||
| 5476 | if (best) { | ||
| 5477 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | ||
| 5478 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | ||
| 5479 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | ||
| 5480 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | ||
| 5481 | } | ||
| 5482 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 5483 | trace_kvm_cpuid(function, | ||
| 5484 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
| 5485 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
| 5486 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
| 5487 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
| 5488 | } | ||
| 5489 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
| 5490 | |||
| 5491 | /* | 4933 | /* |
| 5492 | * Check if userspace requested an interrupt window, and that the | 4934 | * Check if userspace requested an interrupt window, and that the |
| 5493 | * interrupt window is open. | 4935 | * interrupt window is open. |
| @@ -5648,6 +5090,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5648 | int r; | 5090 | int r; |
| 5649 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5091 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
| 5650 | vcpu->run->request_interrupt_window; | 5092 | vcpu->run->request_interrupt_window; |
| 5093 | bool req_immediate_exit = 0; | ||
| 5651 | 5094 | ||
| 5652 | if (vcpu->requests) { | 5095 | if (vcpu->requests) { |
| 5653 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5096 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
| @@ -5687,7 +5130,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5687 | record_steal_time(vcpu); | 5130 | record_steal_time(vcpu); |
| 5688 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5131 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
| 5689 | process_nmi(vcpu); | 5132 | process_nmi(vcpu); |
| 5690 | 5133 | req_immediate_exit = | |
| 5134 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
| 5135 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | ||
| 5136 | kvm_handle_pmu_event(vcpu); | ||
| 5137 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | ||
| 5138 | kvm_deliver_pmi(vcpu); | ||
| 5691 | } | 5139 | } |
| 5692 | 5140 | ||
| 5693 | r = kvm_mmu_reload(vcpu); | 5141 | r = kvm_mmu_reload(vcpu); |
| @@ -5738,6 +5186,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5738 | 5186 | ||
| 5739 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5187 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 5740 | 5188 | ||
| 5189 | if (req_immediate_exit) | ||
| 5190 | smp_send_reschedule(vcpu->cpu); | ||
| 5191 | |||
| 5741 | kvm_guest_enter(); | 5192 | kvm_guest_enter(); |
| 5742 | 5193 | ||
| 5743 | if (unlikely(vcpu->arch.switch_db_regs)) { | 5194 | if (unlikely(vcpu->arch.switch_db_regs)) { |
| @@ -5943,10 +5394,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 5943 | if (r <= 0) | 5394 | if (r <= 0) |
| 5944 | goto out; | 5395 | goto out; |
| 5945 | 5396 | ||
| 5946 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | ||
| 5947 | kvm_register_write(vcpu, VCPU_REGS_RAX, | ||
| 5948 | kvm_run->hypercall.ret); | ||
| 5949 | |||
| 5950 | r = __vcpu_run(vcpu); | 5397 | r = __vcpu_run(vcpu); |
| 5951 | 5398 | ||
| 5952 | out: | 5399 | out: |
| @@ -6148,7 +5595,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 6148 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5595 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
| 6149 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5596 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
| 6150 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5597 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
| 6151 | update_cpuid(vcpu); | 5598 | kvm_update_cpuid(vcpu); |
| 6152 | 5599 | ||
| 6153 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5600 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 6154 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5601 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
| @@ -6425,6 +5872,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 6425 | kvm_async_pf_hash_reset(vcpu); | 5872 | kvm_async_pf_hash_reset(vcpu); |
| 6426 | vcpu->arch.apf.halted = false; | 5873 | vcpu->arch.apf.halted = false; |
| 6427 | 5874 | ||
| 5875 | kvm_pmu_reset(vcpu); | ||
| 5876 | |||
| 6428 | return kvm_x86_ops->vcpu_reset(vcpu); | 5877 | return kvm_x86_ops->vcpu_reset(vcpu); |
| 6429 | } | 5878 | } |
| 6430 | 5879 | ||
| @@ -6473,10 +5922,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 6473 | kvm = vcpu->kvm; | 5922 | kvm = vcpu->kvm; |
| 6474 | 5923 | ||
| 6475 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 5924 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
| 6476 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
| 6477 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
| 6478 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
| 6479 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
| 6480 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 5925 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
| 6481 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5926 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
| 6482 | else | 5927 | else |
| @@ -6513,6 +5958,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 6513 | goto fail_free_mce_banks; | 5958 | goto fail_free_mce_banks; |
| 6514 | 5959 | ||
| 6515 | kvm_async_pf_hash_reset(vcpu); | 5960 | kvm_async_pf_hash_reset(vcpu); |
| 5961 | kvm_pmu_init(vcpu); | ||
| 6516 | 5962 | ||
| 6517 | return 0; | 5963 | return 0; |
| 6518 | fail_free_mce_banks: | 5964 | fail_free_mce_banks: |
| @@ -6531,6 +5977,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
| 6531 | { | 5977 | { |
| 6532 | int idx; | 5978 | int idx; |
| 6533 | 5979 | ||
| 5980 | kvm_pmu_destroy(vcpu); | ||
| 6534 | kfree(vcpu->arch.mce_banks); | 5981 | kfree(vcpu->arch.mce_banks); |
| 6535 | kvm_free_lapic(vcpu); | 5982 | kvm_free_lapic(vcpu); |
| 6536 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5983 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d36fe237c665..cb80c293cdd8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -33,9 +33,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
| 33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); | 33 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | ||
| 37 | u32 function, u32 index); | ||
| 38 | |||
| 39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | 36 | static inline bool is_protmode(struct kvm_vcpu *vcpu) |
| 40 | { | 37 | { |
| 41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | 38 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); |
| @@ -125,4 +122,6 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
| 125 | gva_t addr, void *val, unsigned int bytes, | 122 | gva_t addr, void *val, unsigned int bytes, |
| 126 | struct x86_exception *exception); | 123 | struct x86_exception *exception); |
| 127 | 124 | ||
| 125 | extern u64 host_xcr0; | ||
| 126 | |||
| 128 | #endif | 127 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d52623199978..900c76337e8f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
| 15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
| 16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
| 17 | #include <linux/mmu_notifier.h> | ||
| 17 | #include <linux/preempt.h> | 18 | #include <linux/preempt.h> |
| 18 | #include <linux/msi.h> | 19 | #include <linux/msi.h> |
| 19 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| @@ -50,6 +51,9 @@ | |||
| 50 | #define KVM_REQ_APF_HALT 12 | 51 | #define KVM_REQ_APF_HALT 12 |
| 51 | #define KVM_REQ_STEAL_UPDATE 13 | 52 | #define KVM_REQ_STEAL_UPDATE 13 |
| 52 | #define KVM_REQ_NMI 14 | 53 | #define KVM_REQ_NMI 14 |
| 54 | #define KVM_REQ_IMMEDIATE_EXIT 15 | ||
| 55 | #define KVM_REQ_PMU 16 | ||
| 56 | #define KVM_REQ_PMI 17 | ||
| 53 | 57 | ||
| 54 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 58 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
| 55 | 59 | ||
| @@ -179,6 +183,7 @@ struct kvm_memory_slot { | |||
| 179 | unsigned long *rmap; | 183 | unsigned long *rmap; |
| 180 | unsigned long *dirty_bitmap; | 184 | unsigned long *dirty_bitmap; |
| 181 | unsigned long *dirty_bitmap_head; | 185 | unsigned long *dirty_bitmap_head; |
| 186 | unsigned long nr_dirty_pages; | ||
| 182 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 187 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
| 183 | unsigned long userspace_addr; | 188 | unsigned long userspace_addr; |
| 184 | int user_alloc; | 189 | int user_alloc; |
| @@ -224,11 +229,20 @@ struct kvm_irq_routing_table {}; | |||
| 224 | 229 | ||
| 225 | #endif | 230 | #endif |
| 226 | 231 | ||
| 232 | #ifndef KVM_MEM_SLOTS_NUM | ||
| 233 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
| 234 | #endif | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Note: | ||
| 238 | * memslots are not sorted by id anymore, please use id_to_memslot() | ||
| 239 | * to get the memslot by its id. | ||
| 240 | */ | ||
| 227 | struct kvm_memslots { | 241 | struct kvm_memslots { |
| 228 | int nmemslots; | ||
| 229 | u64 generation; | 242 | u64 generation; |
| 230 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + | 243 | struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM]; |
| 231 | KVM_PRIVATE_MEM_SLOTS]; | 244 | /* The mapping table from slot id to the index in memslots[]. */ |
| 245 | int id_to_index[KVM_MEM_SLOTS_NUM]; | ||
| 232 | }; | 246 | }; |
| 233 | 247 | ||
| 234 | struct kvm { | 248 | struct kvm { |
| @@ -239,7 +253,6 @@ struct kvm { | |||
| 239 | struct srcu_struct srcu; | 253 | struct srcu_struct srcu; |
| 240 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 254 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
| 241 | u32 bsp_vcpu_id; | 255 | u32 bsp_vcpu_id; |
| 242 | struct kvm_vcpu *bsp_vcpu; | ||
| 243 | #endif | 256 | #endif |
| 244 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; | 257 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; |
| 245 | atomic_t online_vcpus; | 258 | atomic_t online_vcpus; |
| @@ -302,6 +315,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) | |||
| 302 | (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ | 315 | (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ |
| 303 | idx++) | 316 | idx++) |
| 304 | 317 | ||
| 318 | #define kvm_for_each_memslot(memslot, slots) \ | ||
| 319 | for (memslot = &slots->memslots[0]; \ | ||
| 320 | memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ | ||
| 321 | memslot++) | ||
| 322 | |||
| 305 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); | 323 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); |
| 306 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | 324 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); |
| 307 | 325 | ||
| @@ -314,6 +332,7 @@ void kvm_exit(void); | |||
| 314 | 332 | ||
| 315 | void kvm_get_kvm(struct kvm *kvm); | 333 | void kvm_get_kvm(struct kvm *kvm); |
| 316 | void kvm_put_kvm(struct kvm *kvm); | 334 | void kvm_put_kvm(struct kvm *kvm); |
| 335 | void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new); | ||
| 317 | 336 | ||
| 318 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | 337 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) |
| 319 | { | 338 | { |
| @@ -322,6 +341,18 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | |||
| 322 | || lockdep_is_held(&kvm->slots_lock)); | 341 | || lockdep_is_held(&kvm->slots_lock)); |
| 323 | } | 342 | } |
| 324 | 343 | ||
| 344 | static inline struct kvm_memory_slot * | ||
| 345 | id_to_memslot(struct kvm_memslots *slots, int id) | ||
| 346 | { | ||
| 347 | int index = slots->id_to_index[id]; | ||
| 348 | struct kvm_memory_slot *slot; | ||
| 349 | |||
| 350 | slot = &slots->memslots[index]; | ||
| 351 | |||
| 352 | WARN_ON(slot->id != id); | ||
| 353 | return slot; | ||
| 354 | } | ||
| 355 | |||
| 325 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 356 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
| 326 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 357 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
| 327 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 358 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 47a070b0520e..ff476ddaf310 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h | |||
| @@ -35,4 +35,3 @@ static inline int kvm_para_has_feature(unsigned int feature) | |||
| 35 | } | 35 | } |
| 36 | #endif /* __KERNEL__ */ | 36 | #endif /* __KERNEL__ */ |
| 37 | #endif /* __LINUX_KVM_PARA_H */ | 37 | #endif /* __LINUX_KVM_PARA_H */ |
| 38 | |||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 30c3c7708132..01d3b70fc98a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -71,6 +71,7 @@ void jump_label_inc(struct jump_label_key *key) | |||
| 71 | atomic_inc(&key->enabled); | 71 | atomic_inc(&key->enabled); |
| 72 | jump_label_unlock(); | 72 | jump_label_unlock(); |
| 73 | } | 73 | } |
| 74 | EXPORT_SYMBOL_GPL(jump_label_inc); | ||
| 74 | 75 | ||
| 75 | static void __jump_label_dec(struct jump_label_key *key, | 76 | static void __jump_label_dec(struct jump_label_key *key, |
| 76 | unsigned long rate_limit, struct delayed_work *work) | 77 | unsigned long rate_limit, struct delayed_work *work) |
| @@ -86,6 +87,7 @@ static void __jump_label_dec(struct jump_label_key *key, | |||
| 86 | 87 | ||
| 87 | jump_label_unlock(); | 88 | jump_label_unlock(); |
| 88 | } | 89 | } |
| 90 | EXPORT_SYMBOL_GPL(jump_label_dec); | ||
| 89 | 91 | ||
| 90 | static void jump_label_update_timeout(struct work_struct *work) | 92 | static void jump_label_update_timeout(struct work_struct *work) |
| 91 | { | 93 | { |
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index a6ec206f36ba..88b2fe3ddf42 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
| @@ -28,9 +28,15 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | |||
| 28 | * (addr,len) is fully included in | 28 | * (addr,len) is fully included in |
| 29 | * (zone->addr, zone->size) | 29 | * (zone->addr, zone->size) |
| 30 | */ | 30 | */ |
| 31 | 31 | if (len < 0) | |
| 32 | return (dev->zone.addr <= addr && | 32 | return 0; |
| 33 | addr + len <= dev->zone.addr + dev->zone.size); | 33 | if (addr + len < addr) |
| 34 | return 0; | ||
| 35 | if (addr < dev->zone.addr) | ||
| 36 | return 0; | ||
| 37 | if (addr + len > dev->zone.addr + dev->zone.size) | ||
| 38 | return 0; | ||
| 39 | return 1; | ||
| 34 | } | 40 | } |
| 35 | 41 | ||
| 36 | static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) | 42 | static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 3eed61eb4867..dcaf272c26c0 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -185,7 +185,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
| 185 | irqe.dest_mode = 0; /* Physical mode. */ | 185 | irqe.dest_mode = 0; /* Physical mode. */ |
| 186 | /* need to read apic_id from apic regiest since | 186 | /* need to read apic_id from apic regiest since |
| 187 | * it can be rewritten */ | 187 | * it can be rewritten */ |
| 188 | irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; | 188 | irqe.dest_id = ioapic->kvm->bsp_vcpu_id; |
| 189 | } | 189 | } |
| 190 | #endif | 190 | #endif |
| 191 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | 191 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); |
| @@ -332,9 +332,18 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 332 | (void*)addr, len, val); | 332 | (void*)addr, len, val); |
| 333 | ASSERT(!(addr & 0xf)); /* check alignment */ | 333 | ASSERT(!(addr & 0xf)); /* check alignment */ |
| 334 | 334 | ||
| 335 | if (len == 4 || len == 8) | 335 | switch (len) { |
| 336 | case 8: | ||
| 337 | case 4: | ||
| 336 | data = *(u32 *) val; | 338 | data = *(u32 *) val; |
| 337 | else { | 339 | break; |
| 340 | case 2: | ||
| 341 | data = *(u16 *) val; | ||
| 342 | break; | ||
| 343 | case 1: | ||
| 344 | data = *(u8 *) val; | ||
| 345 | break; | ||
| 346 | default: | ||
| 338 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 347 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
| 339 | return 0; | 348 | return 0; |
| 340 | } | 349 | } |
| @@ -343,7 +352,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 343 | spin_lock(&ioapic->lock); | 352 | spin_lock(&ioapic->lock); |
| 344 | switch (addr) { | 353 | switch (addr) { |
| 345 | case IOAPIC_REG_SELECT: | 354 | case IOAPIC_REG_SELECT: |
| 346 | ioapic->ioregsel = data; | 355 | ioapic->ioregsel = data & 0xFF; /* 8-bit register */ |
| 347 | break; | 356 | break; |
| 348 | 357 | ||
| 349 | case IOAPIC_REG_WINDOW: | 358 | case IOAPIC_REG_WINDOW: |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index a195c07fa829..4e5f7b7f1d2b 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
| @@ -134,14 +134,15 @@ unmap_pages: | |||
| 134 | 134 | ||
| 135 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 135 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
| 136 | { | 136 | { |
| 137 | int i, idx, r = 0; | 137 | int idx, r = 0; |
| 138 | struct kvm_memslots *slots; | 138 | struct kvm_memslots *slots; |
| 139 | struct kvm_memory_slot *memslot; | ||
| 139 | 140 | ||
| 140 | idx = srcu_read_lock(&kvm->srcu); | 141 | idx = srcu_read_lock(&kvm->srcu); |
| 141 | slots = kvm_memslots(kvm); | 142 | slots = kvm_memslots(kvm); |
| 142 | 143 | ||
| 143 | for (i = 0; i < slots->nmemslots; i++) { | 144 | kvm_for_each_memslot(memslot, slots) { |
| 144 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); | 145 | r = kvm_iommu_map_pages(kvm, memslot); |
| 145 | if (r) | 146 | if (r) |
| 146 | break; | 147 | break; |
| 147 | } | 148 | } |
| @@ -311,16 +312,16 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
| 311 | 312 | ||
| 312 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 313 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
| 313 | { | 314 | { |
| 314 | int i, idx; | 315 | int idx; |
| 315 | struct kvm_memslots *slots; | 316 | struct kvm_memslots *slots; |
| 317 | struct kvm_memory_slot *memslot; | ||
| 316 | 318 | ||
| 317 | idx = srcu_read_lock(&kvm->srcu); | 319 | idx = srcu_read_lock(&kvm->srcu); |
| 318 | slots = kvm_memslots(kvm); | 320 | slots = kvm_memslots(kvm); |
| 319 | 321 | ||
| 320 | for (i = 0; i < slots->nmemslots; i++) { | 322 | kvm_for_each_memslot(memslot, slots) |
| 321 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 323 | kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages); |
| 322 | slots->memslots[i].npages); | 324 | |
| 323 | } | ||
| 324 | srcu_read_unlock(&kvm->srcu, idx); | 325 | srcu_read_unlock(&kvm->srcu, idx); |
| 325 | 326 | ||
| 326 | return 0; | 327 | return 0; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d9cfb782cb81..7287bf5d1c9e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -440,6 +440,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) | |||
| 440 | 440 | ||
| 441 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 441 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
| 442 | 442 | ||
| 443 | static void kvm_init_memslots_id(struct kvm *kvm) | ||
| 444 | { | ||
| 445 | int i; | ||
| 446 | struct kvm_memslots *slots = kvm->memslots; | ||
| 447 | |||
| 448 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | ||
| 449 | slots->id_to_index[i] = slots->memslots[i].id = i; | ||
| 450 | } | ||
| 451 | |||
| 443 | static struct kvm *kvm_create_vm(void) | 452 | static struct kvm *kvm_create_vm(void) |
| 444 | { | 453 | { |
| 445 | int r, i; | 454 | int r, i; |
| @@ -465,6 +474,7 @@ static struct kvm *kvm_create_vm(void) | |||
| 465 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 474 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
| 466 | if (!kvm->memslots) | 475 | if (!kvm->memslots) |
| 467 | goto out_err_nosrcu; | 476 | goto out_err_nosrcu; |
| 477 | kvm_init_memslots_id(kvm); | ||
| 468 | if (init_srcu_struct(&kvm->srcu)) | 478 | if (init_srcu_struct(&kvm->srcu)) |
| 469 | goto out_err_nosrcu; | 479 | goto out_err_nosrcu; |
| 470 | for (i = 0; i < KVM_NR_BUSES; i++) { | 480 | for (i = 0; i < KVM_NR_BUSES; i++) { |
| @@ -547,11 +557,11 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
| 547 | 557 | ||
| 548 | void kvm_free_physmem(struct kvm *kvm) | 558 | void kvm_free_physmem(struct kvm *kvm) |
| 549 | { | 559 | { |
| 550 | int i; | ||
| 551 | struct kvm_memslots *slots = kvm->memslots; | 560 | struct kvm_memslots *slots = kvm->memslots; |
| 561 | struct kvm_memory_slot *memslot; | ||
| 552 | 562 | ||
| 553 | for (i = 0; i < slots->nmemslots; ++i) | 563 | kvm_for_each_memslot(memslot, slots) |
| 554 | kvm_free_physmem_slot(&slots->memslots[i], NULL); | 564 | kvm_free_physmem_slot(memslot, NULL); |
| 555 | 565 | ||
| 556 | kfree(kvm->memslots); | 566 | kfree(kvm->memslots); |
| 557 | } | 567 | } |
| @@ -625,10 +635,69 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
| 625 | return -ENOMEM; | 635 | return -ENOMEM; |
| 626 | 636 | ||
| 627 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; | 637 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; |
| 638 | memslot->nr_dirty_pages = 0; | ||
| 628 | return 0; | 639 | return 0; |
| 629 | } | 640 | } |
| 630 | #endif /* !CONFIG_S390 */ | 641 | #endif /* !CONFIG_S390 */ |
| 631 | 642 | ||
| 643 | static struct kvm_memory_slot * | ||
| 644 | search_memslots(struct kvm_memslots *slots, gfn_t gfn) | ||
| 645 | { | ||
| 646 | struct kvm_memory_slot *memslot; | ||
| 647 | |||
| 648 | kvm_for_each_memslot(memslot, slots) | ||
| 649 | if (gfn >= memslot->base_gfn && | ||
| 650 | gfn < memslot->base_gfn + memslot->npages) | ||
| 651 | return memslot; | ||
| 652 | |||
| 653 | return NULL; | ||
| 654 | } | ||
| 655 | |||
| 656 | static int cmp_memslot(const void *slot1, const void *slot2) | ||
| 657 | { | ||
| 658 | struct kvm_memory_slot *s1, *s2; | ||
| 659 | |||
| 660 | s1 = (struct kvm_memory_slot *)slot1; | ||
| 661 | s2 = (struct kvm_memory_slot *)slot2; | ||
| 662 | |||
| 663 | if (s1->npages < s2->npages) | ||
| 664 | return 1; | ||
| 665 | if (s1->npages > s2->npages) | ||
| 666 | return -1; | ||
| 667 | |||
| 668 | return 0; | ||
| 669 | } | ||
| 670 | |||
| 671 | /* | ||
| 672 | * Sort the memslots base on its size, so the larger slots | ||
| 673 | * will get better fit. | ||
| 674 | */ | ||
| 675 | static void sort_memslots(struct kvm_memslots *slots) | ||
| 676 | { | ||
| 677 | int i; | ||
| 678 | |||
| 679 | sort(slots->memslots, KVM_MEM_SLOTS_NUM, | ||
| 680 | sizeof(struct kvm_memory_slot), cmp_memslot, NULL); | ||
| 681 | |||
| 682 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | ||
| 683 | slots->id_to_index[slots->memslots[i].id] = i; | ||
| 684 | } | ||
| 685 | |||
| 686 | void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) | ||
| 687 | { | ||
| 688 | if (new) { | ||
| 689 | int id = new->id; | ||
| 690 | struct kvm_memory_slot *old = id_to_memslot(slots, id); | ||
| 691 | unsigned long npages = old->npages; | ||
| 692 | |||
| 693 | *old = *new; | ||
| 694 | if (new->npages != npages) | ||
| 695 | sort_memslots(slots); | ||
| 696 | } | ||
| 697 | |||
| 698 | slots->generation++; | ||
| 699 | } | ||
| 700 | |||
| 632 | /* | 701 | /* |
| 633 | * Allocate some memory and give it an address in the guest physical address | 702 | * Allocate some memory and give it an address in the guest physical address |
| 634 | * space. | 703 | * space. |
| @@ -662,12 +731,12 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 662 | (void __user *)(unsigned long)mem->userspace_addr, | 731 | (void __user *)(unsigned long)mem->userspace_addr, |
| 663 | mem->memory_size))) | 732 | mem->memory_size))) |
| 664 | goto out; | 733 | goto out; |
| 665 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 734 | if (mem->slot >= KVM_MEM_SLOTS_NUM) |
| 666 | goto out; | 735 | goto out; |
| 667 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 736 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
| 668 | goto out; | 737 | goto out; |
| 669 | 738 | ||
| 670 | memslot = &kvm->memslots->memslots[mem->slot]; | 739 | memslot = id_to_memslot(kvm->memslots, mem->slot); |
| 671 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 740 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
| 672 | npages = mem->memory_size >> PAGE_SHIFT; | 741 | npages = mem->memory_size >> PAGE_SHIFT; |
| 673 | 742 | ||
| @@ -774,15 +843,17 @@ skip_lpage: | |||
| 774 | #endif /* not defined CONFIG_S390 */ | 843 | #endif /* not defined CONFIG_S390 */ |
| 775 | 844 | ||
| 776 | if (!npages) { | 845 | if (!npages) { |
| 846 | struct kvm_memory_slot *slot; | ||
| 847 | |||
| 777 | r = -ENOMEM; | 848 | r = -ENOMEM; |
| 778 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 849 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), |
| 850 | GFP_KERNEL); | ||
| 779 | if (!slots) | 851 | if (!slots) |
| 780 | goto out_free; | 852 | goto out_free; |
| 781 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 853 | slot = id_to_memslot(slots, mem->slot); |
| 782 | if (mem->slot >= slots->nmemslots) | 854 | slot->flags |= KVM_MEMSLOT_INVALID; |
| 783 | slots->nmemslots = mem->slot + 1; | 855 | |
| 784 | slots->generation++; | 856 | update_memslots(slots, NULL); |
| 785 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
| 786 | 857 | ||
| 787 | old_memslots = kvm->memslots; | 858 | old_memslots = kvm->memslots; |
| 788 | rcu_assign_pointer(kvm->memslots, slots); | 859 | rcu_assign_pointer(kvm->memslots, slots); |
| @@ -810,13 +881,10 @@ skip_lpage: | |||
| 810 | } | 881 | } |
| 811 | 882 | ||
| 812 | r = -ENOMEM; | 883 | r = -ENOMEM; |
| 813 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 884 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), |
| 885 | GFP_KERNEL); | ||
| 814 | if (!slots) | 886 | if (!slots) |
| 815 | goto out_free; | 887 | goto out_free; |
| 816 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
| 817 | if (mem->slot >= slots->nmemslots) | ||
| 818 | slots->nmemslots = mem->slot + 1; | ||
| 819 | slots->generation++; | ||
| 820 | 888 | ||
| 821 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 889 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
| 822 | if (!npages) { | 890 | if (!npages) { |
| @@ -826,7 +894,7 @@ skip_lpage: | |||
| 826 | new.lpage_info[i] = NULL; | 894 | new.lpage_info[i] = NULL; |
| 827 | } | 895 | } |
| 828 | 896 | ||
| 829 | slots->memslots[mem->slot] = new; | 897 | update_memslots(slots, &new); |
| 830 | old_memslots = kvm->memslots; | 898 | old_memslots = kvm->memslots; |
| 831 | rcu_assign_pointer(kvm->memslots, slots); | 899 | rcu_assign_pointer(kvm->memslots, slots); |
| 832 | synchronize_srcu_expedited(&kvm->srcu); | 900 | synchronize_srcu_expedited(&kvm->srcu); |
| @@ -888,7 +956,7 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
| 888 | if (log->slot >= KVM_MEMORY_SLOTS) | 956 | if (log->slot >= KVM_MEMORY_SLOTS) |
| 889 | goto out; | 957 | goto out; |
| 890 | 958 | ||
| 891 | memslot = &kvm->memslots->memslots[log->slot]; | 959 | memslot = id_to_memslot(kvm->memslots, log->slot); |
| 892 | r = -ENOENT; | 960 | r = -ENOENT; |
| 893 | if (!memslot->dirty_bitmap) | 961 | if (!memslot->dirty_bitmap) |
| 894 | goto out; | 962 | goto out; |
| @@ -966,16 +1034,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
| 966 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, | 1034 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, |
| 967 | gfn_t gfn) | 1035 | gfn_t gfn) |
| 968 | { | 1036 | { |
| 969 | int i; | 1037 | return search_memslots(slots, gfn); |
| 970 | |||
| 971 | for (i = 0; i < slots->nmemslots; ++i) { | ||
| 972 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | ||
| 973 | |||
| 974 | if (gfn >= memslot->base_gfn | ||
| 975 | && gfn < memslot->base_gfn + memslot->npages) | ||
| 976 | return memslot; | ||
| 977 | } | ||
| 978 | return NULL; | ||
| 979 | } | 1038 | } |
| 980 | 1039 | ||
| 981 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 1040 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
| @@ -986,20 +1045,13 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot); | |||
| 986 | 1045 | ||
| 987 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 1046 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
| 988 | { | 1047 | { |
| 989 | int i; | 1048 | struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); |
| 990 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
| 991 | 1049 | ||
| 992 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 1050 | if (!memslot || memslot->id >= KVM_MEMORY_SLOTS || |
| 993 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 1051 | memslot->flags & KVM_MEMSLOT_INVALID) |
| 994 | 1052 | return 0; | |
| 995 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
| 996 | continue; | ||
| 997 | 1053 | ||
| 998 | if (gfn >= memslot->base_gfn | 1054 | return 1; |
| 999 | && gfn < memslot->base_gfn + memslot->npages) | ||
| 1000 | return 1; | ||
| 1001 | } | ||
| 1002 | return 0; | ||
| 1003 | } | 1055 | } |
| 1004 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 1056 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
| 1005 | 1057 | ||
| @@ -1491,7 +1543,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
| 1491 | if (memslot && memslot->dirty_bitmap) { | 1543 | if (memslot && memslot->dirty_bitmap) { |
| 1492 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1544 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
| 1493 | 1545 | ||
| 1494 | __set_bit_le(rel_gfn, memslot->dirty_bitmap); | 1546 | if (!__test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) |
| 1547 | memslot->nr_dirty_pages++; | ||
| 1495 | } | 1548 | } |
| 1496 | } | 1549 | } |
| 1497 | 1550 | ||
| @@ -1690,10 +1743,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | |||
| 1690 | smp_wmb(); | 1743 | smp_wmb(); |
| 1691 | atomic_inc(&kvm->online_vcpus); | 1744 | atomic_inc(&kvm->online_vcpus); |
| 1692 | 1745 | ||
| 1693 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
| 1694 | if (kvm->bsp_vcpu_id == id) | ||
| 1695 | kvm->bsp_vcpu = vcpu; | ||
| 1696 | #endif | ||
| 1697 | mutex_unlock(&kvm->lock); | 1746 | mutex_unlock(&kvm->lock); |
| 1698 | return r; | 1747 | return r; |
| 1699 | 1748 | ||
| @@ -1768,12 +1817,11 @@ out_free1: | |||
| 1768 | struct kvm_regs *kvm_regs; | 1817 | struct kvm_regs *kvm_regs; |
| 1769 | 1818 | ||
| 1770 | r = -ENOMEM; | 1819 | r = -ENOMEM; |
| 1771 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 1820 | kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); |
| 1772 | if (!kvm_regs) | 1821 | if (IS_ERR(kvm_regs)) { |
| 1822 | r = PTR_ERR(kvm_regs); | ||
| 1773 | goto out; | 1823 | goto out; |
| 1774 | r = -EFAULT; | 1824 | } |
| 1775 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | ||
| 1776 | goto out_free2; | ||
| 1777 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | 1825 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); |
| 1778 | if (r) | 1826 | if (r) |
| 1779 | goto out_free2; | 1827 | goto out_free2; |
| @@ -1797,13 +1845,11 @@ out_free2: | |||
| 1797 | break; | 1845 | break; |
| 1798 | } | 1846 | } |
| 1799 | case KVM_SET_SREGS: { | 1847 | case KVM_SET_SREGS: { |
| 1800 | kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 1848 | kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); |
| 1801 | r = -ENOMEM; | 1849 | if (IS_ERR(kvm_sregs)) { |
| 1802 | if (!kvm_sregs) | 1850 | r = PTR_ERR(kvm_sregs); |
| 1803 | goto out; | ||
| 1804 | r = -EFAULT; | ||
| 1805 | if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) | ||
| 1806 | goto out; | 1851 | goto out; |
| 1852 | } | ||
| 1807 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); | 1853 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); |
| 1808 | if (r) | 1854 | if (r) |
| 1809 | goto out; | 1855 | goto out; |
| @@ -1899,13 +1945,11 @@ out_free2: | |||
| 1899 | break; | 1945 | break; |
| 1900 | } | 1946 | } |
| 1901 | case KVM_SET_FPU: { | 1947 | case KVM_SET_FPU: { |
| 1902 | fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 1948 | fpu = memdup_user(argp, sizeof(*fpu)); |
| 1903 | r = -ENOMEM; | 1949 | if (IS_ERR(fpu)) { |
| 1904 | if (!fpu) | 1950 | r = PTR_ERR(fpu); |
| 1905 | goto out; | ||
| 1906 | r = -EFAULT; | ||
| 1907 | if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) | ||
| 1908 | goto out; | 1951 | goto out; |
| 1952 | } | ||
| 1909 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); | 1953 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); |
| 1910 | if (r) | 1954 | if (r) |
| 1911 | goto out; | 1955 | goto out; |
| @@ -2520,10 +2564,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 2520 | if (bus->dev_count > NR_IOBUS_DEVS-1) | 2564 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
| 2521 | return -ENOSPC; | 2565 | return -ENOSPC; |
| 2522 | 2566 | ||
| 2523 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); | 2567 | new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); |
| 2524 | if (!new_bus) | 2568 | if (!new_bus) |
| 2525 | return -ENOMEM; | 2569 | return -ENOMEM; |
| 2526 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
| 2527 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); | 2570 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); |
| 2528 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2571 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
| 2529 | synchronize_srcu_expedited(&kvm->srcu); | 2572 | synchronize_srcu_expedited(&kvm->srcu); |
| @@ -2539,13 +2582,12 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
| 2539 | int i, r; | 2582 | int i, r; |
| 2540 | struct kvm_io_bus *new_bus, *bus; | 2583 | struct kvm_io_bus *new_bus, *bus; |
| 2541 | 2584 | ||
| 2542 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); | 2585 | bus = kvm->buses[bus_idx]; |
| 2586 | |||
| 2587 | new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); | ||
| 2543 | if (!new_bus) | 2588 | if (!new_bus) |
| 2544 | return -ENOMEM; | 2589 | return -ENOMEM; |
| 2545 | 2590 | ||
| 2546 | bus = kvm->buses[bus_idx]; | ||
| 2547 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
| 2548 | |||
| 2549 | r = -ENOENT; | 2591 | r = -ENOENT; |
| 2550 | for (i = 0; i < new_bus->dev_count; i++) | 2592 | for (i = 0; i < new_bus->dev_count; i++) |
| 2551 | if (new_bus->range[i].dev == dev) { | 2593 | if (new_bus->range[i].dev == dev) { |
| @@ -2612,15 +2654,29 @@ static const struct file_operations *stat_fops[] = { | |||
| 2612 | [KVM_STAT_VM] = &vm_stat_fops, | 2654 | [KVM_STAT_VM] = &vm_stat_fops, |
| 2613 | }; | 2655 | }; |
| 2614 | 2656 | ||
| 2615 | static void kvm_init_debug(void) | 2657 | static int kvm_init_debug(void) |
| 2616 | { | 2658 | { |
| 2659 | int r = -EFAULT; | ||
| 2617 | struct kvm_stats_debugfs_item *p; | 2660 | struct kvm_stats_debugfs_item *p; |
| 2618 | 2661 | ||
| 2619 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); | 2662 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
| 2620 | for (p = debugfs_entries; p->name; ++p) | 2663 | if (kvm_debugfs_dir == NULL) |
| 2664 | goto out; | ||
| 2665 | |||
| 2666 | for (p = debugfs_entries; p->name; ++p) { | ||
| 2621 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, | 2667 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
| 2622 | (void *)(long)p->offset, | 2668 | (void *)(long)p->offset, |
| 2623 | stat_fops[p->kind]); | 2669 | stat_fops[p->kind]); |
| 2670 | if (p->dentry == NULL) | ||
| 2671 | goto out_dir; | ||
| 2672 | } | ||
| 2673 | |||
| 2674 | return 0; | ||
| 2675 | |||
| 2676 | out_dir: | ||
| 2677 | debugfs_remove_recursive(kvm_debugfs_dir); | ||
| 2678 | out: | ||
| 2679 | return r; | ||
| 2624 | } | 2680 | } |
| 2625 | 2681 | ||
| 2626 | static void kvm_exit_debug(void) | 2682 | static void kvm_exit_debug(void) |
| @@ -2764,10 +2820,16 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
| 2764 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2820 | kvm_preempt_ops.sched_in = kvm_sched_in; |
| 2765 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2821 | kvm_preempt_ops.sched_out = kvm_sched_out; |
| 2766 | 2822 | ||
| 2767 | kvm_init_debug(); | 2823 | r = kvm_init_debug(); |
| 2824 | if (r) { | ||
| 2825 | printk(KERN_ERR "kvm: create debugfs files failed\n"); | ||
| 2826 | goto out_undebugfs; | ||
| 2827 | } | ||
| 2768 | 2828 | ||
| 2769 | return 0; | 2829 | return 0; |
| 2770 | 2830 | ||
| 2831 | out_undebugfs: | ||
| 2832 | unregister_syscore_ops(&kvm_syscore_ops); | ||
| 2771 | out_unreg: | 2833 | out_unreg: |
| 2772 | kvm_async_pf_deinit(); | 2834 | kvm_async_pf_deinit(); |
| 2773 | out_free: | 2835 | out_free: |
