aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 19:17:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-24 19:17:30 -0400
commit07acfc2a9349a8ce45b236c2624dad452001966b (patch)
treec40f3eaac18a8320e65af220979223b5cd632b1b
parentb5f4035adfffbcc6b478de5b8c44b618b3124aff (diff)
parent322728e55aa7834e2fab2786b76df183c4843a12 (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM changes from Avi Kivity: "Changes include additional instruction emulation, page-crossing MMIO, faster dirty logging, preventing the watchdog from killing a stopped guest, module autoload, a new MSI ABI, and some minor optimizations and fixes. Outside x86 we have a small s390 and a very large ppc update. Regarding the new (for kvm) rebaseless workflow, some of the patches that were merged before we switch trees had to be rebased, while others are true pulls. In either case the signoffs should be correct now." Fix up trivial conflicts in Documentation/feature-removal-schedule.txt arch/powerpc/kvm/book3s_segment.S and arch/x86/include/asm/kvm_para.h. I suspect the kvm_para.h resolution ends up doing the "do I have cpuid" check effectively twice (it was done differently in two different commits), but better safe than sorry ;) * 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (125 commits) KVM: make asm-generic/kvm_para.h have an ifdef __KERNEL__ block KVM: s390: onereg for timer related registers KVM: s390: epoch difference and TOD programmable field KVM: s390: KVM_GET/SET_ONEREG for s390 KVM: s390: add capability indicating COW support KVM: Fix mmu_reload() clash with nested vmx event injection KVM: MMU: Don't use RCU for lockless shadow walking KVM: VMX: Optimize %ds, %es reload KVM: VMX: Fix %ds/%es clobber KVM: x86 emulator: convert bsf/bsr instructions to emulate_2op_SrcV_nobyte() KVM: VMX: unlike vmcs on fail path KVM: PPC: Emulator: clean up SPR reads and writes KVM: PPC: Emulator: clean up instruction parsing kvm/powerpc: Add new ioctl to retreive server MMU infos kvm/book3s: Make kernel emulated H_PUT_TCE available for "PR" KVM KVM: PPC: bookehv: Fix r8/r13 storing in level exception handler KVM: PPC: Book3S: Enable IRQs during exit handling KVM: PPC: Fix PR KVM on POWER7 bare metal KVM: PPC: Fix stbux emulation KVM: PPC: bookehv: Use lwz/stw instead of PPC_LL/PPC_STL for 32-bit fields ...
-rw-r--r--Documentation/feature-removal-schedule.txt7
-rw-r--r--Documentation/virtual/kvm/api.txt281
-rw-r--r--Documentation/virtual/kvm/cpuid.txt6
-rw-r--r--Documentation/virtual/kvm/msr.txt4
-rw-r--r--arch/alpha/include/asm/kvm_para.h1
-rw-r--r--arch/arm/include/asm/kvm_para.h1
-rw-r--r--arch/avr32/include/asm/kvm_para.h1
-rw-r--r--arch/blackfin/include/asm/kvm_para.h1
-rw-r--r--arch/c6x/include/asm/kvm_para.h1
-rw-r--r--arch/frv/include/asm/kvm_para.h1
-rw-r--r--arch/h8300/include/asm/kvm_para.h1
-rw-r--r--arch/hexagon/include/asm/kvm_para.h1
-rw-r--r--arch/ia64/include/asm/kvm_host.h3
-rw-r--r--arch/ia64/include/asm/kvm_para.h5
-rw-r--r--arch/ia64/kvm/kvm-ia64.c30
-rw-r--r--arch/m68k/include/asm/kvm_para.h1
-rw-r--r--arch/microblaze/include/asm/kvm_para.h1
-rw-r--r--arch/mips/include/asm/kvm_para.h1
-rw-r--r--arch/mn10300/include/asm/kvm_para.h1
-rw-r--r--arch/openrisc/include/asm/kvm_para.h1
-rw-r--r--arch/parisc/include/asm/kvm_para.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h23
-rw-r--r--arch/powerpc/include/asm/dbell.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h10
-rw-r--r--arch/powerpc/include/asm/hw_irq.h1
-rw-r--r--arch/powerpc/include/asm/kvm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h8
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h3
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h49
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h96
-rw-r--r--arch/powerpc/include/asm/kvm_host.h60
-rw-r--r--arch/powerpc/include/asm/kvm_para.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h20
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h6
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/reg_booke.h34
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/include/asm/time.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c19
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S12
-rw-r--r--arch/powerpc/kernel/head_44x.S23
-rw-r--r--arch/powerpc/kernel/head_booke.h69
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S98
-rw-r--r--arch/powerpc/kernel/idle_power7.S7
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kernel/time.c3
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/44x_emulate.c51
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile17
-rw-r--r--arch/powerpc/kvm/book3s.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c31
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c106
-rw-r--r--arch/powerpc/kvm/book3s_hv.c467
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S185
-rw-r--r--arch/powerpc/kvm/book3s_pr.c59
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c101
-rw-r--r--arch/powerpc/kvm/book3s_segment.S13
-rw-r--r--arch/powerpc/kvm/booke.c471
-rw-r--r--arch/powerpc/kvm/booke.h62
-rw-r--r--arch/powerpc/kvm/booke_emulate.c118
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S8
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S597
-rw-r--r--arch/powerpc/kvm/e500.c372
-rw-r--r--arch/powerpc/kvm/e500.h306
-rw-r--r--arch/powerpc/kvm/e500_emulate.c210
-rw-r--r--arch/powerpc/kvm/e500_tlb.c666
-rw-r--r--arch/powerpc/kvm/e500_tlb.h174
-rw-r--r--arch/powerpc/kvm/e500mc.c342
-rw-r--r--arch/powerpc/kvm/emulate.c197
-rw-r--r--arch/powerpc/kvm/powerpc.c94
-rw-r--r--arch/powerpc/kvm/timing.h6
-rw-r--r--arch/s390/include/asm/kvm.h5
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/kvm_para.h5
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/kvm/diag.c29
-rw-r--r--arch/s390/kvm/intercept.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c87
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/priv.c31
-rw-r--r--arch/score/include/asm/kvm_para.h1
-rw-r--r--arch/sh/include/asm/kvm_para.h1
-rw-r--r--arch/sparc/include/asm/kvm_para.h1
-rw-r--r--arch/tile/include/asm/kvm_para.h1
-rw-r--r--arch/um/include/asm/kvm_para.h1
-rw-r--r--arch/unicore32/include/asm/kvm_para.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h13
-rw-r--r--arch/x86/include/asm/kvm_para.h24
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/kernel/kvmclock.c20
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c5
-rw-r--r--arch/x86/kvm/emulate.c293
-rw-r--r--arch/x86/kvm/i8254.c31
-rw-r--r--arch/x86/kvm/i8254.h7
-rw-r--r--arch/x86/kvm/lapic.c31
-rw-r--r--arch/x86/kvm/mmu.c345
-rw-r--r--arch/x86/kvm/mmu_audit.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c41
-rw-r--r--arch/x86/kvm/x86.c280
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/xtensa/include/asm/kvm_para.h1
-rw-r--r--drivers/s390/char/sclp_cmd.c12
-rw-r--r--include/asm-generic/kvm_para.h22
-rw-r--r--include/linux/kvm.h42
-rw-r--r--include/linux/kvm_host.h55
-rw-r--r--kernel/watchdog.c12
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/ioapic.c10
-rw-r--r--virt/kvm/ioapic.h1
-rw-r--r--virt/kvm/irq_comm.c14
-rw-r--r--virt/kvm/kvm_main.c132
124 files changed, 5419 insertions, 1968 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 50d82ae09e2a..4ba1eb7590a7 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -588,3 +588,10 @@ Why: Remount currently allows changing bound subsystems and
588 replaced with conventional fsnotify. 588 replaced with conventional fsnotify.
589 589
590---------------------------- 590----------------------------
591
592What: KVM debugfs statistics
593When: 2013
594Why: KVM tracepoints provide mostly equivalent information in a much more
595 flexible fashion.
596
597----------------------------
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6386f8c0482e..930126698a0f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2,6 +2,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation
2=================================================================== 2===================================================================
3 3
41. General description 41. General description
5----------------------
5 6
6The kvm API is a set of ioctls that are issued to control various aspects 7The kvm API is a set of ioctls that are issued to control various aspects
7of a virtual machine. The ioctls belong to three classes 8of a virtual machine. The ioctls belong to three classes
@@ -23,7 +24,9 @@ of a virtual machine. The ioctls belong to three classes
23 Only run vcpu ioctls from the same thread that was used to create the 24 Only run vcpu ioctls from the same thread that was used to create the
24 vcpu. 25 vcpu.
25 26
27
262. File descriptors 282. File descriptors
29-------------------
27 30
28The kvm API is centered around file descriptors. An initial 31The kvm API is centered around file descriptors. An initial
29open("/dev/kvm") obtains a handle to the kvm subsystem; this handle 32open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
@@ -41,7 +44,9 @@ not cause harm to the host, their actual behavior is not guaranteed by
41the API. The only supported use is one virtual machine per process, 44the API. The only supported use is one virtual machine per process,
42and one vcpu per thread. 45and one vcpu per thread.
43 46
47
443. Extensions 483. Extensions
49-------------
45 50
46As of Linux 2.6.22, the KVM ABI has been stabilized: no backward 51As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
47incompatible change are allowed. However, there is an extension 52incompatible change are allowed. However, there is an extension
@@ -53,7 +58,9 @@ Instead, kvm defines extension identifiers and a facility to query
53whether a particular extension identifier is available. If it is, a 58whether a particular extension identifier is available. If it is, a
54set of ioctls is available for application use. 59set of ioctls is available for application use.
55 60
61
564. API description 624. API description
63------------------
57 64
58This section describes ioctls that can be used to control kvm guests. 65This section describes ioctls that can be used to control kvm guests.
59For each ioctl, the following information is provided along with a 66For each ioctl, the following information is provided along with a
@@ -75,6 +82,7 @@ description:
75 Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) 82 Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
76 are not detailed, but errors with specific meanings are. 83 are not detailed, but errors with specific meanings are.
77 84
85
784.1 KVM_GET_API_VERSION 864.1 KVM_GET_API_VERSION
79 87
80Capability: basic 88Capability: basic
@@ -90,6 +98,7 @@ supported. Applications should refuse to run if KVM_GET_API_VERSION
90returns a value other than 12. If this check passes, all ioctls 98returns a value other than 12. If this check passes, all ioctls
91described as 'basic' will be available. 99described as 'basic' will be available.
92 100
101
934.2 KVM_CREATE_VM 1024.2 KVM_CREATE_VM
94 103
95Capability: basic 104Capability: basic
@@ -109,6 +118,7 @@ In order to create user controlled virtual machines on S390, check
109KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as 118KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
110privileged user (CAP_SYS_ADMIN). 119privileged user (CAP_SYS_ADMIN).
111 120
121
1124.3 KVM_GET_MSR_INDEX_LIST 1224.3 KVM_GET_MSR_INDEX_LIST
113 123
114Capability: basic 124Capability: basic
@@ -135,6 +145,7 @@ Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
135not returned in the MSR list, as different vcpus can have a different number 145not returned in the MSR list, as different vcpus can have a different number
136of banks, as set via the KVM_X86_SETUP_MCE ioctl. 146of banks, as set via the KVM_X86_SETUP_MCE ioctl.
137 147
148
1384.4 KVM_CHECK_EXTENSION 1494.4 KVM_CHECK_EXTENSION
139 150
140Capability: basic 151Capability: basic
@@ -149,6 +160,7 @@ receives an integer that describes the extension availability.
149Generally 0 means no and 1 means yes, but some extensions may report 160Generally 0 means no and 1 means yes, but some extensions may report
150additional information in the integer return value. 161additional information in the integer return value.
151 162
163
1524.5 KVM_GET_VCPU_MMAP_SIZE 1644.5 KVM_GET_VCPU_MMAP_SIZE
153 165
154Capability: basic 166Capability: basic
@@ -161,6 +173,7 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared
161memory region. This ioctl returns the size of that region. See the 173memory region. This ioctl returns the size of that region. See the
162KVM_RUN documentation for details. 174KVM_RUN documentation for details.
163 175
176
1644.6 KVM_SET_MEMORY_REGION 1774.6 KVM_SET_MEMORY_REGION
165 178
166Capability: basic 179Capability: basic
@@ -171,6 +184,7 @@ Returns: 0 on success, -1 on error
171 184
172This ioctl is obsolete and has been removed. 185This ioctl is obsolete and has been removed.
173 186
187
1744.7 KVM_CREATE_VCPU 1884.7 KVM_CREATE_VCPU
175 189
176Capability: basic 190Capability: basic
@@ -223,6 +237,7 @@ machines, the resulting vcpu fd can be memory mapped at page offset
223KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual 237KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
224cpu's hardware control block. 238cpu's hardware control block.
225 239
240
2264.8 KVM_GET_DIRTY_LOG (vm ioctl) 2414.8 KVM_GET_DIRTY_LOG (vm ioctl)
227 242
228Capability: basic 243Capability: basic
@@ -246,6 +261,7 @@ since the last call to this ioctl. Bit 0 is the first page in the
246memory slot. Ensure the entire structure is cleared to avoid padding 261memory slot. Ensure the entire structure is cleared to avoid padding
247issues. 262issues.
248 263
264
2494.9 KVM_SET_MEMORY_ALIAS 2654.9 KVM_SET_MEMORY_ALIAS
250 266
251Capability: basic 267Capability: basic
@@ -256,6 +272,7 @@ Returns: 0 (success), -1 (error)
256 272
257This ioctl is obsolete and has been removed. 273This ioctl is obsolete and has been removed.
258 274
275
2594.10 KVM_RUN 2764.10 KVM_RUN
260 277
261Capability: basic 278Capability: basic
@@ -272,6 +289,7 @@ obtained by mmap()ing the vcpu fd at offset 0, with the size given by
272KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct 289KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct
273kvm_run' (see below). 290kvm_run' (see below).
274 291
292
2754.11 KVM_GET_REGS 2934.11 KVM_GET_REGS
276 294
277Capability: basic 295Capability: basic
@@ -292,6 +310,7 @@ struct kvm_regs {
292 __u64 rip, rflags; 310 __u64 rip, rflags;
293}; 311};
294 312
313
2954.12 KVM_SET_REGS 3144.12 KVM_SET_REGS
296 315
297Capability: basic 316Capability: basic
@@ -304,6 +323,7 @@ Writes the general purpose registers into the vcpu.
304 323
305See KVM_GET_REGS for the data structure. 324See KVM_GET_REGS for the data structure.
306 325
326
3074.13 KVM_GET_SREGS 3274.13 KVM_GET_SREGS
308 328
309Capability: basic 329Capability: basic
@@ -331,6 +351,7 @@ interrupt_bitmap is a bitmap of pending external interrupts. At most
331one bit may be set. This interrupt has been acknowledged by the APIC 351one bit may be set. This interrupt has been acknowledged by the APIC
332but not yet injected into the cpu core. 352but not yet injected into the cpu core.
333 353
354
3344.14 KVM_SET_SREGS 3554.14 KVM_SET_SREGS
335 356
336Capability: basic 357Capability: basic
@@ -342,6 +363,7 @@ Returns: 0 on success, -1 on error
342Writes special registers into the vcpu. See KVM_GET_SREGS for the 363Writes special registers into the vcpu. See KVM_GET_SREGS for the
343data structures. 364data structures.
344 365
366
3454.15 KVM_TRANSLATE 3674.15 KVM_TRANSLATE
346 368
347Capability: basic 369Capability: basic
@@ -365,6 +387,7 @@ struct kvm_translation {
365 __u8 pad[5]; 387 __u8 pad[5];
366}; 388};
367 389
390
3684.16 KVM_INTERRUPT 3914.16 KVM_INTERRUPT
369 392
370Capability: basic 393Capability: basic
@@ -413,6 +436,7 @@ c) KVM_INTERRUPT_SET_LEVEL
413Note that any value for 'irq' other than the ones stated above is invalid 436Note that any value for 'irq' other than the ones stated above is invalid
414and incurs unexpected behavior. 437and incurs unexpected behavior.
415 438
439
4164.17 KVM_DEBUG_GUEST 4404.17 KVM_DEBUG_GUEST
417 441
418Capability: basic 442Capability: basic
@@ -423,6 +447,7 @@ Returns: -1 on error
423 447
424Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. 448Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
425 449
450
4264.18 KVM_GET_MSRS 4514.18 KVM_GET_MSRS
427 452
428Capability: basic 453Capability: basic
@@ -451,6 +476,7 @@ Application code should set the 'nmsrs' member (which indicates the
451size of the entries array) and the 'index' member of each array entry. 476size of the entries array) and the 'index' member of each array entry.
452kvm will fill in the 'data' member. 477kvm will fill in the 'data' member.
453 478
479
4544.19 KVM_SET_MSRS 4804.19 KVM_SET_MSRS
455 481
456Capability: basic 482Capability: basic
@@ -466,6 +492,7 @@ Application code should set the 'nmsrs' member (which indicates the
466size of the entries array), and the 'index' and 'data' members of each 492size of the entries array), and the 'index' and 'data' members of each
467array entry. 493array entry.
468 494
495
4694.20 KVM_SET_CPUID 4964.20 KVM_SET_CPUID
470 497
471Capability: basic 498Capability: basic
@@ -494,6 +521,7 @@ struct kvm_cpuid {
494 struct kvm_cpuid_entry entries[0]; 521 struct kvm_cpuid_entry entries[0];
495}; 522};
496 523
524
4974.21 KVM_SET_SIGNAL_MASK 5254.21 KVM_SET_SIGNAL_MASK
498 526
499Capability: basic 527Capability: basic
@@ -516,6 +544,7 @@ struct kvm_signal_mask {
516 __u8 sigset[0]; 544 __u8 sigset[0];
517}; 545};
518 546
547
5194.22 KVM_GET_FPU 5484.22 KVM_GET_FPU
520 549
521Capability: basic 550Capability: basic
@@ -541,6 +570,7 @@ struct kvm_fpu {
541 __u32 pad2; 570 __u32 pad2;
542}; 571};
543 572
573
5444.23 KVM_SET_FPU 5744.23 KVM_SET_FPU
545 575
546Capability: basic 576Capability: basic
@@ -566,6 +596,7 @@ struct kvm_fpu {
566 __u32 pad2; 596 __u32 pad2;
567}; 597};
568 598
599
5694.24 KVM_CREATE_IRQCHIP 6004.24 KVM_CREATE_IRQCHIP
570 601
571Capability: KVM_CAP_IRQCHIP 602Capability: KVM_CAP_IRQCHIP
@@ -579,6 +610,7 @@ ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
579local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 610local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
580only go to the IOAPIC. On ia64, a IOSAPIC is created. 611only go to the IOAPIC. On ia64, a IOSAPIC is created.
581 612
613
5824.25 KVM_IRQ_LINE 6144.25 KVM_IRQ_LINE
583 615
584Capability: KVM_CAP_IRQCHIP 616Capability: KVM_CAP_IRQCHIP
@@ -600,6 +632,7 @@ struct kvm_irq_level {
600 __u32 level; /* 0 or 1 */ 632 __u32 level; /* 0 or 1 */
601}; 633};
602 634
635
6034.26 KVM_GET_IRQCHIP 6364.26 KVM_GET_IRQCHIP
604 637
605Capability: KVM_CAP_IRQCHIP 638Capability: KVM_CAP_IRQCHIP
@@ -621,6 +654,7 @@ struct kvm_irqchip {
621 } chip; 654 } chip;
622}; 655};
623 656
657
6244.27 KVM_SET_IRQCHIP 6584.27 KVM_SET_IRQCHIP
625 659
626Capability: KVM_CAP_IRQCHIP 660Capability: KVM_CAP_IRQCHIP
@@ -642,6 +676,7 @@ struct kvm_irqchip {
642 } chip; 676 } chip;
643}; 677};
644 678
679
6454.28 KVM_XEN_HVM_CONFIG 6804.28 KVM_XEN_HVM_CONFIG
646 681
647Capability: KVM_CAP_XEN_HVM 682Capability: KVM_CAP_XEN_HVM
@@ -666,6 +701,7 @@ struct kvm_xen_hvm_config {
666 __u8 pad2[30]; 701 __u8 pad2[30];
667}; 702};
668 703
704
6694.29 KVM_GET_CLOCK 7054.29 KVM_GET_CLOCK
670 706
671Capability: KVM_CAP_ADJUST_CLOCK 707Capability: KVM_CAP_ADJUST_CLOCK
@@ -684,6 +720,7 @@ struct kvm_clock_data {
684 __u32 pad[9]; 720 __u32 pad[9];
685}; 721};
686 722
723
6874.30 KVM_SET_CLOCK 7244.30 KVM_SET_CLOCK
688 725
689Capability: KVM_CAP_ADJUST_CLOCK 726Capability: KVM_CAP_ADJUST_CLOCK
@@ -702,6 +739,7 @@ struct kvm_clock_data {
702 __u32 pad[9]; 739 __u32 pad[9];
703}; 740};
704 741
742
7054.31 KVM_GET_VCPU_EVENTS 7434.31 KVM_GET_VCPU_EVENTS
706 744
707Capability: KVM_CAP_VCPU_EVENTS 745Capability: KVM_CAP_VCPU_EVENTS
@@ -741,6 +779,7 @@ struct kvm_vcpu_events {
741KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that 779KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
742interrupt.shadow contains a valid state. Otherwise, this field is undefined. 780interrupt.shadow contains a valid state. Otherwise, this field is undefined.
743 781
782
7444.32 KVM_SET_VCPU_EVENTS 7834.32 KVM_SET_VCPU_EVENTS
745 784
746Capability: KVM_CAP_VCPU_EVENTS 785Capability: KVM_CAP_VCPU_EVENTS
@@ -767,6 +806,7 @@ If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
767the flags field to signal that interrupt.shadow contains a valid state and 806the flags field to signal that interrupt.shadow contains a valid state and
768shall be written into the VCPU. 807shall be written into the VCPU.
769 808
809
7704.33 KVM_GET_DEBUGREGS 8104.33 KVM_GET_DEBUGREGS
771 811
772Capability: KVM_CAP_DEBUGREGS 812Capability: KVM_CAP_DEBUGREGS
@@ -785,6 +825,7 @@ struct kvm_debugregs {
785 __u64 reserved[9]; 825 __u64 reserved[9];
786}; 826};
787 827
828
7884.34 KVM_SET_DEBUGREGS 8294.34 KVM_SET_DEBUGREGS
789 830
790Capability: KVM_CAP_DEBUGREGS 831Capability: KVM_CAP_DEBUGREGS
@@ -798,6 +839,7 @@ Writes debug registers into the vcpu.
798See KVM_GET_DEBUGREGS for the data structure. The flags field is unused 839See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
799yet and must be cleared on entry. 840yet and must be cleared on entry.
800 841
842
8014.35 KVM_SET_USER_MEMORY_REGION 8434.35 KVM_SET_USER_MEMORY_REGION
802 844
803Capability: KVM_CAP_USER_MEM 845Capability: KVM_CAP_USER_MEM
@@ -844,6 +886,7 @@ It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
844The KVM_SET_MEMORY_REGION does not allow fine grained control over memory 886The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
845allocation and is deprecated. 887allocation and is deprecated.
846 888
889
8474.36 KVM_SET_TSS_ADDR 8904.36 KVM_SET_TSS_ADDR
848 891
849Capability: KVM_CAP_SET_TSS_ADDR 892Capability: KVM_CAP_SET_TSS_ADDR
@@ -862,6 +905,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware
862because of a quirk in the virtualization implementation (see the internals 905because of a quirk in the virtualization implementation (see the internals
863documentation when it pops into existence). 906documentation when it pops into existence).
864 907
908
8654.37 KVM_ENABLE_CAP 9094.37 KVM_ENABLE_CAP
866 910
867Capability: KVM_CAP_ENABLE_CAP 911Capability: KVM_CAP_ENABLE_CAP
@@ -897,6 +941,7 @@ function properly, this is the place to put them.
897 __u8 pad[64]; 941 __u8 pad[64];
898}; 942};
899 943
944
9004.38 KVM_GET_MP_STATE 9454.38 KVM_GET_MP_STATE
901 946
902Capability: KVM_CAP_MP_STATE 947Capability: KVM_CAP_MP_STATE
@@ -927,6 +972,7 @@ Possible values are:
927This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 972This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
928irqchip, the multiprocessing state must be maintained by userspace. 973irqchip, the multiprocessing state must be maintained by userspace.
929 974
975
9304.39 KVM_SET_MP_STATE 9764.39 KVM_SET_MP_STATE
931 977
932Capability: KVM_CAP_MP_STATE 978Capability: KVM_CAP_MP_STATE
@@ -941,6 +987,7 @@ arguments.
941This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 987This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
942irqchip, the multiprocessing state must be maintained by userspace. 988irqchip, the multiprocessing state must be maintained by userspace.
943 989
990
9444.40 KVM_SET_IDENTITY_MAP_ADDR 9914.40 KVM_SET_IDENTITY_MAP_ADDR
945 992
946Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR 993Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
@@ -959,6 +1006,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware
959because of a quirk in the virtualization implementation (see the internals 1006because of a quirk in the virtualization implementation (see the internals
960documentation when it pops into existence). 1007documentation when it pops into existence).
961 1008
1009
9624.41 KVM_SET_BOOT_CPU_ID 10104.41 KVM_SET_BOOT_CPU_ID
963 1011
964Capability: KVM_CAP_SET_BOOT_CPU_ID 1012Capability: KVM_CAP_SET_BOOT_CPU_ID
@@ -971,6 +1019,7 @@ Define which vcpu is the Bootstrap Processor (BSP). Values are the same
971as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default 1019as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
972is vcpu 0. 1020is vcpu 0.
973 1021
1022
9744.42 KVM_GET_XSAVE 10234.42 KVM_GET_XSAVE
975 1024
976Capability: KVM_CAP_XSAVE 1025Capability: KVM_CAP_XSAVE
@@ -985,6 +1034,7 @@ struct kvm_xsave {
985 1034
986This ioctl would copy current vcpu's xsave struct to the userspace. 1035This ioctl would copy current vcpu's xsave struct to the userspace.
987 1036
1037
9884.43 KVM_SET_XSAVE 10384.43 KVM_SET_XSAVE
989 1039
990Capability: KVM_CAP_XSAVE 1040Capability: KVM_CAP_XSAVE
@@ -999,6 +1049,7 @@ struct kvm_xsave {
999 1049
1000This ioctl would copy userspace's xsave struct to the kernel. 1050This ioctl would copy userspace's xsave struct to the kernel.
1001 1051
1052
10024.44 KVM_GET_XCRS 10534.44 KVM_GET_XCRS
1003 1054
1004Capability: KVM_CAP_XCRS 1055Capability: KVM_CAP_XCRS
@@ -1022,6 +1073,7 @@ struct kvm_xcrs {
1022 1073
1023This ioctl would copy current vcpu's xcrs to the userspace. 1074This ioctl would copy current vcpu's xcrs to the userspace.
1024 1075
1076
10254.45 KVM_SET_XCRS 10774.45 KVM_SET_XCRS
1026 1078
1027Capability: KVM_CAP_XCRS 1079Capability: KVM_CAP_XCRS
@@ -1045,6 +1097,7 @@ struct kvm_xcrs {
1045 1097
1046This ioctl would set vcpu's xcr to the value userspace specified. 1098This ioctl would set vcpu's xcr to the value userspace specified.
1047 1099
1100
10484.46 KVM_GET_SUPPORTED_CPUID 11014.46 KVM_GET_SUPPORTED_CPUID
1049 1102
1050Capability: KVM_CAP_EXT_CPUID 1103Capability: KVM_CAP_EXT_CPUID
@@ -1119,6 +1172,7 @@ support. Instead it is reported via
1119if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the 1172if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
1120feature in userspace, then you can enable the feature for KVM_SET_CPUID2. 1173feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
1121 1174
1175
11224.47 KVM_PPC_GET_PVINFO 11764.47 KVM_PPC_GET_PVINFO
1123 1177
1124Capability: KVM_CAP_PPC_GET_PVINFO 1178Capability: KVM_CAP_PPC_GET_PVINFO
@@ -1142,6 +1196,7 @@ of 4 instructions that make up a hypercall.
1142If any additional field gets added to this structure later on, a bit for that 1196If any additional field gets added to this structure later on, a bit for that
1143additional piece of information will be set in the flags bitmap. 1197additional piece of information will be set in the flags bitmap.
1144 1198
1199
11454.48 KVM_ASSIGN_PCI_DEVICE 12004.48 KVM_ASSIGN_PCI_DEVICE
1146 1201
1147Capability: KVM_CAP_DEVICE_ASSIGNMENT 1202Capability: KVM_CAP_DEVICE_ASSIGNMENT
@@ -1185,6 +1240,7 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
1185device assignment. The user requesting this ioctl must have read/write 1240device assignment. The user requesting this ioctl must have read/write
1186access to the PCI sysfs resource files associated with the device. 1241access to the PCI sysfs resource files associated with the device.
1187 1242
1243
11884.49 KVM_DEASSIGN_PCI_DEVICE 12444.49 KVM_DEASSIGN_PCI_DEVICE
1189 1245
1190Capability: KVM_CAP_DEVICE_DEASSIGNMENT 1246Capability: KVM_CAP_DEVICE_DEASSIGNMENT
@@ -1198,6 +1254,7 @@ Ends PCI device assignment, releasing all associated resources.
1198See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is 1254See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
1199used in kvm_assigned_pci_dev to identify the device. 1255used in kvm_assigned_pci_dev to identify the device.
1200 1256
1257
12014.50 KVM_ASSIGN_DEV_IRQ 12584.50 KVM_ASSIGN_DEV_IRQ
1202 1259
1203Capability: KVM_CAP_ASSIGN_DEV_IRQ 1260Capability: KVM_CAP_ASSIGN_DEV_IRQ
@@ -1231,6 +1288,7 @@ The following flags are defined:
1231It is not valid to specify multiple types per host or guest IRQ. However, the 1288It is not valid to specify multiple types per host or guest IRQ. However, the
1232IRQ type of host and guest can differ or can even be null. 1289IRQ type of host and guest can differ or can even be null.
1233 1290
1291
12344.51 KVM_DEASSIGN_DEV_IRQ 12924.51 KVM_DEASSIGN_DEV_IRQ
1235 1293
1236Capability: KVM_CAP_ASSIGN_DEV_IRQ 1294Capability: KVM_CAP_ASSIGN_DEV_IRQ
@@ -1245,6 +1303,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
1245by assigned_dev_id, flags must correspond to the IRQ type specified on 1303by assigned_dev_id, flags must correspond to the IRQ type specified on
1246KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 1304KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
1247 1305
1306
12484.52 KVM_SET_GSI_ROUTING 13074.52 KVM_SET_GSI_ROUTING
1249 1308
1250Capability: KVM_CAP_IRQ_ROUTING 1309Capability: KVM_CAP_IRQ_ROUTING
@@ -1293,6 +1352,7 @@ struct kvm_irq_routing_msi {
1293 __u32 pad; 1352 __u32 pad;
1294}; 1353};
1295 1354
1355
12964.53 KVM_ASSIGN_SET_MSIX_NR 13564.53 KVM_ASSIGN_SET_MSIX_NR
1297 1357
1298Capability: KVM_CAP_DEVICE_MSIX 1358Capability: KVM_CAP_DEVICE_MSIX
@@ -1314,6 +1374,7 @@ struct kvm_assigned_msix_nr {
1314 1374
1315#define KVM_MAX_MSIX_PER_DEV 256 1375#define KVM_MAX_MSIX_PER_DEV 256
1316 1376
1377
13174.54 KVM_ASSIGN_SET_MSIX_ENTRY 13784.54 KVM_ASSIGN_SET_MSIX_ENTRY
1318 1379
1319Capability: KVM_CAP_DEVICE_MSIX 1380Capability: KVM_CAP_DEVICE_MSIX
@@ -1332,7 +1393,8 @@ struct kvm_assigned_msix_entry {
1332 __u16 padding[3]; 1393 __u16 padding[3];
1333}; 1394};
1334 1395
13354.54 KVM_SET_TSC_KHZ 1396
13974.55 KVM_SET_TSC_KHZ
1336 1398
1337Capability: KVM_CAP_TSC_CONTROL 1399Capability: KVM_CAP_TSC_CONTROL
1338Architectures: x86 1400Architectures: x86
@@ -1343,7 +1405,8 @@ Returns: 0 on success, -1 on error
1343Specifies the tsc frequency for the virtual machine. The unit of the 1405Specifies the tsc frequency for the virtual machine. The unit of the
1344frequency is KHz. 1406frequency is KHz.
1345 1407
13464.55 KVM_GET_TSC_KHZ 1408
14094.56 KVM_GET_TSC_KHZ
1347 1410
1348Capability: KVM_CAP_GET_TSC_KHZ 1411Capability: KVM_CAP_GET_TSC_KHZ
1349Architectures: x86 1412Architectures: x86
@@ -1355,7 +1418,8 @@ Returns the tsc frequency of the guest. The unit of the return value is
1355KHz. If the host has unstable tsc this ioctl returns -EIO instead as an 1418KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
1356error. 1419error.
1357 1420
13584.56 KVM_GET_LAPIC 1421
14224.57 KVM_GET_LAPIC
1359 1423
1360Capability: KVM_CAP_IRQCHIP 1424Capability: KVM_CAP_IRQCHIP
1361Architectures: x86 1425Architectures: x86
@@ -1371,7 +1435,8 @@ struct kvm_lapic_state {
1371Reads the Local APIC registers and copies them into the input argument. The 1435Reads the Local APIC registers and copies them into the input argument. The
1372data format and layout are the same as documented in the architecture manual. 1436data format and layout are the same as documented in the architecture manual.
1373 1437
13744.57 KVM_SET_LAPIC 1438
14394.58 KVM_SET_LAPIC
1375 1440
1376Capability: KVM_CAP_IRQCHIP 1441Capability: KVM_CAP_IRQCHIP
1377Architectures: x86 1442Architectures: x86
@@ -1387,7 +1452,8 @@ struct kvm_lapic_state {
1387Copies the input argument into the the Local APIC registers. The data format 1452Copies the input argument into the the Local APIC registers. The data format
1388and layout are the same as documented in the architecture manual. 1453and layout are the same as documented in the architecture manual.
1389 1454
13904.58 KVM_IOEVENTFD 1455
14564.59 KVM_IOEVENTFD
1391 1457
1392Capability: KVM_CAP_IOEVENTFD 1458Capability: KVM_CAP_IOEVENTFD
1393Architectures: all 1459Architectures: all
@@ -1417,7 +1483,8 @@ The following flags are defined:
1417If datamatch flag is set, the event will be signaled only if the written value 1483If datamatch flag is set, the event will be signaled only if the written value
1418to the registered address is equal to datamatch in struct kvm_ioeventfd. 1484to the registered address is equal to datamatch in struct kvm_ioeventfd.
1419 1485
14204.59 KVM_DIRTY_TLB 1486
14874.60 KVM_DIRTY_TLB
1421 1488
1422Capability: KVM_CAP_SW_TLB 1489Capability: KVM_CAP_SW_TLB
1423Architectures: ppc 1490Architectures: ppc
@@ -1449,7 +1516,8 @@ The "num_dirty" field is a performance hint for KVM to determine whether it
1449should skip processing the bitmap and just invalidate everything. It must 1516should skip processing the bitmap and just invalidate everything. It must
1450be set to the number of set bits in the bitmap. 1517be set to the number of set bits in the bitmap.
1451 1518
14524.60 KVM_ASSIGN_SET_INTX_MASK 1519
15204.61 KVM_ASSIGN_SET_INTX_MASK
1453 1521
1454Capability: KVM_CAP_PCI_2_3 1522Capability: KVM_CAP_PCI_2_3
1455Architectures: x86 1523Architectures: x86
@@ -1482,6 +1550,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
1482by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is 1550by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
1483evaluated. 1551evaluated.
1484 1552
1553
14854.62 KVM_CREATE_SPAPR_TCE 15544.62 KVM_CREATE_SPAPR_TCE
1486 1555
1487Capability: KVM_CAP_SPAPR_TCE 1556Capability: KVM_CAP_SPAPR_TCE
@@ -1517,6 +1586,7 @@ the entries written by kernel-handled H_PUT_TCE calls, and also lets
1517userspace update the TCE table directly which is useful in some 1586userspace update the TCE table directly which is useful in some
1518circumstances. 1587circumstances.
1519 1588
1589
15204.63 KVM_ALLOCATE_RMA 15904.63 KVM_ALLOCATE_RMA
1521 1591
1522Capability: KVM_CAP_PPC_RMA 1592Capability: KVM_CAP_PPC_RMA
@@ -1549,6 +1619,7 @@ is supported; 2 if the processor requires all virtual machines to have
1549an RMA, or 1 if the processor can use an RMA but doesn't require it, 1619an RMA, or 1 if the processor can use an RMA but doesn't require it,
1550because it supports the Virtual RMA (VRMA) facility. 1620because it supports the Virtual RMA (VRMA) facility.
1551 1621
1622
15524.64 KVM_NMI 16234.64 KVM_NMI
1553 1624
1554Capability: KVM_CAP_USER_NMI 1625Capability: KVM_CAP_USER_NMI
@@ -1574,6 +1645,7 @@ following algorithm:
1574Some guests configure the LINT1 NMI input to cause a panic, aiding in 1645Some guests configure the LINT1 NMI input to cause a panic, aiding in
1575debugging. 1646debugging.
1576 1647
1648
15774.65 KVM_S390_UCAS_MAP 16494.65 KVM_S390_UCAS_MAP
1578 1650
1579Capability: KVM_CAP_S390_UCONTROL 1651Capability: KVM_CAP_S390_UCONTROL
@@ -1593,6 +1665,7 @@ This ioctl maps the memory at "user_addr" with the length "length" to
1593the vcpu's address space starting at "vcpu_addr". All parameters need to 1665the vcpu's address space starting at "vcpu_addr". All parameters need to
1594be alligned by 1 megabyte. 1666be alligned by 1 megabyte.
1595 1667
1668
15964.66 KVM_S390_UCAS_UNMAP 16694.66 KVM_S390_UCAS_UNMAP
1597 1670
1598Capability: KVM_CAP_S390_UCONTROL 1671Capability: KVM_CAP_S390_UCONTROL
@@ -1612,6 +1685,7 @@ This ioctl unmaps the memory in the vcpu's address space starting at
1612"vcpu_addr" with the length "length". The field "user_addr" is ignored. 1685"vcpu_addr" with the length "length". The field "user_addr" is ignored.
1613All parameters need to be alligned by 1 megabyte. 1686All parameters need to be alligned by 1 megabyte.
1614 1687
1688
16154.67 KVM_S390_VCPU_FAULT 16894.67 KVM_S390_VCPU_FAULT
1616 1690
1617Capability: KVM_CAP_S390_UCONTROL 1691Capability: KVM_CAP_S390_UCONTROL
@@ -1628,6 +1702,7 @@ table upfront. This is useful to handle validity intercepts for user
1628controlled virtual machines to fault in the virtual cpu's lowcore pages 1702controlled virtual machines to fault in the virtual cpu's lowcore pages
1629prior to calling the KVM_RUN ioctl. 1703prior to calling the KVM_RUN ioctl.
1630 1704
1705
16314.68 KVM_SET_ONE_REG 17064.68 KVM_SET_ONE_REG
1632 1707
1633Capability: KVM_CAP_ONE_REG 1708Capability: KVM_CAP_ONE_REG
@@ -1653,6 +1728,7 @@ registers, find a list below:
1653 | | 1728 | |
1654 PPC | KVM_REG_PPC_HIOR | 64 1729 PPC | KVM_REG_PPC_HIOR | 64
1655 1730
1731
16564.69 KVM_GET_ONE_REG 17324.69 KVM_GET_ONE_REG
1657 1733
1658Capability: KVM_CAP_ONE_REG 1734Capability: KVM_CAP_ONE_REG
@@ -1669,7 +1745,193 @@ at the memory location pointed to by "addr".
1669The list of registers accessible using this interface is identical to the 1745The list of registers accessible using this interface is identical to the
1670list in 4.64. 1746list in 4.64.
1671 1747
1748
17494.70 KVM_KVMCLOCK_CTRL
1750
1751Capability: KVM_CAP_KVMCLOCK_CTRL
1752Architectures: Any that implement pvclocks (currently x86 only)
1753Type: vcpu ioctl
1754Parameters: None
1755Returns: 0 on success, -1 on error
1756
1757This signals to the host kernel that the specified guest is being paused by
1758userspace. The host will set a flag in the pvclock structure that is checked
1759from the soft lockup watchdog. The flag is part of the pvclock structure that
1760is shared between guest and host, specifically the second bit of the flags
1761field of the pvclock_vcpu_time_info structure. It will be set exclusively by
1762the host and read/cleared exclusively by the guest. The guest operation of
1763checking and clearing the flag must an atomic operation so
1764load-link/store-conditional, or equivalent must be used. There are two cases
1765where the guest will clear the flag: when the soft lockup watchdog timer resets
1766itself or when a soft lockup is detected. This ioctl can be called any time
1767after pausing the vcpu, but before it is resumed.
1768
1769
17704.71 KVM_SIGNAL_MSI
1771
1772Capability: KVM_CAP_SIGNAL_MSI
1773Architectures: x86
1774Type: vm ioctl
1775Parameters: struct kvm_msi (in)
1776Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
1777
1778Directly inject a MSI message. Only valid with in-kernel irqchip that handles
1779MSI messages.
1780
1781struct kvm_msi {
1782 __u32 address_lo;
1783 __u32 address_hi;
1784 __u32 data;
1785 __u32 flags;
1786 __u8 pad[16];
1787};
1788
1789No flags are defined so far. The corresponding field must be 0.
1790
1791
17924.71 KVM_CREATE_PIT2
1793
1794Capability: KVM_CAP_PIT2
1795Architectures: x86
1796Type: vm ioctl
1797Parameters: struct kvm_pit_config (in)
1798Returns: 0 on success, -1 on error
1799
1800Creates an in-kernel device model for the i8254 PIT. This call is only valid
1801after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
1802parameters have to be passed:
1803
1804struct kvm_pit_config {
1805 __u32 flags;
1806 __u32 pad[15];
1807};
1808
1809Valid flags are:
1810
1811#define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */
1812
1813PIT timer interrupts may use a per-VM kernel thread for injection. If it
1814exists, this thread will have a name of the following pattern:
1815
1816kvm-pit/<owner-process-pid>
1817
1818When running a guest with elevated priorities, the scheduling parameters of
1819this thread may have to be adjusted accordingly.
1820
1821This IOCTL replaces the obsolete KVM_CREATE_PIT.
1822
1823
18244.72 KVM_GET_PIT2
1825
1826Capability: KVM_CAP_PIT_STATE2
1827Architectures: x86
1828Type: vm ioctl
1829Parameters: struct kvm_pit_state2 (out)
1830Returns: 0 on success, -1 on error
1831
1832Retrieves the state of the in-kernel PIT model. Only valid after
1833KVM_CREATE_PIT2. The state is returned in the following structure:
1834
1835struct kvm_pit_state2 {
1836 struct kvm_pit_channel_state channels[3];
1837 __u32 flags;
1838 __u32 reserved[9];
1839};
1840
1841Valid flags are:
1842
1843/* disable PIT in HPET legacy mode */
1844#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
1845
1846This IOCTL replaces the obsolete KVM_GET_PIT.
1847
1848
18494.73 KVM_SET_PIT2
1850
1851Capability: KVM_CAP_PIT_STATE2
1852Architectures: x86
1853Type: vm ioctl
1854Parameters: struct kvm_pit_state2 (in)
1855Returns: 0 on success, -1 on error
1856
1857Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
1858See KVM_GET_PIT2 for details on struct kvm_pit_state2.
1859
1860This IOCTL replaces the obsolete KVM_SET_PIT.
1861
1862
18634.74 KVM_PPC_GET_SMMU_INFO
1864
1865Capability: KVM_CAP_PPC_GET_SMMU_INFO
1866Architectures: powerpc
1867Type: vm ioctl
1868Parameters: None
1869Returns: 0 on success, -1 on error
1870
1871This populates and returns a structure describing the features of
1872the "Server" class MMU emulation supported by KVM.
1873This can in turn be used by userspace to generate the appropariate
1874device-tree properties for the guest operating system.
1875
1876The structure contains some global informations, followed by an
1877array of supported segment page sizes:
1878
1879 struct kvm_ppc_smmu_info {
1880 __u64 flags;
1881 __u32 slb_size;
1882 __u32 pad;
1883 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
1884 };
1885
1886The supported flags are:
1887
1888 - KVM_PPC_PAGE_SIZES_REAL:
1889 When that flag is set, guest page sizes must "fit" the backing
1890 store page sizes. When not set, any page size in the list can
1891 be used regardless of how they are backed by userspace.
1892
1893 - KVM_PPC_1T_SEGMENTS
1894 The emulated MMU supports 1T segments in addition to the
1895 standard 256M ones.
1896
1897The "slb_size" field indicates how many SLB entries are supported
1898
1899The "sps" array contains 8 entries indicating the supported base
1900page sizes for a segment in increasing order. Each entry is defined
1901as follow:
1902
1903 struct kvm_ppc_one_seg_page_size {
1904 __u32 page_shift; /* Base page shift of segment (or 0) */
1905 __u32 slb_enc; /* SLB encoding for BookS */
1906 struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
1907 };
1908
1909An entry with a "page_shift" of 0 is unused. Because the array is
1910organized in increasing order, a lookup can stop when encoutering
1911such an entry.
1912
1913The "slb_enc" field provides the encoding to use in the SLB for the
1914page size. The bits are in positions such as the value can directly
1915be OR'ed into the "vsid" argument of the slbmte instruction.
1916
1917The "enc" array is a list which for each of those segment base page
1918size provides the list of supported actual page sizes (which can be
1919only larger or equal to the base page size), along with the
1920corresponding encoding in the hash PTE. Similarily, the array is
19218 entries sorted by increasing sizes and an entry with a "0" shift
1922is an empty entry and a terminator:
1923
1924 struct kvm_ppc_one_page_size {
1925 __u32 page_shift; /* Page shift (or 0) */
1926 __u32 pte_enc; /* Encoding in the HPTE (>>12) */
1927 };
1928
1929The "pte_enc" field provides a value that can OR'ed into the hash
1930PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
1931into the hash PTE second double word).
1932
16725. The kvm_run structure 19335. The kvm_run structure
1934------------------------
1673 1935
1674Application code obtains a pointer to the kvm_run structure by 1936Application code obtains a pointer to the kvm_run structure by
1675mmap()ing a vcpu fd. From that point, application code can control 1937mmap()ing a vcpu fd. From that point, application code can control
@@ -1910,7 +2172,9 @@ and usually define the validity of a groups of registers. (e.g. one bit
1910 2172
1911}; 2173};
1912 2174
2175
19136. Capabilities that can be enabled 21766. Capabilities that can be enabled
2177-----------------------------------
1914 2178
1915There are certain capabilities that change the behavior of the virtual CPU when 2179There are certain capabilities that change the behavior of the virtual CPU when
1916enabled. To enable them, please see section 4.37. Below you can find a list of 2180enabled. To enable them, please see section 4.37. Below you can find a list of
@@ -1926,6 +2190,7 @@ The following information is provided along with the description:
1926 Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) 2190 Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
1927 are not detailed, but errors with specific meanings are. 2191 are not detailed, but errors with specific meanings are.
1928 2192
2193
19296.1 KVM_CAP_PPC_OSI 21946.1 KVM_CAP_PPC_OSI
1930 2195
1931Architectures: ppc 2196Architectures: ppc
@@ -1939,6 +2204,7 @@ between the guest and the host.
1939 2204
1940When this capability is enabled, KVM_EXIT_OSI can occur. 2205When this capability is enabled, KVM_EXIT_OSI can occur.
1941 2206
2207
19426.2 KVM_CAP_PPC_PAPR 22086.2 KVM_CAP_PPC_PAPR
1943 2209
1944Architectures: ppc 2210Architectures: ppc
@@ -1957,6 +2223,7 @@ HTAB invisible to the guest.
1957 2223
1958When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. 2224When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
1959 2225
2226
19606.3 KVM_CAP_SW_TLB 22276.3 KVM_CAP_SW_TLB
1961 2228
1962Architectures: ppc 2229Architectures: ppc
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 882068538c9c..83afe65d4966 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -10,11 +10,15 @@ a guest.
10KVM cpuid functions are: 10KVM cpuid functions are:
11 11
12function: KVM_CPUID_SIGNATURE (0x40000000) 12function: KVM_CPUID_SIGNATURE (0x40000000)
13returns : eax = 0, 13returns : eax = 0x40000001,
14 ebx = 0x4b4d564b, 14 ebx = 0x4b4d564b,
15 ecx = 0x564b4d56, 15 ecx = 0x564b4d56,
16 edx = 0x4d. 16 edx = 0x4d.
17Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". 17Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
18The value in eax corresponds to the maximum cpuid function present in this leaf,
19and will be updated if more functions are added in the future.
20Note also that old hosts set eax value to 0x0. This should
21be interpreted as if the value was 0x40000001.
18This function queries the presence of KVM cpuid leafs. 22This function queries the presence of KVM cpuid leafs.
19 23
20 24
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 50317809113d..96b41bd97523 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -109,6 +109,10 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
109 0 | 24 | multiple cpus are guaranteed to 109 0 | 24 | multiple cpus are guaranteed to
110 | | be monotonic 110 | | be monotonic
111 ------------------------------------------------------------- 111 -------------------------------------------------------------
112 | | guest vcpu has been paused by
113 1 | N/A | the host
114 | | See 4.70 in api.txt
115 -------------------------------------------------------------
112 116
113 Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid 117 Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
114 leaf prior to usage. 118 leaf prior to usage.
diff --git a/arch/alpha/include/asm/kvm_para.h b/arch/alpha/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/alpha/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/arm/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/avr32/include/asm/kvm_para.h b/arch/avr32/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/avr32/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/blackfin/include/asm/kvm_para.h b/arch/blackfin/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/blackfin/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/c6x/include/asm/kvm_para.h b/arch/c6x/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/c6x/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/frv/include/asm/kvm_para.h b/arch/frv/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/frv/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/h8300/include/asm/kvm_para.h b/arch/h8300/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/h8300/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/hexagon/include/asm/kvm_para.h b/arch/hexagon/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/hexagon/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index e35b3a84a40b..6d6a5ac48d85 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -365,6 +365,7 @@ struct thash_cb {
365}; 365};
366 366
367struct kvm_vcpu_stat { 367struct kvm_vcpu_stat {
368 u32 halt_wakeup;
368}; 369};
369 370
370struct kvm_vcpu_arch { 371struct kvm_vcpu_arch {
@@ -448,6 +449,8 @@ struct kvm_vcpu_arch {
448 char log_buf[VMM_LOG_LEN]; 449 char log_buf[VMM_LOG_LEN];
449 union context host; 450 union context host;
450 union context guest; 451 union context guest;
452
453 char mmio_data[8];
451}; 454};
452 455
453struct kvm_vm_stat { 456struct kvm_vm_stat {
diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
index 1588aee781a2..2019cb99335e 100644
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -26,6 +26,11 @@ static inline unsigned int kvm_arch_para_features(void)
26 return 0; 26 return 0;
27} 27}
28 28
29static inline bool kvm_check_and_clear_guest_paused(void)
30{
31 return false;
32}
33
29#endif 34#endif
30 35
31#endif 36#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 463fb3bbe11e..bd77cb507c1c 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
232 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) 232 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
233 goto mmio; 233 goto mmio;
234 vcpu->mmio_needed = 1; 234 vcpu->mmio_needed = 1;
235 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; 235 vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
236 vcpu->mmio_size = kvm_run->mmio.len = p->size; 236 vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
237 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; 237 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
238 238
239 if (vcpu->mmio_is_write) 239 if (vcpu->mmio_is_write)
240 memcpy(vcpu->mmio_data, &p->data, p->size); 240 memcpy(vcpu->arch.mmio_data, &p->data, p->size);
241 memcpy(kvm_run->mmio.data, &p->data, p->size); 241 memcpy(kvm_run->mmio.data, &p->data, p->size);
242 kvm_run->exit_reason = KVM_EXIT_MMIO; 242 kvm_run->exit_reason = KVM_EXIT_MMIO;
243 return 0; 243 return 0;
@@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
719 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); 719 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
720 720
721 if (!vcpu->mmio_is_write) 721 if (!vcpu->mmio_is_write)
722 memcpy(&p->data, vcpu->mmio_data, 8); 722 memcpy(&p->data, vcpu->arch.mmio_data, 8);
723 p->state = STATE_IORESP_READY; 723 p->state = STATE_IORESP_READY;
724} 724}
725 725
@@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
739 } 739 }
740 740
741 if (vcpu->mmio_needed) { 741 if (vcpu->mmio_needed) {
742 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 742 memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
743 kvm_set_mmio_data(vcpu); 743 kvm_set_mmio_data(vcpu);
744 vcpu->mmio_read_completed = 1; 744 vcpu->mmio_read_completed = 1;
745 vcpu->mmio_needed = 0; 745 vcpu->mmio_needed = 0;
@@ -1872,21 +1872,6 @@ void kvm_arch_hardware_unsetup(void)
1872{ 1872{
1873} 1873}
1874 1874
1875void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1876{
1877 int me;
1878 int cpu = vcpu->cpu;
1879
1880 if (waitqueue_active(&vcpu->wq))
1881 wake_up_interruptible(&vcpu->wq);
1882
1883 me = get_cpu();
1884 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
1885 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
1886 smp_send_reschedule(cpu);
1887 put_cpu();
1888}
1889
1890int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 1875int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1891{ 1876{
1892 return __apic_accept_irq(vcpu, irq->vector); 1877 return __apic_accept_irq(vcpu, irq->vector);
@@ -1956,6 +1941,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1956 (kvm_highest_pending_irq(vcpu) != -1); 1941 (kvm_highest_pending_irq(vcpu) != -1);
1957} 1942}
1958 1943
1944int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1945{
1946 return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
1947}
1948
1959int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1949int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1960 struct kvm_mp_state *mp_state) 1950 struct kvm_mp_state *mp_state)
1961{ 1951{
diff --git a/arch/m68k/include/asm/kvm_para.h b/arch/m68k/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/m68k/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/microblaze/include/asm/kvm_para.h b/arch/microblaze/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/microblaze/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/mips/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/mn10300/include/asm/kvm_para.h b/arch/mn10300/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/mn10300/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/openrisc/include/asm/kvm_para.h b/arch/openrisc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/openrisc/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/parisc/include/asm/kvm_para.h b/arch/parisc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/parisc/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b9219e99bd2a..50d82c8a037f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -168,6 +168,7 @@ extern const char *powerpc_base_platform;
168#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000) 168#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000)
169#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000) 169#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000)
170#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000) 170#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000)
171#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000)
171 172
172/* 173/*
173 * Add the 64-bit processor unique features in the top half of the word; 174 * Add the 64-bit processor unique features in the top half of the word;
@@ -376,7 +377,8 @@ extern const char *powerpc_base_platform;
376#define CPU_FTRS_47X (CPU_FTRS_440x6) 377#define CPU_FTRS_47X (CPU_FTRS_440x6)
377#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ 378#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
378 CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ 379 CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
379 CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE) 380 CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
381 CPU_FTR_DEBUG_LVL_EXC)
380#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ 382#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
381 CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ 383 CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
382 CPU_FTR_NOEXECUTE) 384 CPU_FTR_NOEXECUTE)
@@ -385,15 +387,15 @@ extern const char *powerpc_base_platform;
385 CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 387 CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
386#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 388#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
387 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 389 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
388 CPU_FTR_DBELL) 390 CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
389#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 391#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
390 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 392 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
391 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 393 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
392 CPU_FTR_DEBUG_LVL_EXC) 394 CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
393#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 395#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
394 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 396 CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
395 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 397 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
396 CPU_FTR_DEBUG_LVL_EXC) 398 CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
397#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) 399#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
398 400
399/* 64-bit CPUs */ 401/* 64-bit CPUs */
@@ -486,8 +488,10 @@ enum {
486 CPU_FTRS_E200 | 488 CPU_FTRS_E200 |
487#endif 489#endif
488#ifdef CONFIG_E500 490#ifdef CONFIG_E500
489 CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC | 491 CPU_FTRS_E500 | CPU_FTRS_E500_2 |
490 CPU_FTRS_E5500 | CPU_FTRS_E6500 | 492#endif
493#ifdef CONFIG_PPC_E500MC
494 CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 |
491#endif 495#endif
492 0, 496 0,
493}; 497};
@@ -531,9 +535,12 @@ enum {
531 CPU_FTRS_E200 & 535 CPU_FTRS_E200 &
532#endif 536#endif
533#ifdef CONFIG_E500 537#ifdef CONFIG_E500
534 CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC & 538 CPU_FTRS_E500 & CPU_FTRS_E500_2 &
535 CPU_FTRS_E5500 & CPU_FTRS_E6500 & 539#endif
540#ifdef CONFIG_PPC_E500MC
541 CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 &
536#endif 542#endif
543 ~CPU_FTR_EMB_HV & /* can be removed at runtime */
537 CPU_FTRS_POSSIBLE, 544 CPU_FTRS_POSSIBLE,
538}; 545};
539#endif /* __powerpc64__ */ 546#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index efa74ac44a35..154c067761b1 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -19,6 +19,9 @@
19 19
20#define PPC_DBELL_MSG_BRDCAST (0x04000000) 20#define PPC_DBELL_MSG_BRDCAST (0x04000000)
21#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) 21#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
22#define PPC_DBELL_TYPE_MASK PPC_DBELL_TYPE(0xf)
23#define PPC_DBELL_LPID(x) ((x) << (63 - 49))
24#define PPC_DBELL_PIR_MASK 0x3fff
22enum ppc_dbell { 25enum ppc_dbell {
23 PPC_DBELL = 0, /* doorbell */ 26 PPC_DBELL = 0, /* doorbell */
24 PPC_DBELL_CRIT = 1, /* critical doorbell */ 27 PPC_DBELL_CRIT = 1, /* critical doorbell */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 612252388190..423cf9eaf4a4 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -133,6 +133,16 @@
133#define H_PP1 (1UL<<(63-62)) 133#define H_PP1 (1UL<<(63-62))
134#define H_PP2 (1UL<<(63-63)) 134#define H_PP2 (1UL<<(63-63))
135 135
136/* Flags for H_REGISTER_VPA subfunction field */
137#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */
138#define H_VPA_FUNC_MASK 7UL
139#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */
140#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */
141#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */
142#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */
143#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */
144#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */
145
136/* VASI States */ 146/* VASI States */
137#define H_VASI_INVALID 0 147#define H_VASI_INVALID 0
138#define H_VASI_ENABLED 1 148#define H_VASI_ENABLED 1
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 51010bfc792e..c9aac24b02e2 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -33,6 +33,7 @@
33extern void __replay_interrupt(unsigned int vector); 33extern void __replay_interrupt(unsigned int vector);
34 34
35extern void timer_interrupt(struct pt_regs *); 35extern void timer_interrupt(struct pt_regs *);
36extern void performance_monitor_exception(struct pt_regs *regs);
36 37
37#ifdef CONFIG_PPC64 38#ifdef CONFIG_PPC64
38#include <asm/paca.h> 39#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index b921c3f48928..1bea4d8ea6f4 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -277,6 +277,7 @@ struct kvm_sync_regs {
277#define KVM_CPU_E500V2 2 277#define KVM_CPU_E500V2 2
278#define KVM_CPU_3S_32 3 278#define KVM_CPU_3S_32 3
279#define KVM_CPU_3S_64 4 279#define KVM_CPU_3S_64 4
280#define KVM_CPU_E500MC 5
280 281
281/* for KVM_CAP_SPAPR_TCE */ 282/* for KVM_CAP_SPAPR_TCE */
282struct kvm_create_spapr_tce { 283struct kvm_create_spapr_tce {
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 7b1f0e0fc653..76fdcfef0889 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -20,6 +20,16 @@
20#ifndef __POWERPC_KVM_ASM_H__ 20#ifndef __POWERPC_KVM_ASM_H__
21#define __POWERPC_KVM_ASM_H__ 21#define __POWERPC_KVM_ASM_H__
22 22
23#ifdef __ASSEMBLY__
24#ifdef CONFIG_64BIT
25#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg)
26#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg)
27#else
28#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg)
29#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg)
30#endif
31#endif
32
23/* IVPR must be 64KiB-aligned. */ 33/* IVPR must be 64KiB-aligned. */
24#define VCPU_SIZE_ORDER 4 34#define VCPU_SIZE_ORDER 4
25#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) 35#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12)
@@ -48,6 +58,14 @@
48#define BOOKE_INTERRUPT_SPE_FP_DATA 33 58#define BOOKE_INTERRUPT_SPE_FP_DATA 33
49#define BOOKE_INTERRUPT_SPE_FP_ROUND 34 59#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
50#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 60#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
61#define BOOKE_INTERRUPT_DOORBELL 36
62#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
63
64/* booke_hv */
65#define BOOKE_INTERRUPT_GUEST_DBELL 38
66#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39
67#define BOOKE_INTERRUPT_HV_SYSCALL 40
68#define BOOKE_INTERRUPT_HV_PRIV 41
51 69
52/* book3s */ 70/* book3s */
53 71
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fd07f43d6622..f0e0c6a66d97 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -453,4 +453,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
453 453
454#define INS_DCBZ 0x7c0007ec 454#define INS_DCBZ 0x7c0007ec
455 455
456/* LPIDs we support with this build -- runtime limit may be lower */
457#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
458
456#endif /* __ASM_KVM_BOOK3S_H__ */ 459#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 1f2f5b6156bd..88609b23b775 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -79,6 +79,9 @@ struct kvmppc_host_state {
79 u8 napping; 79 u8 napping;
80 80
81#ifdef CONFIG_KVM_BOOK3S_64_HV 81#ifdef CONFIG_KVM_BOOK3S_64_HV
82 u8 hwthread_req;
83 u8 hwthread_state;
84
82 struct kvm_vcpu *kvm_vcpu; 85 struct kvm_vcpu *kvm_vcpu;
83 struct kvmppc_vcore *kvm_vcore; 86 struct kvmppc_vcore *kvm_vcore;
84 unsigned long xics_phys; 87 unsigned long xics_phys;
@@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu {
122 125
123#endif /*__ASSEMBLY__ */ 126#endif /*__ASSEMBLY__ */
124 127
128/* Values for kvm_state */
129#define KVM_HWTHREAD_IN_KERNEL 0
130#define KVM_HWTHREAD_IN_NAP 1
131#define KVM_HWTHREAD_IN_KVM 2
132
125#endif /* __ASM_KVM_BOOK3S_ASM_H__ */ 133#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index a90e09188777..b7cd3356a532 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,6 +23,9 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25 25
26/* LPIDs we support with this build -- runtime limit may be lower */
27#define KVMPPC_NR_LPIDS 64
28
26static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 29static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
27{ 30{
28 vcpu->arch.gpr[num] = val; 31 vcpu->arch.gpr[num] = val;
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
new file mode 100644
index 000000000000..30a600fa1b6a
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright 2010-2011 Freescale Semiconductor, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef ASM_KVM_BOOKE_HV_ASM_H
10#define ASM_KVM_BOOKE_HV_ASM_H
11
12#ifdef __ASSEMBLY__
13
14/*
15 * All exceptions from guest state must go through KVM
16 * (except for those which are delivered directly to the guest) --
17 * there are no exceptions for which we fall through directly to
18 * the normal host handler.
19 *
20 * Expected inputs (normal exceptions):
21 * SCRATCH0 = saved r10
22 * r10 = thread struct
23 * r11 = appropriate SRR1 variant (currently used as scratch)
24 * r13 = saved CR
25 * *(r10 + THREAD_NORMSAVE(0)) = saved r11
26 * *(r10 + THREAD_NORMSAVE(2)) = saved r13
27 *
28 * Expected inputs (crit/mcheck/debug exceptions):
29 * appropriate SCRATCH = saved r8
30 * r8 = exception level stack frame
31 * r9 = *(r8 + _CCR) = saved CR
32 * r11 = appropriate SRR1 variant (currently used as scratch)
33 * *(r8 + GPR9) = saved r9
34 * *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
35 * *(r8 + GPR11) = saved r11
36 */
37.macro DO_KVM intno srr1
38#ifdef CONFIG_KVM_BOOKE_HV
39BEGIN_FTR_SECTION
40 mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
41 bf 3, kvmppc_resume_\intno\()_\srr1
42 b kvmppc_handler_\intno\()_\srr1
43kvmppc_resume_\intno\()_\srr1:
44END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
45#endif
46.endm
47
48#endif /*__ASSEMBLY__ */
49#endif /* ASM_KVM_BOOKE_HV_ASM_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
deleted file mode 100644
index 8cd50a514271..000000000000
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ /dev/null
@@ -1,96 +0,0 @@
1/*
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, <yu.liu@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#ifndef __ASM_KVM_E500_H__
16#define __ASM_KVM_E500_H__
17
18#include <linux/kvm_host.h>
19
20#define BOOKE_INTERRUPT_SIZE 36
21
22#define E500_PID_NUM 3
23#define E500_TLB_NUM 2
24
25#define E500_TLB_VALID 1
26#define E500_TLB_DIRTY 2
27
28struct tlbe_ref {
29 pfn_t pfn;
30 unsigned int flags; /* E500_TLB_* */
31};
32
33struct tlbe_priv {
34 struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
35};
36
37struct vcpu_id_table;
38
39struct kvmppc_e500_tlb_params {
40 int entries, ways, sets;
41};
42
43struct kvmppc_vcpu_e500 {
44 /* Unmodified copy of the guest's TLB -- shared with host userspace. */
45 struct kvm_book3e_206_tlb_entry *gtlb_arch;
46
47 /* Starting entry number in gtlb_arch[] */
48 int gtlb_offset[E500_TLB_NUM];
49
50 /* KVM internal information associated with each guest TLB entry */
51 struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
52
53 struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
54
55 unsigned int gtlb_nv[E500_TLB_NUM];
56
57 /*
58 * information associated with each host TLB entry --
59 * TLB1 only for now. If/when guest TLB1 entries can be
60 * mapped with host TLB0, this will be used for that too.
61 *
62 * We don't want to use this for guest TLB0 because then we'd
63 * have the overhead of doing the translation again even if
64 * the entry is still in the guest TLB (e.g. we swapped out
65 * and back, and our host TLB entries got evicted).
66 */
67 struct tlbe_ref *tlb_refs[E500_TLB_NUM];
68 unsigned int host_tlb1_nv;
69
70 u32 host_pid[E500_PID_NUM];
71 u32 pid[E500_PID_NUM];
72 u32 svr;
73
74 /* vcpu id table */
75 struct vcpu_id_table *idt;
76
77 u32 l1csr0;
78 u32 l1csr1;
79 u32 hid0;
80 u32 hid1;
81 u32 tlb0cfg;
82 u32 tlb1cfg;
83 u64 mcar;
84
85 struct page **shared_tlb_pages;
86 int num_shared_tlb_pages;
87
88 struct kvm_vcpu vcpu;
89};
90
91static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
92{
93 return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
94}
95
96#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 52eb9c1f4fe0..d848cdc49715 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -82,7 +82,7 @@ struct kvm_vcpu;
82 82
83struct lppaca; 83struct lppaca;
84struct slb_shadow; 84struct slb_shadow;
85struct dtl; 85struct dtl_entry;
86 86
87struct kvm_vm_stat { 87struct kvm_vm_stat {
88 u32 remote_tlb_flush; 88 u32 remote_tlb_flush;
@@ -106,6 +106,8 @@ struct kvm_vcpu_stat {
106 u32 dec_exits; 106 u32 dec_exits;
107 u32 ext_intr_exits; 107 u32 ext_intr_exits;
108 u32 halt_wakeup; 108 u32 halt_wakeup;
109 u32 dbell_exits;
110 u32 gdbell_exits;
109#ifdef CONFIG_PPC_BOOK3S 111#ifdef CONFIG_PPC_BOOK3S
110 u32 pf_storage; 112 u32 pf_storage;
111 u32 pf_instruc; 113 u32 pf_instruc;
@@ -140,6 +142,7 @@ enum kvm_exit_types {
140 EMULATED_TLBSX_EXITS, 142 EMULATED_TLBSX_EXITS,
141 EMULATED_TLBWE_EXITS, 143 EMULATED_TLBWE_EXITS,
142 EMULATED_RFI_EXITS, 144 EMULATED_RFI_EXITS,
145 EMULATED_RFCI_EXITS,
143 DEC_EXITS, 146 DEC_EXITS,
144 EXT_INTR_EXITS, 147 EXT_INTR_EXITS,
145 HALT_WAKEUP, 148 HALT_WAKEUP,
@@ -147,6 +150,8 @@ enum kvm_exit_types {
147 FP_UNAVAIL, 150 FP_UNAVAIL,
148 DEBUG_EXITS, 151 DEBUG_EXITS,
149 TIMEINGUEST, 152 TIMEINGUEST,
153 DBELL_EXITS,
154 GDBELL_EXITS,
150 __NUMBER_OF_KVM_EXIT_TYPES 155 __NUMBER_OF_KVM_EXIT_TYPES
151}; 156};
152 157
@@ -217,10 +222,10 @@ struct kvm_arch_memory_slot {
217}; 222};
218 223
219struct kvm_arch { 224struct kvm_arch {
225 unsigned int lpid;
220#ifdef CONFIG_KVM_BOOK3S_64_HV 226#ifdef CONFIG_KVM_BOOK3S_64_HV
221 unsigned long hpt_virt; 227 unsigned long hpt_virt;
222 struct revmap_entry *revmap; 228 struct revmap_entry *revmap;
223 unsigned int lpid;
224 unsigned int host_lpid; 229 unsigned int host_lpid;
225 unsigned long host_lpcr; 230 unsigned long host_lpcr;
226 unsigned long sdr1; 231 unsigned long sdr1;
@@ -232,7 +237,6 @@ struct kvm_arch {
232 unsigned long vrma_slb_v; 237 unsigned long vrma_slb_v;
233 int rma_setup_done; 238 int rma_setup_done;
234 int using_mmu_notifiers; 239 int using_mmu_notifiers;
235 struct list_head spapr_tce_tables;
236 spinlock_t slot_phys_lock; 240 spinlock_t slot_phys_lock;
237 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; 241 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
238 int slot_npages[KVM_MEM_SLOTS_NUM]; 242 int slot_npages[KVM_MEM_SLOTS_NUM];
@@ -240,6 +244,9 @@ struct kvm_arch {
240 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 244 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
241 struct kvmppc_linear_info *hpt_li; 245 struct kvmppc_linear_info *hpt_li;
242#endif /* CONFIG_KVM_BOOK3S_64_HV */ 246#endif /* CONFIG_KVM_BOOK3S_64_HV */
247#ifdef CONFIG_PPC_BOOK3S_64
248 struct list_head spapr_tce_tables;
249#endif
243}; 250};
244 251
245/* 252/*
@@ -263,6 +270,9 @@ struct kvmppc_vcore {
263 struct list_head runnable_threads; 270 struct list_head runnable_threads;
264 spinlock_t lock; 271 spinlock_t lock;
265 wait_queue_head_t wq; 272 wait_queue_head_t wq;
273 u64 stolen_tb;
274 u64 preempt_tb;
275 struct kvm_vcpu *runner;
266}; 276};
267 277
268#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) 278#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -274,6 +284,19 @@ struct kvmppc_vcore {
274#define VCORE_EXITING 2 284#define VCORE_EXITING 2
275#define VCORE_SLEEPING 3 285#define VCORE_SLEEPING 3
276 286
287/*
288 * Struct used to manage memory for a virtual processor area
289 * registered by a PAPR guest. There are three types of area
290 * that a guest can register.
291 */
292struct kvmppc_vpa {
293 void *pinned_addr; /* Address in kernel linear mapping */
294 void *pinned_end; /* End of region */
295 unsigned long next_gpa; /* Guest phys addr for update */
296 unsigned long len; /* Number of bytes required */
297 u8 update_pending; /* 1 => update pinned_addr from next_gpa */
298};
299
277struct kvmppc_pte { 300struct kvmppc_pte {
278 ulong eaddr; 301 ulong eaddr;
279 u64 vpage; 302 u64 vpage;
@@ -345,6 +368,17 @@ struct kvm_vcpu_arch {
345 u64 vsr[64]; 368 u64 vsr[64];
346#endif 369#endif
347 370
371#ifdef CONFIG_KVM_BOOKE_HV
372 u32 host_mas4;
373 u32 host_mas6;
374 u32 shadow_epcr;
375 u32 epcr;
376 u32 shadow_msrp;
377 u32 eplc;
378 u32 epsc;
379 u32 oldpir;
380#endif
381
348#ifdef CONFIG_PPC_BOOK3S 382#ifdef CONFIG_PPC_BOOK3S
349 /* For Gekko paired singles */ 383 /* For Gekko paired singles */
350 u32 qpr[32]; 384 u32 qpr[32];
@@ -370,6 +404,7 @@ struct kvm_vcpu_arch {
370#endif 404#endif
371 u32 vrsave; /* also USPRG0 */ 405 u32 vrsave; /* also USPRG0 */
372 u32 mmucr; 406 u32 mmucr;
407 /* shadow_msr is unused for BookE HV */
373 ulong shadow_msr; 408 ulong shadow_msr;
374 ulong csrr0; 409 ulong csrr0;
375 ulong csrr1; 410 ulong csrr1;
@@ -426,8 +461,12 @@ struct kvm_vcpu_arch {
426 ulong fault_esr; 461 ulong fault_esr;
427 ulong queued_dear; 462 ulong queued_dear;
428 ulong queued_esr; 463 ulong queued_esr;
464 u32 tlbcfg[4];
465 u32 mmucfg;
466 u32 epr;
429#endif 467#endif
430 gpa_t paddr_accessed; 468 gpa_t paddr_accessed;
469 gva_t vaddr_accessed;
431 470
432 u8 io_gpr; /* GPR used as IO source/target */ 471 u8 io_gpr; /* GPR used as IO source/target */
433 u8 mmio_is_bigendian; 472 u8 mmio_is_bigendian;
@@ -453,11 +492,6 @@ struct kvm_vcpu_arch {
453 u8 prodded; 492 u8 prodded;
454 u32 last_inst; 493 u32 last_inst;
455 494
456 struct lppaca *vpa;
457 struct slb_shadow *slb_shadow;
458 struct dtl *dtl;
459 struct dtl *dtl_end;
460
461 wait_queue_head_t *wqp; 495 wait_queue_head_t *wqp;
462 struct kvmppc_vcore *vcore; 496 struct kvmppc_vcore *vcore;
463 int ret; 497 int ret;
@@ -482,6 +516,14 @@ struct kvm_vcpu_arch {
482 struct task_struct *run_task; 516 struct task_struct *run_task;
483 struct kvm_run *kvm_run; 517 struct kvm_run *kvm_run;
484 pgd_t *pgdir; 518 pgd_t *pgdir;
519
520 spinlock_t vpa_update_lock;
521 struct kvmppc_vpa vpa;
522 struct kvmppc_vpa dtl;
523 struct dtl_entry *dtl_ptr;
524 unsigned long dtl_index;
525 u64 stolen_logged;
526 struct kvmppc_vpa slb_shadow;
485#endif 527#endif
486}; 528};
487 529
@@ -498,4 +540,6 @@ struct kvm_vcpu_arch {
498#define KVM_MMIO_REG_QPR 0x0040 540#define KVM_MMIO_REG_QPR 0x0040
499#define KVM_MMIO_REG_FQPR 0x0060 541#define KVM_MMIO_REG_FQPR 0x0060
500 542
543#define __KVM_HAVE_ARCH_WQP
544
501#endif /* __POWERPC_KVM_HOST_H__ */ 545#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 7b754e743003..c18916bff689 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -206,6 +206,11 @@ static inline unsigned int kvm_arch_para_features(void)
206 return r; 206 return r;
207} 207}
208 208
209static inline bool kvm_check_and_clear_guest_paused(void)
210{
211 return false;
212}
213
209#endif /* __KERNEL__ */ 214#endif /* __KERNEL__ */
210 215
211#endif /* __POWERPC_KVM_PARA_H__ */ 216#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9d6dee0f7d48..f68c22fa2fce 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -95,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
95extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 95extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
96extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); 96extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
97 97
98extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); 98extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
99extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 99extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
100extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); 100extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
101extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 101extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
@@ -107,8 +107,10 @@ extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
107 107
108extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 108extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
109 unsigned int op, int *advance); 109 unsigned int op, int *advance);
110extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); 110extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
111extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); 111 ulong val);
112extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
113 ulong *val);
112 114
113extern int kvmppc_booke_init(void); 115extern int kvmppc_booke_init(void);
114extern void kvmppc_booke_exit(void); 116extern void kvmppc_booke_exit(void);
@@ -126,6 +128,8 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
126extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 128extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
127extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 129extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
128 struct kvm_create_spapr_tce *args); 130 struct kvm_create_spapr_tce *args);
131extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
132 unsigned long ioba, unsigned long tce);
129extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, 133extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
130 struct kvm_allocate_rma *rma); 134 struct kvm_allocate_rma *rma);
131extern struct kvmppc_linear_info *kvm_alloc_rma(void); 135extern struct kvmppc_linear_info *kvm_alloc_rma(void);
@@ -138,6 +142,11 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
138 struct kvm_userspace_memory_region *mem); 142 struct kvm_userspace_memory_region *mem);
139extern void kvmppc_core_commit_memory_region(struct kvm *kvm, 143extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
140 struct kvm_userspace_memory_region *mem); 144 struct kvm_userspace_memory_region *mem);
145extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
146 struct kvm_ppc_smmu_info *info);
147
148extern int kvmppc_bookehv_init(void);
149extern void kvmppc_bookehv_exit(void);
141 150
142/* 151/*
143 * Cuts out inst bits with ordering according to spec. 152 * Cuts out inst bits with ordering according to spec.
@@ -204,4 +213,9 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
204int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, 213int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
205 struct kvm_dirty_tlb *cfg); 214 struct kvm_dirty_tlb *cfg);
206 215
216long kvmppc_alloc_lpid(void);
217void kvmppc_claim_lpid(long lpid);
218void kvmppc_free_lpid(long lpid);
219void kvmppc_init_lpid(unsigned long nr_lpids);
220
207#endif /* __POWERPC_KVM_PPC_H__ */ 221#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cdb5421877e2..eeabcdbc30f7 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -104,6 +104,8 @@
104#define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */ 104#define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */
105#define MAS4_TSIZED_SHIFT 7 105#define MAS4_TSIZED_SHIFT 7
106 106
107#define MAS5_SGS 0x80000000
108
107#define MAS6_SPID0 0x3FFF0000 109#define MAS6_SPID0 0x3FFF0000
108#define MAS6_SPID1 0x00007FFE 110#define MAS6_SPID1 0x00007FFE
109#define MAS6_ISIZE(x) MAS1_TSIZE(x) 111#define MAS6_ISIZE(x) MAS1_TSIZE(x)
@@ -118,6 +120,10 @@
118 120
119#define MAS7_RPN 0xFFFFFFFF 121#define MAS7_RPN 0xFFFFFFFF
120 122
123#define MAS8_TGS 0x80000000 /* Guest space */
124#define MAS8_VF 0x40000000 /* Virtualization Fault */
125#define MAS8_TLPID 0x000000ff
126
121/* Bit definitions for MMUCFG */ 127/* Bit definitions for MMUCFG */
122#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ 128#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */
123#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ 129#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 55e85631c42e..413a5eaef56c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -240,6 +240,9 @@ struct thread_struct {
240#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 240#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
241 void* kvm_shadow_vcpu; /* KVM internal data */ 241 void* kvm_shadow_vcpu; /* KVM internal data */
242#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ 242#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
243#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
244 struct kvm_vcpu *kvm_vcpu;
245#endif
243#ifdef CONFIG_PPC64 246#ifdef CONFIG_PPC64
244 unsigned long dscr; 247 unsigned long dscr;
245 int dscr_inherit; 248 int dscr_inherit;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9d7f0fb69028..f0cb7f461b9d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -257,7 +257,9 @@
257#define LPCR_LPES_SH 2 257#define LPCR_LPES_SH 2
258#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ 258#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
259#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ 259#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
260#ifndef SPRN_LPID
260#define SPRN_LPID 0x13F /* Logical Partition Identifier */ 261#define SPRN_LPID 0x13F /* Logical Partition Identifier */
262#endif
261#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 263#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
262#define SPRN_HMER 0x150 /* Hardware m? error recovery */ 264#define SPRN_HMER 0x150 /* Hardware m? error recovery */
263#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 265#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 8a97aa7289d3..2d916c4982c5 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -56,18 +56,30 @@
56#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ 56#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
57#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ 57#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
58#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ 58#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
59#define SPRN_MSRP 0x137 /* MSR Protect Register */
59#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ 60#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
60#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ 61#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
61#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */ 62#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */
62#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */ 63#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */
64#define SPRN_LPID 0x152 /* Logical Partition ID */
63#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */ 65#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */
64#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */ 66#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */
65#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */ 67#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */
66#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */ 68#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */
67#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */ 69#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */
68#define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */ 70#define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */
71#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */
72#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */
73#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */
74#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */
69#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */ 75#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */
70#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */ 76#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */
77#define SPRN_GSRR0 0x17A /* Guest SRR0 */
78#define SPRN_GSRR1 0x17B /* Guest SRR1 */
79#define SPRN_GEPR 0x17C /* Guest EPR */
80#define SPRN_GDEAR 0x17D /* Guest DEAR */
81#define SPRN_GPIR 0x17E /* Guest PIR */
82#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */
71#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */ 83#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */
72#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */ 84#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */
73#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */ 85#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */
@@ -88,6 +100,13 @@
88#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ 100#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */
89#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ 101#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */
90#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ 102#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */
103#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */
104#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */
105#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */
106#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */
107#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */
108#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */
109#define SPRN_GIVPR 0x1BF /* Guest IVPR */
91#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ 110#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */
92#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ 111#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */
93#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ 112#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */
@@ -240,6 +259,10 @@
240#define MCSR_LDG 0x00002000UL /* Guarded Load */ 259#define MCSR_LDG 0x00002000UL /* Guarded Load */
241#define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */ 260#define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */
242#define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */ 261#define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */
262
263#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */
264#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */
265#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */
243#endif 266#endif
244 267
245#ifdef CONFIG_E200 268#ifdef CONFIG_E200
@@ -594,6 +617,17 @@
594#define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates 617#define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates
595 * for hypervisor */ 618 * for hypervisor */
596 619
620/* Bit definitions for EPLC/EPSC */
621#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */
622#define EPC_EPR_SHIFT 31
623#define EPC_EAS 0x40000000 /* Address Space */
624#define EPC_EAS_SHIFT 30
625#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */
626#define EPC_EGS_SHIFT 29
627#define EPC_ELPID 0x00ff0000
628#define EPC_ELPID_SHIFT 16
629#define EPC_EPID 0x00003fff
630#define EPC_EPID_SHIFT 0
597 631
598/* 632/*
599 * The IBM-403 is an even more odd special case, as it is much 633 * The IBM-403 is an even more odd special case, as it is much
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 1a6320290d26..200d763a0a67 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -17,6 +17,7 @@ extern struct task_struct *_switch(struct thread_struct *prev,
17 struct thread_struct *next); 17 struct thread_struct *next);
18 18
19extern void giveup_fpu(struct task_struct *); 19extern void giveup_fpu(struct task_struct *);
20extern void load_up_fpu(void);
20extern void disable_kernel_fp(void); 21extern void disable_kernel_fp(void);
21extern void enable_kernel_fp(void); 22extern void enable_kernel_fp(void);
22extern void flush_fp_to_thread(struct task_struct *); 23extern void flush_fp_to_thread(struct task_struct *);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 2136f58a54e8..3b4b4a8da922 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -23,6 +23,7 @@
23extern unsigned long tb_ticks_per_jiffy; 23extern unsigned long tb_ticks_per_jiffy;
24extern unsigned long tb_ticks_per_usec; 24extern unsigned long tb_ticks_per_usec;
25extern unsigned long tb_ticks_per_sec; 25extern unsigned long tb_ticks_per_sec;
26extern struct clock_event_device decrementer_clockevent;
26 27
27struct rtc_time; 28struct rtc_time;
28extern void to_tm(int tim, struct rtc_time * tm); 29extern void to_tm(int tim, struct rtc_time * tm);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4554dc2fe857..52c7ad78242e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -116,6 +116,9 @@ int main(void)
116#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 116#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
117 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); 117 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
118#endif 118#endif
119#ifdef CONFIG_KVM_BOOKE_HV
120 DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
121#endif
119 122
120 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); 123 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
121 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); 124 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
@@ -383,6 +386,7 @@ int main(void)
383#ifdef CONFIG_KVM 386#ifdef CONFIG_KVM
384 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 387 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
385 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 388 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
389 DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
386 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 390 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
387 DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); 391 DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
388 DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); 392 DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
@@ -425,9 +429,11 @@ int main(void)
425 DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); 429 DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
426 DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); 430 DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
427 431
432 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
433 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
434
428 /* book3s */ 435 /* book3s */
429#ifdef CONFIG_KVM_BOOK3S_64_HV 436#ifdef CONFIG_KVM_BOOK3S_64_HV
430 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
431 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); 437 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
432 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 438 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
433 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); 439 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -440,9 +446,9 @@ int main(void)
440 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); 446 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
441 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); 447 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
442 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 448 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
449 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
443#endif 450#endif
444#ifdef CONFIG_PPC_BOOK3S 451#ifdef CONFIG_PPC_BOOK3S
445 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
446 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 452 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
447 DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); 453 DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
448 DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); 454 DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
@@ -457,7 +463,6 @@ int main(void)
457 DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); 463 DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
458 DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); 464 DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
459 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); 465 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
460 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
461 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 466 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
462 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 467 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
463 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 468 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
@@ -533,6 +538,8 @@ int main(void)
533 HSTATE_FIELD(HSTATE_NAPPING, napping); 538 HSTATE_FIELD(HSTATE_NAPPING, napping);
534 539
535#ifdef CONFIG_KVM_BOOK3S_64_HV 540#ifdef CONFIG_KVM_BOOK3S_64_HV
541 HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
542 HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
536 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 543 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
537 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); 544 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
538 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); 545 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
@@ -593,6 +600,12 @@ int main(void)
593 DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); 600 DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
594#endif 601#endif
595 602
603#ifdef CONFIG_KVM_BOOKE_HV
604 DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
605 DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
606 DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
607#endif
608
596#ifdef CONFIG_KVM_EXIT_TIMING 609#ifdef CONFIG_KVM_EXIT_TIMING
597 DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, 610 DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
598 arch.timing_exit.tv32.tbu)); 611 arch.timing_exit.tv32.tbu));
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 8053db02b85e..69fdd2322a66 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2)
73 mtlr r4 73 mtlr r4
74 blr 74 blr
75_GLOBAL(__setup_cpu_e500mc) 75_GLOBAL(__setup_cpu_e500mc)
76 mr r5, r4
76 mflr r4 77 mflr r4
77 bl __e500_icache_setup 78 bl __e500_icache_setup
78 bl __e500_dcache_setup 79 bl __e500_dcache_setup
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7bed44ee165..1c06d2971545 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -63,11 +63,13 @@ BEGIN_FTR_SECTION
63 GET_PACA(r13) 63 GET_PACA(r13)
64 64
65#ifdef CONFIG_KVM_BOOK3S_64_HV 65#ifdef CONFIG_KVM_BOOK3S_64_HV
66 lbz r0,PACAPROCSTART(r13) 66 li r0,KVM_HWTHREAD_IN_KERNEL
67 cmpwi r0,0x80 67 stb r0,HSTATE_HWTHREAD_STATE(r13)
68 bne 1f 68 /* Order setting hwthread_state vs. testing hwthread_req */
69 li r0,1 69 sync
70 stb r0,PACAPROCSTART(r13) 70 lbz r0,HSTATE_HWTHREAD_REQ(r13)
71 cmpwi r0,0
72 beq 1f
71 b kvm_start_guest 73 b kvm_start_guest
721: 741:
73#endif 75#endif
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 22d608e8bb7d..7a2e5e421abf 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -248,10 +248,11 @@ _ENTRY(_start);
248 248
249interrupt_base: 249interrupt_base:
250 /* Critical Input Interrupt */ 250 /* Critical Input Interrupt */
251 CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) 251 CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
252 252
253 /* Machine Check Interrupt */ 253 /* Machine Check Interrupt */
254 CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) 254 CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
255 machine_check_exception)
255 MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) 256 MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
256 257
257 /* Data Storage Interrupt */ 258 /* Data Storage Interrupt */
@@ -261,7 +262,8 @@ interrupt_base:
261 INSTRUCTION_STORAGE_EXCEPTION 262 INSTRUCTION_STORAGE_EXCEPTION
262 263
263 /* External Input Interrupt */ 264 /* External Input Interrupt */
264 EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) 265 EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
266 do_IRQ, EXC_XFER_LITE)
265 267
266 /* Alignment Interrupt */ 268 /* Alignment Interrupt */
267 ALIGNMENT_EXCEPTION 269 ALIGNMENT_EXCEPTION
@@ -273,29 +275,32 @@ interrupt_base:
273#ifdef CONFIG_PPC_FPU 275#ifdef CONFIG_PPC_FPU
274 FP_UNAVAILABLE_EXCEPTION 276 FP_UNAVAILABLE_EXCEPTION
275#else 277#else
276 EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) 278 EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
279 FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
277#endif 280#endif
278 /* System Call Interrupt */ 281 /* System Call Interrupt */
279 START_EXCEPTION(SystemCall) 282 START_EXCEPTION(SystemCall)
280 NORMAL_EXCEPTION_PROLOG 283 NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
281 EXC_XFER_EE_LITE(0x0c00, DoSyscall) 284 EXC_XFER_EE_LITE(0x0c00, DoSyscall)
282 285
283 /* Auxiliary Processor Unavailable Interrupt */ 286 /* Auxiliary Processor Unavailable Interrupt */
284 EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) 287 EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
288 AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
285 289
286 /* Decrementer Interrupt */ 290 /* Decrementer Interrupt */
287 DECREMENTER_EXCEPTION 291 DECREMENTER_EXCEPTION
288 292
289 /* Fixed Internal Timer Interrupt */ 293 /* Fixed Internal Timer Interrupt */
290 /* TODO: Add FIT support */ 294 /* TODO: Add FIT support */
291 EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) 295 EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
296 unknown_exception, EXC_XFER_EE)
292 297
293 /* Watchdog Timer Interrupt */ 298 /* Watchdog Timer Interrupt */
294 /* TODO: Add watchdog support */ 299 /* TODO: Add watchdog support */
295#ifdef CONFIG_BOOKE_WDT 300#ifdef CONFIG_BOOKE_WDT
296 CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) 301 CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
297#else 302#else
298 CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) 303 CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
299#endif 304#endif
300 305
301 /* Data TLB Error Interrupt */ 306 /* Data TLB Error Interrupt */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 0e4175388f47..5f051eeb93a2 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -2,6 +2,9 @@
2#define __HEAD_BOOKE_H__ 2#define __HEAD_BOOKE_H__
3 3
4#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ 4#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
5#include <asm/kvm_asm.h>
6#include <asm/kvm_booke_hv_asm.h>
7
5/* 8/*
6 * Macros used for common Book-e exception handling 9 * Macros used for common Book-e exception handling
7 */ 10 */
@@ -28,14 +31,15 @@
28 */ 31 */
29#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) 32#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
30 33
31#define NORMAL_EXCEPTION_PROLOG \ 34#define NORMAL_EXCEPTION_PROLOG(intno) \
32 mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ 35 mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
33 mfspr r10, SPRN_SPRG_THREAD; \ 36 mfspr r10, SPRN_SPRG_THREAD; \
34 stw r11, THREAD_NORMSAVE(0)(r10); \ 37 stw r11, THREAD_NORMSAVE(0)(r10); \
35 stw r13, THREAD_NORMSAVE(2)(r10); \ 38 stw r13, THREAD_NORMSAVE(2)(r10); \
36 mfcr r13; /* save CR in r13 for now */\ 39 mfcr r13; /* save CR in r13 for now */\
37 mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ 40 mfspr r11, SPRN_SRR1; \
38 andi. r11,r11,MSR_PR; \ 41 DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
42 andi. r11, r11, MSR_PR; /* check whether user or kernel */\
39 mr r11, r1; \ 43 mr r11, r1; \
40 beq 1f; \ 44 beq 1f; \
41 /* if from user, start at top of this thread's kernel stack */ \ 45 /* if from user, start at top of this thread's kernel stack */ \
@@ -113,7 +117,7 @@
113 * registers as the normal prolog above. Instead we use a portion of the 117 * registers as the normal prolog above. Instead we use a portion of the
114 * critical/machine check exception stack at low physical addresses. 118 * critical/machine check exception stack at low physical addresses.
115 */ 119 */
116#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ 120#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
117 mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ 121 mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
118 BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ 122 BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
119 stw r9,GPR9(r8); /* save various registers */\ 123 stw r9,GPR9(r8); /* save various registers */\
@@ -121,8 +125,9 @@
121 stw r10,GPR10(r8); \ 125 stw r10,GPR10(r8); \
122 stw r11,GPR11(r8); \ 126 stw r11,GPR11(r8); \
123 stw r9,_CCR(r8); /* save CR on stack */\ 127 stw r9,_CCR(r8); /* save CR on stack */\
124 mfspr r10,exc_level_srr1; /* check whether user or kernel */\ 128 mfspr r11,exc_level_srr1; /* check whether user or kernel */\
125 andi. r10,r10,MSR_PR; \ 129 DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
130 andi. r11,r11,MSR_PR; \
126 mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ 131 mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
127 lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ 132 lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
128 addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ 133 addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
@@ -162,12 +167,30 @@
162 SAVE_4GPRS(3, r11); \ 167 SAVE_4GPRS(3, r11); \
163 SAVE_2GPRS(7, r11) 168 SAVE_2GPRS(7, r11)
164 169
165#define CRITICAL_EXCEPTION_PROLOG \ 170#define CRITICAL_EXCEPTION_PROLOG(intno) \
166 EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) 171 EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
167#define DEBUG_EXCEPTION_PROLOG \ 172#define DEBUG_EXCEPTION_PROLOG \
168 EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) 173 EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
169#define MCHECK_EXCEPTION_PROLOG \ 174#define MCHECK_EXCEPTION_PROLOG \
170 EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) 175 EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
176 SPRN_MCSRR0, SPRN_MCSRR1)
177
178/*
179 * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite
180 * being delivered to the host. This exception can only happen
181 * inside a KVM guest -- so we just handle up to the DO_KVM rather
182 * than try to fit this into one of the existing prolog macros.
183 */
184#define GUEST_DOORBELL_EXCEPTION \
185 START_EXCEPTION(GuestDoorbell); \
186 mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
187 mfspr r10, SPRN_SPRG_THREAD; \
188 stw r11, THREAD_NORMSAVE(0)(r10); \
189 mfspr r11, SPRN_SRR1; \
190 stw r13, THREAD_NORMSAVE(2)(r10); \
191 mfcr r13; /* save CR in r13 for now */\
192 DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \
193 trap
171 194
172/* 195/*
173 * Exception vectors. 196 * Exception vectors.
@@ -181,16 +204,16 @@ label:
181 .long func; \ 204 .long func; \
182 .long ret_from_except_full 205 .long ret_from_except_full
183 206
184#define EXCEPTION(n, label, hdlr, xfer) \ 207#define EXCEPTION(n, intno, label, hdlr, xfer) \
185 START_EXCEPTION(label); \ 208 START_EXCEPTION(label); \
186 NORMAL_EXCEPTION_PROLOG; \ 209 NORMAL_EXCEPTION_PROLOG(intno); \
187 addi r3,r1,STACK_FRAME_OVERHEAD; \ 210 addi r3,r1,STACK_FRAME_OVERHEAD; \
188 xfer(n, hdlr) 211 xfer(n, hdlr)
189 212
190#define CRITICAL_EXCEPTION(n, label, hdlr) \ 213#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
191 START_EXCEPTION(label); \ 214 START_EXCEPTION(label); \
192 CRITICAL_EXCEPTION_PROLOG; \ 215 CRITICAL_EXCEPTION_PROLOG(intno); \
193 addi r3,r1,STACK_FRAME_OVERHEAD; \ 216 addi r3,r1,STACK_FRAME_OVERHEAD; \
194 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ 217 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
195 NOCOPY, crit_transfer_to_handler, \ 218 NOCOPY, crit_transfer_to_handler, \
196 ret_from_crit_exc) 219 ret_from_crit_exc)
@@ -302,7 +325,7 @@ label:
302 325
303#define DEBUG_CRIT_EXCEPTION \ 326#define DEBUG_CRIT_EXCEPTION \
304 START_EXCEPTION(DebugCrit); \ 327 START_EXCEPTION(DebugCrit); \
305 CRITICAL_EXCEPTION_PROLOG; \ 328 CRITICAL_EXCEPTION_PROLOG(DEBUG); \
306 \ 329 \
307 /* \ 330 /* \
308 * If there is a single step or branch-taken exception in an \ 331 * If there is a single step or branch-taken exception in an \
@@ -355,7 +378,7 @@ label:
355 378
356#define DATA_STORAGE_EXCEPTION \ 379#define DATA_STORAGE_EXCEPTION \
357 START_EXCEPTION(DataStorage) \ 380 START_EXCEPTION(DataStorage) \
358 NORMAL_EXCEPTION_PROLOG; \ 381 NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
359 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ 382 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
360 stw r5,_ESR(r11); \ 383 stw r5,_ESR(r11); \
361 mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ 384 mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
@@ -363,7 +386,7 @@ label:
363 386
364#define INSTRUCTION_STORAGE_EXCEPTION \ 387#define INSTRUCTION_STORAGE_EXCEPTION \
365 START_EXCEPTION(InstructionStorage) \ 388 START_EXCEPTION(InstructionStorage) \
366 NORMAL_EXCEPTION_PROLOG; \ 389 NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
367 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ 390 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
368 stw r5,_ESR(r11); \ 391 stw r5,_ESR(r11); \
369 mr r4,r12; /* Pass SRR0 as arg2 */ \ 392 mr r4,r12; /* Pass SRR0 as arg2 */ \
@@ -372,7 +395,7 @@ label:
372 395
373#define ALIGNMENT_EXCEPTION \ 396#define ALIGNMENT_EXCEPTION \
374 START_EXCEPTION(Alignment) \ 397 START_EXCEPTION(Alignment) \
375 NORMAL_EXCEPTION_PROLOG; \ 398 NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
376 mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ 399 mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
377 stw r4,_DEAR(r11); \ 400 stw r4,_DEAR(r11); \
378 addi r3,r1,STACK_FRAME_OVERHEAD; \ 401 addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -380,7 +403,7 @@ label:
380 403
381#define PROGRAM_EXCEPTION \ 404#define PROGRAM_EXCEPTION \
382 START_EXCEPTION(Program) \ 405 START_EXCEPTION(Program) \
383 NORMAL_EXCEPTION_PROLOG; \ 406 NORMAL_EXCEPTION_PROLOG(PROGRAM); \
384 mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ 407 mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
385 stw r4,_ESR(r11); \ 408 stw r4,_ESR(r11); \
386 addi r3,r1,STACK_FRAME_OVERHEAD; \ 409 addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -388,7 +411,7 @@ label:
388 411
389#define DECREMENTER_EXCEPTION \ 412#define DECREMENTER_EXCEPTION \
390 START_EXCEPTION(Decrementer) \ 413 START_EXCEPTION(Decrementer) \
391 NORMAL_EXCEPTION_PROLOG; \ 414 NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
392 lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ 415 lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
393 mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ 416 mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
394 addi r3,r1,STACK_FRAME_OVERHEAD; \ 417 addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -396,7 +419,7 @@ label:
396 419
397#define FP_UNAVAILABLE_EXCEPTION \ 420#define FP_UNAVAILABLE_EXCEPTION \
398 START_EXCEPTION(FloatingPointUnavailable) \ 421 START_EXCEPTION(FloatingPointUnavailable) \
399 NORMAL_EXCEPTION_PROLOG; \ 422 NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
400 beq 1f; \ 423 beq 1f; \
401 bl load_up_fpu; /* if from user, just load it up */ \ 424 bl load_up_fpu; /* if from user, just load it up */ \
402 b fast_exception_return; \ 425 b fast_exception_return; \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index de80e0f9a2bd..1f4434a38608 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -301,19 +301,20 @@ _ENTRY(__early_start)
301 301
302interrupt_base: 302interrupt_base:
303 /* Critical Input Interrupt */ 303 /* Critical Input Interrupt */
304 CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) 304 CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
305 305
306 /* Machine Check Interrupt */ 306 /* Machine Check Interrupt */
307#ifdef CONFIG_E200 307#ifdef CONFIG_E200
308 /* no RFMCI, MCSRRs on E200 */ 308 /* no RFMCI, MCSRRs on E200 */
309 CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) 309 CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
310 machine_check_exception)
310#else 311#else
311 MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) 312 MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
312#endif 313#endif
313 314
314 /* Data Storage Interrupt */ 315 /* Data Storage Interrupt */
315 START_EXCEPTION(DataStorage) 316 START_EXCEPTION(DataStorage)
316 NORMAL_EXCEPTION_PROLOG 317 NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
317 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ 318 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
318 stw r5,_ESR(r11) 319 stw r5,_ESR(r11)
319 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ 320 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -328,7 +329,7 @@ interrupt_base:
328 INSTRUCTION_STORAGE_EXCEPTION 329 INSTRUCTION_STORAGE_EXCEPTION
329 330
330 /* External Input Interrupt */ 331 /* External Input Interrupt */
331 EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) 332 EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
332 333
333 /* Alignment Interrupt */ 334 /* Alignment Interrupt */
334 ALIGNMENT_EXCEPTION 335 ALIGNMENT_EXCEPTION
@@ -342,32 +343,36 @@ interrupt_base:
342#else 343#else
343#ifdef CONFIG_E200 344#ifdef CONFIG_E200
344 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ 345 /* E200 treats 'normal' floating point instructions as FP Unavail exception */
345 EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) 346 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
347 program_check_exception, EXC_XFER_EE)
346#else 348#else
347 EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) 349 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
350 unknown_exception, EXC_XFER_EE)
348#endif 351#endif
349#endif 352#endif
350 353
351 /* System Call Interrupt */ 354 /* System Call Interrupt */
352 START_EXCEPTION(SystemCall) 355 START_EXCEPTION(SystemCall)
353 NORMAL_EXCEPTION_PROLOG 356 NORMAL_EXCEPTION_PROLOG(SYSCALL)
354 EXC_XFER_EE_LITE(0x0c00, DoSyscall) 357 EXC_XFER_EE_LITE(0x0c00, DoSyscall)
355 358
356 /* Auxiliary Processor Unavailable Interrupt */ 359 /* Auxiliary Processor Unavailable Interrupt */
357 EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) 360 EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
361 unknown_exception, EXC_XFER_EE)
358 362
359 /* Decrementer Interrupt */ 363 /* Decrementer Interrupt */
360 DECREMENTER_EXCEPTION 364 DECREMENTER_EXCEPTION
361 365
362 /* Fixed Internal Timer Interrupt */ 366 /* Fixed Internal Timer Interrupt */
363 /* TODO: Add FIT support */ 367 /* TODO: Add FIT support */
364 EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) 368 EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
369 unknown_exception, EXC_XFER_EE)
365 370
366 /* Watchdog Timer Interrupt */ 371 /* Watchdog Timer Interrupt */
367#ifdef CONFIG_BOOKE_WDT 372#ifdef CONFIG_BOOKE_WDT
368 CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) 373 CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
369#else 374#else
370 CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) 375 CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
371#endif 376#endif
372 377
373 /* Data TLB Error Interrupt */ 378 /* Data TLB Error Interrupt */
@@ -375,10 +380,16 @@ interrupt_base:
375 mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ 380 mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
376 mfspr r10, SPRN_SPRG_THREAD 381 mfspr r10, SPRN_SPRG_THREAD
377 stw r11, THREAD_NORMSAVE(0)(r10) 382 stw r11, THREAD_NORMSAVE(0)(r10)
383#ifdef CONFIG_KVM_BOOKE_HV
384BEGIN_FTR_SECTION
385 mfspr r11, SPRN_SRR1
386END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
387#endif
378 stw r12, THREAD_NORMSAVE(1)(r10) 388 stw r12, THREAD_NORMSAVE(1)(r10)
379 stw r13, THREAD_NORMSAVE(2)(r10) 389 stw r13, THREAD_NORMSAVE(2)(r10)
380 mfcr r13 390 mfcr r13
381 stw r13, THREAD_NORMSAVE(3)(r10) 391 stw r13, THREAD_NORMSAVE(3)(r10)
392 DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
382 mfspr r10, SPRN_DEAR /* Get faulting address */ 393 mfspr r10, SPRN_DEAR /* Get faulting address */
383 394
384 /* If we are faulting a kernel address, we have to use the 395 /* If we are faulting a kernel address, we have to use the
@@ -463,10 +474,16 @@ interrupt_base:
463 mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ 474 mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
464 mfspr r10, SPRN_SPRG_THREAD 475 mfspr r10, SPRN_SPRG_THREAD
465 stw r11, THREAD_NORMSAVE(0)(r10) 476 stw r11, THREAD_NORMSAVE(0)(r10)
477#ifdef CONFIG_KVM_BOOKE_HV
478BEGIN_FTR_SECTION
479 mfspr r11, SPRN_SRR1
480END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
481#endif
466 stw r12, THREAD_NORMSAVE(1)(r10) 482 stw r12, THREAD_NORMSAVE(1)(r10)
467 stw r13, THREAD_NORMSAVE(2)(r10) 483 stw r13, THREAD_NORMSAVE(2)(r10)
468 mfcr r13 484 mfcr r13
469 stw r13, THREAD_NORMSAVE(3)(r10) 485 stw r13, THREAD_NORMSAVE(3)(r10)
486 DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
470 mfspr r10, SPRN_SRR0 /* Get faulting address */ 487 mfspr r10, SPRN_SRR0 /* Get faulting address */
471 488
472 /* If we are faulting a kernel address, we have to use the 489 /* If we are faulting a kernel address, we have to use the
@@ -538,36 +555,54 @@ interrupt_base:
538#ifdef CONFIG_SPE 555#ifdef CONFIG_SPE
539 /* SPE Unavailable */ 556 /* SPE Unavailable */
540 START_EXCEPTION(SPEUnavailable) 557 START_EXCEPTION(SPEUnavailable)
541 NORMAL_EXCEPTION_PROLOG 558 NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
542 bne load_up_spe 559 bne load_up_spe
543 addi r3,r1,STACK_FRAME_OVERHEAD 560 addi r3,r1,STACK_FRAME_OVERHEAD
544 EXC_XFER_EE_LITE(0x2010, KernelSPE) 561 EXC_XFER_EE_LITE(0x2010, KernelSPE)
545#else 562#else
546 EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) 563 EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
564 unknown_exception, EXC_XFER_EE)
547#endif /* CONFIG_SPE */ 565#endif /* CONFIG_SPE */
548 566
549 /* SPE Floating Point Data */ 567 /* SPE Floating Point Data */
550#ifdef CONFIG_SPE 568#ifdef CONFIG_SPE
551 EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); 569 EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
570 SPEFloatingPointException, EXC_XFER_EE);
552 571
553 /* SPE Floating Point Round */ 572 /* SPE Floating Point Round */
554 EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE) 573 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
574 SPEFloatingPointRoundException, EXC_XFER_EE)
555#else 575#else
556 EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) 576 EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
557 EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) 577 unknown_exception, EXC_XFER_EE)
578 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
579 unknown_exception, EXC_XFER_EE)
558#endif /* CONFIG_SPE */ 580#endif /* CONFIG_SPE */
559 581
560 /* Performance Monitor */ 582 /* Performance Monitor */
561 EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) 583 EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
584 performance_monitor_exception, EXC_XFER_STD)
562 585
563 EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD) 586 EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
564 587
565 CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception) 588 CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
589 CriticalDoorbell, unknown_exception)
566 590
567 /* Debug Interrupt */ 591 /* Debug Interrupt */
568 DEBUG_DEBUG_EXCEPTION 592 DEBUG_DEBUG_EXCEPTION
569 DEBUG_CRIT_EXCEPTION 593 DEBUG_CRIT_EXCEPTION
570 594
595 GUEST_DOORBELL_EXCEPTION
596
597 CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
598 unknown_exception)
599
600 /* Hypercall */
601 EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
602
603 /* Embedded Hypervisor Privilege */
604 EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
605
571/* 606/*
572 * Local functions 607 * Local functions
573 */ 608 */
@@ -871,8 +906,31 @@ _GLOBAL(__setup_e500mc_ivors)
871 mtspr SPRN_IVOR36,r3 906 mtspr SPRN_IVOR36,r3
872 li r3,CriticalDoorbell@l 907 li r3,CriticalDoorbell@l
873 mtspr SPRN_IVOR37,r3 908 mtspr SPRN_IVOR37,r3
909
910 /*
911 * We only want to touch IVOR38-41 if we're running on hardware
912 * that supports category E.HV. The architectural way to determine
913 * this is MMUCFG[LPIDSIZE].
914 */
915 mfspr r3, SPRN_MMUCFG
916 andis. r3, r3, MMUCFG_LPIDSIZE@h
917 beq no_hv
918 li r3,GuestDoorbell@l
919 mtspr SPRN_IVOR38,r3
920 li r3,CriticalGuestDoorbell@l
921 mtspr SPRN_IVOR39,r3
922 li r3,Hypercall@l
923 mtspr SPRN_IVOR40,r3
924 li r3,Ehvpriv@l
925 mtspr SPRN_IVOR41,r3
926skip_hv_ivors:
874 sync 927 sync
875 blr 928 blr
929no_hv:
930 lwz r3, CPU_SPEC_FEATURES(r5)
931 rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
932 stw r3, CPU_SPEC_FEATURES(r5)
933 b skip_hv_ivors
876 934
877#ifdef CONFIG_SPE 935#ifdef CONFIG_SPE
878/* 936/*
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 0cdc9a392839..7140d838339e 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -16,6 +16,7 @@
16#include <asm/asm-offsets.h> 16#include <asm/asm-offsets.h>
17#include <asm/ppc-opcode.h> 17#include <asm/ppc-opcode.h>
18#include <asm/hw_irq.h> 18#include <asm/hw_irq.h>
19#include <asm/kvm_book3s_asm.h>
19 20
20#undef DEBUG 21#undef DEBUG
21 22
@@ -81,6 +82,12 @@ _GLOBAL(power7_idle)
81 std r9,_MSR(r1) 82 std r9,_MSR(r1)
82 std r1,PACAR1(r13) 83 std r1,PACAR1(r13)
83 84
85#ifdef CONFIG_KVM_BOOK3S_64_HV
86 /* Tell KVM we're napping */
87 li r4,KVM_HWTHREAD_IN_NAP
88 stb r4,HSTATE_HWTHREAD_STATE(r13)
89#endif
90
84 /* Magic NAP mode enter sequence */ 91 /* Magic NAP mode enter sequence */
85 std r0,0(r1) 92 std r0,0(r1)
86 ptesync 93 ptesync
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 786a2700ec2d..d1f2aafcbe8c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -190,3 +190,7 @@ EXPORT_SYMBOL(__arch_hweight16);
190EXPORT_SYMBOL(__arch_hweight32); 190EXPORT_SYMBOL(__arch_hweight32);
191EXPORT_SYMBOL(__arch_hweight64); 191EXPORT_SYMBOL(__arch_hweight64);
192#endif 192#endif
193
194#ifdef CONFIG_PPC_BOOK3S_64
195EXPORT_SYMBOL_GPL(mmu_psize_defs);
196#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2c42cd72d0f5..99a995c2a3f2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt,
100static void decrementer_set_mode(enum clock_event_mode mode, 100static void decrementer_set_mode(enum clock_event_mode mode,
101 struct clock_event_device *dev); 101 struct clock_event_device *dev);
102 102
103static struct clock_event_device decrementer_clockevent = { 103struct clock_event_device decrementer_clockevent = {
104 .name = "decrementer", 104 .name = "decrementer",
105 .rating = 200, 105 .rating = 200,
106 .irq = 0, 106 .irq = 0,
@@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = {
108 .set_mode = decrementer_set_mode, 108 .set_mode = decrementer_set_mode,
109 .features = CLOCK_EVT_FEAT_ONESHOT, 109 .features = CLOCK_EVT_FEAT_ONESHOT,
110}; 110};
111EXPORT_SYMBOL(decrementer_clockevent);
111 112
112DEFINE_PER_CPU(u64, decrementers_next_tb); 113DEFINE_PER_CPU(u64, decrementers_next_tb);
113static DEFINE_PER_CPU(struct clock_event_device, decrementers); 114static DEFINE_PER_CPU(struct clock_event_device, decrementers);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 7b612a76c701..50e7dbc7356c 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -29,15 +29,18 @@
29#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30 30
31#include "44x_tlb.h" 31#include "44x_tlb.h"
32#include "booke.h"
32 33
33void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 34void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
34{ 35{
36 kvmppc_booke_vcpu_load(vcpu, cpu);
35 kvmppc_44x_tlb_load(vcpu); 37 kvmppc_44x_tlb_load(vcpu);
36} 38}
37 39
38void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 40void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
39{ 41{
40 kvmppc_44x_tlb_put(vcpu); 42 kvmppc_44x_tlb_put(vcpu);
43 kvmppc_booke_vcpu_put(vcpu);
41} 44}
42 45
43int kvmppc_core_check_processor_compat(void) 46int kvmppc_core_check_processor_compat(void)
@@ -160,6 +163,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
160 kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 163 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
161} 164}
162 165
166int kvmppc_core_init_vm(struct kvm *kvm)
167{
168 return 0;
169}
170
171void kvmppc_core_destroy_vm(struct kvm *kvm)
172{
173}
174
163static int __init kvmppc_44x_init(void) 175static int __init kvmppc_44x_init(void)
164{ 176{
165 int r; 177 int r;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 549bb2c9a47a..c8c61578fdfc 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -37,22 +37,19 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
37 unsigned int inst, int *advance) 37 unsigned int inst, int *advance)
38{ 38{
39 int emulated = EMULATE_DONE; 39 int emulated = EMULATE_DONE;
40 int dcrn; 40 int dcrn = get_dcrn(inst);
41 int ra; 41 int ra = get_ra(inst);
42 int rb; 42 int rb = get_rb(inst);
43 int rc; 43 int rc = get_rc(inst);
44 int rs; 44 int rs = get_rs(inst);
45 int rt; 45 int rt = get_rt(inst);
46 int ws; 46 int ws = get_ws(inst);
47 47
48 switch (get_op(inst)) { 48 switch (get_op(inst)) {
49 case 31: 49 case 31:
50 switch (get_xop(inst)) { 50 switch (get_xop(inst)) {
51 51
52 case XOP_MFDCR: 52 case XOP_MFDCR:
53 dcrn = get_dcrn(inst);
54 rt = get_rt(inst);
55
56 /* The guest may access CPR0 registers to determine the timebase 53 /* The guest may access CPR0 registers to determine the timebase
57 * frequency, and it must know the real host frequency because it 54 * frequency, and it must know the real host frequency because it
58 * can directly access the timebase registers. 55 * can directly access the timebase registers.
@@ -88,9 +85,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
88 break; 85 break;
89 86
90 case XOP_MTDCR: 87 case XOP_MTDCR:
91 dcrn = get_dcrn(inst);
92 rs = get_rs(inst);
93
94 /* emulate some access in kernel */ 88 /* emulate some access in kernel */
95 switch (dcrn) { 89 switch (dcrn) {
96 case DCRN_CPR0_CONFIG_ADDR: 90 case DCRN_CPR0_CONFIG_ADDR:
@@ -108,17 +102,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
108 break; 102 break;
109 103
110 case XOP_TLBWE: 104 case XOP_TLBWE:
111 ra = get_ra(inst);
112 rs = get_rs(inst);
113 ws = get_ws(inst);
114 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); 105 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
115 break; 106 break;
116 107
117 case XOP_TLBSX: 108 case XOP_TLBSX:
118 rt = get_rt(inst);
119 ra = get_ra(inst);
120 rb = get_rb(inst);
121 rc = get_rc(inst);
122 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); 109 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
123 break; 110 break;
124 111
@@ -141,41 +128,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
141 return emulated; 128 return emulated;
142} 129}
143 130
144int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 131int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
145{ 132{
146 int emulated = EMULATE_DONE; 133 int emulated = EMULATE_DONE;
147 134
148 switch (sprn) { 135 switch (sprn) {
149 case SPRN_PID: 136 case SPRN_PID:
150 kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; 137 kvmppc_set_pid(vcpu, spr_val); break;
151 case SPRN_MMUCR: 138 case SPRN_MMUCR:
152 vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; 139 vcpu->arch.mmucr = spr_val; break;
153 case SPRN_CCR0: 140 case SPRN_CCR0:
154 vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; 141 vcpu->arch.ccr0 = spr_val; break;
155 case SPRN_CCR1: 142 case SPRN_CCR1:
156 vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; 143 vcpu->arch.ccr1 = spr_val; break;
157 default: 144 default:
158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 145 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
159 } 146 }
160 147
161 return emulated; 148 return emulated;
162} 149}
163 150
164int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 151int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
165{ 152{
166 int emulated = EMULATE_DONE; 153 int emulated = EMULATE_DONE;
167 154
168 switch (sprn) { 155 switch (sprn) {
169 case SPRN_PID: 156 case SPRN_PID:
170 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; 157 *spr_val = vcpu->arch.pid; break;
171 case SPRN_MMUCR: 158 case SPRN_MMUCR:
172 kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; 159 *spr_val = vcpu->arch.mmucr; break;
173 case SPRN_CCR0: 160 case SPRN_CCR0:
174 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; 161 *spr_val = vcpu->arch.ccr0; break;
175 case SPRN_CCR1: 162 case SPRN_CCR1:
176 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; 163 *spr_val = vcpu->arch.ccr1; break;
177 default: 164 default:
178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 165 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
179 } 166 }
180 167
181 return emulated; 168 return emulated;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8f64709ae331..f4dacb9c57fa 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR
90 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV 90 depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
91 select KVM_BOOK3S_PR 91 select KVM_BOOK3S_PR
92 92
93config KVM_BOOKE_HV
94 bool
95
93config KVM_440 96config KVM_440
94 bool "KVM support for PowerPC 440 processors" 97 bool "KVM support for PowerPC 440 processors"
95 depends on EXPERIMENTAL && 44x 98 depends on EXPERIMENTAL && 44x
@@ -106,7 +109,7 @@ config KVM_440
106 109
107config KVM_EXIT_TIMING 110config KVM_EXIT_TIMING
108 bool "Detailed exit timing" 111 bool "Detailed exit timing"
109 depends on KVM_440 || KVM_E500 112 depends on KVM_440 || KVM_E500V2 || KVM_E500MC
110 ---help--- 113 ---help---
111 Calculate elapsed time for every exit/enter cycle. A per-vcpu 114 Calculate elapsed time for every exit/enter cycle. A per-vcpu
112 report is available in debugfs kvm/vm#_vcpu#_timing. 115 report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -115,14 +118,29 @@ config KVM_EXIT_TIMING
115 118
116 If unsure, say N. 119 If unsure, say N.
117 120
118config KVM_E500 121config KVM_E500V2
119 bool "KVM support for PowerPC E500 processors" 122 bool "KVM support for PowerPC E500v2 processors"
120 depends on EXPERIMENTAL && E500 123 depends on EXPERIMENTAL && E500 && !PPC_E500MC
121 select KVM 124 select KVM
122 select KVM_MMIO 125 select KVM_MMIO
123 ---help--- 126 ---help---
124 Support running unmodified E500 guest kernels in virtual machines on 127 Support running unmodified E500 guest kernels in virtual machines on
125 E500 host processors. 128 E500v2 host processors.
129
130 This module provides access to the hardware capabilities through
131 a character device node named /dev/kvm.
132
133 If unsure, say N.
134
135config KVM_E500MC
136 bool "KVM support for PowerPC E500MC/E5500 processors"
137 depends on EXPERIMENTAL && PPC_E500MC
138 select KVM
139 select KVM_MMIO
140 select KVM_BOOKE_HV
141 ---help---
142 Support running unmodified E500MC/E5500 (32-bit) guest kernels in
143 virtual machines on E500MC/E5500 host processors.
126 144
127 This module provides access to the hardware capabilities through 145 This module provides access to the hardware capabilities through
128 a character device node named /dev/kvm. 146 a character device node named /dev/kvm.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3688aeecc4b2..c2a08636e6d4 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -36,7 +36,17 @@ kvm-e500-objs := \
36 e500.o \ 36 e500.o \
37 e500_tlb.o \ 37 e500_tlb.o \
38 e500_emulate.o 38 e500_emulate.o
39kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) 39kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
40
41kvm-e500mc-objs := \
42 $(common-objs-y) \
43 booke.o \
44 booke_emulate.o \
45 bookehv_interrupts.o \
46 e500mc.o \
47 e500_tlb.o \
48 e500_emulate.o
49kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
40 50
41kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 51kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
42 ../../../virt/kvm/coalesced_mmio.o \ 52 ../../../virt/kvm/coalesced_mmio.o \
@@ -44,6 +54,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
44 book3s_paired_singles.o \ 54 book3s_paired_singles.o \
45 book3s_pr.o \ 55 book3s_pr.o \
46 book3s_pr_papr.o \ 56 book3s_pr_papr.o \
57 book3s_64_vio_hv.o \
47 book3s_emulate.o \ 58 book3s_emulate.o \
48 book3s_interrupts.o \ 59 book3s_interrupts.o \
49 book3s_mmu_hpte.o \ 60 book3s_mmu_hpte.o \
@@ -68,6 +79,7 @@ kvm-book3s_64-module-objs := \
68 powerpc.o \ 79 powerpc.o \
69 emulate.o \ 80 emulate.o \
70 book3s.o \ 81 book3s.o \
82 book3s_64_vio.o \
71 $(kvm-book3s_64-objs-y) 83 $(kvm-book3s_64-objs-y)
72 84
73kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) 85kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -88,7 +100,8 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
88kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 100kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
89 101
90obj-$(CONFIG_KVM_440) += kvm.o 102obj-$(CONFIG_KVM_440) += kvm.o
91obj-$(CONFIG_KVM_E500) += kvm.o 103obj-$(CONFIG_KVM_E500V2) += kvm.o
104obj-$(CONFIG_KVM_E500MC) += kvm.o
92obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 105obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
93obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 106obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
94 107
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7d54f4ed6d96..3f2a8360c857 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
258 return true; 258 return true;
259} 259}
260 260
261void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) 261int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
262{ 262{
263 unsigned long *pending = &vcpu->arch.pending_exceptions; 263 unsigned long *pending = &vcpu->arch.pending_exceptions;
264 unsigned long old_pending = vcpu->arch.pending_exceptions; 264 unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -283,12 +283,17 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
283 283
284 /* Tell the guest about our interrupt status */ 284 /* Tell the guest about our interrupt status */
285 kvmppc_update_int_pending(vcpu, *pending, old_pending); 285 kvmppc_update_int_pending(vcpu, *pending, old_pending);
286
287 return 0;
286} 288}
287 289
288pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 290pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
289{ 291{
290 ulong mp_pa = vcpu->arch.magic_page_pa; 292 ulong mp_pa = vcpu->arch.magic_page_pa;
291 293
294 if (!(vcpu->arch.shared->msr & MSR_SF))
295 mp_pa = (uint32_t)mp_pa;
296
292 /* Magic page override */ 297 /* Magic page override */
293 if (unlikely(mp_pa) && 298 if (unlikely(mp_pa) &&
294 unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == 299 unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c3beaeef3f60..80a577517584 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -36,13 +36,11 @@
36 36
37/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 37/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
38#define MAX_LPID_970 63 38#define MAX_LPID_970 63
39#define NR_LPIDS (LPID_RSVD + 1)
40unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
41 39
42long kvmppc_alloc_hpt(struct kvm *kvm) 40long kvmppc_alloc_hpt(struct kvm *kvm)
43{ 41{
44 unsigned long hpt; 42 unsigned long hpt;
45 unsigned long lpid; 43 long lpid;
46 struct revmap_entry *rev; 44 struct revmap_entry *rev;
47 struct kvmppc_linear_info *li; 45 struct kvmppc_linear_info *li;
48 46
@@ -72,14 +70,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
72 } 70 }
73 kvm->arch.revmap = rev; 71 kvm->arch.revmap = rev;
74 72
75 /* Allocate the guest's logical partition ID */ 73 lpid = kvmppc_alloc_lpid();
76 do { 74 if (lpid < 0)
77 lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); 75 goto out_freeboth;
78 if (lpid >= NR_LPIDS) {
79 pr_err("kvm_alloc_hpt: No LPIDs free\n");
80 goto out_freeboth;
81 }
82 } while (test_and_set_bit(lpid, lpid_inuse));
83 76
84 kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); 77 kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
85 kvm->arch.lpid = lpid; 78 kvm->arch.lpid = lpid;
@@ -96,7 +89,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
96 89
97void kvmppc_free_hpt(struct kvm *kvm) 90void kvmppc_free_hpt(struct kvm *kvm)
98{ 91{
99 clear_bit(kvm->arch.lpid, lpid_inuse); 92 kvmppc_free_lpid(kvm->arch.lpid);
100 vfree(kvm->arch.revmap); 93 vfree(kvm->arch.revmap);
101 if (kvm->arch.hpt_li) 94 if (kvm->arch.hpt_li)
102 kvm_release_hpt(kvm->arch.hpt_li); 95 kvm_release_hpt(kvm->arch.hpt_li);
@@ -171,8 +164,7 @@ int kvmppc_mmu_hv_init(void)
171 if (!cpu_has_feature(CPU_FTR_HVMODE)) 164 if (!cpu_has_feature(CPU_FTR_HVMODE))
172 return -EINVAL; 165 return -EINVAL;
173 166
174 memset(lpid_inuse, 0, sizeof(lpid_inuse)); 167 /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
175
176 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 168 if (cpu_has_feature(CPU_FTR_ARCH_206)) {
177 host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 169 host_lpid = mfspr(SPRN_LPID); /* POWER7 */
178 rsvd_lpid = LPID_RSVD; 170 rsvd_lpid = LPID_RSVD;
@@ -181,9 +173,11 @@ int kvmppc_mmu_hv_init(void)
181 rsvd_lpid = MAX_LPID_970; 173 rsvd_lpid = MAX_LPID_970;
182 } 174 }
183 175
184 set_bit(host_lpid, lpid_inuse); 176 kvmppc_init_lpid(rsvd_lpid + 1);
177
178 kvmppc_claim_lpid(host_lpid);
185 /* rsvd_lpid is reserved for use in partition switching */ 179 /* rsvd_lpid is reserved for use in partition switching */
186 set_bit(rsvd_lpid, lpid_inuse); 180 kvmppc_claim_lpid(rsvd_lpid);
187 181
188 return 0; 182 return 0;
189} 183}
@@ -452,7 +446,7 @@ static int instruction_is_store(unsigned int instr)
452} 446}
453 447
454static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 448static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
455 unsigned long gpa, int is_store) 449 unsigned long gpa, gva_t ea, int is_store)
456{ 450{
457 int ret; 451 int ret;
458 u32 last_inst; 452 u32 last_inst;
@@ -499,6 +493,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
499 */ 493 */
500 494
501 vcpu->arch.paddr_accessed = gpa; 495 vcpu->arch.paddr_accessed = gpa;
496 vcpu->arch.vaddr_accessed = ea;
502 return kvmppc_emulate_mmio(run, vcpu); 497 return kvmppc_emulate_mmio(run, vcpu);
503} 498}
504 499
@@ -552,7 +547,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
552 /* No memslot means it's an emulated MMIO region */ 547 /* No memslot means it's an emulated MMIO region */
553 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 548 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
554 unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); 549 unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
555 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, 550 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
556 dsisr & DSISR_ISSTORE); 551 dsisr & DSISR_ISSTORE);
557 } 552 }
558 553
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index f2e6e48ea463..56b983e7b738 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -90,8 +90,6 @@ slb_exit_skip_ ## num:
90 or r10, r10, r12 90 or r10, r10, r12
91 slbie r10 91 slbie r10
92 92
93 isync
94
95 /* Fill SLB with our shadow */ 93 /* Fill SLB with our shadow */
96 94
97 lbz r12, SVCPU_SLB_MAX(r3) 95 lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
new file mode 100644
index 000000000000..72ffc899c082
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -0,0 +1,150 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 */
18
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <linux/gfp.h>
25#include <linux/slab.h>
26#include <linux/hugetlb.h>
27#include <linux/list.h>
28#include <linux/anon_inodes.h>
29
30#include <asm/tlbflush.h>
31#include <asm/kvm_ppc.h>
32#include <asm/kvm_book3s.h>
33#include <asm/mmu-hash64.h>
34#include <asm/hvcall.h>
35#include <asm/synch.h>
36#include <asm/ppc-opcode.h>
37#include <asm/kvm_host.h>
38#include <asm/udbg.h>
39
40#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
41
42static long kvmppc_stt_npages(unsigned long window_size)
43{
44 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
45 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
46}
47
48static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
49{
50 struct kvm *kvm = stt->kvm;
51 int i;
52
53 mutex_lock(&kvm->lock);
54 list_del(&stt->list);
55 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
56 __free_page(stt->pages[i]);
57 kfree(stt);
58 mutex_unlock(&kvm->lock);
59
60 kvm_put_kvm(kvm);
61}
62
63static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
64{
65 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
66 struct page *page;
67
68 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
69 return VM_FAULT_SIGBUS;
70
71 page = stt->pages[vmf->pgoff];
72 get_page(page);
73 vmf->page = page;
74 return 0;
75}
76
77static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
78 .fault = kvm_spapr_tce_fault,
79};
80
81static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
82{
83 vma->vm_ops = &kvm_spapr_tce_vm_ops;
84 return 0;
85}
86
87static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
88{
89 struct kvmppc_spapr_tce_table *stt = filp->private_data;
90
91 release_spapr_tce_table(stt);
92 return 0;
93}
94
95static struct file_operations kvm_spapr_tce_fops = {
96 .mmap = kvm_spapr_tce_mmap,
97 .release = kvm_spapr_tce_release,
98};
99
100long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
101 struct kvm_create_spapr_tce *args)
102{
103 struct kvmppc_spapr_tce_table *stt = NULL;
104 long npages;
105 int ret = -ENOMEM;
106 int i;
107
108 /* Check this LIOBN hasn't been previously allocated */
109 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
110 if (stt->liobn == args->liobn)
111 return -EBUSY;
112 }
113
114 npages = kvmppc_stt_npages(args->window_size);
115
116 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
117 GFP_KERNEL);
118 if (!stt)
119 goto fail;
120
121 stt->liobn = args->liobn;
122 stt->window_size = args->window_size;
123 stt->kvm = kvm;
124
125 for (i = 0; i < npages; i++) {
126 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
127 if (!stt->pages[i])
128 goto fail;
129 }
130
131 kvm_get_kvm(kvm);
132
133 mutex_lock(&kvm->lock);
134 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
135
136 mutex_unlock(&kvm->lock);
137
138 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
139 stt, O_RDWR);
140
141fail:
142 if (stt) {
143 for (i = 0; i < npages; i++)
144 if (stt->pages[i])
145 __free_page(stt->pages[i]);
146
147 kfree(stt);
148 }
149 return ret;
150}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ea0f8c537c28..30c2f3b134c6 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -38,6 +38,9 @@
38 38
39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
40 40
41/* WARNING: This will be called in real-mode on HV KVM and virtual
42 * mode on PR KVM
43 */
41long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 44long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
42 unsigned long ioba, unsigned long tce) 45 unsigned long ioba, unsigned long tce)
43{ 46{
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 135663a3e4fc..b9a989dc76cc 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -87,6 +87,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
87 unsigned int inst, int *advance) 87 unsigned int inst, int *advance)
88{ 88{
89 int emulated = EMULATE_DONE; 89 int emulated = EMULATE_DONE;
90 int rt = get_rt(inst);
91 int rs = get_rs(inst);
92 int ra = get_ra(inst);
93 int rb = get_rb(inst);
90 94
91 switch (get_op(inst)) { 95 switch (get_op(inst)) {
92 case 19: 96 case 19:
@@ -106,21 +110,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
106 case 31: 110 case 31:
107 switch (get_xop(inst)) { 111 switch (get_xop(inst)) {
108 case OP_31_XOP_MFMSR: 112 case OP_31_XOP_MFMSR:
109 kvmppc_set_gpr(vcpu, get_rt(inst), 113 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
110 vcpu->arch.shared->msr);
111 break; 114 break;
112 case OP_31_XOP_MTMSRD: 115 case OP_31_XOP_MTMSRD:
113 { 116 {
114 ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); 117 ulong rs_val = kvmppc_get_gpr(vcpu, rs);
115 if (inst & 0x10000) { 118 if (inst & 0x10000) {
116 vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); 119 ulong new_msr = vcpu->arch.shared->msr;
117 vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); 120 new_msr &= ~(MSR_RI | MSR_EE);
121 new_msr |= rs_val & (MSR_RI | MSR_EE);
122 vcpu->arch.shared->msr = new_msr;
118 } else 123 } else
119 kvmppc_set_msr(vcpu, rs); 124 kvmppc_set_msr(vcpu, rs_val);
120 break; 125 break;
121 } 126 }
122 case OP_31_XOP_MTMSR: 127 case OP_31_XOP_MTMSR:
123 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); 128 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
124 break; 129 break;
125 case OP_31_XOP_MFSR: 130 case OP_31_XOP_MFSR:
126 { 131 {
@@ -130,7 +135,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
130 if (vcpu->arch.mmu.mfsrin) { 135 if (vcpu->arch.mmu.mfsrin) {
131 u32 sr; 136 u32 sr;
132 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 137 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
133 kvmppc_set_gpr(vcpu, get_rt(inst), sr); 138 kvmppc_set_gpr(vcpu, rt, sr);
134 } 139 }
135 break; 140 break;
136 } 141 }
@@ -138,29 +143,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
138 { 143 {
139 int srnum; 144 int srnum;
140 145
141 srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; 146 srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf;
142 if (vcpu->arch.mmu.mfsrin) { 147 if (vcpu->arch.mmu.mfsrin) {
143 u32 sr; 148 u32 sr;
144 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 149 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
145 kvmppc_set_gpr(vcpu, get_rt(inst), sr); 150 kvmppc_set_gpr(vcpu, rt, sr);
146 } 151 }
147 break; 152 break;
148 } 153 }
149 case OP_31_XOP_MTSR: 154 case OP_31_XOP_MTSR:
150 vcpu->arch.mmu.mtsrin(vcpu, 155 vcpu->arch.mmu.mtsrin(vcpu,
151 (inst >> 16) & 0xf, 156 (inst >> 16) & 0xf,
152 kvmppc_get_gpr(vcpu, get_rs(inst))); 157 kvmppc_get_gpr(vcpu, rs));
153 break; 158 break;
154 case OP_31_XOP_MTSRIN: 159 case OP_31_XOP_MTSRIN:
155 vcpu->arch.mmu.mtsrin(vcpu, 160 vcpu->arch.mmu.mtsrin(vcpu,
156 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, 161 (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf,
157 kvmppc_get_gpr(vcpu, get_rs(inst))); 162 kvmppc_get_gpr(vcpu, rs));
158 break; 163 break;
159 case OP_31_XOP_TLBIE: 164 case OP_31_XOP_TLBIE:
160 case OP_31_XOP_TLBIEL: 165 case OP_31_XOP_TLBIEL:
161 { 166 {
162 bool large = (inst & 0x00200000) ? true : false; 167 bool large = (inst & 0x00200000) ? true : false;
163 ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); 168 ulong addr = kvmppc_get_gpr(vcpu, rb);
164 vcpu->arch.mmu.tlbie(vcpu, addr, large); 169 vcpu->arch.mmu.tlbie(vcpu, addr, large);
165 break; 170 break;
166 } 171 }
@@ -171,15 +176,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
171 return EMULATE_FAIL; 176 return EMULATE_FAIL;
172 177
173 vcpu->arch.mmu.slbmte(vcpu, 178 vcpu->arch.mmu.slbmte(vcpu,
174 kvmppc_get_gpr(vcpu, get_rs(inst)), 179 kvmppc_get_gpr(vcpu, rs),
175 kvmppc_get_gpr(vcpu, get_rb(inst))); 180 kvmppc_get_gpr(vcpu, rb));
176 break; 181 break;
177 case OP_31_XOP_SLBIE: 182 case OP_31_XOP_SLBIE:
178 if (!vcpu->arch.mmu.slbie) 183 if (!vcpu->arch.mmu.slbie)
179 return EMULATE_FAIL; 184 return EMULATE_FAIL;
180 185
181 vcpu->arch.mmu.slbie(vcpu, 186 vcpu->arch.mmu.slbie(vcpu,
182 kvmppc_get_gpr(vcpu, get_rb(inst))); 187 kvmppc_get_gpr(vcpu, rb));
183 break; 188 break;
184 case OP_31_XOP_SLBIA: 189 case OP_31_XOP_SLBIA:
185 if (!vcpu->arch.mmu.slbia) 190 if (!vcpu->arch.mmu.slbia)
@@ -191,22 +196,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
191 if (!vcpu->arch.mmu.slbmfee) { 196 if (!vcpu->arch.mmu.slbmfee) {
192 emulated = EMULATE_FAIL; 197 emulated = EMULATE_FAIL;
193 } else { 198 } else {
194 ulong t, rb; 199 ulong t, rb_val;
195 200
196 rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 201 rb_val = kvmppc_get_gpr(vcpu, rb);
197 t = vcpu->arch.mmu.slbmfee(vcpu, rb); 202 t = vcpu->arch.mmu.slbmfee(vcpu, rb_val);
198 kvmppc_set_gpr(vcpu, get_rt(inst), t); 203 kvmppc_set_gpr(vcpu, rt, t);
199 } 204 }
200 break; 205 break;
201 case OP_31_XOP_SLBMFEV: 206 case OP_31_XOP_SLBMFEV:
202 if (!vcpu->arch.mmu.slbmfev) { 207 if (!vcpu->arch.mmu.slbmfev) {
203 emulated = EMULATE_FAIL; 208 emulated = EMULATE_FAIL;
204 } else { 209 } else {
205 ulong t, rb; 210 ulong t, rb_val;
206 211
207 rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 212 rb_val = kvmppc_get_gpr(vcpu, rb);
208 t = vcpu->arch.mmu.slbmfev(vcpu, rb); 213 t = vcpu->arch.mmu.slbmfev(vcpu, rb_val);
209 kvmppc_set_gpr(vcpu, get_rt(inst), t); 214 kvmppc_set_gpr(vcpu, rt, t);
210 } 215 }
211 break; 216 break;
212 case OP_31_XOP_DCBA: 217 case OP_31_XOP_DCBA:
@@ -214,17 +219,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
214 break; 219 break;
215 case OP_31_XOP_DCBZ: 220 case OP_31_XOP_DCBZ:
216 { 221 {
217 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 222 ulong rb_val = kvmppc_get_gpr(vcpu, rb);
218 ulong ra = 0; 223 ulong ra_val = 0;
219 ulong addr, vaddr; 224 ulong addr, vaddr;
220 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 225 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
221 u32 dsisr; 226 u32 dsisr;
222 int r; 227 int r;
223 228
224 if (get_ra(inst)) 229 if (ra)
225 ra = kvmppc_get_gpr(vcpu, get_ra(inst)); 230 ra_val = kvmppc_get_gpr(vcpu, ra);
226 231
227 addr = (ra + rb) & ~31ULL; 232 addr = (ra_val + rb_val) & ~31ULL;
228 if (!(vcpu->arch.shared->msr & MSR_SF)) 233 if (!(vcpu->arch.shared->msr & MSR_SF))
229 addr &= 0xffffffff; 234 addr &= 0xffffffff;
230 vaddr = addr; 235 vaddr = addr;
@@ -313,10 +318,9 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
313 return bat; 318 return bat;
314} 319}
315 320
316int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 321int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
317{ 322{
318 int emulated = EMULATE_DONE; 323 int emulated = EMULATE_DONE;
319 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
320 324
321 switch (sprn) { 325 switch (sprn) {
322 case SPRN_SDR1: 326 case SPRN_SDR1:
@@ -428,7 +432,7 @@ unprivileged:
428 return emulated; 432 return emulated;
429} 433}
430 434
431int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 435int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
432{ 436{
433 int emulated = EMULATE_DONE; 437 int emulated = EMULATE_DONE;
434 438
@@ -441,46 +445,46 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
441 struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); 445 struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
442 446
443 if (sprn % 2) 447 if (sprn % 2)
444 kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); 448 *spr_val = bat->raw >> 32;
445 else 449 else
446 kvmppc_set_gpr(vcpu, rt, bat->raw); 450 *spr_val = bat->raw;
447 451
448 break; 452 break;
449 } 453 }
450 case SPRN_SDR1: 454 case SPRN_SDR1:
451 if (!spr_allowed(vcpu, PRIV_HYPER)) 455 if (!spr_allowed(vcpu, PRIV_HYPER))
452 goto unprivileged; 456 goto unprivileged;
453 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); 457 *spr_val = to_book3s(vcpu)->sdr1;
454 break; 458 break;
455 case SPRN_DSISR: 459 case SPRN_DSISR:
456 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); 460 *spr_val = vcpu->arch.shared->dsisr;
457 break; 461 break;
458 case SPRN_DAR: 462 case SPRN_DAR:
459 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); 463 *spr_val = vcpu->arch.shared->dar;
460 break; 464 break;
461 case SPRN_HIOR: 465 case SPRN_HIOR:
462 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); 466 *spr_val = to_book3s(vcpu)->hior;
463 break; 467 break;
464 case SPRN_HID0: 468 case SPRN_HID0:
465 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); 469 *spr_val = to_book3s(vcpu)->hid[0];
466 break; 470 break;
467 case SPRN_HID1: 471 case SPRN_HID1:
468 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); 472 *spr_val = to_book3s(vcpu)->hid[1];
469 break; 473 break;
470 case SPRN_HID2: 474 case SPRN_HID2:
471 case SPRN_HID2_GEKKO: 475 case SPRN_HID2_GEKKO:
472 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); 476 *spr_val = to_book3s(vcpu)->hid[2];
473 break; 477 break;
474 case SPRN_HID4: 478 case SPRN_HID4:
475 case SPRN_HID4_GEKKO: 479 case SPRN_HID4_GEKKO:
476 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); 480 *spr_val = to_book3s(vcpu)->hid[4];
477 break; 481 break;
478 case SPRN_HID5: 482 case SPRN_HID5:
479 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); 483 *spr_val = to_book3s(vcpu)->hid[5];
480 break; 484 break;
481 case SPRN_CFAR: 485 case SPRN_CFAR:
482 case SPRN_PURR: 486 case SPRN_PURR:
483 kvmppc_set_gpr(vcpu, rt, 0); 487 *spr_val = 0;
484 break; 488 break;
485 case SPRN_GQR0: 489 case SPRN_GQR0:
486 case SPRN_GQR1: 490 case SPRN_GQR1:
@@ -490,8 +494,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
490 case SPRN_GQR5: 494 case SPRN_GQR5:
491 case SPRN_GQR6: 495 case SPRN_GQR6:
492 case SPRN_GQR7: 496 case SPRN_GQR7:
493 kvmppc_set_gpr(vcpu, rt, 497 *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
494 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
495 break; 498 break;
496 case SPRN_THRM1: 499 case SPRN_THRM1:
497 case SPRN_THRM2: 500 case SPRN_THRM2:
@@ -506,7 +509,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
506 case SPRN_PMC3_GEKKO: 509 case SPRN_PMC3_GEKKO:
507 case SPRN_PMC4_GEKKO: 510 case SPRN_PMC4_GEKKO:
508 case SPRN_WPAR_GEKKO: 511 case SPRN_WPAR_GEKKO:
509 kvmppc_set_gpr(vcpu, rt, 0); 512 *spr_val = 0;
510 break; 513 break;
511 default: 514 default:
512unprivileged: 515unprivileged:
@@ -565,23 +568,22 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
565ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) 568ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
566{ 569{
567 ulong dar = 0; 570 ulong dar = 0;
568 ulong ra; 571 ulong ra = get_ra(inst);
572 ulong rb = get_rb(inst);
569 573
570 switch (get_op(inst)) { 574 switch (get_op(inst)) {
571 case OP_LFS: 575 case OP_LFS:
572 case OP_LFD: 576 case OP_LFD:
573 case OP_STFD: 577 case OP_STFD:
574 case OP_STFS: 578 case OP_STFS:
575 ra = get_ra(inst);
576 if (ra) 579 if (ra)
577 dar = kvmppc_get_gpr(vcpu, ra); 580 dar = kvmppc_get_gpr(vcpu, ra);
578 dar += (s32)((s16)inst); 581 dar += (s32)((s16)inst);
579 break; 582 break;
580 case 31: 583 case 31:
581 ra = get_ra(inst);
582 if (ra) 584 if (ra)
583 dar = kvmppc_get_gpr(vcpu, ra); 585 dar = kvmppc_get_gpr(vcpu, ra);
584 dar += kvmppc_get_gpr(vcpu, get_rb(inst)); 586 dar += kvmppc_get_gpr(vcpu, rb);
585 break; 587 break;
586 default: 588 default:
587 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); 589 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 108d1f580177..c6af1d623839 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -60,12 +60,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
60 60
61void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 61void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
62{ 62{
63 struct kvmppc_vcore *vc = vcpu->arch.vcore;
64
63 local_paca->kvm_hstate.kvm_vcpu = vcpu; 65 local_paca->kvm_hstate.kvm_vcpu = vcpu;
64 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; 66 local_paca->kvm_hstate.kvm_vcore = vc;
67 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
68 vc->stolen_tb += mftb() - vc->preempt_tb;
65} 69}
66 70
67void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 71void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
68{ 72{
73 struct kvmppc_vcore *vc = vcpu->arch.vcore;
74
75 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
76 vc->preempt_tb = mftb();
69} 77}
70 78
71void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 79void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -134,6 +142,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
134 vpa->yield_count = 1; 142 vpa->yield_count = 1;
135} 143}
136 144
145/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
146struct reg_vpa {
147 u32 dummy;
148 union {
149 u16 hword;
150 u32 word;
151 } length;
152};
153
154static int vpa_is_registered(struct kvmppc_vpa *vpap)
155{
156 if (vpap->update_pending)
157 return vpap->next_gpa != 0;
158 return vpap->pinned_addr != NULL;
159}
160
137static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 161static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
138 unsigned long flags, 162 unsigned long flags,
139 unsigned long vcpuid, unsigned long vpa) 163 unsigned long vcpuid, unsigned long vpa)
@@ -142,88 +166,182 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
142 unsigned long len, nb; 166 unsigned long len, nb;
143 void *va; 167 void *va;
144 struct kvm_vcpu *tvcpu; 168 struct kvm_vcpu *tvcpu;
145 int err = H_PARAMETER; 169 int err;
170 int subfunc;
171 struct kvmppc_vpa *vpap;
146 172
147 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 173 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
148 if (!tvcpu) 174 if (!tvcpu)
149 return H_PARAMETER; 175 return H_PARAMETER;
150 176
151 flags >>= 63 - 18; 177 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
152 flags &= 7; 178 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
153 if (flags == 0 || flags == 4) 179 subfunc == H_VPA_REG_SLB) {
154 return H_PARAMETER; 180 /* Registering new area - address must be cache-line aligned */
155 if (flags < 4) { 181 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
156 if (vpa & 0x7f)
157 return H_PARAMETER; 182 return H_PARAMETER;
158 if (flags >= 2 && !tvcpu->arch.vpa) 183
159 return H_RESOURCE; 184 /* convert logical addr to kernel addr and read length */
160 /* registering new area; convert logical addr to real */
161 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 185 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
162 if (va == NULL) 186 if (va == NULL)
163 return H_PARAMETER; 187 return H_PARAMETER;
164 if (flags <= 1) 188 if (subfunc == H_VPA_REG_VPA)
165 len = *(unsigned short *)(va + 4); 189 len = ((struct reg_vpa *)va)->length.hword;
166 else 190 else
167 len = *(unsigned int *)(va + 4); 191 len = ((struct reg_vpa *)va)->length.word;
168 if (len > nb) 192 kvmppc_unpin_guest_page(kvm, va);
169 goto out_unpin; 193
170 switch (flags) { 194 /* Check length */
171 case 1: /* register VPA */ 195 if (len > nb || len < sizeof(struct reg_vpa))
172 if (len < 640) 196 return H_PARAMETER;
173 goto out_unpin; 197 } else {
174 if (tvcpu->arch.vpa) 198 vpa = 0;
175 kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); 199 len = 0;
176 tvcpu->arch.vpa = va; 200 }
177 init_vpa(vcpu, va); 201
178 break; 202 err = H_PARAMETER;
179 case 2: /* register DTL */ 203 vpap = NULL;
180 if (len < 48) 204 spin_lock(&tvcpu->arch.vpa_update_lock);
181 goto out_unpin; 205
182 len -= len % 48; 206 switch (subfunc) {
183 if (tvcpu->arch.dtl) 207 case H_VPA_REG_VPA: /* register VPA */
184 kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); 208 if (len < sizeof(struct lppaca))
185 tvcpu->arch.dtl = va;
186 tvcpu->arch.dtl_end = va + len;
187 break; 209 break;
188 case 3: /* register SLB shadow buffer */ 210 vpap = &tvcpu->arch.vpa;
189 if (len < 16) 211 err = 0;
190 goto out_unpin; 212 break;
191 if (tvcpu->arch.slb_shadow) 213
192 kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); 214 case H_VPA_REG_DTL: /* register DTL */
193 tvcpu->arch.slb_shadow = va; 215 if (len < sizeof(struct dtl_entry))
194 break; 216 break;
195 } 217 len -= len % sizeof(struct dtl_entry);
196 } else { 218
197 switch (flags) { 219 /* Check that they have previously registered a VPA */
198 case 5: /* unregister VPA */ 220 err = H_RESOURCE;
199 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) 221 if (!vpa_is_registered(&tvcpu->arch.vpa))
200 return H_RESOURCE;
201 if (!tvcpu->arch.vpa)
202 break;
203 kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
204 tvcpu->arch.vpa = NULL;
205 break; 222 break;
206 case 6: /* unregister DTL */ 223
207 if (!tvcpu->arch.dtl) 224 vpap = &tvcpu->arch.dtl;
208 break; 225 err = 0;
209 kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); 226 break;
210 tvcpu->arch.dtl = NULL; 227
228 case H_VPA_REG_SLB: /* register SLB shadow buffer */
229 /* Check that they have previously registered a VPA */
230 err = H_RESOURCE;
231 if (!vpa_is_registered(&tvcpu->arch.vpa))
211 break; 232 break;
212 case 7: /* unregister SLB shadow buffer */ 233
213 if (!tvcpu->arch.slb_shadow) 234 vpap = &tvcpu->arch.slb_shadow;
214 break; 235 err = 0;
215 kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); 236 break;
216 tvcpu->arch.slb_shadow = NULL; 237
238 case H_VPA_DEREG_VPA: /* deregister VPA */
239 /* Check they don't still have a DTL or SLB buf registered */
240 err = H_RESOURCE;
241 if (vpa_is_registered(&tvcpu->arch.dtl) ||
242 vpa_is_registered(&tvcpu->arch.slb_shadow))
217 break; 243 break;
218 } 244
245 vpap = &tvcpu->arch.vpa;
246 err = 0;
247 break;
248
249 case H_VPA_DEREG_DTL: /* deregister DTL */
250 vpap = &tvcpu->arch.dtl;
251 err = 0;
252 break;
253
254 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
255 vpap = &tvcpu->arch.slb_shadow;
256 err = 0;
257 break;
258 }
259
260 if (vpap) {
261 vpap->next_gpa = vpa;
262 vpap->len = len;
263 vpap->update_pending = 1;
219 } 264 }
220 return H_SUCCESS;
221 265
222 out_unpin: 266 spin_unlock(&tvcpu->arch.vpa_update_lock);
223 kvmppc_unpin_guest_page(kvm, va); 267
224 return err; 268 return err;
225} 269}
226 270
271static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
272{
273 void *va;
274 unsigned long nb;
275
276 vpap->update_pending = 0;
277 va = NULL;
278 if (vpap->next_gpa) {
279 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
280 if (nb < vpap->len) {
281 /*
282 * If it's now too short, it must be that userspace
283 * has changed the mappings underlying guest memory,
284 * so unregister the region.
285 */
286 kvmppc_unpin_guest_page(kvm, va);
287 va = NULL;
288 }
289 }
290 if (vpap->pinned_addr)
291 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
292 vpap->pinned_addr = va;
293 if (va)
294 vpap->pinned_end = va + vpap->len;
295}
296
297static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
298{
299 struct kvm *kvm = vcpu->kvm;
300
301 spin_lock(&vcpu->arch.vpa_update_lock);
302 if (vcpu->arch.vpa.update_pending) {
303 kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
304 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
305 }
306 if (vcpu->arch.dtl.update_pending) {
307 kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
308 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
309 vcpu->arch.dtl_index = 0;
310 }
311 if (vcpu->arch.slb_shadow.update_pending)
312 kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
313 spin_unlock(&vcpu->arch.vpa_update_lock);
314}
315
316static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
317 struct kvmppc_vcore *vc)
318{
319 struct dtl_entry *dt;
320 struct lppaca *vpa;
321 unsigned long old_stolen;
322
323 dt = vcpu->arch.dtl_ptr;
324 vpa = vcpu->arch.vpa.pinned_addr;
325 old_stolen = vcpu->arch.stolen_logged;
326 vcpu->arch.stolen_logged = vc->stolen_tb;
327 if (!dt || !vpa)
328 return;
329 memset(dt, 0, sizeof(struct dtl_entry));
330 dt->dispatch_reason = 7;
331 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
332 dt->timebase = mftb();
333 dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
334 dt->srr0 = kvmppc_get_pc(vcpu);
335 dt->srr1 = vcpu->arch.shregs.msr;
336 ++dt;
337 if (dt == vcpu->arch.dtl.pinned_end)
338 dt = vcpu->arch.dtl.pinned_addr;
339 vcpu->arch.dtl_ptr = dt;
340 /* order writing *dt vs. writing vpa->dtl_idx */
341 smp_wmb();
342 vpa->dtl_idx = ++vcpu->arch.dtl_index;
343}
344
227int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 345int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
228{ 346{
229 unsigned long req = kvmppc_get_gpr(vcpu, 3); 347 unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -468,6 +586,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
468 /* default to host PVR, since we can't spoof it */ 586 /* default to host PVR, since we can't spoof it */
469 vcpu->arch.pvr = mfspr(SPRN_PVR); 587 vcpu->arch.pvr = mfspr(SPRN_PVR);
470 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 588 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
589 spin_lock_init(&vcpu->arch.vpa_update_lock);
471 590
472 kvmppc_mmu_book3s_hv_init(vcpu); 591 kvmppc_mmu_book3s_hv_init(vcpu);
473 592
@@ -486,6 +605,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
486 INIT_LIST_HEAD(&vcore->runnable_threads); 605 INIT_LIST_HEAD(&vcore->runnable_threads);
487 spin_lock_init(&vcore->lock); 606 spin_lock_init(&vcore->lock);
488 init_waitqueue_head(&vcore->wq); 607 init_waitqueue_head(&vcore->wq);
608 vcore->preempt_tb = mftb();
489 } 609 }
490 kvm->arch.vcores[core] = vcore; 610 kvm->arch.vcores[core] = vcore;
491 } 611 }
@@ -498,6 +618,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
498 ++vcore->num_threads; 618 ++vcore->num_threads;
499 spin_unlock(&vcore->lock); 619 spin_unlock(&vcore->lock);
500 vcpu->arch.vcore = vcore; 620 vcpu->arch.vcore = vcore;
621 vcpu->arch.stolen_logged = vcore->stolen_tb;
501 622
502 vcpu->arch.cpu_type = KVM_CPU_3S_64; 623 vcpu->arch.cpu_type = KVM_CPU_3S_64;
503 kvmppc_sanity_check(vcpu); 624 kvmppc_sanity_check(vcpu);
@@ -512,12 +633,14 @@ out:
512 633
513void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 634void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
514{ 635{
515 if (vcpu->arch.dtl) 636 spin_lock(&vcpu->arch.vpa_update_lock);
516 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); 637 if (vcpu->arch.dtl.pinned_addr)
517 if (vcpu->arch.slb_shadow) 638 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
518 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); 639 if (vcpu->arch.slb_shadow.pinned_addr)
519 if (vcpu->arch.vpa) 640 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
520 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); 641 if (vcpu->arch.vpa.pinned_addr)
642 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
643 spin_unlock(&vcpu->arch.vpa_update_lock);
521 kvm_vcpu_uninit(vcpu); 644 kvm_vcpu_uninit(vcpu);
522 kmem_cache_free(kvm_vcpu_cache, vcpu); 645 kmem_cache_free(kvm_vcpu_cache, vcpu);
523} 646}
@@ -569,6 +692,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
569 list_del(&vcpu->arch.run_list); 692 list_del(&vcpu->arch.run_list);
570} 693}
571 694
695static int kvmppc_grab_hwthread(int cpu)
696{
697 struct paca_struct *tpaca;
698 long timeout = 1000;
699
700 tpaca = &paca[cpu];
701
702 /* Ensure the thread won't go into the kernel if it wakes */
703 tpaca->kvm_hstate.hwthread_req = 1;
704
705 /*
706 * If the thread is already executing in the kernel (e.g. handling
707 * a stray interrupt), wait for it to get back to nap mode.
708 * The smp_mb() is to ensure that our setting of hwthread_req
709 * is visible before we look at hwthread_state, so if this
710 * races with the code at system_reset_pSeries and the thread
711 * misses our setting of hwthread_req, we are sure to see its
712 * setting of hwthread_state, and vice versa.
713 */
714 smp_mb();
715 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
716 if (--timeout <= 0) {
717 pr_err("KVM: couldn't grab cpu %d\n", cpu);
718 return -EBUSY;
719 }
720 udelay(1);
721 }
722 return 0;
723}
724
725static void kvmppc_release_hwthread(int cpu)
726{
727 struct paca_struct *tpaca;
728
729 tpaca = &paca[cpu];
730 tpaca->kvm_hstate.hwthread_req = 0;
731 tpaca->kvm_hstate.kvm_vcpu = NULL;
732}
733
572static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 734static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
573{ 735{
574 int cpu; 736 int cpu;
@@ -588,8 +750,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
588 smp_wmb(); 750 smp_wmb();
589#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 751#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
590 if (vcpu->arch.ptid) { 752 if (vcpu->arch.ptid) {
591 tpaca->cpu_start = 0x80; 753 kvmppc_grab_hwthread(cpu);
592 wmb();
593 xics_wake_cpu(cpu); 754 xics_wake_cpu(cpu);
594 ++vc->n_woken; 755 ++vc->n_woken;
595 } 756 }
@@ -639,7 +800,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
639 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 800 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
640 long ret; 801 long ret;
641 u64 now; 802 u64 now;
642 int ptid; 803 int ptid, i;
643 804
644 /* don't start if any threads have a signal pending */ 805 /* don't start if any threads have a signal pending */
645 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 806 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -681,17 +842,29 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
681 vc->nap_count = 0; 842 vc->nap_count = 0;
682 vc->entry_exit_count = 0; 843 vc->entry_exit_count = 0;
683 vc->vcore_state = VCORE_RUNNING; 844 vc->vcore_state = VCORE_RUNNING;
845 vc->stolen_tb += mftb() - vc->preempt_tb;
684 vc->in_guest = 0; 846 vc->in_guest = 0;
685 vc->pcpu = smp_processor_id(); 847 vc->pcpu = smp_processor_id();
686 vc->napping_threads = 0; 848 vc->napping_threads = 0;
687 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 849 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
688 kvmppc_start_thread(vcpu); 850 kvmppc_start_thread(vcpu);
851 if (vcpu->arch.vpa.update_pending ||
852 vcpu->arch.slb_shadow.update_pending ||
853 vcpu->arch.dtl.update_pending)
854 kvmppc_update_vpas(vcpu);
855 kvmppc_create_dtl_entry(vcpu, vc);
856 }
857 /* Grab any remaining hw threads so they can't go into the kernel */
858 for (i = ptid; i < threads_per_core; ++i)
859 kvmppc_grab_hwthread(vc->pcpu + i);
689 860
690 preempt_disable(); 861 preempt_disable();
691 spin_unlock(&vc->lock); 862 spin_unlock(&vc->lock);
692 863
693 kvm_guest_enter(); 864 kvm_guest_enter();
694 __kvmppc_vcore_entry(NULL, vcpu0); 865 __kvmppc_vcore_entry(NULL, vcpu0);
866 for (i = 0; i < threads_per_core; ++i)
867 kvmppc_release_hwthread(vc->pcpu + i);
695 868
696 spin_lock(&vc->lock); 869 spin_lock(&vc->lock);
697 /* disable sending of IPIs on virtual external irqs */ 870 /* disable sending of IPIs on virtual external irqs */
@@ -737,6 +910,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
737 spin_lock(&vc->lock); 910 spin_lock(&vc->lock);
738 out: 911 out:
739 vc->vcore_state = VCORE_INACTIVE; 912 vc->vcore_state = VCORE_INACTIVE;
913 vc->preempt_tb = mftb();
740 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 914 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
741 arch.run_list) { 915 arch.run_list) {
742 if (vcpu->arch.ret != RESUME_GUEST) { 916 if (vcpu->arch.ret != RESUME_GUEST) {
@@ -835,6 +1009,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
835 spin_lock(&vc->lock); 1009 spin_lock(&vc->lock);
836 continue; 1010 continue;
837 } 1011 }
1012 vc->runner = vcpu;
838 n_ceded = 0; 1013 n_ceded = 0;
839 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) 1014 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
840 n_ceded += v->arch.ceded; 1015 n_ceded += v->arch.ceded;
@@ -854,6 +1029,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
854 wake_up(&v->arch.cpu_run); 1029 wake_up(&v->arch.cpu_run);
855 } 1030 }
856 } 1031 }
1032 vc->runner = NULL;
857 } 1033 }
858 1034
859 if (signal_pending(current)) { 1035 if (signal_pending(current)) {
@@ -917,115 +1093,6 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
917 return r; 1093 return r;
918} 1094}
919 1095
920static long kvmppc_stt_npages(unsigned long window_size)
921{
922 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
923 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
924}
925
926static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
927{
928 struct kvm *kvm = stt->kvm;
929 int i;
930
931 mutex_lock(&kvm->lock);
932 list_del(&stt->list);
933 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
934 __free_page(stt->pages[i]);
935 kfree(stt);
936 mutex_unlock(&kvm->lock);
937
938 kvm_put_kvm(kvm);
939}
940
941static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
942{
943 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
944 struct page *page;
945
946 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
947 return VM_FAULT_SIGBUS;
948
949 page = stt->pages[vmf->pgoff];
950 get_page(page);
951 vmf->page = page;
952 return 0;
953}
954
955static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
956 .fault = kvm_spapr_tce_fault,
957};
958
959static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
960{
961 vma->vm_ops = &kvm_spapr_tce_vm_ops;
962 return 0;
963}
964
965static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
966{
967 struct kvmppc_spapr_tce_table *stt = filp->private_data;
968
969 release_spapr_tce_table(stt);
970 return 0;
971}
972
973static struct file_operations kvm_spapr_tce_fops = {
974 .mmap = kvm_spapr_tce_mmap,
975 .release = kvm_spapr_tce_release,
976};
977
978long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
979 struct kvm_create_spapr_tce *args)
980{
981 struct kvmppc_spapr_tce_table *stt = NULL;
982 long npages;
983 int ret = -ENOMEM;
984 int i;
985
986 /* Check this LIOBN hasn't been previously allocated */
987 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
988 if (stt->liobn == args->liobn)
989 return -EBUSY;
990 }
991
992 npages = kvmppc_stt_npages(args->window_size);
993
994 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
995 GFP_KERNEL);
996 if (!stt)
997 goto fail;
998
999 stt->liobn = args->liobn;
1000 stt->window_size = args->window_size;
1001 stt->kvm = kvm;
1002
1003 for (i = 0; i < npages; i++) {
1004 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
1005 if (!stt->pages[i])
1006 goto fail;
1007 }
1008
1009 kvm_get_kvm(kvm);
1010
1011 mutex_lock(&kvm->lock);
1012 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
1013
1014 mutex_unlock(&kvm->lock);
1015
1016 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
1017 stt, O_RDWR);
1018
1019fail:
1020 if (stt) {
1021 for (i = 0; i < npages; i++)
1022 if (stt->pages[i])
1023 __free_page(stt->pages[i]);
1024
1025 kfree(stt);
1026 }
1027 return ret;
1028}
1029 1096
1030/* Work out RMLS (real mode limit selector) field value for a given RMA size. 1097/* Work out RMLS (real mode limit selector) field value for a given RMA size.
1031 Assumes POWER7 or PPC970. */ 1098 Assumes POWER7 or PPC970. */
@@ -1108,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1108 return fd; 1175 return fd;
1109} 1176}
1110 1177
1178static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1179 int linux_psize)
1180{
1181 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
1182
1183 if (!def->shift)
1184 return;
1185 (*sps)->page_shift = def->shift;
1186 (*sps)->slb_enc = def->sllp;
1187 (*sps)->enc[0].page_shift = def->shift;
1188 (*sps)->enc[0].pte_enc = def->penc;
1189 (*sps)++;
1190}
1191
1192int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1193{
1194 struct kvm_ppc_one_seg_page_size *sps;
1195
1196 info->flags = KVM_PPC_PAGE_SIZES_REAL;
1197 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1198 info->flags |= KVM_PPC_1T_SEGMENTS;
1199 info->slb_size = mmu_slb_size;
1200
1201 /* We only support these sizes for now, and no muti-size segments */
1202 sps = &info->sps[0];
1203 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1204 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1205 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1206
1207 return 0;
1208}
1209
1111/* 1210/*
1112 * Get (and clear) the dirty memory log for a memory slot. 1211 * Get (and clear) the dirty memory log for a memory slot.
1113 */ 1212 */
@@ -1404,12 +1503,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1404 return EMULATE_FAIL; 1503 return EMULATE_FAIL;
1405} 1504}
1406 1505
1407int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 1506int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
1408{ 1507{
1409 return EMULATE_FAIL; 1508 return EMULATE_FAIL;
1410} 1509}
1411 1510
1412int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 1511int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
1413{ 1512{
1414 return EMULATE_FAIL; 1513 return EMULATE_FAIL;
1415} 1514}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index d3fb4df02c41..84035a528c80 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
68 rotldi r10,r10,16 68 rotldi r10,r10,16
69 mtmsrd r10,1 69 mtmsrd r10,1
70 70
71 /* Save host PMU registers and load guest PMU registers */ 71 /* Save host PMU registers */
72 /* R4 is live here (vcpu pointer) but not r3 or r5 */ 72 /* R4 is live here (vcpu pointer) but not r3 or r5 */
73 li r3, 1 73 li r3, 1
74 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 74 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
75 mfspr r7, SPRN_MMCR0 /* save MMCR0 */ 75 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
76 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ 76 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
77 mfspr r6, SPRN_MMCRA
78BEGIN_FTR_SECTION
79 /* On P7, clear MMCRA in order to disable SDAR updates */
80 li r5, 0
81 mtspr SPRN_MMCRA, r5
82END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
77 isync 83 isync
78 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 84 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
79 lbz r5, LPPACA_PMCINUSE(r3) 85 lbz r5, LPPACA_PMCINUSE(r3)
80 cmpwi r5, 0 86 cmpwi r5, 0
81 beq 31f /* skip if not */ 87 beq 31f /* skip if not */
82 mfspr r5, SPRN_MMCR1 88 mfspr r5, SPRN_MMCR1
83 mfspr r6, SPRN_MMCRA
84 std r7, HSTATE_MMCR(r13) 89 std r7, HSTATE_MMCR(r13)
85 std r5, HSTATE_MMCR + 8(r13) 90 std r5, HSTATE_MMCR + 8(r13)
86 std r6, HSTATE_MMCR + 16(r13) 91 std r6, HSTATE_MMCR + 16(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b70bf22a3ff3..a84aafce2a12 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/asm-offsets.h> 27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h>
29 30
30/***************************************************************************** 31/*****************************************************************************
31 * * 32 * *
@@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
82 83
83#define XICS_XIRR 4 84#define XICS_XIRR 4
84#define XICS_QIRR 0xc 85#define XICS_QIRR 0xc
86#define XICS_IPI 2 /* interrupt source # for IPIs */
85 87
86/* 88/*
87 * We come in here when wakened from nap mode on a secondary hw thread. 89 * We come in here when wakened from nap mode on a secondary hw thread.
@@ -94,26 +96,54 @@ kvm_start_guest:
94 subi r1,r1,STACK_FRAME_OVERHEAD 96 subi r1,r1,STACK_FRAME_OVERHEAD
95 ld r2,PACATOC(r13) 97 ld r2,PACATOC(r13)
96 98
97 /* were we napping due to cede? */ 99 li r0,KVM_HWTHREAD_IN_KVM
98 lbz r0,HSTATE_NAPPING(r13) 100 stb r0,HSTATE_HWTHREAD_STATE(r13)
99 cmpwi r0,0
100 bne kvm_end_cede
101 101
102 /* get vcpu pointer */ 102 /* NV GPR values from power7_idle() will no longer be valid */
103 ld r4, HSTATE_KVM_VCPU(r13) 103 li r0,1
104 stb r0,PACA_NAPSTATELOST(r13)
104 105
105 /* We got here with an IPI; clear it */ 106 /* get vcpu pointer, NULL if we have no vcpu to run */
106 ld r5, HSTATE_XICS_PHYS(r13) 107 ld r4,HSTATE_KVM_VCPU(r13)
107 li r0, 0xff 108 cmpdi cr1,r4,0
108 li r6, XICS_QIRR 109
109 li r7, XICS_XIRR 110 /* Check the wake reason in SRR1 to see why we got here */
110 lwzcix r8, r5, r7 /* ack the interrupt */ 111 mfspr r3,SPRN_SRR1
112 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
113 cmpwi r3,4 /* was it an external interrupt? */
114 bne 27f
115
116 /*
117 * External interrupt - for now assume it is an IPI, since we
118 * should never get any other interrupts sent to offline threads.
119 * Only do this for secondary threads.
120 */
121 beq cr1,25f
122 lwz r3,VCPU_PTID(r4)
123 cmpwi r3,0
124 beq 27f
12525: ld r5,HSTATE_XICS_PHYS(r13)
126 li r0,0xff
127 li r6,XICS_QIRR
128 li r7,XICS_XIRR
129 lwzcix r8,r5,r7 /* get and ack the interrupt */
111 sync 130 sync
112 stbcix r0, r5, r6 /* clear it */ 131 clrldi. r9,r8,40 /* get interrupt source ID. */
113 stwcix r8, r5, r7 /* EOI it */ 132 beq 27f /* none there? */
133 cmpwi r9,XICS_IPI
134 bne 26f
135 stbcix r0,r5,r6 /* clear IPI */
13626: stwcix r8,r5,r7 /* EOI the interrupt */
114 137
115 /* NV GPR values from power7_idle() will no longer be valid */ 13827: /* XXX should handle hypervisor maintenance interrupts etc. here */
116 stb r0, PACA_NAPSTATELOST(r13) 139
140 /* if we have no vcpu to run, go back to sleep */
141 beq cr1,kvm_no_guest
142
143 /* were we napping due to cede? */
144 lbz r0,HSTATE_NAPPING(r13)
145 cmpwi r0,0
146 bne kvm_end_cede
117 147
118.global kvmppc_hv_entry 148.global kvmppc_hv_entry
119kvmppc_hv_entry: 149kvmppc_hv_entry:
@@ -129,24 +159,15 @@ kvmppc_hv_entry:
129 mflr r0 159 mflr r0
130 std r0, HSTATE_VMHANDLER(r13) 160 std r0, HSTATE_VMHANDLER(r13)
131 161
132 ld r14, VCPU_GPR(r14)(r4) 162 /* Set partition DABR */
133 ld r15, VCPU_GPR(r15)(r4) 163 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
134 ld r16, VCPU_GPR(r16)(r4) 164 li r5,3
135 ld r17, VCPU_GPR(r17)(r4) 165 ld r6,VCPU_DABR(r4)
136 ld r18, VCPU_GPR(r18)(r4) 166 mtspr SPRN_DABRX,r5
137 ld r19, VCPU_GPR(r19)(r4) 167 mtspr SPRN_DABR,r6
138 ld r20, VCPU_GPR(r20)(r4) 168BEGIN_FTR_SECTION
139 ld r21, VCPU_GPR(r21)(r4) 169 isync
140 ld r22, VCPU_GPR(r22)(r4) 170END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
141 ld r23, VCPU_GPR(r23)(r4)
142 ld r24, VCPU_GPR(r24)(r4)
143 ld r25, VCPU_GPR(r25)(r4)
144 ld r26, VCPU_GPR(r26)(r4)
145 ld r27, VCPU_GPR(r27)(r4)
146 ld r28, VCPU_GPR(r28)(r4)
147 ld r29, VCPU_GPR(r29)(r4)
148 ld r30, VCPU_GPR(r30)(r4)
149 ld r31, VCPU_GPR(r31)(r4)
150 171
151 /* Load guest PMU registers */ 172 /* Load guest PMU registers */
152 /* R4 is live here (vcpu pointer) */ 173 /* R4 is live here (vcpu pointer) */
@@ -185,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
185 /* Load up FP, VMX and VSX registers */ 206 /* Load up FP, VMX and VSX registers */
186 bl kvmppc_load_fp 207 bl kvmppc_load_fp
187 208
209 ld r14, VCPU_GPR(r14)(r4)
210 ld r15, VCPU_GPR(r15)(r4)
211 ld r16, VCPU_GPR(r16)(r4)
212 ld r17, VCPU_GPR(r17)(r4)
213 ld r18, VCPU_GPR(r18)(r4)
214 ld r19, VCPU_GPR(r19)(r4)
215 ld r20, VCPU_GPR(r20)(r4)
216 ld r21, VCPU_GPR(r21)(r4)
217 ld r22, VCPU_GPR(r22)(r4)
218 ld r23, VCPU_GPR(r23)(r4)
219 ld r24, VCPU_GPR(r24)(r4)
220 ld r25, VCPU_GPR(r25)(r4)
221 ld r26, VCPU_GPR(r26)(r4)
222 ld r27, VCPU_GPR(r27)(r4)
223 ld r28, VCPU_GPR(r28)(r4)
224 ld r29, VCPU_GPR(r29)(r4)
225 ld r30, VCPU_GPR(r30)(r4)
226 ld r31, VCPU_GPR(r31)(r4)
227
188BEGIN_FTR_SECTION 228BEGIN_FTR_SECTION
189 /* Switch DSCR to guest value */ 229 /* Switch DSCR to guest value */
190 ld r5, VCPU_DSCR(r4) 230 ld r5, VCPU_DSCR(r4)
@@ -226,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
226 mtspr SPRN_DAR, r5 266 mtspr SPRN_DAR, r5
227 mtspr SPRN_DSISR, r6 267 mtspr SPRN_DSISR, r6
228 268
229 /* Set partition DABR */
230 li r5,3
231 ld r6,VCPU_DABR(r4)
232 mtspr SPRN_DABRX,r5
233 mtspr SPRN_DABR,r6
234
235BEGIN_FTR_SECTION 269BEGIN_FTR_SECTION
236 /* Restore AMR and UAMOR, set AMOR to all 1s */ 270 /* Restore AMR and UAMOR, set AMOR to all 1s */
237 ld r5,VCPU_AMR(r4) 271 ld r5,VCPU_AMR(r4)
@@ -925,12 +959,6 @@ BEGIN_FTR_SECTION
925 mtspr SPRN_AMR,r6 959 mtspr SPRN_AMR,r6
926END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 960END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
927 961
928 /* Restore host DABR and DABRX */
929 ld r5,HSTATE_DABR(r13)
930 li r6,7
931 mtspr SPRN_DABR,r5
932 mtspr SPRN_DABRX,r6
933
934 /* Switch DSCR back to host value */ 962 /* Switch DSCR back to host value */
935BEGIN_FTR_SECTION 963BEGIN_FTR_SECTION
936 mfspr r8, SPRN_DSCR 964 mfspr r8, SPRN_DSCR
@@ -969,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
969 std r5, VCPU_SPRG2(r9) 997 std r5, VCPU_SPRG2(r9)
970 std r6, VCPU_SPRG3(r9) 998 std r6, VCPU_SPRG3(r9)
971 999
1000 /* save FP state */
1001 mr r3, r9
1002 bl .kvmppc_save_fp
1003
972 /* Increment yield count if they have a VPA */ 1004 /* Increment yield count if they have a VPA */
973 ld r8, VCPU_VPA(r9) /* do they have a VPA? */ 1005 ld r8, VCPU_VPA(r9) /* do they have a VPA? */
974 cmpdi r8, 0 1006 cmpdi r8, 0
@@ -983,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
983 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 1015 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
984 mfspr r4, SPRN_MMCR0 /* save MMCR0 */ 1016 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
985 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 1017 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
1018 mfspr r6, SPRN_MMCRA
1019BEGIN_FTR_SECTION
1020 /* On P7, clear MMCRA in order to disable SDAR updates */
1021 li r7, 0
1022 mtspr SPRN_MMCRA, r7
1023END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
986 isync 1024 isync
987 beq 21f /* if no VPA, save PMU stuff anyway */ 1025 beq 21f /* if no VPA, save PMU stuff anyway */
988 lbz r7, LPPACA_PMCINUSE(r8) 1026 lbz r7, LPPACA_PMCINUSE(r8)
@@ -991,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
991 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 1029 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
992 b 22f 1030 b 22f
99321: mfspr r5, SPRN_MMCR1 103121: mfspr r5, SPRN_MMCR1
994 mfspr r6, SPRN_MMCRA
995 std r4, VCPU_MMCR(r9) 1032 std r4, VCPU_MMCR(r9)
996 std r5, VCPU_MMCR + 8(r9) 1033 std r5, VCPU_MMCR + 8(r9)
997 std r6, VCPU_MMCR + 16(r9) 1034 std r6, VCPU_MMCR + 16(r9)
@@ -1016,17 +1053,20 @@ BEGIN_FTR_SECTION
1016 stw r11, VCPU_PMC + 28(r9) 1053 stw r11, VCPU_PMC + 28(r9)
1017END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1054END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
101822: 105522:
1019 /* save FP state */
1020 mr r3, r9
1021 bl .kvmppc_save_fp
1022 1056
1023 /* Secondary threads go off to take a nap on POWER7 */ 1057 /* Secondary threads go off to take a nap on POWER7 */
1024BEGIN_FTR_SECTION 1058BEGIN_FTR_SECTION
1025 lwz r0,VCPU_PTID(r3) 1059 lwz r0,VCPU_PTID(r9)
1026 cmpwi r0,0 1060 cmpwi r0,0
1027 bne secondary_nap 1061 bne secondary_nap
1028END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1062END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1029 1063
1064 /* Restore host DABR and DABRX */
1065 ld r5,HSTATE_DABR(r13)
1066 li r6,7
1067 mtspr SPRN_DABR,r5
1068 mtspr SPRN_DABRX,r6
1069
1030 /* 1070 /*
1031 * Reload DEC. HDEC interrupts were disabled when 1071 * Reload DEC. HDEC interrupts were disabled when
1032 * we reloaded the host's LPCR value. 1072 * we reloaded the host's LPCR value.
@@ -1363,7 +1403,12 @@ bounce_ext_interrupt:
1363 1403
1364_GLOBAL(kvmppc_h_set_dabr) 1404_GLOBAL(kvmppc_h_set_dabr)
1365 std r4,VCPU_DABR(r3) 1405 std r4,VCPU_DABR(r3)
1366 mtspr SPRN_DABR,r4 1406 /* Work around P7 bug where DABR can get corrupted on mtspr */
14071: mtspr SPRN_DABR,r4
1408 mfspr r5, SPRN_DABR
1409 cmpd r4, r5
1410 bne 1b
1411 isync
1367 li r3,0 1412 li r3,0
1368 blr 1413 blr
1369 1414
@@ -1445,8 +1490,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1445 * Take a nap until a decrementer or external interrupt occurs, 1490 * Take a nap until a decrementer or external interrupt occurs,
1446 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR 1491 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
1447 */ 1492 */
1448 li r0,0x80 1493 li r0,1
1449 stb r0,PACAPROCSTART(r13) 1494 stb r0,HSTATE_HWTHREAD_REQ(r13)
1450 mfspr r5,SPRN_LPCR 1495 mfspr r5,SPRN_LPCR
1451 ori r5,r5,LPCR_PECE0 | LPCR_PECE1 1496 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
1452 mtspr SPRN_LPCR,r5 1497 mtspr SPRN_LPCR,r5
@@ -1463,26 +1508,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1463kvm_end_cede: 1508kvm_end_cede:
1464 /* Woken by external or decrementer interrupt */ 1509 /* Woken by external or decrementer interrupt */
1465 ld r1, HSTATE_HOST_R1(r13) 1510 ld r1, HSTATE_HOST_R1(r13)
1466 ld r2, PACATOC(r13)
1467 1511
1468 /* If we're a secondary thread and we got here by an IPI, ack it */
1469 ld r4,HSTATE_KVM_VCPU(r13)
1470 lwz r3,VCPU_PTID(r4)
1471 cmpwi r3,0
1472 beq 27f
1473 mfspr r3,SPRN_SRR1
1474 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
1475 cmpwi r3,4 /* was it an external interrupt? */
1476 bne 27f
1477 ld r5, HSTATE_XICS_PHYS(r13)
1478 li r0,0xff
1479 li r6,XICS_QIRR
1480 li r7,XICS_XIRR
1481 lwzcix r8,r5,r7 /* ack the interrupt */
1482 sync
1483 stbcix r0,r5,r6 /* clear it */
1484 stwcix r8,r5,r7 /* EOI it */
148527:
1486 /* load up FP state */ 1512 /* load up FP state */
1487 bl kvmppc_load_fp 1513 bl kvmppc_load_fp
1488 1514
@@ -1580,12 +1606,17 @@ secondary_nap:
1580 stwcx. r3, 0, r4 1606 stwcx. r3, 0, r4
1581 bne 51b 1607 bne 51b
1582 1608
1609kvm_no_guest:
1610 li r0, KVM_HWTHREAD_IN_NAP
1611 stb r0, HSTATE_HWTHREAD_STATE(r13)
1612 li r0, 0
1613 std r0, HSTATE_KVM_VCPU(r13)
1614
1583 li r3, LPCR_PECE0 1615 li r3, LPCR_PECE0
1584 mfspr r4, SPRN_LPCR 1616 mfspr r4, SPRN_LPCR
1585 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1617 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
1586 mtspr SPRN_LPCR, r4 1618 mtspr SPRN_LPCR, r4
1587 isync 1619 isync
1588 li r0, 0
1589 std r0, HSTATE_SCRATCH0(r13) 1620 std r0, HSTATE_SCRATCH0(r13)
1590 ptesync 1621 ptesync
1591 ld r0, HSTATE_SCRATCH0(r13) 1622 ld r0, HSTATE_SCRATCH0(r13)
@@ -1599,8 +1630,8 @@ secondary_nap:
1599 * r3 = vcpu pointer 1630 * r3 = vcpu pointer
1600 */ 1631 */
1601_GLOBAL(kvmppc_save_fp) 1632_GLOBAL(kvmppc_save_fp)
1602 mfmsr r9 1633 mfmsr r5
1603 ori r8,r9,MSR_FP 1634 ori r8,r5,MSR_FP
1604#ifdef CONFIG_ALTIVEC 1635#ifdef CONFIG_ALTIVEC
1605BEGIN_FTR_SECTION 1636BEGIN_FTR_SECTION
1606 oris r8,r8,MSR_VEC@h 1637 oris r8,r8,MSR_VEC@h
@@ -1649,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1649#endif 1680#endif
1650 mfspr r6,SPRN_VRSAVE 1681 mfspr r6,SPRN_VRSAVE
1651 stw r6,VCPU_VRSAVE(r3) 1682 stw r6,VCPU_VRSAVE(r3)
1652 mtmsrd r9 1683 mtmsrd r5
1653 isync 1684 isync
1654 blr 1685 blr
1655 1686
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7759053d391b..a1baec340f7e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -120,6 +120,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
120 if (msr & MSR_POW) { 120 if (msr & MSR_POW) {
121 if (!vcpu->arch.pending_exceptions) { 121 if (!vcpu->arch.pending_exceptions) {
122 kvm_vcpu_block(vcpu); 122 kvm_vcpu_block(vcpu);
123 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
123 vcpu->stat.halt_wakeup++; 124 vcpu->stat.halt_wakeup++;
124 125
125 /* Unset POW bit after we woke up */ 126 /* Unset POW bit after we woke up */
@@ -144,6 +145,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
144 } 145 }
145 } 146 }
146 147
148 /*
149 * When switching from 32 to 64-bit, we may have a stale 32-bit
150 * magic page around, we need to flush it. Typically 32-bit magic
151 * page will be instanciated when calling into RTAS. Note: We
152 * assume that such transition only happens while in kernel mode,
153 * ie, we never transition from user 32-bit to kernel 64-bit with
154 * a 32-bit magic page around.
155 */
156 if (vcpu->arch.magic_page_pa &&
157 !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
158 /* going from RTAS to normal kernel code */
159 kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
160 ~0xFFFUL);
161 }
162
147 /* Preload FPU if it's enabled */ 163 /* Preload FPU if it's enabled */
148 if (vcpu->arch.shared->msr & MSR_FP) 164 if (vcpu->arch.shared->msr & MSR_FP)
149 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 165 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
@@ -251,6 +267,9 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
251{ 267{
252 ulong mp_pa = vcpu->arch.magic_page_pa; 268 ulong mp_pa = vcpu->arch.magic_page_pa;
253 269
270 if (!(vcpu->arch.shared->msr & MSR_SF))
271 mp_pa = (uint32_t)mp_pa;
272
254 if (unlikely(mp_pa) && 273 if (unlikely(mp_pa) &&
255 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { 274 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
256 return 1; 275 return 1;
@@ -351,6 +370,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
351 /* MMIO */ 370 /* MMIO */
352 vcpu->stat.mmio_exits++; 371 vcpu->stat.mmio_exits++;
353 vcpu->arch.paddr_accessed = pte.raddr; 372 vcpu->arch.paddr_accessed = pte.raddr;
373 vcpu->arch.vaddr_accessed = pte.eaddr;
354 r = kvmppc_emulate_mmio(run, vcpu); 374 r = kvmppc_emulate_mmio(run, vcpu);
355 if ( r == RESUME_HOST_NV ) 375 if ( r == RESUME_HOST_NV )
356 r = RESUME_HOST; 376 r = RESUME_HOST;
@@ -528,6 +548,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
528 run->exit_reason = KVM_EXIT_UNKNOWN; 548 run->exit_reason = KVM_EXIT_UNKNOWN;
529 run->ready_for_interrupt_injection = 1; 549 run->ready_for_interrupt_injection = 1;
530 550
551 /* We get here with MSR.EE=0, so enable it to be a nice citizen */
552 __hard_irq_enable();
553
531 trace_kvm_book3s_exit(exit_nr, vcpu); 554 trace_kvm_book3s_exit(exit_nr, vcpu);
532 preempt_enable(); 555 preempt_enable();
533 kvm_resched(vcpu); 556 kvm_resched(vcpu);
@@ -617,10 +640,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
617 break; 640 break;
618 /* We're good on these - the host merely wanted to get our attention */ 641 /* We're good on these - the host merely wanted to get our attention */
619 case BOOK3S_INTERRUPT_DECREMENTER: 642 case BOOK3S_INTERRUPT_DECREMENTER:
643 case BOOK3S_INTERRUPT_HV_DECREMENTER:
620 vcpu->stat.dec_exits++; 644 vcpu->stat.dec_exits++;
621 r = RESUME_GUEST; 645 r = RESUME_GUEST;
622 break; 646 break;
623 case BOOK3S_INTERRUPT_EXTERNAL: 647 case BOOK3S_INTERRUPT_EXTERNAL:
648 case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
649 case BOOK3S_INTERRUPT_EXTERNAL_HV:
624 vcpu->stat.ext_intr_exits++; 650 vcpu->stat.ext_intr_exits++;
625 r = RESUME_GUEST; 651 r = RESUME_GUEST;
626 break; 652 break;
@@ -628,6 +654,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
628 r = RESUME_GUEST; 654 r = RESUME_GUEST;
629 break; 655 break;
630 case BOOK3S_INTERRUPT_PROGRAM: 656 case BOOK3S_INTERRUPT_PROGRAM:
657 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
631 { 658 {
632 enum emulation_result er; 659 enum emulation_result er;
633 struct kvmppc_book3s_shadow_vcpu *svcpu; 660 struct kvmppc_book3s_shadow_vcpu *svcpu;
@@ -1131,6 +1158,31 @@ out:
1131 return r; 1158 return r;
1132} 1159}
1133 1160
1161#ifdef CONFIG_PPC64
1162int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1163{
1164 /* No flags */
1165 info->flags = 0;
1166
1167 /* SLB is always 64 entries */
1168 info->slb_size = 64;
1169
1170 /* Standard 4k base page size segment */
1171 info->sps[0].page_shift = 12;
1172 info->sps[0].slb_enc = 0;
1173 info->sps[0].enc[0].page_shift = 12;
1174 info->sps[0].enc[0].pte_enc = 0;
1175
1176 /* Standard 16M large page size segment */
1177 info->sps[1].page_shift = 24;
1178 info->sps[1].slb_enc = SLB_VSID_L;
1179 info->sps[1].enc[0].page_shift = 24;
1180 info->sps[1].enc[0].pte_enc = 0;
1181
1182 return 0;
1183}
1184#endif /* CONFIG_PPC64 */
1185
1134int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1186int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1135 struct kvm_userspace_memory_region *mem) 1187 struct kvm_userspace_memory_region *mem)
1136{ 1188{
@@ -1144,11 +1196,18 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
1144 1196
1145int kvmppc_core_init_vm(struct kvm *kvm) 1197int kvmppc_core_init_vm(struct kvm *kvm)
1146{ 1198{
1199#ifdef CONFIG_PPC64
1200 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
1201#endif
1202
1147 return 0; 1203 return 0;
1148} 1204}
1149 1205
1150void kvmppc_core_destroy_vm(struct kvm *kvm) 1206void kvmppc_core_destroy_vm(struct kvm *kvm)
1151{ 1207{
1208#ifdef CONFIG_PPC64
1209 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1210#endif
1152} 1211}
1153 1212
1154static int kvmppc_book3s_init(void) 1213static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index b9589324797b..3ff9013d6e79 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -15,6 +15,8 @@
15 * published by the Free Software Foundation. 15 * published by the Free Software Foundation.
16 */ 16 */
17 17
18#include <linux/anon_inodes.h>
19
18#include <asm/uaccess.h> 20#include <asm/uaccess.h>
19#include <asm/kvm_ppc.h> 21#include <asm/kvm_ppc.h>
20#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
@@ -98,6 +100,83 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
98 return EMULATE_DONE; 100 return EMULATE_DONE;
99} 101}
100 102
103/* Request defs for kvmppc_h_pr_bulk_remove() */
104#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
105#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
106#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
107#define H_BULK_REMOVE_END 0xc000000000000000ULL
108#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
109#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
110#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
111#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
112#define H_BULK_REMOVE_HW 0x3000000000000000ULL
113#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
114#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
115#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
116#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
117#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
118#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
119#define H_BULK_REMOVE_MAX_BATCH 4
120
121static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
122{
123 int i;
124 int paramnr = 4;
125 int ret = H_SUCCESS;
126
127 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
128 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
129 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
130 unsigned long pteg, rb, flags;
131 unsigned long pte[2];
132 unsigned long v = 0;
133
134 if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
135 break; /* Exit success */
136 } else if ((tsh & H_BULK_REMOVE_TYPE) !=
137 H_BULK_REMOVE_REQUEST) {
138 ret = H_PARAMETER;
139 break; /* Exit fail */
140 }
141
142 tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
143 tsh |= H_BULK_REMOVE_RESPONSE;
144
145 if ((tsh & H_BULK_REMOVE_ANDCOND) &&
146 (tsh & H_BULK_REMOVE_AVPN)) {
147 tsh |= H_BULK_REMOVE_PARM;
148 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
149 ret = H_PARAMETER;
150 break; /* Exit fail */
151 }
152
153 pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
154 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
155
156 /* tsl = AVPN */
157 flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
158
159 if ((pte[0] & HPTE_V_VALID) == 0 ||
160 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
161 ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
162 tsh |= H_BULK_REMOVE_NOT_FOUND;
163 } else {
164 /* Splat the pteg in (userland) hpt */
165 copy_to_user((void __user *)pteg, &v, sizeof(v));
166
167 rb = compute_tlbie_rb(pte[0], pte[1],
168 tsh & H_BULK_REMOVE_PTEX);
169 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
170 tsh |= H_BULK_REMOVE_SUCCESS;
171 tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
172 }
173 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
174 }
175 kvmppc_set_gpr(vcpu, 3, ret);
176
177 return EMULATE_DONE;
178}
179
101static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) 180static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
102{ 181{
103 unsigned long flags = kvmppc_get_gpr(vcpu, 4); 182 unsigned long flags = kvmppc_get_gpr(vcpu, 4);
@@ -134,6 +213,20 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
134 return EMULATE_DONE; 213 return EMULATE_DONE;
135} 214}
136 215
216static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
217{
218 unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
219 unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
220 unsigned long tce = kvmppc_get_gpr(vcpu, 6);
221 long rc;
222
223 rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
224 if (rc == H_TOO_HARD)
225 return EMULATE_FAIL;
226 kvmppc_set_gpr(vcpu, 3, rc);
227 return EMULATE_DONE;
228}
229
137int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) 230int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
138{ 231{
139 switch (cmd) { 232 switch (cmd) {
@@ -144,12 +237,12 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
144 case H_PROTECT: 237 case H_PROTECT:
145 return kvmppc_h_pr_protect(vcpu); 238 return kvmppc_h_pr_protect(vcpu);
146 case H_BULK_REMOVE: 239 case H_BULK_REMOVE:
147 /* We just flush all PTEs, so user space can 240 return kvmppc_h_pr_bulk_remove(vcpu);
148 handle the HPT modifications */ 241 case H_PUT_TCE:
149 kvmppc_mmu_pte_flush(vcpu, 0, 0); 242 return kvmppc_h_pr_put_tce(vcpu);
150 break;
151 case H_CEDE: 243 case H_CEDE:
152 kvm_vcpu_block(vcpu); 244 kvm_vcpu_block(vcpu);
245 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
153 vcpu->stat.halt_wakeup++; 246 vcpu->stat.halt_wakeup++;
154 return EMULATE_DONE; 247 return EMULATE_DONE;
155 } 248 }
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 6e6e9cef34a8..798491a268b3 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -128,24 +128,25 @@ no_dcbz32_on:
128 /* First clear RI in our current MSR value */ 128 /* First clear RI in our current MSR value */
129 li r0, MSR_RI 129 li r0, MSR_RI
130 andc r6, r6, r0 130 andc r6, r6, r0
131 MTMSR_EERI(r6)
132 mtsrr0 r9
133 mtsrr1 r4
134 131
135 PPC_LL r0, SVCPU_R0(r3) 132 PPC_LL r0, SVCPU_R0(r3)
136 PPC_LL r1, SVCPU_R1(r3) 133 PPC_LL r1, SVCPU_R1(r3)
137 PPC_LL r2, SVCPU_R2(r3) 134 PPC_LL r2, SVCPU_R2(r3)
138 PPC_LL r4, SVCPU_R4(r3)
139 PPC_LL r5, SVCPU_R5(r3) 135 PPC_LL r5, SVCPU_R5(r3)
140 PPC_LL r6, SVCPU_R6(r3)
141 PPC_LL r7, SVCPU_R7(r3) 136 PPC_LL r7, SVCPU_R7(r3)
142 PPC_LL r8, SVCPU_R8(r3) 137 PPC_LL r8, SVCPU_R8(r3)
143 PPC_LL r9, SVCPU_R9(r3)
144 PPC_LL r10, SVCPU_R10(r3) 138 PPC_LL r10, SVCPU_R10(r3)
145 PPC_LL r11, SVCPU_R11(r3) 139 PPC_LL r11, SVCPU_R11(r3)
146 PPC_LL r12, SVCPU_R12(r3) 140 PPC_LL r12, SVCPU_R12(r3)
147 PPC_LL r13, SVCPU_R13(r3) 141 PPC_LL r13, SVCPU_R13(r3)
148 142
143 MTMSR_EERI(r6)
144 mtsrr0 r9
145 mtsrr1 r4
146
147 PPC_LL r4, SVCPU_R4(r3)
148 PPC_LL r6, SVCPU_R6(r3)
149 PPC_LL r9, SVCPU_R9(r3)
149 PPC_LL r3, (SVCPU_R3)(r3) 150 PPC_LL r3, (SVCPU_R3)(r3)
150 151
151 RFI 152 RFI
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee9e1ee9c858..72f13f4a06e0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -17,6 +17,8 @@
17 * 17 *
18 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
19 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 19 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
20 * Scott Wood <scottwood@freescale.com>
21 * Varun Sethi <varun.sethi@freescale.com>
20 */ 22 */
21 23
22#include <linux/errno.h> 24#include <linux/errno.h>
@@ -30,9 +32,12 @@
30#include <asm/cputable.h> 32#include <asm/cputable.h>
31#include <asm/uaccess.h> 33#include <asm/uaccess.h>
32#include <asm/kvm_ppc.h> 34#include <asm/kvm_ppc.h>
33#include "timing.h"
34#include <asm/cacheflush.h> 35#include <asm/cacheflush.h>
36#include <asm/dbell.h>
37#include <asm/hw_irq.h>
38#include <asm/irq.h>
35 39
40#include "timing.h"
36#include "booke.h" 41#include "booke.h"
37 42
38unsigned long kvmppc_booke_handlers; 43unsigned long kvmppc_booke_handlers;
@@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
55 { "dec", VCPU_STAT(dec_exits) }, 60 { "dec", VCPU_STAT(dec_exits) },
56 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 61 { "ext_intr", VCPU_STAT(ext_intr_exits) },
57 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 62 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
63 { "doorbell", VCPU_STAT(dbell_exits) },
64 { "guest doorbell", VCPU_STAT(gdbell_exits) },
58 { NULL } 65 { NULL }
59}; 66};
60 67
@@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
121{ 128{
122 u32 old_msr = vcpu->arch.shared->msr; 129 u32 old_msr = vcpu->arch.shared->msr;
123 130
131#ifdef CONFIG_KVM_BOOKE_HV
132 new_msr |= MSR_GS;
133#endif
134
124 vcpu->arch.shared->msr = new_msr; 135 vcpu->arch.shared->msr = new_msr;
125 136
126 kvmppc_mmu_msr_notify(vcpu, old_msr); 137 kvmppc_mmu_msr_notify(vcpu, old_msr);
@@ -195,17 +206,87 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
195 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 206 clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
196} 207}
197 208
209static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
210{
211#ifdef CONFIG_KVM_BOOKE_HV
212 mtspr(SPRN_GSRR0, srr0);
213 mtspr(SPRN_GSRR1, srr1);
214#else
215 vcpu->arch.shared->srr0 = srr0;
216 vcpu->arch.shared->srr1 = srr1;
217#endif
218}
219
220static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
221{
222 vcpu->arch.csrr0 = srr0;
223 vcpu->arch.csrr1 = srr1;
224}
225
226static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
227{
228 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
229 vcpu->arch.dsrr0 = srr0;
230 vcpu->arch.dsrr1 = srr1;
231 } else {
232 set_guest_csrr(vcpu, srr0, srr1);
233 }
234}
235
236static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
237{
238 vcpu->arch.mcsrr0 = srr0;
239 vcpu->arch.mcsrr1 = srr1;
240}
241
242static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
243{
244#ifdef CONFIG_KVM_BOOKE_HV
245 return mfspr(SPRN_GDEAR);
246#else
247 return vcpu->arch.shared->dar;
248#endif
249}
250
251static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
252{
253#ifdef CONFIG_KVM_BOOKE_HV
254 mtspr(SPRN_GDEAR, dear);
255#else
256 vcpu->arch.shared->dar = dear;
257#endif
258}
259
260static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
261{
262#ifdef CONFIG_KVM_BOOKE_HV
263 return mfspr(SPRN_GESR);
264#else
265 return vcpu->arch.shared->esr;
266#endif
267}
268
269static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
270{
271#ifdef CONFIG_KVM_BOOKE_HV
272 mtspr(SPRN_GESR, esr);
273#else
274 vcpu->arch.shared->esr = esr;
275#endif
276}
277
198/* Deliver the interrupt of the corresponding priority, if possible. */ 278/* Deliver the interrupt of the corresponding priority, if possible. */
199static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 279static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
200 unsigned int priority) 280 unsigned int priority)
201{ 281{
202 int allowed = 0; 282 int allowed = 0;
203 ulong uninitialized_var(msr_mask); 283 ulong msr_mask = 0;
204 bool update_esr = false, update_dear = false; 284 bool update_esr = false, update_dear = false;
205 ulong crit_raw = vcpu->arch.shared->critical; 285 ulong crit_raw = vcpu->arch.shared->critical;
206 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); 286 ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
207 bool crit; 287 bool crit;
208 bool keep_irq = false; 288 bool keep_irq = false;
289 enum int_class int_class;
209 290
210 /* Truncate crit indicators in 32 bit mode */ 291 /* Truncate crit indicators in 32 bit mode */
211 if (!(vcpu->arch.shared->msr & MSR_SF)) { 292 if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -241,46 +322,85 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
241 case BOOKE_IRQPRIO_AP_UNAVAIL: 322 case BOOKE_IRQPRIO_AP_UNAVAIL:
242 case BOOKE_IRQPRIO_ALIGNMENT: 323 case BOOKE_IRQPRIO_ALIGNMENT:
243 allowed = 1; 324 allowed = 1;
244 msr_mask = MSR_CE|MSR_ME|MSR_DE; 325 msr_mask = MSR_CE | MSR_ME | MSR_DE;
326 int_class = INT_CLASS_NONCRIT;
245 break; 327 break;
246 case BOOKE_IRQPRIO_CRITICAL: 328 case BOOKE_IRQPRIO_CRITICAL:
247 case BOOKE_IRQPRIO_WATCHDOG: 329 case BOOKE_IRQPRIO_DBELL_CRIT:
248 allowed = vcpu->arch.shared->msr & MSR_CE; 330 allowed = vcpu->arch.shared->msr & MSR_CE;
331 allowed = allowed && !crit;
249 msr_mask = MSR_ME; 332 msr_mask = MSR_ME;
333 int_class = INT_CLASS_CRIT;
250 break; 334 break;
251 case BOOKE_IRQPRIO_MACHINE_CHECK: 335 case BOOKE_IRQPRIO_MACHINE_CHECK:
252 allowed = vcpu->arch.shared->msr & MSR_ME; 336 allowed = vcpu->arch.shared->msr & MSR_ME;
253 msr_mask = 0; 337 allowed = allowed && !crit;
338 int_class = INT_CLASS_MC;
254 break; 339 break;
255 case BOOKE_IRQPRIO_DECREMENTER: 340 case BOOKE_IRQPRIO_DECREMENTER:
256 case BOOKE_IRQPRIO_FIT: 341 case BOOKE_IRQPRIO_FIT:
257 keep_irq = true; 342 keep_irq = true;
258 /* fall through */ 343 /* fall through */
259 case BOOKE_IRQPRIO_EXTERNAL: 344 case BOOKE_IRQPRIO_EXTERNAL:
345 case BOOKE_IRQPRIO_DBELL:
260 allowed = vcpu->arch.shared->msr & MSR_EE; 346 allowed = vcpu->arch.shared->msr & MSR_EE;
261 allowed = allowed && !crit; 347 allowed = allowed && !crit;
262 msr_mask = MSR_CE|MSR_ME|MSR_DE; 348 msr_mask = MSR_CE | MSR_ME | MSR_DE;
349 int_class = INT_CLASS_NONCRIT;
263 break; 350 break;
264 case BOOKE_IRQPRIO_DEBUG: 351 case BOOKE_IRQPRIO_DEBUG:
265 allowed = vcpu->arch.shared->msr & MSR_DE; 352 allowed = vcpu->arch.shared->msr & MSR_DE;
353 allowed = allowed && !crit;
266 msr_mask = MSR_ME; 354 msr_mask = MSR_ME;
355 int_class = INT_CLASS_CRIT;
267 break; 356 break;
268 } 357 }
269 358
270 if (allowed) { 359 if (allowed) {
271 vcpu->arch.shared->srr0 = vcpu->arch.pc; 360 switch (int_class) {
272 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; 361 case INT_CLASS_NONCRIT:
362 set_guest_srr(vcpu, vcpu->arch.pc,
363 vcpu->arch.shared->msr);
364 break;
365 case INT_CLASS_CRIT:
366 set_guest_csrr(vcpu, vcpu->arch.pc,
367 vcpu->arch.shared->msr);
368 break;
369 case INT_CLASS_DBG:
370 set_guest_dsrr(vcpu, vcpu->arch.pc,
371 vcpu->arch.shared->msr);
372 break;
373 case INT_CLASS_MC:
374 set_guest_mcsrr(vcpu, vcpu->arch.pc,
375 vcpu->arch.shared->msr);
376 break;
377 }
378
273 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 379 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
274 if (update_esr == true) 380 if (update_esr == true)
275 vcpu->arch.shared->esr = vcpu->arch.queued_esr; 381 set_guest_esr(vcpu, vcpu->arch.queued_esr);
276 if (update_dear == true) 382 if (update_dear == true)
277 vcpu->arch.shared->dar = vcpu->arch.queued_dear; 383 set_guest_dear(vcpu, vcpu->arch.queued_dear);
278 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); 384 kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
279 385
280 if (!keep_irq) 386 if (!keep_irq)
281 clear_bit(priority, &vcpu->arch.pending_exceptions); 387 clear_bit(priority, &vcpu->arch.pending_exceptions);
282 } 388 }
283 389
390#ifdef CONFIG_KVM_BOOKE_HV
391 /*
392 * If an interrupt is pending but masked, raise a guest doorbell
393 * so that we are notified when the guest enables the relevant
394 * MSR bit.
395 */
396 if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
397 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
398 if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
399 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
400 if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
401 kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
402#endif
403
284 return allowed; 404 return allowed;
285} 405}
286 406
@@ -305,7 +425,7 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
305 } 425 }
306 426
307 priority = __ffs(*pending); 427 priority = __ffs(*pending);
308 while (priority <= BOOKE_IRQPRIO_MAX) { 428 while (priority < BOOKE_IRQPRIO_MAX) {
309 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 429 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
310 break; 430 break;
311 431
@@ -319,8 +439,9 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
319} 439}
320 440
321/* Check pending exceptions and deliver one, if possible. */ 441/* Check pending exceptions and deliver one, if possible. */
322void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) 442int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
323{ 443{
444 int r = 0;
324 WARN_ON_ONCE(!irqs_disabled()); 445 WARN_ON_ONCE(!irqs_disabled());
325 446
326 kvmppc_core_check_exceptions(vcpu); 447 kvmppc_core_check_exceptions(vcpu);
@@ -328,16 +449,60 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
328 if (vcpu->arch.shared->msr & MSR_WE) { 449 if (vcpu->arch.shared->msr & MSR_WE) {
329 local_irq_enable(); 450 local_irq_enable();
330 kvm_vcpu_block(vcpu); 451 kvm_vcpu_block(vcpu);
452 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
331 local_irq_disable(); 453 local_irq_disable();
332 454
333 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); 455 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
334 kvmppc_core_check_exceptions(vcpu); 456 r = 1;
335 }; 457 };
458
459 return r;
460}
461
462/*
463 * Common checks before entering the guest world. Call with interrupts
464 * disabled.
465 *
466 * returns !0 if a signal is pending and check_signal is true
467 */
468static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
469{
470 int r = 0;
471
472 WARN_ON_ONCE(!irqs_disabled());
473 while (true) {
474 if (need_resched()) {
475 local_irq_enable();
476 cond_resched();
477 local_irq_disable();
478 continue;
479 }
480
481 if (signal_pending(current)) {
482 r = 1;
483 break;
484 }
485
486 if (kvmppc_core_prepare_to_enter(vcpu)) {
487 /* interrupts got enabled in between, so we
488 are back at square 1 */
489 continue;
490 }
491
492 break;
493 }
494
495 return r;
336} 496}
337 497
338int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 498int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
339{ 499{
340 int ret; 500 int ret;
501#ifdef CONFIG_PPC_FPU
502 unsigned int fpscr;
503 int fpexc_mode;
504 u64 fpr[32];
505#endif
341 506
342 if (!vcpu->arch.sane) { 507 if (!vcpu->arch.sane) {
343 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 508 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -345,17 +510,53 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
345 } 510 }
346 511
347 local_irq_disable(); 512 local_irq_disable();
348 513 if (kvmppc_prepare_to_enter(vcpu)) {
349 kvmppc_core_prepare_to_enter(vcpu);
350
351 if (signal_pending(current)) {
352 kvm_run->exit_reason = KVM_EXIT_INTR; 514 kvm_run->exit_reason = KVM_EXIT_INTR;
353 ret = -EINTR; 515 ret = -EINTR;
354 goto out; 516 goto out;
355 } 517 }
356 518
357 kvm_guest_enter(); 519 kvm_guest_enter();
520
521#ifdef CONFIG_PPC_FPU
522 /* Save userspace FPU state in stack */
523 enable_kernel_fp();
524 memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
525 fpscr = current->thread.fpscr.val;
526 fpexc_mode = current->thread.fpexc_mode;
527
528 /* Restore guest FPU state to thread */
529 memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
530 current->thread.fpscr.val = vcpu->arch.fpscr;
531
532 /*
533 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
534 * as always using the FPU. Kernel usage of FP (via
535 * enable_kernel_fp()) in this thread must not occur while
536 * vcpu->fpu_active is set.
537 */
538 vcpu->fpu_active = 1;
539
540 kvmppc_load_guest_fp(vcpu);
541#endif
542
358 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 543 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
544
545#ifdef CONFIG_PPC_FPU
546 kvmppc_save_guest_fp(vcpu);
547
548 vcpu->fpu_active = 0;
549
550 /* Save guest FPU state from thread */
551 memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
552 vcpu->arch.fpscr = current->thread.fpscr.val;
553
554 /* Restore userspace FPU state from stack */
555 memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
556 current->thread.fpscr.val = fpscr;
557 current->thread.fpexc_mode = fpexc_mode;
558#endif
559
359 kvm_guest_exit(); 560 kvm_guest_exit();
360 561
361out: 562out:
@@ -363,6 +564,84 @@ out:
363 return ret; 564 return ret;
364} 565}
365 566
567static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
568{
569 enum emulation_result er;
570
571 er = kvmppc_emulate_instruction(run, vcpu);
572 switch (er) {
573 case EMULATE_DONE:
574 /* don't overwrite subtypes, just account kvm_stats */
575 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
576 /* Future optimization: only reload non-volatiles if
577 * they were actually modified by emulation. */
578 return RESUME_GUEST_NV;
579
580 case EMULATE_DO_DCR:
581 run->exit_reason = KVM_EXIT_DCR;
582 return RESUME_HOST;
583
584 case EMULATE_FAIL:
585 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
586 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
587 /* For debugging, encode the failing instruction and
588 * report it to userspace. */
589 run->hw.hardware_exit_reason = ~0ULL << 32;
590 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
591 kvmppc_core_queue_program(vcpu, ESR_PIL);
592 return RESUME_HOST;
593
594 default:
595 BUG();
596 }
597}
598
599static void kvmppc_fill_pt_regs(struct pt_regs *regs)
600{
601 ulong r1, ip, msr, lr;
602
603 asm("mr %0, 1" : "=r"(r1));
604 asm("mflr %0" : "=r"(lr));
605 asm("mfmsr %0" : "=r"(msr));
606 asm("bl 1f; 1: mflr %0" : "=r"(ip));
607
608 memset(regs, 0, sizeof(*regs));
609 regs->gpr[1] = r1;
610 regs->nip = ip;
611 regs->msr = msr;
612 regs->link = lr;
613}
614
615static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
616 unsigned int exit_nr)
617{
618 struct pt_regs regs;
619
620 switch (exit_nr) {
621 case BOOKE_INTERRUPT_EXTERNAL:
622 kvmppc_fill_pt_regs(&regs);
623 do_IRQ(&regs);
624 break;
625 case BOOKE_INTERRUPT_DECREMENTER:
626 kvmppc_fill_pt_regs(&regs);
627 timer_interrupt(&regs);
628 break;
629#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
630 case BOOKE_INTERRUPT_DOORBELL:
631 kvmppc_fill_pt_regs(&regs);
632 doorbell_exception(&regs);
633 break;
634#endif
635 case BOOKE_INTERRUPT_MACHINE_CHECK:
636 /* FIXME */
637 break;
638 case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
639 kvmppc_fill_pt_regs(&regs);
640 performance_monitor_exception(&regs);
641 break;
642 }
643}
644
366/** 645/**
367 * kvmppc_handle_exit 646 * kvmppc_handle_exit
368 * 647 *
@@ -371,12 +650,14 @@ out:
371int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 650int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
372 unsigned int exit_nr) 651 unsigned int exit_nr)
373{ 652{
374 enum emulation_result er;
375 int r = RESUME_HOST; 653 int r = RESUME_HOST;
376 654
377 /* update before a new last_exit_type is rewritten */ 655 /* update before a new last_exit_type is rewritten */
378 kvmppc_update_timing_stats(vcpu); 656 kvmppc_update_timing_stats(vcpu);
379 657
658 /* restart interrupts if they were meant for the host */
659 kvmppc_restart_interrupt(vcpu, exit_nr);
660
380 local_irq_enable(); 661 local_irq_enable();
381 662
382 run->exit_reason = KVM_EXIT_UNKNOWN; 663 run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -386,62 +667,74 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
386 case BOOKE_INTERRUPT_MACHINE_CHECK: 667 case BOOKE_INTERRUPT_MACHINE_CHECK:
387 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); 668 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
388 kvmppc_dump_vcpu(vcpu); 669 kvmppc_dump_vcpu(vcpu);
670 /* For debugging, send invalid exit reason to user space */
671 run->hw.hardware_exit_reason = ~1ULL << 32;
672 run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
389 r = RESUME_HOST; 673 r = RESUME_HOST;
390 break; 674 break;
391 675
392 case BOOKE_INTERRUPT_EXTERNAL: 676 case BOOKE_INTERRUPT_EXTERNAL:
393 kvmppc_account_exit(vcpu, EXT_INTR_EXITS); 677 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
394 if (need_resched())
395 cond_resched();
396 r = RESUME_GUEST; 678 r = RESUME_GUEST;
397 break; 679 break;
398 680
399 case BOOKE_INTERRUPT_DECREMENTER: 681 case BOOKE_INTERRUPT_DECREMENTER:
400 /* Since we switched IVPR back to the host's value, the host
401 * handled this interrupt the moment we enabled interrupts.
402 * Now we just offer it a chance to reschedule the guest. */
403 kvmppc_account_exit(vcpu, DEC_EXITS); 682 kvmppc_account_exit(vcpu, DEC_EXITS);
404 if (need_resched())
405 cond_resched();
406 r = RESUME_GUEST; 683 r = RESUME_GUEST;
407 break; 684 break;
408 685
686 case BOOKE_INTERRUPT_DOORBELL:
687 kvmppc_account_exit(vcpu, DBELL_EXITS);
688 r = RESUME_GUEST;
689 break;
690
691 case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
692 kvmppc_account_exit(vcpu, GDBELL_EXITS);
693
694 /*
695 * We are here because there is a pending guest interrupt
696 * which could not be delivered as MSR_CE or MSR_ME was not
697 * set. Once we break from here we will retry delivery.
698 */
699 r = RESUME_GUEST;
700 break;
701
702 case BOOKE_INTERRUPT_GUEST_DBELL:
703 kvmppc_account_exit(vcpu, GDBELL_EXITS);
704
705 /*
706 * We are here because there is a pending guest interrupt
707 * which could not be delivered as MSR_EE was not set. Once
708 * we break from here we will retry delivery.
709 */
710 r = RESUME_GUEST;
711 break;
712
713 case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
714 r = RESUME_GUEST;
715 break;
716
717 case BOOKE_INTERRUPT_HV_PRIV:
718 r = emulation_exit(run, vcpu);
719 break;
720
409 case BOOKE_INTERRUPT_PROGRAM: 721 case BOOKE_INTERRUPT_PROGRAM:
410 if (vcpu->arch.shared->msr & MSR_PR) { 722 if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
411 /* Program traps generated by user-level software must be handled 723 /*
412 * by the guest kernel. */ 724 * Program traps generated by user-level software must
725 * be handled by the guest kernel.
726 *
727 * In GS mode, hypervisor privileged instructions trap
728 * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
729 * actual program interrupts, handled by the guest.
730 */
413 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); 731 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
414 r = RESUME_GUEST; 732 r = RESUME_GUEST;
415 kvmppc_account_exit(vcpu, USR_PR_INST); 733 kvmppc_account_exit(vcpu, USR_PR_INST);
416 break; 734 break;
417 } 735 }
418 736
419 er = kvmppc_emulate_instruction(run, vcpu); 737 r = emulation_exit(run, vcpu);
420 switch (er) {
421 case EMULATE_DONE:
422 /* don't overwrite subtypes, just account kvm_stats */
423 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
424 /* Future optimization: only reload non-volatiles if
425 * they were actually modified by emulation. */
426 r = RESUME_GUEST_NV;
427 break;
428 case EMULATE_DO_DCR:
429 run->exit_reason = KVM_EXIT_DCR;
430 r = RESUME_HOST;
431 break;
432 case EMULATE_FAIL:
433 /* XXX Deliver Program interrupt to guest. */
434 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
435 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
436 /* For debugging, encode the failing instruction and
437 * report it to userspace. */
438 run->hw.hardware_exit_reason = ~0ULL << 32;
439 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
440 r = RESUME_HOST;
441 break;
442 default:
443 BUG();
444 }
445 break; 738 break;
446 739
447 case BOOKE_INTERRUPT_FP_UNAVAIL: 740 case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -506,6 +799,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
506 r = RESUME_GUEST; 799 r = RESUME_GUEST;
507 break; 800 break;
508 801
802#ifdef CONFIG_KVM_BOOKE_HV
803 case BOOKE_INTERRUPT_HV_SYSCALL:
804 if (!(vcpu->arch.shared->msr & MSR_PR)) {
805 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
806 } else {
807 /*
808 * hcall from guest userspace -- send privileged
809 * instruction program check.
810 */
811 kvmppc_core_queue_program(vcpu, ESR_PPR);
812 }
813
814 r = RESUME_GUEST;
815 break;
816#else
509 case BOOKE_INTERRUPT_SYSCALL: 817 case BOOKE_INTERRUPT_SYSCALL:
510 if (!(vcpu->arch.shared->msr & MSR_PR) && 818 if (!(vcpu->arch.shared->msr & MSR_PR) &&
511 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { 819 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
@@ -519,6 +827,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
519 kvmppc_account_exit(vcpu, SYSCALL_EXITS); 827 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
520 r = RESUME_GUEST; 828 r = RESUME_GUEST;
521 break; 829 break;
830#endif
522 831
523 case BOOKE_INTERRUPT_DTLB_MISS: { 832 case BOOKE_INTERRUPT_DTLB_MISS: {
524 unsigned long eaddr = vcpu->arch.fault_dear; 833 unsigned long eaddr = vcpu->arch.fault_dear;
@@ -526,7 +835,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
526 gpa_t gpaddr; 835 gpa_t gpaddr;
527 gfn_t gfn; 836 gfn_t gfn;
528 837
529#ifdef CONFIG_KVM_E500 838#ifdef CONFIG_KVM_E500V2
530 if (!(vcpu->arch.shared->msr & MSR_PR) && 839 if (!(vcpu->arch.shared->msr & MSR_PR) &&
531 (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { 840 (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
532 kvmppc_map_magic(vcpu); 841 kvmppc_map_magic(vcpu);
@@ -567,6 +876,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
567 /* Guest has mapped and accessed a page which is not 876 /* Guest has mapped and accessed a page which is not
568 * actually RAM. */ 877 * actually RAM. */
569 vcpu->arch.paddr_accessed = gpaddr; 878 vcpu->arch.paddr_accessed = gpaddr;
879 vcpu->arch.vaddr_accessed = eaddr;
570 r = kvmppc_emulate_mmio(run, vcpu); 880 r = kvmppc_emulate_mmio(run, vcpu);
571 kvmppc_account_exit(vcpu, MMIO_EXITS); 881 kvmppc_account_exit(vcpu, MMIO_EXITS);
572 } 882 }
@@ -634,15 +944,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
634 BUG(); 944 BUG();
635 } 945 }
636 946
637 local_irq_disable(); 947 /*
638 948 * To avoid clobbering exit_reason, only check for signals if we
639 kvmppc_core_prepare_to_enter(vcpu); 949 * aren't already exiting to userspace for some other reason.
640 950 */
641 if (!(r & RESUME_HOST)) { 951 if (!(r & RESUME_HOST)) {
642 /* To avoid clobbering exit_reason, only check for signals if 952 local_irq_disable();
643 * we aren't already exiting to userspace for some other 953 if (kvmppc_prepare_to_enter(vcpu)) {
644 * reason. */
645 if (signal_pending(current)) {
646 run->exit_reason = KVM_EXIT_INTR; 954 run->exit_reason = KVM_EXIT_INTR;
647 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 955 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
648 kvmppc_account_exit(vcpu, SIGNAL_EXITS); 956 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
@@ -659,12 +967,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
659 int r; 967 int r;
660 968
661 vcpu->arch.pc = 0; 969 vcpu->arch.pc = 0;
662 vcpu->arch.shared->msr = 0;
663 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
664 vcpu->arch.shared->pir = vcpu->vcpu_id; 970 vcpu->arch.shared->pir = vcpu->vcpu_id;
665 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 971 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
972 kvmppc_set_msr(vcpu, 0);
666 973
974#ifndef CONFIG_KVM_BOOKE_HV
975 vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
667 vcpu->arch.shadow_pid = 1; 976 vcpu->arch.shadow_pid = 1;
977 vcpu->arch.shared->msr = 0;
978#endif
668 979
669 /* Eye-catching numbers so we know if the guest takes an interrupt 980 /* Eye-catching numbers so we know if the guest takes an interrupt
670 * before it's programmed its own IVPR/IVORs. */ 981 * before it's programmed its own IVPR/IVORs. */
@@ -745,8 +1056,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
745 sregs->u.e.csrr0 = vcpu->arch.csrr0; 1056 sregs->u.e.csrr0 = vcpu->arch.csrr0;
746 sregs->u.e.csrr1 = vcpu->arch.csrr1; 1057 sregs->u.e.csrr1 = vcpu->arch.csrr1;
747 sregs->u.e.mcsr = vcpu->arch.mcsr; 1058 sregs->u.e.mcsr = vcpu->arch.mcsr;
748 sregs->u.e.esr = vcpu->arch.shared->esr; 1059 sregs->u.e.esr = get_guest_esr(vcpu);
749 sregs->u.e.dear = vcpu->arch.shared->dar; 1060 sregs->u.e.dear = get_guest_dear(vcpu);
750 sregs->u.e.tsr = vcpu->arch.tsr; 1061 sregs->u.e.tsr = vcpu->arch.tsr;
751 sregs->u.e.tcr = vcpu->arch.tcr; 1062 sregs->u.e.tcr = vcpu->arch.tcr;
752 sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); 1063 sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -763,8 +1074,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
763 vcpu->arch.csrr0 = sregs->u.e.csrr0; 1074 vcpu->arch.csrr0 = sregs->u.e.csrr0;
764 vcpu->arch.csrr1 = sregs->u.e.csrr1; 1075 vcpu->arch.csrr1 = sregs->u.e.csrr1;
765 vcpu->arch.mcsr = sregs->u.e.mcsr; 1076 vcpu->arch.mcsr = sregs->u.e.mcsr;
766 vcpu->arch.shared->esr = sregs->u.e.esr; 1077 set_guest_esr(vcpu, sregs->u.e.esr);
767 vcpu->arch.shared->dar = sregs->u.e.dear; 1078 set_guest_dear(vcpu, sregs->u.e.dear);
768 vcpu->arch.vrsave = sregs->u.e.vrsave; 1079 vcpu->arch.vrsave = sregs->u.e.vrsave;
769 kvmppc_set_tcr(vcpu, sregs->u.e.tcr); 1080 kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
770 1081
@@ -932,15 +1243,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
932{ 1243{
933} 1244}
934 1245
935int kvmppc_core_init_vm(struct kvm *kvm)
936{
937 return 0;
938}
939
940void kvmppc_core_destroy_vm(struct kvm *kvm)
941{
942}
943
944void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) 1246void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
945{ 1247{
946 vcpu->arch.tcr = new_tcr; 1248 vcpu->arch.tcr = new_tcr;
@@ -968,8 +1270,19 @@ void kvmppc_decrementer_func(unsigned long data)
968 kvmppc_set_tsr_bits(vcpu, TSR_DIS); 1270 kvmppc_set_tsr_bits(vcpu, TSR_DIS);
969} 1271}
970 1272
1273void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1274{
1275 current->thread.kvm_vcpu = vcpu;
1276}
1277
1278void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
1279{
1280 current->thread.kvm_vcpu = NULL;
1281}
1282
971int __init kvmppc_booke_init(void) 1283int __init kvmppc_booke_init(void)
972{ 1284{
1285#ifndef CONFIG_KVM_BOOKE_HV
973 unsigned long ivor[16]; 1286 unsigned long ivor[16];
974 unsigned long max_ivor = 0; 1287 unsigned long max_ivor = 0;
975 int i; 1288 int i;
@@ -1012,7 +1325,7 @@ int __init kvmppc_booke_init(void)
1012 } 1325 }
1013 flush_icache_range(kvmppc_booke_handlers, 1326 flush_icache_range(kvmppc_booke_handlers,
1014 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 1327 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
1015 1328#endif /* !BOOKE_HV */
1016 return 0; 1329 return 0;
1017} 1330}
1018 1331
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 2fe202705a3f..ba61974c1e20 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,6 +23,7 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_ppc.h> 25#include <asm/kvm_ppc.h>
26#include <asm/switch_to.h>
26#include "timing.h" 27#include "timing.h"
27 28
28/* interrupt priortity ordering */ 29/* interrupt priortity ordering */
@@ -48,7 +49,20 @@
48#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 49#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
49/* Internal pseudo-irqprio for level triggered externals */ 50/* Internal pseudo-irqprio for level triggered externals */
50#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 51#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
51#define BOOKE_IRQPRIO_MAX 20 52#define BOOKE_IRQPRIO_DBELL 21
53#define BOOKE_IRQPRIO_DBELL_CRIT 22
54#define BOOKE_IRQPRIO_MAX 23
55
56#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
57 (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
58 (1 << BOOKE_IRQPRIO_DBELL) | \
59 (1 << BOOKE_IRQPRIO_DECREMENTER) | \
60 (1 << BOOKE_IRQPRIO_FIT) | \
61 (1 << BOOKE_IRQPRIO_EXTERNAL))
62
63#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
64 (1 << BOOKE_IRQPRIO_WATCHDOG) | \
65 (1 << BOOKE_IRQPRIO_CRITICAL))
52 66
53extern unsigned long kvmppc_booke_handlers; 67extern unsigned long kvmppc_booke_handlers;
54 68
@@ -61,8 +75,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
61 75
62int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 76int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
63 unsigned int inst, int *advance); 77 unsigned int inst, int *advance);
64int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); 78int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
65int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); 79int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
66 80
67/* low-level asm code to transfer guest state */ 81/* low-level asm code to transfer guest state */
68void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); 82void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
@@ -71,4 +85,46 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
71/* high-level function, manages flags, host state */ 85/* high-level function, manages flags, host state */
72void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); 86void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
73 87
88void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
89void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
90
91enum int_class {
92 INT_CLASS_NONCRIT,
93 INT_CLASS_CRIT,
94 INT_CLASS_MC,
95 INT_CLASS_DBG,
96};
97
98void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
99
100/*
101 * Load up guest vcpu FP state if it's needed.
102 * It also set the MSR_FP in thread so that host know
103 * we're holding FPU, and then host can help to save
104 * guest vcpu FP state if other threads require to use FPU.
105 * This simulates an FP unavailable fault.
106 *
107 * It requires to be called with preemption disabled.
108 */
109static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
110{
111#ifdef CONFIG_PPC_FPU
112 if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
113 load_up_fpu();
114 current->thread.regs->msr |= MSR_FP;
115 }
116#endif
117}
118
119/*
120 * Save guest vcpu FP state into thread.
121 * It requires to be called with preemption disabled.
122 */
123static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
124{
125#ifdef CONFIG_PPC_FPU
126 if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
127 giveup_fpu(current);
128#endif
129}
74#endif /* __KVM_BOOKE_H__ */ 130#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 3e652da36534..6c76397f2af4 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -40,8 +40,8 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
40 unsigned int inst, int *advance) 40 unsigned int inst, int *advance)
41{ 41{
42 int emulated = EMULATE_DONE; 42 int emulated = EMULATE_DONE;
43 int rs; 43 int rs = get_rs(inst);
44 int rt; 44 int rt = get_rt(inst);
45 45
46 switch (get_op(inst)) { 46 switch (get_op(inst)) {
47 case 19: 47 case 19:
@@ -62,19 +62,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
62 switch (get_xop(inst)) { 62 switch (get_xop(inst)) {
63 63
64 case OP_31_XOP_MFMSR: 64 case OP_31_XOP_MFMSR:
65 rt = get_rt(inst);
66 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); 65 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
67 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
68 break; 67 break;
69 68
70 case OP_31_XOP_MTMSR: 69 case OP_31_XOP_MTMSR:
71 rs = get_rs(inst);
72 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 70 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
73 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); 71 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
74 break; 72 break;
75 73
76 case OP_31_XOP_WRTEE: 74 case OP_31_XOP_WRTEE:
77 rs = get_rs(inst);
78 vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) 75 vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
79 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); 76 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
80 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 77 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
@@ -99,22 +96,32 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
99 return emulated; 96 return emulated;
100} 97}
101 98
102int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 99/*
100 * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
101 * Their backing store is in real registers, and these functions
102 * will return the wrong result if called for them in another context
103 * (such as debugging).
104 */
105int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
103{ 106{
104 int emulated = EMULATE_DONE; 107 int emulated = EMULATE_DONE;
105 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
106 108
107 switch (sprn) { 109 switch (sprn) {
108 case SPRN_DEAR: 110 case SPRN_DEAR:
109 vcpu->arch.shared->dar = spr_val; break; 111 vcpu->arch.shared->dar = spr_val;
112 break;
110 case SPRN_ESR: 113 case SPRN_ESR:
111 vcpu->arch.shared->esr = spr_val; break; 114 vcpu->arch.shared->esr = spr_val;
115 break;
112 case SPRN_DBCR0: 116 case SPRN_DBCR0:
113 vcpu->arch.dbcr0 = spr_val; break; 117 vcpu->arch.dbcr0 = spr_val;
118 break;
114 case SPRN_DBCR1: 119 case SPRN_DBCR1:
115 vcpu->arch.dbcr1 = spr_val; break; 120 vcpu->arch.dbcr1 = spr_val;
121 break;
116 case SPRN_DBSR: 122 case SPRN_DBSR:
117 vcpu->arch.dbsr &= ~spr_val; break; 123 vcpu->arch.dbsr &= ~spr_val;
124 break;
118 case SPRN_TSR: 125 case SPRN_TSR:
119 kvmppc_clr_tsr_bits(vcpu, spr_val); 126 kvmppc_clr_tsr_bits(vcpu, spr_val);
120 break; 127 break;
@@ -122,20 +129,29 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
122 kvmppc_set_tcr(vcpu, spr_val); 129 kvmppc_set_tcr(vcpu, spr_val);
123 break; 130 break;
124 131
125 /* Note: SPRG4-7 are user-readable. These values are 132 /*
126 * loaded into the real SPRGs when resuming the 133 * Note: SPRG4-7 are user-readable.
127 * guest. */ 134 * These values are loaded into the real SPRGs when resuming the
135 * guest (PR-mode only).
136 */
128 case SPRN_SPRG4: 137 case SPRN_SPRG4:
129 vcpu->arch.shared->sprg4 = spr_val; break; 138 vcpu->arch.shared->sprg4 = spr_val;
139 break;
130 case SPRN_SPRG5: 140 case SPRN_SPRG5:
131 vcpu->arch.shared->sprg5 = spr_val; break; 141 vcpu->arch.shared->sprg5 = spr_val;
142 break;
132 case SPRN_SPRG6: 143 case SPRN_SPRG6:
133 vcpu->arch.shared->sprg6 = spr_val; break; 144 vcpu->arch.shared->sprg6 = spr_val;
145 break;
134 case SPRN_SPRG7: 146 case SPRN_SPRG7:
135 vcpu->arch.shared->sprg7 = spr_val; break; 147 vcpu->arch.shared->sprg7 = spr_val;
148 break;
136 149
137 case SPRN_IVPR: 150 case SPRN_IVPR:
138 vcpu->arch.ivpr = spr_val; 151 vcpu->arch.ivpr = spr_val;
152#ifdef CONFIG_KVM_BOOKE_HV
153 mtspr(SPRN_GIVPR, spr_val);
154#endif
139 break; 155 break;
140 case SPRN_IVOR0: 156 case SPRN_IVOR0:
141 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; 157 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
@@ -145,6 +161,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
145 break; 161 break;
146 case SPRN_IVOR2: 162 case SPRN_IVOR2:
147 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; 163 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
164#ifdef CONFIG_KVM_BOOKE_HV
165 mtspr(SPRN_GIVOR2, spr_val);
166#endif
148 break; 167 break;
149 case SPRN_IVOR3: 168 case SPRN_IVOR3:
150 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; 169 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
@@ -163,6 +182,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
163 break; 182 break;
164 case SPRN_IVOR8: 183 case SPRN_IVOR8:
165 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; 184 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
185#ifdef CONFIG_KVM_BOOKE_HV
186 mtspr(SPRN_GIVOR8, spr_val);
187#endif
166 break; 188 break;
167 case SPRN_IVOR9: 189 case SPRN_IVOR9:
168 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; 190 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
@@ -193,75 +215,83 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
193 return emulated; 215 return emulated;
194} 216}
195 217
196int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 218int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
197{ 219{
198 int emulated = EMULATE_DONE; 220 int emulated = EMULATE_DONE;
199 221
200 switch (sprn) { 222 switch (sprn) {
201 case SPRN_IVPR: 223 case SPRN_IVPR:
202 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; 224 *spr_val = vcpu->arch.ivpr;
225 break;
203 case SPRN_DEAR: 226 case SPRN_DEAR:
204 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; 227 *spr_val = vcpu->arch.shared->dar;
228 break;
205 case SPRN_ESR: 229 case SPRN_ESR:
206 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; 230 *spr_val = vcpu->arch.shared->esr;
231 break;
207 case SPRN_DBCR0: 232 case SPRN_DBCR0:
208 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; 233 *spr_val = vcpu->arch.dbcr0;
234 break;
209 case SPRN_DBCR1: 235 case SPRN_DBCR1:
210 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; 236 *spr_val = vcpu->arch.dbcr1;
237 break;
211 case SPRN_DBSR: 238 case SPRN_DBSR:
212 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; 239 *spr_val = vcpu->arch.dbsr;
240 break;
213 case SPRN_TSR: 241 case SPRN_TSR:
214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; 242 *spr_val = vcpu->arch.tsr;
243 break;
215 case SPRN_TCR: 244 case SPRN_TCR:
216 kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; 245 *spr_val = vcpu->arch.tcr;
246 break;
217 247
218 case SPRN_IVOR0: 248 case SPRN_IVOR0:
219 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); 249 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
220 break; 250 break;
221 case SPRN_IVOR1: 251 case SPRN_IVOR1:
222 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); 252 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
223 break; 253 break;
224 case SPRN_IVOR2: 254 case SPRN_IVOR2:
225 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); 255 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
226 break; 256 break;
227 case SPRN_IVOR3: 257 case SPRN_IVOR3:
228 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); 258 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
229 break; 259 break;
230 case SPRN_IVOR4: 260 case SPRN_IVOR4:
231 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); 261 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
232 break; 262 break;
233 case SPRN_IVOR5: 263 case SPRN_IVOR5:
234 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); 264 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
235 break; 265 break;
236 case SPRN_IVOR6: 266 case SPRN_IVOR6:
237 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); 267 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
238 break; 268 break;
239 case SPRN_IVOR7: 269 case SPRN_IVOR7:
240 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); 270 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
241 break; 271 break;
242 case SPRN_IVOR8: 272 case SPRN_IVOR8:
243 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); 273 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
244 break; 274 break;
245 case SPRN_IVOR9: 275 case SPRN_IVOR9:
246 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); 276 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
247 break; 277 break;
248 case SPRN_IVOR10: 278 case SPRN_IVOR10:
249 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); 279 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
250 break; 280 break;
251 case SPRN_IVOR11: 281 case SPRN_IVOR11:
252 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); 282 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
253 break; 283 break;
254 case SPRN_IVOR12: 284 case SPRN_IVOR12:
255 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); 285 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
256 break; 286 break;
257 case SPRN_IVOR13: 287 case SPRN_IVOR13:
258 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); 288 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
259 break; 289 break;
260 case SPRN_IVOR14: 290 case SPRN_IVOR14:
261 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); 291 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
262 break; 292 break;
263 case SPRN_IVOR15: 293 case SPRN_IVOR15:
264 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); 294 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
265 break; 295 break;
266 296
267 default: 297 default:
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index c8c4b878795a..8feec2ff3928 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -419,13 +419,13 @@ lightweight_exit:
419 * written directly to the shared area, so we 419 * written directly to the shared area, so we
420 * need to reload them here with the guest's values. 420 * need to reload them here with the guest's values.
421 */ 421 */
422 lwz r3, VCPU_SHARED_SPRG4(r5) 422 PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
423 mtspr SPRN_SPRG4W, r3 423 mtspr SPRN_SPRG4W, r3
424 lwz r3, VCPU_SHARED_SPRG5(r5) 424 PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
425 mtspr SPRN_SPRG5W, r3 425 mtspr SPRN_SPRG5W, r3
426 lwz r3, VCPU_SHARED_SPRG6(r5) 426 PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
427 mtspr SPRN_SPRG6W, r3 427 mtspr SPRN_SPRG6W, r3
428 lwz r3, VCPU_SHARED_SPRG7(r5) 428 PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
429 mtspr SPRN_SPRG7W, r3 429 mtspr SPRN_SPRG7W, r3
430 430
431#ifdef CONFIG_KVM_EXIT_TIMING 431#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
new file mode 100644
index 000000000000..6048a00515d7
--- /dev/null
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -0,0 +1,597 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
16 *
17 * Author: Varun Sethi <varun.sethi@freescale.com>
18 * Author: Scott Wood <scotwood@freescale.com>
19 *
20 * This file is derived from arch/powerpc/kvm/booke_interrupts.S
21 */
22
23#include <asm/ppc_asm.h>
24#include <asm/kvm_asm.h>
25#include <asm/reg.h>
26#include <asm/mmu-44x.h>
27#include <asm/page.h>
28#include <asm/asm-compat.h>
29#include <asm/asm-offsets.h>
30#include <asm/bitsperlong.h>
31#include <asm/thread_info.h>
32
33#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
34
35#define GET_VCPU(vcpu, thread) \
36 PPC_LL vcpu, THREAD_KVM_VCPU(thread)
37
38#define LONGBYTES (BITS_PER_LONG / 8)
39
40#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES))
41#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
42
43/* The host stack layout: */
44#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
45#define HOST_CALLEE_LR (1 * LONGBYTES)
46#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
47/*
48 * r2 is special: it holds 'current', and it made nonvolatile in the
49 * kernel with the -ffixed-r2 gcc option.
50 */
51#define HOST_R2 (3 * LONGBYTES)
52#define HOST_CR (4 * LONGBYTES)
53#define HOST_NV_GPRS (5 * LONGBYTES)
54#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
55#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES)
56#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
57#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
58
59#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
60#define NEED_DEAR 0x00000002 /* save faulting DEAR */
61#define NEED_ESR 0x00000004 /* save faulting ESR */
62
63/*
64 * On entry:
65 * r4 = vcpu, r5 = srr0, r6 = srr1
66 * saved in vcpu: cr, ctr, r3-r13
67 */
68.macro kvm_handler_common intno, srr0, flags
69 /* Restore host stack pointer */
70 PPC_STL r1, VCPU_GPR(r1)(r4)
71 PPC_STL r2, VCPU_GPR(r2)(r4)
72 PPC_LL r1, VCPU_HOST_STACK(r4)
73 PPC_LL r2, HOST_R2(r1)
74
75 mfspr r10, SPRN_PID
76 lwz r8, VCPU_HOST_PID(r4)
77 PPC_LL r11, VCPU_SHARED(r4)
78 PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */
79 li r14, \intno
80
81 stw r10, VCPU_GUEST_PID(r4)
82 mtspr SPRN_PID, r8
83
84#ifdef CONFIG_KVM_EXIT_TIMING
85 /* save exit time */
861: mfspr r7, SPRN_TBRU
87 mfspr r8, SPRN_TBRL
88 mfspr r9, SPRN_TBRU
89 cmpw r9, r7
90 stw r8, VCPU_TIMING_EXIT_TBL(r4)
91 bne- 1b
92 stw r9, VCPU_TIMING_EXIT_TBU(r4)
93#endif
94
95 oris r8, r6, MSR_CE@h
96 PPC_STD(r6, VCPU_SHARED_MSR, r11)
97 ori r8, r8, MSR_ME | MSR_RI
98 PPC_STL r5, VCPU_PC(r4)
99
100 /*
101 * Make sure CE/ME/RI are set (if appropriate for exception type)
102 * whether or not the guest had it set. Since mfmsr/mtmsr are
103 * somewhat expensive, skip in the common case where the guest
104 * had all these bits set (and thus they're still set if
105 * appropriate for the exception type).
106 */
107 cmpw r6, r8
108 beq 1f
109 mfmsr r7
110 .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
111 oris r7, r7, MSR_CE@h
112 .endif
113 .if \srr0 != SPRN_MCSRR0
114 ori r7, r7, MSR_ME | MSR_RI
115 .endif
116 mtmsr r7
1171:
118
119 .if \flags & NEED_EMU
120 /*
121 * This assumes you have external PID support.
122 * To support a bookehv CPU without external PID, you'll
123 * need to look up the TLB entry and create a temporary mapping.
124 *
125 * FIXME: we don't currently handle if the lwepx faults. PR-mode
126 * booke doesn't handle it either. Since Linux doesn't use
127 * broadcast tlbivax anymore, the only way this should happen is
128 * if the guest maps its memory execute-but-not-read, or if we
129 * somehow take a TLB miss in the middle of this entry code and
130 * evict the relevant entry. On e500mc, all kernel lowmem is
131 * bolted into TLB1 large page mappings, and we don't use
132 * broadcast invalidates, so we should not take a TLB miss here.
133 *
134 * Later we'll need to deal with faults here. Disallowing guest
135 * mappings that are execute-but-not-read could be an option on
136 * e500mc, but not on chips with an LRAT if it is used.
137 */
138
139 mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
140 PPC_STL r15, VCPU_GPR(r15)(r4)
141 PPC_STL r16, VCPU_GPR(r16)(r4)
142 PPC_STL r17, VCPU_GPR(r17)(r4)
143 PPC_STL r18, VCPU_GPR(r18)(r4)
144 PPC_STL r19, VCPU_GPR(r19)(r4)
145 mr r8, r3
146 PPC_STL r20, VCPU_GPR(r20)(r4)
147 rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
148 PPC_STL r21, VCPU_GPR(r21)(r4)
149 rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
150 PPC_STL r22, VCPU_GPR(r22)(r4)
151 rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
152 PPC_STL r23, VCPU_GPR(r23)(r4)
153 PPC_STL r24, VCPU_GPR(r24)(r4)
154 PPC_STL r25, VCPU_GPR(r25)(r4)
155 PPC_STL r26, VCPU_GPR(r26)(r4)
156 PPC_STL r27, VCPU_GPR(r27)(r4)
157 PPC_STL r28, VCPU_GPR(r28)(r4)
158 PPC_STL r29, VCPU_GPR(r29)(r4)
159 PPC_STL r30, VCPU_GPR(r30)(r4)
160 PPC_STL r31, VCPU_GPR(r31)(r4)
161 mtspr SPRN_EPLC, r8
162
163 /* disable preemption, so we are sure we hit the fixup handler */
164#ifdef CONFIG_PPC64
165 clrrdi r8,r1,THREAD_SHIFT
166#else
167 rlwinm r8,r1,0,0,31-THREAD_SHIFT /* current thread_info */
168#endif
169 li r7, 1
170 stw r7, TI_PREEMPT(r8)
171
172 isync
173
174 /*
175 * In case the read goes wrong, we catch it and write an invalid value
176 * in LAST_INST instead.
177 */
1781: lwepx r9, 0, r5
1792:
180.section .fixup, "ax"
1813: li r9, KVM_INST_FETCH_FAILED
182 b 2b
183.previous
184.section __ex_table,"a"
185 PPC_LONG_ALIGN
186 PPC_LONG 1b,3b
187.previous
188
189 mtspr SPRN_EPLC, r3
190 li r7, 0
191 stw r7, TI_PREEMPT(r8)
192 stw r9, VCPU_LAST_INST(r4)
193 .endif
194
195 .if \flags & NEED_ESR
196 mfspr r8, SPRN_ESR
197 PPC_STL r8, VCPU_FAULT_ESR(r4)
198 .endif
199
200 .if \flags & NEED_DEAR
201 mfspr r9, SPRN_DEAR
202 PPC_STL r9, VCPU_FAULT_DEAR(r4)
203 .endif
204
205 b kvmppc_resume_host
206.endm
207
208/*
209 * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
210 */
211.macro kvm_handler intno srr0, srr1, flags
212_GLOBAL(kvmppc_handler_\intno\()_\srr1)
213 GET_VCPU(r11, r10)
214 PPC_STL r3, VCPU_GPR(r3)(r11)
215 mfspr r3, SPRN_SPRG_RSCRATCH0
216 PPC_STL r4, VCPU_GPR(r4)(r11)
217 PPC_LL r4, THREAD_NORMSAVE(0)(r10)
218 PPC_STL r5, VCPU_GPR(r5)(r11)
219 stw r13, VCPU_CR(r11)
220 mfspr r5, \srr0
221 PPC_STL r3, VCPU_GPR(r10)(r11)
222 PPC_LL r3, THREAD_NORMSAVE(2)(r10)
223 PPC_STL r6, VCPU_GPR(r6)(r11)
224 PPC_STL r4, VCPU_GPR(r11)(r11)
225 mfspr r6, \srr1
226 PPC_STL r7, VCPU_GPR(r7)(r11)
227 PPC_STL r8, VCPU_GPR(r8)(r11)
228 PPC_STL r9, VCPU_GPR(r9)(r11)
229 PPC_STL r3, VCPU_GPR(r13)(r11)
230 mfctr r7
231 PPC_STL r12, VCPU_GPR(r12)(r11)
232 PPC_STL r7, VCPU_CTR(r11)
233 mr r4, r11
234 kvm_handler_common \intno, \srr0, \flags
235.endm
236
237.macro kvm_lvl_handler intno scratch srr0, srr1, flags
238_GLOBAL(kvmppc_handler_\intno\()_\srr1)
239 mfspr r10, SPRN_SPRG_THREAD
240 GET_VCPU(r11, r10)
241 PPC_STL r3, VCPU_GPR(r3)(r11)
242 mfspr r3, \scratch
243 PPC_STL r4, VCPU_GPR(r4)(r11)
244 PPC_LL r4, GPR9(r8)
245 PPC_STL r5, VCPU_GPR(r5)(r11)
246 stw r9, VCPU_CR(r11)
247 mfspr r5, \srr0
248 PPC_STL r3, VCPU_GPR(r8)(r11)
249 PPC_LL r3, GPR10(r8)
250 PPC_STL r6, VCPU_GPR(r6)(r11)
251 PPC_STL r4, VCPU_GPR(r9)(r11)
252 mfspr r6, \srr1
253 PPC_LL r4, GPR11(r8)
254 PPC_STL r7, VCPU_GPR(r7)(r11)
255 PPC_STL r3, VCPU_GPR(r10)(r11)
256 mfctr r7
257 PPC_STL r12, VCPU_GPR(r12)(r11)
258 PPC_STL r13, VCPU_GPR(r13)(r11)
259 PPC_STL r4, VCPU_GPR(r11)(r11)
260 PPC_STL r7, VCPU_CTR(r11)
261 mr r4, r11
262 kvm_handler_common \intno, \srr0, \flags
263.endm
264
265kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
266 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
267kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
268 SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
269kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
270 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR)
271kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
272kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
273kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
274 SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
275kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
276kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
277kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
278kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
279kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
280kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
281kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
282 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
283kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
284 SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
285kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
286kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
287kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
288kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
289kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
290kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
291kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
292 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
293kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
294kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
295kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
296kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
297 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
298kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
299 SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
300kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
301 SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
302
303
304/* Registers:
305 * SPRG_SCRATCH0: guest r10
306 * r4: vcpu pointer
307 * r11: vcpu->arch.shared
308 * r14: KVM exit number
309 */
310_GLOBAL(kvmppc_resume_host)
311 /* Save remaining volatile guest register state to vcpu. */
312 mfspr r3, SPRN_VRSAVE
313 PPC_STL r0, VCPU_GPR(r0)(r4)
314 mflr r5
315 mfspr r6, SPRN_SPRG4
316 PPC_STL r5, VCPU_LR(r4)
317 mfspr r7, SPRN_SPRG5
318 stw r3, VCPU_VRSAVE(r4)
319 PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
320 mfspr r8, SPRN_SPRG6
321 PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
322 mfspr r9, SPRN_SPRG7
323 PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
324 mfxer r3
325 PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
326
327 /* save guest MAS registers and restore host mas4 & mas6 */
328 mfspr r5, SPRN_MAS0
329 PPC_STL r3, VCPU_XER(r4)
330 mfspr r6, SPRN_MAS1
331 stw r5, VCPU_SHARED_MAS0(r11)
332 mfspr r7, SPRN_MAS2
333 stw r6, VCPU_SHARED_MAS1(r11)
334 PPC_STD(r7, VCPU_SHARED_MAS2, r11)
335 mfspr r5, SPRN_MAS3
336 mfspr r6, SPRN_MAS4
337 stw r5, VCPU_SHARED_MAS7_3+4(r11)
338 mfspr r7, SPRN_MAS6
339 stw r6, VCPU_SHARED_MAS4(r11)
340 mfspr r5, SPRN_MAS7
341 lwz r6, VCPU_HOST_MAS4(r4)
342 stw r7, VCPU_SHARED_MAS6(r11)
343 lwz r8, VCPU_HOST_MAS6(r4)
344 mtspr SPRN_MAS4, r6
345 stw r5, VCPU_SHARED_MAS7_3+0(r11)
346 mtspr SPRN_MAS6, r8
347 /* Enable MAS register updates via exception */
348 mfspr r3, SPRN_EPCR
349 rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH
350 mtspr SPRN_EPCR, r3
351 isync
352
353 /* Switch to kernel stack and jump to handler. */
354 PPC_LL r3, HOST_RUN(r1)
355 mr r5, r14 /* intno */
356 mr r14, r4 /* Save vcpu pointer. */
357 bl kvmppc_handle_exit
358
359 /* Restore vcpu pointer and the nonvolatiles we used. */
360 mr r4, r14
361 PPC_LL r14, VCPU_GPR(r14)(r4)
362
363 andi. r5, r3, RESUME_FLAG_NV
364 beq skip_nv_load
365 PPC_LL r15, VCPU_GPR(r15)(r4)
366 PPC_LL r16, VCPU_GPR(r16)(r4)
367 PPC_LL r17, VCPU_GPR(r17)(r4)
368 PPC_LL r18, VCPU_GPR(r18)(r4)
369 PPC_LL r19, VCPU_GPR(r19)(r4)
370 PPC_LL r20, VCPU_GPR(r20)(r4)
371 PPC_LL r21, VCPU_GPR(r21)(r4)
372 PPC_LL r22, VCPU_GPR(r22)(r4)
373 PPC_LL r23, VCPU_GPR(r23)(r4)
374 PPC_LL r24, VCPU_GPR(r24)(r4)
375 PPC_LL r25, VCPU_GPR(r25)(r4)
376 PPC_LL r26, VCPU_GPR(r26)(r4)
377 PPC_LL r27, VCPU_GPR(r27)(r4)
378 PPC_LL r28, VCPU_GPR(r28)(r4)
379 PPC_LL r29, VCPU_GPR(r29)(r4)
380 PPC_LL r30, VCPU_GPR(r30)(r4)
381 PPC_LL r31, VCPU_GPR(r31)(r4)
382skip_nv_load:
383 /* Should we return to the guest? */
384 andi. r5, r3, RESUME_FLAG_HOST
385 beq lightweight_exit
386
387 srawi r3, r3, 2 /* Shift -ERR back down. */
388
389heavyweight_exit:
390 /* Not returning to guest. */
391 PPC_LL r5, HOST_STACK_LR(r1)
392 lwz r6, HOST_CR(r1)
393
394 /*
395 * We already saved guest volatile register state; now save the
396 * non-volatiles.
397 */
398
399 PPC_STL r15, VCPU_GPR(r15)(r4)
400 PPC_STL r16, VCPU_GPR(r16)(r4)
401 PPC_STL r17, VCPU_GPR(r17)(r4)
402 PPC_STL r18, VCPU_GPR(r18)(r4)
403 PPC_STL r19, VCPU_GPR(r19)(r4)
404 PPC_STL r20, VCPU_GPR(r20)(r4)
405 PPC_STL r21, VCPU_GPR(r21)(r4)
406 PPC_STL r22, VCPU_GPR(r22)(r4)
407 PPC_STL r23, VCPU_GPR(r23)(r4)
408 PPC_STL r24, VCPU_GPR(r24)(r4)
409 PPC_STL r25, VCPU_GPR(r25)(r4)
410 PPC_STL r26, VCPU_GPR(r26)(r4)
411 PPC_STL r27, VCPU_GPR(r27)(r4)
412 PPC_STL r28, VCPU_GPR(r28)(r4)
413 PPC_STL r29, VCPU_GPR(r29)(r4)
414 PPC_STL r30, VCPU_GPR(r30)(r4)
415 PPC_STL r31, VCPU_GPR(r31)(r4)
416
417 /* Load host non-volatile register state from host stack. */
418 PPC_LL r14, HOST_NV_GPR(r14)(r1)
419 PPC_LL r15, HOST_NV_GPR(r15)(r1)
420 PPC_LL r16, HOST_NV_GPR(r16)(r1)
421 PPC_LL r17, HOST_NV_GPR(r17)(r1)
422 PPC_LL r18, HOST_NV_GPR(r18)(r1)
423 PPC_LL r19, HOST_NV_GPR(r19)(r1)
424 PPC_LL r20, HOST_NV_GPR(r20)(r1)
425 PPC_LL r21, HOST_NV_GPR(r21)(r1)
426 PPC_LL r22, HOST_NV_GPR(r22)(r1)
427 PPC_LL r23, HOST_NV_GPR(r23)(r1)
428 PPC_LL r24, HOST_NV_GPR(r24)(r1)
429 PPC_LL r25, HOST_NV_GPR(r25)(r1)
430 PPC_LL r26, HOST_NV_GPR(r26)(r1)
431 PPC_LL r27, HOST_NV_GPR(r27)(r1)
432 PPC_LL r28, HOST_NV_GPR(r28)(r1)
433 PPC_LL r29, HOST_NV_GPR(r29)(r1)
434 PPC_LL r30, HOST_NV_GPR(r30)(r1)
435 PPC_LL r31, HOST_NV_GPR(r31)(r1)
436
437 /* Return to kvm_vcpu_run(). */
438 mtlr r5
439 mtcr r6
440 addi r1, r1, HOST_STACK_SIZE
441 /* r3 still contains the return code from kvmppc_handle_exit(). */
442 blr
443
444/* Registers:
445 * r3: kvm_run pointer
446 * r4: vcpu pointer
447 */
448_GLOBAL(__kvmppc_vcpu_run)
449 stwu r1, -HOST_STACK_SIZE(r1)
450 PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
451
452 /* Save host state to stack. */
453 PPC_STL r3, HOST_RUN(r1)
454 mflr r3
455 mfcr r5
456 PPC_STL r3, HOST_STACK_LR(r1)
457
458 stw r5, HOST_CR(r1)
459
460 /* Save host non-volatile register state to stack. */
461 PPC_STL r14, HOST_NV_GPR(r14)(r1)
462 PPC_STL r15, HOST_NV_GPR(r15)(r1)
463 PPC_STL r16, HOST_NV_GPR(r16)(r1)
464 PPC_STL r17, HOST_NV_GPR(r17)(r1)
465 PPC_STL r18, HOST_NV_GPR(r18)(r1)
466 PPC_STL r19, HOST_NV_GPR(r19)(r1)
467 PPC_STL r20, HOST_NV_GPR(r20)(r1)
468 PPC_STL r21, HOST_NV_GPR(r21)(r1)
469 PPC_STL r22, HOST_NV_GPR(r22)(r1)
470 PPC_STL r23, HOST_NV_GPR(r23)(r1)
471 PPC_STL r24, HOST_NV_GPR(r24)(r1)
472 PPC_STL r25, HOST_NV_GPR(r25)(r1)
473 PPC_STL r26, HOST_NV_GPR(r26)(r1)
474 PPC_STL r27, HOST_NV_GPR(r27)(r1)
475 PPC_STL r28, HOST_NV_GPR(r28)(r1)
476 PPC_STL r29, HOST_NV_GPR(r29)(r1)
477 PPC_STL r30, HOST_NV_GPR(r30)(r1)
478 PPC_STL r31, HOST_NV_GPR(r31)(r1)
479
480 /* Load guest non-volatiles. */
481 PPC_LL r14, VCPU_GPR(r14)(r4)
482 PPC_LL r15, VCPU_GPR(r15)(r4)
483 PPC_LL r16, VCPU_GPR(r16)(r4)
484 PPC_LL r17, VCPU_GPR(r17)(r4)
485 PPC_LL r18, VCPU_GPR(r18)(r4)
486 PPC_LL r19, VCPU_GPR(r19)(r4)
487 PPC_LL r20, VCPU_GPR(r20)(r4)
488 PPC_LL r21, VCPU_GPR(r21)(r4)
489 PPC_LL r22, VCPU_GPR(r22)(r4)
490 PPC_LL r23, VCPU_GPR(r23)(r4)
491 PPC_LL r24, VCPU_GPR(r24)(r4)
492 PPC_LL r25, VCPU_GPR(r25)(r4)
493 PPC_LL r26, VCPU_GPR(r26)(r4)
494 PPC_LL r27, VCPU_GPR(r27)(r4)
495 PPC_LL r28, VCPU_GPR(r28)(r4)
496 PPC_LL r29, VCPU_GPR(r29)(r4)
497 PPC_LL r30, VCPU_GPR(r30)(r4)
498 PPC_LL r31, VCPU_GPR(r31)(r4)
499
500
501lightweight_exit:
502 PPC_STL r2, HOST_R2(r1)
503
504 mfspr r3, SPRN_PID
505 stw r3, VCPU_HOST_PID(r4)
506 lwz r3, VCPU_GUEST_PID(r4)
507 mtspr SPRN_PID, r3
508
509 PPC_LL r11, VCPU_SHARED(r4)
510 /* Disable MAS register updates via exception */
511 mfspr r3, SPRN_EPCR
512 oris r3, r3, SPRN_EPCR_DMIUH@h
513 mtspr SPRN_EPCR, r3
514 isync
515 /* Save host mas4 and mas6 and load guest MAS registers */
516 mfspr r3, SPRN_MAS4
517 stw r3, VCPU_HOST_MAS4(r4)
518 mfspr r3, SPRN_MAS6
519 stw r3, VCPU_HOST_MAS6(r4)
520 lwz r3, VCPU_SHARED_MAS0(r11)
521 lwz r5, VCPU_SHARED_MAS1(r11)
522 PPC_LD(r6, VCPU_SHARED_MAS2, r11)
523 lwz r7, VCPU_SHARED_MAS7_3+4(r11)
524 lwz r8, VCPU_SHARED_MAS4(r11)
525 mtspr SPRN_MAS0, r3
526 mtspr SPRN_MAS1, r5
527 mtspr SPRN_MAS2, r6
528 mtspr SPRN_MAS3, r7
529 mtspr SPRN_MAS4, r8
530 lwz r3, VCPU_SHARED_MAS6(r11)
531 lwz r5, VCPU_SHARED_MAS7_3+0(r11)
532 mtspr SPRN_MAS6, r3
533 mtspr SPRN_MAS7, r5
534
535 /*
536 * Host interrupt handlers may have clobbered these guest-readable
537 * SPRGs, so we need to reload them here with the guest's values.
538 */
539 lwz r3, VCPU_VRSAVE(r4)
540 PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
541 mtspr SPRN_VRSAVE, r3
542 PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
543 mtspr SPRN_SPRG4W, r5
544 PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
545 mtspr SPRN_SPRG5W, r6
546 PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
547 mtspr SPRN_SPRG6W, r7
548 mtspr SPRN_SPRG7W, r8
549
550 /* Load some guest volatiles. */
551 PPC_LL r3, VCPU_LR(r4)
552 PPC_LL r5, VCPU_XER(r4)
553 PPC_LL r6, VCPU_CTR(r4)
554 lwz r7, VCPU_CR(r4)
555 PPC_LL r8, VCPU_PC(r4)
556 PPC_LD(r9, VCPU_SHARED_MSR, r11)
557 PPC_LL r0, VCPU_GPR(r0)(r4)
558 PPC_LL r1, VCPU_GPR(r1)(r4)
559 PPC_LL r2, VCPU_GPR(r2)(r4)
560 PPC_LL r10, VCPU_GPR(r10)(r4)
561 PPC_LL r11, VCPU_GPR(r11)(r4)
562 PPC_LL r12, VCPU_GPR(r12)(r4)
563 PPC_LL r13, VCPU_GPR(r13)(r4)
564 mtlr r3
565 mtxer r5
566 mtctr r6
567 mtsrr0 r8
568 mtsrr1 r9
569
570#ifdef CONFIG_KVM_EXIT_TIMING
571 /* save enter time */
5721:
573 mfspr r6, SPRN_TBRU
574 mfspr r9, SPRN_TBRL
575 mfspr r8, SPRN_TBRU
576 cmpw r8, r6
577 stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
578 bne 1b
579 stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
580#endif
581
582 /*
583 * Don't execute any instruction which can change CR after
584 * below instruction.
585 */
586 mtcr r7
587
588 /* Finish loading guest volatiles and jump to guest. */
589 PPC_LL r5, VCPU_GPR(r5)(r4)
590 PPC_LL r6, VCPU_GPR(r6)(r4)
591 PPC_LL r7, VCPU_GPR(r7)(r4)
592 PPC_LL r8, VCPU_GPR(r8)(r4)
593 PPC_LL r9, VCPU_GPR(r9)(r4)
594
595 PPC_LL r3, VCPU_GPR(r3)(r4)
596 PPC_LL r4, VCPU_GPR(r4)(r4)
597 rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ddcd896fa2ff..b479ed77c515 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -20,11 +20,282 @@
20#include <asm/reg.h> 20#include <asm/reg.h>
21#include <asm/cputable.h> 21#include <asm/cputable.h>
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23#include <asm/kvm_e500.h>
24#include <asm/kvm_ppc.h> 23#include <asm/kvm_ppc.h>
25 24
25#include "../mm/mmu_decl.h"
26#include "booke.h" 26#include "booke.h"
27#include "e500_tlb.h" 27#include "e500.h"
28
29struct id {
30 unsigned long val;
31 struct id **pentry;
32};
33
34#define NUM_TIDS 256
35
36/*
37 * This table provide mappings from:
38 * (guestAS,guestTID,guestPR) --> ID of physical cpu
39 * guestAS [0..1]
40 * guestTID [0..255]
41 * guestPR [0..1]
42 * ID [1..255]
43 * Each vcpu keeps one vcpu_id_table.
44 */
45struct vcpu_id_table {
46 struct id id[2][NUM_TIDS][2];
47};
48
49/*
50 * This table provide reversed mappings of vcpu_id_table:
51 * ID --> address of vcpu_id_table item.
52 * Each physical core has one pcpu_id_table.
53 */
54struct pcpu_id_table {
55 struct id *entry[NUM_TIDS];
56};
57
58static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
59
60/* This variable keeps last used shadow ID on local core.
61 * The valid range of shadow ID is [1..255] */
62static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
63
64/*
65 * Allocate a free shadow id and setup a valid sid mapping in given entry.
66 * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
67 *
68 * The caller must have preemption disabled, and keep it that way until
69 * it has finished with the returned shadow id (either written into the
70 * TLB or arch.shadow_pid, or discarded).
71 */
72static inline int local_sid_setup_one(struct id *entry)
73{
74 unsigned long sid;
75 int ret = -1;
76
77 sid = ++(__get_cpu_var(pcpu_last_used_sid));
78 if (sid < NUM_TIDS) {
79 __get_cpu_var(pcpu_sids).entry[sid] = entry;
80 entry->val = sid;
81 entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
82 ret = sid;
83 }
84
85 /*
86 * If sid == NUM_TIDS, we've run out of sids. We return -1, and
87 * the caller will invalidate everything and start over.
88 *
89 * sid > NUM_TIDS indicates a race, which we disable preemption to
90 * avoid.
91 */
92 WARN_ON(sid > NUM_TIDS);
93
94 return ret;
95}
96
97/*
98 * Check if given entry contain a valid shadow id mapping.
99 * An ID mapping is considered valid only if
100 * both vcpu and pcpu know this mapping.
101 *
102 * The caller must have preemption disabled, and keep it that way until
103 * it has finished with the returned shadow id (either written into the
104 * TLB or arch.shadow_pid, or discarded).
105 */
106static inline int local_sid_lookup(struct id *entry)
107{
108 if (entry && entry->val != 0 &&
109 __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
110 entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
111 return entry->val;
112 return -1;
113}
114
115/* Invalidate all id mappings on local core -- call with preempt disabled */
116static inline void local_sid_destroy_all(void)
117{
118 __get_cpu_var(pcpu_last_used_sid) = 0;
119 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
120}
121
122static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
123{
124 vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
125 return vcpu_e500->idt;
126}
127
128static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
129{
130 kfree(vcpu_e500->idt);
131 vcpu_e500->idt = NULL;
132}
133
134/* Map guest pid to shadow.
135 * We use PID to keep shadow of current guest non-zero PID,
136 * and use PID1 to keep shadow of guest zero PID.
137 * So that guest tlbe with TID=0 can be accessed at any time */
138static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
139{
140 preempt_disable();
141 vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
142 get_cur_as(&vcpu_e500->vcpu),
143 get_cur_pid(&vcpu_e500->vcpu),
144 get_cur_pr(&vcpu_e500->vcpu), 1);
145 vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
146 get_cur_as(&vcpu_e500->vcpu), 0,
147 get_cur_pr(&vcpu_e500->vcpu), 1);
148 preempt_enable();
149}
150
151/* Invalidate all mappings on vcpu */
152static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
153{
154 memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
155
156 /* Update shadow pid when mappings are changed */
157 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
158}
159
160/* Invalidate one ID mapping on vcpu */
161static inline void kvmppc_e500_id_table_reset_one(
162 struct kvmppc_vcpu_e500 *vcpu_e500,
163 int as, int pid, int pr)
164{
165 struct vcpu_id_table *idt = vcpu_e500->idt;
166
167 BUG_ON(as >= 2);
168 BUG_ON(pid >= NUM_TIDS);
169 BUG_ON(pr >= 2);
170
171 idt->id[as][pid][pr].val = 0;
172 idt->id[as][pid][pr].pentry = NULL;
173
174 /* Update shadow pid when mappings are changed */
175 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
176}
177
178/*
179 * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
180 * This function first lookup if a valid mapping exists,
181 * if not, then creates a new one.
182 *
183 * The caller must have preemption disabled, and keep it that way until
184 * it has finished with the returned shadow id (either written into the
185 * TLB or arch.shadow_pid, or discarded).
186 */
187unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
188 unsigned int as, unsigned int gid,
189 unsigned int pr, int avoid_recursion)
190{
191 struct vcpu_id_table *idt = vcpu_e500->idt;
192 int sid;
193
194 BUG_ON(as >= 2);
195 BUG_ON(gid >= NUM_TIDS);
196 BUG_ON(pr >= 2);
197
198 sid = local_sid_lookup(&idt->id[as][gid][pr]);
199
200 while (sid <= 0) {
201 /* No mapping yet */
202 sid = local_sid_setup_one(&idt->id[as][gid][pr]);
203 if (sid <= 0) {
204 _tlbil_all();
205 local_sid_destroy_all();
206 }
207
208 /* Update shadow pid when mappings are changed */
209 if (!avoid_recursion)
210 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
211 }
212
213 return sid;
214}
215
216unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
217 struct kvm_book3e_206_tlb_entry *gtlbe)
218{
219 return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
220 get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
221}
222
223void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
224{
225 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
226
227 if (vcpu->arch.pid != pid) {
228 vcpu_e500->pid[0] = vcpu->arch.pid = pid;
229 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
230 }
231}
232
233/* gtlbe must not be mapped by more than one host tlbe */
234void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
235 struct kvm_book3e_206_tlb_entry *gtlbe)
236{
237 struct vcpu_id_table *idt = vcpu_e500->idt;
238 unsigned int pr, tid, ts, pid;
239 u32 val, eaddr;
240 unsigned long flags;
241
242 ts = get_tlb_ts(gtlbe);
243 tid = get_tlb_tid(gtlbe);
244
245 preempt_disable();
246
247 /* One guest ID may be mapped to two shadow IDs */
248 for (pr = 0; pr < 2; pr++) {
249 /*
250 * The shadow PID can have a valid mapping on at most one
251 * host CPU. In the common case, it will be valid on this
252 * CPU, in which case we do a local invalidation of the
253 * specific address.
254 *
255 * If the shadow PID is not valid on the current host CPU,
256 * we invalidate the entire shadow PID.
257 */
258 pid = local_sid_lookup(&idt->id[ts][tid][pr]);
259 if (pid <= 0) {
260 kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
261 continue;
262 }
263
264 /*
265 * The guest is invalidating a 4K entry which is in a PID
266 * that has a valid shadow mapping on this host CPU. We
267 * search host TLB to invalidate it's shadow TLB entry,
268 * similar to __tlbil_va except that we need to look in AS1.
269 */
270 val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
271 eaddr = get_tlb_eaddr(gtlbe);
272
273 local_irq_save(flags);
274
275 mtspr(SPRN_MAS6, val);
276 asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
277 val = mfspr(SPRN_MAS1);
278 if (val & MAS1_VALID) {
279 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
280 asm volatile("tlbwe");
281 }
282
283 local_irq_restore(flags);
284 }
285
286 preempt_enable();
287}
288
289void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
290{
291 kvmppc_e500_id_table_reset_all(vcpu_e500);
292}
293
294void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
295{
296 /* Recalc shadow pid since MSR changes */
297 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
298}
28 299
29void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) 300void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
30{ 301{
@@ -36,17 +307,20 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
36 307
37void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 308void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
38{ 309{
39 kvmppc_e500_tlb_load(vcpu, cpu); 310 kvmppc_booke_vcpu_load(vcpu, cpu);
311
312 /* Shadow PID may be expired on local core */
313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
40} 314}
41 315
42void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 316void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
43{ 317{
44 kvmppc_e500_tlb_put(vcpu);
45
46#ifdef CONFIG_SPE 318#ifdef CONFIG_SPE
47 if (vcpu->arch.shadow_msr & MSR_SPE) 319 if (vcpu->arch.shadow_msr & MSR_SPE)
48 kvmppc_vcpu_disable_spe(vcpu); 320 kvmppc_vcpu_disable_spe(vcpu);
49#endif 321#endif
322
323 kvmppc_booke_vcpu_put(vcpu);
50} 324}
51 325
52int kvmppc_core_check_processor_compat(void) 326int kvmppc_core_check_processor_compat(void)
@@ -61,6 +335,23 @@ int kvmppc_core_check_processor_compat(void)
61 return r; 335 return r;
62} 336}
63 337
338static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
339{
340 struct kvm_book3e_206_tlb_entry *tlbe;
341
342 /* Insert large initial mapping for guest. */
343 tlbe = get_entry(vcpu_e500, 1, 0);
344 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
345 tlbe->mas2 = 0;
346 tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
347
348 /* 4K map for serial output. Used by kernel wrapper. */
349 tlbe = get_entry(vcpu_e500, 1, 1);
350 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
351 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
352 tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
353}
354
64int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) 355int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
65{ 356{
66 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 357 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -76,32 +367,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
76 return 0; 367 return 0;
77} 368}
78 369
79/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
80int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
81 struct kvm_translation *tr)
82{
83 int index;
84 gva_t eaddr;
85 u8 pid;
86 u8 as;
87
88 eaddr = tr->linear_address;
89 pid = (tr->linear_address >> 32) & 0xff;
90 as = (tr->linear_address >> 40) & 0x1;
91
92 index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
93 if (index < 0) {
94 tr->valid = 0;
95 return 0;
96 }
97
98 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
99 /* XXX what does "writeable" and "usermode" even mean? */
100 tr->valid = 1;
101
102 return 0;
103}
104
105void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 370void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
106{ 371{
107 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 372 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -115,19 +380,6 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
115 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; 380 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
116 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; 381 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
117 382
118 sregs->u.e.mas0 = vcpu->arch.shared->mas0;
119 sregs->u.e.mas1 = vcpu->arch.shared->mas1;
120 sregs->u.e.mas2 = vcpu->arch.shared->mas2;
121 sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
122 sregs->u.e.mas4 = vcpu->arch.shared->mas4;
123 sregs->u.e.mas6 = vcpu->arch.shared->mas6;
124
125 sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
126 sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
127 sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
128 sregs->u.e.tlbcfg[2] = 0;
129 sregs->u.e.tlbcfg[3] = 0;
130
131 sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 383 sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
132 sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 384 sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
133 sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 385 sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
@@ -135,11 +387,13 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
135 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 387 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
136 388
137 kvmppc_get_sregs_ivor(vcpu, sregs); 389 kvmppc_get_sregs_ivor(vcpu, sregs);
390 kvmppc_get_sregs_e500_tlb(vcpu, sregs);
138} 391}
139 392
140int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 393int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
141{ 394{
142 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 395 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
396 int ret;
143 397
144 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 398 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
145 vcpu_e500->svr = sregs->u.e.impl.fsl.svr; 399 vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
@@ -147,14 +401,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
147 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; 401 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
148 } 402 }
149 403
150 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { 404 ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
151 vcpu->arch.shared->mas0 = sregs->u.e.mas0; 405 if (ret < 0)
152 vcpu->arch.shared->mas1 = sregs->u.e.mas1; 406 return ret;
153 vcpu->arch.shared->mas2 = sregs->u.e.mas2;
154 vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
155 vcpu->arch.shared->mas4 = sregs->u.e.mas4;
156 vcpu->arch.shared->mas6 = sregs->u.e.mas6;
157 }
158 407
159 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) 408 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
160 return 0; 409 return 0;
@@ -193,9 +442,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
193 if (err) 442 if (err)
194 goto free_vcpu; 443 goto free_vcpu;
195 444
445 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
446 goto uninit_vcpu;
447
196 err = kvmppc_e500_tlb_init(vcpu_e500); 448 err = kvmppc_e500_tlb_init(vcpu_e500);
197 if (err) 449 if (err)
198 goto uninit_vcpu; 450 goto uninit_id;
199 451
200 vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); 452 vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
201 if (!vcpu->arch.shared) 453 if (!vcpu->arch.shared)
@@ -205,6 +457,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
205 457
206uninit_tlb: 458uninit_tlb:
207 kvmppc_e500_tlb_uninit(vcpu_e500); 459 kvmppc_e500_tlb_uninit(vcpu_e500);
460uninit_id:
461 kvmppc_e500_id_table_free(vcpu_e500);
208uninit_vcpu: 462uninit_vcpu:
209 kvm_vcpu_uninit(vcpu); 463 kvm_vcpu_uninit(vcpu);
210free_vcpu: 464free_vcpu:
@@ -218,11 +472,21 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
218 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 472 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
219 473
220 free_page((unsigned long)vcpu->arch.shared); 474 free_page((unsigned long)vcpu->arch.shared);
221 kvm_vcpu_uninit(vcpu);
222 kvmppc_e500_tlb_uninit(vcpu_e500); 475 kvmppc_e500_tlb_uninit(vcpu_e500);
476 kvmppc_e500_id_table_free(vcpu_e500);
477 kvm_vcpu_uninit(vcpu);
223 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 478 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
224} 479}
225 480
481int kvmppc_core_init_vm(struct kvm *kvm)
482{
483 return 0;
484}
485
486void kvmppc_core_destroy_vm(struct kvm *kvm)
487{
488}
489
226static int __init kvmppc_e500_init(void) 490static int __init kvmppc_e500_init(void)
227{ 491{
228 int r, i; 492 int r, i;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
new file mode 100644
index 000000000000..aa8b81428bf4
--- /dev/null
+++ b/arch/powerpc/kvm/e500.h
@@ -0,0 +1,306 @@
1/*
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu <yu.liu@freescale.com>
5 * Scott Wood <scottwood@freescale.com>
6 * Ashish Kalra <ashish.kalra@freescale.com>
7 * Varun Sethi <varun.sethi@freescale.com>
8 *
9 * Description:
10 * This file is based on arch/powerpc/kvm/44x_tlb.h and
11 * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
12 * Copyright IBM Corp. 2007-2008
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License, version 2, as
16 * published by the Free Software Foundation.
17 */
18
19#ifndef KVM_E500_H
20#define KVM_E500_H
21
22#include <linux/kvm_host.h>
23#include <asm/mmu-book3e.h>
24#include <asm/tlb.h>
25
26#define E500_PID_NUM 3
27#define E500_TLB_NUM 2
28
29#define E500_TLB_VALID 1
30#define E500_TLB_DIRTY 2
31#define E500_TLB_BITMAP 4
32
33struct tlbe_ref {
34 pfn_t pfn;
35 unsigned int flags; /* E500_TLB_* */
36};
37
38struct tlbe_priv {
39 struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
40};
41
42#ifdef CONFIG_KVM_E500V2
43struct vcpu_id_table;
44#endif
45
46struct kvmppc_e500_tlb_params {
47 int entries, ways, sets;
48};
49
50struct kvmppc_vcpu_e500 {
51 struct kvm_vcpu vcpu;
52
53 /* Unmodified copy of the guest's TLB -- shared with host userspace. */
54 struct kvm_book3e_206_tlb_entry *gtlb_arch;
55
56 /* Starting entry number in gtlb_arch[] */
57 int gtlb_offset[E500_TLB_NUM];
58
59 /* KVM internal information associated with each guest TLB entry */
60 struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
61
62 struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
63
64 unsigned int gtlb_nv[E500_TLB_NUM];
65
66 /*
67 * information associated with each host TLB entry --
68 * TLB1 only for now. If/when guest TLB1 entries can be
69 * mapped with host TLB0, this will be used for that too.
70 *
71 * We don't want to use this for guest TLB0 because then we'd
72 * have the overhead of doing the translation again even if
73 * the entry is still in the guest TLB (e.g. we swapped out
74 * and back, and our host TLB entries got evicted).
75 */
76 struct tlbe_ref *tlb_refs[E500_TLB_NUM];
77 unsigned int host_tlb1_nv;
78
79 u32 svr;
80 u32 l1csr0;
81 u32 l1csr1;
82 u32 hid0;
83 u32 hid1;
84 u64 mcar;
85
86 struct page **shared_tlb_pages;
87 int num_shared_tlb_pages;
88
89 u64 *g2h_tlb1_map;
90 unsigned int *h2g_tlb1_rmap;
91
92 /* Minimum and maximum address mapped my TLB1 */
93 unsigned long tlb1_min_eaddr;
94 unsigned long tlb1_max_eaddr;
95
96#ifdef CONFIG_KVM_E500V2
97 u32 pid[E500_PID_NUM];
98
99 /* vcpu id table */
100 struct vcpu_id_table *idt;
101#endif
102};
103
104static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
105{
106 return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
107}
108
109
110/* This geometry is the legacy default -- can be overridden by userspace */
111#define KVM_E500_TLB0_WAY_SIZE 128
112#define KVM_E500_TLB0_WAY_NUM 2
113
114#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
115#define KVM_E500_TLB1_SIZE 16
116
117#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
118#define tlbsel_of(index) ((index) >> 16)
119#define esel_of(index) ((index) & 0xFFFF)
120
121#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
122#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
123#define MAS2_ATTRIB_MASK \
124 (MAS2_X0 | MAS2_X1)
125#define MAS3_ATTRIB_MASK \
126 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
127 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
128
129int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
130 ulong value);
131int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
132int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
133int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
134int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
135int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
136int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
137void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
138
139void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
140int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
141
142
143#ifdef CONFIG_KVM_E500V2
144unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
145 unsigned int as, unsigned int gid,
146 unsigned int pr, int avoid_recursion);
147#endif
148
149/* TLB helper functions */
150static inline unsigned int
151get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
152{
153 return (tlbe->mas1 >> 7) & 0x1f;
154}
155
156static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
157{
158 return tlbe->mas2 & 0xfffff000;
159}
160
161static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
162{
163 unsigned int pgsize = get_tlb_size(tlbe);
164 return 1ULL << 10 << pgsize;
165}
166
167static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
168{
169 u64 bytes = get_tlb_bytes(tlbe);
170 return get_tlb_eaddr(tlbe) + bytes - 1;
171}
172
173static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
174{
175 return tlbe->mas7_3 & ~0xfffULL;
176}
177
178static inline unsigned int
179get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
180{
181 return (tlbe->mas1 >> 16) & 0xff;
182}
183
184static inline unsigned int
185get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
186{
187 return (tlbe->mas1 >> 12) & 0x1;
188}
189
190static inline unsigned int
191get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
192{
193 return (tlbe->mas1 >> 31) & 0x1;
194}
195
196static inline unsigned int
197get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
198{
199 return (tlbe->mas1 >> 30) & 0x1;
200}
201
202static inline unsigned int
203get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
204{
205 return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
206}
207
208static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
209{
210 return vcpu->arch.pid & 0xff;
211}
212
213static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
214{
215 return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
216}
217
218static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
219{
220 return !!(vcpu->arch.shared->msr & MSR_PR);
221}
222
223static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
224{
225 return (vcpu->arch.shared->mas6 >> 16) & 0xff;
226}
227
228static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
229{
230 return vcpu->arch.shared->mas6 & 0x1;
231}
232
233static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
234{
235 /*
236 * Manual says that tlbsel has 2 bits wide.
237 * Since we only have two TLBs, only lower bit is used.
238 */
239 return (vcpu->arch.shared->mas0 >> 28) & 0x1;
240}
241
242static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
243{
244 return vcpu->arch.shared->mas0 & 0xfff;
245}
246
247static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
248{
249 return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
250}
251
252static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
253 const struct kvm_book3e_206_tlb_entry *tlbe)
254{
255 gpa_t gpa;
256
257 if (!get_tlb_v(tlbe))
258 return 0;
259
260#ifndef CONFIG_KVM_BOOKE_HV
261 /* Does it match current guest AS? */
262 /* XXX what about IS != DS? */
263 if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
264 return 0;
265#endif
266
267 gpa = get_tlb_raddr(tlbe);
268 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
269 /* Mapping is not for RAM. */
270 return 0;
271
272 return 1;
273}
274
275static inline struct kvm_book3e_206_tlb_entry *get_entry(
276 struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
277{
278 int offset = vcpu_e500->gtlb_offset[tlbsel];
279 return &vcpu_e500->gtlb_arch[offset + entry];
280}
281
282void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
283 struct kvm_book3e_206_tlb_entry *gtlbe);
284void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
285
286#ifdef CONFIG_KVM_BOOKE_HV
287#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
288#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
289#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
290#else
291unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
292 struct kvm_book3e_206_tlb_entry *gtlbe);
293
294static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
295{
296 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
297 unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
298
299 return vcpu_e500->pid[tidseld];
300}
301
302/* Force TS=1 for all guest mappings. */
303#define get_tlb_sts(gtlbe) (MAS1_TS)
304#endif /* !BOOKE_HV */
305
306#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 6d0b2bd54fb0..8b99e076dc81 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -14,27 +14,96 @@
14 14
15#include <asm/kvm_ppc.h> 15#include <asm/kvm_ppc.h>
16#include <asm/disassemble.h> 16#include <asm/disassemble.h>
17#include <asm/kvm_e500.h> 17#include <asm/dbell.h>
18 18
19#include "booke.h" 19#include "booke.h"
20#include "e500_tlb.h" 20#include "e500.h"
21 21
22#define XOP_MSGSND 206
23#define XOP_MSGCLR 238
22#define XOP_TLBIVAX 786 24#define XOP_TLBIVAX 786
23#define XOP_TLBSX 914 25#define XOP_TLBSX 914
24#define XOP_TLBRE 946 26#define XOP_TLBRE 946
25#define XOP_TLBWE 978 27#define XOP_TLBWE 978
28#define XOP_TLBILX 18
29
30#ifdef CONFIG_KVM_E500MC
31static int dbell2prio(ulong param)
32{
33 int msg = param & PPC_DBELL_TYPE_MASK;
34 int prio = -1;
35
36 switch (msg) {
37 case PPC_DBELL_TYPE(PPC_DBELL):
38 prio = BOOKE_IRQPRIO_DBELL;
39 break;
40 case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
41 prio = BOOKE_IRQPRIO_DBELL_CRIT;
42 break;
43 default:
44 break;
45 }
46
47 return prio;
48}
49
50static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
51{
52 ulong param = vcpu->arch.gpr[rb];
53 int prio = dbell2prio(param);
54
55 if (prio < 0)
56 return EMULATE_FAIL;
57
58 clear_bit(prio, &vcpu->arch.pending_exceptions);
59 return EMULATE_DONE;
60}
61
62static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
63{
64 ulong param = vcpu->arch.gpr[rb];
65 int prio = dbell2prio(rb);
66 int pir = param & PPC_DBELL_PIR_MASK;
67 int i;
68 struct kvm_vcpu *cvcpu;
69
70 if (prio < 0)
71 return EMULATE_FAIL;
72
73 kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
74 int cpir = cvcpu->arch.shared->pir;
75 if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
76 set_bit(prio, &cvcpu->arch.pending_exceptions);
77 kvm_vcpu_kick(cvcpu);
78 }
79 }
80
81 return EMULATE_DONE;
82}
83#endif
26 84
27int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 85int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
28 unsigned int inst, int *advance) 86 unsigned int inst, int *advance)
29{ 87{
30 int emulated = EMULATE_DONE; 88 int emulated = EMULATE_DONE;
31 int ra; 89 int ra = get_ra(inst);
32 int rb; 90 int rb = get_rb(inst);
91 int rt = get_rt(inst);
33 92
34 switch (get_op(inst)) { 93 switch (get_op(inst)) {
35 case 31: 94 case 31:
36 switch (get_xop(inst)) { 95 switch (get_xop(inst)) {
37 96
97#ifdef CONFIG_KVM_E500MC
98 case XOP_MSGSND:
99 emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
100 break;
101
102 case XOP_MSGCLR:
103 emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
104 break;
105#endif
106
38 case XOP_TLBRE: 107 case XOP_TLBRE:
39 emulated = kvmppc_e500_emul_tlbre(vcpu); 108 emulated = kvmppc_e500_emul_tlbre(vcpu);
40 break; 109 break;
@@ -44,13 +113,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
44 break; 113 break;
45 114
46 case XOP_TLBSX: 115 case XOP_TLBSX:
47 rb = get_rb(inst);
48 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); 116 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
49 break; 117 break;
50 118
119 case XOP_TLBILX:
120 emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
121 break;
122
51 case XOP_TLBIVAX: 123 case XOP_TLBIVAX:
52 ra = get_ra(inst);
53 rb = get_rb(inst);
54 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); 124 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
55 break; 125 break;
56 126
@@ -70,52 +140,63 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
70 return emulated; 140 return emulated;
71} 141}
72 142
73int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 143int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
74{ 144{
75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 145 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
76 int emulated = EMULATE_DONE; 146 int emulated = EMULATE_DONE;
77 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
78 147
79 switch (sprn) { 148 switch (sprn) {
149#ifndef CONFIG_KVM_BOOKE_HV
80 case SPRN_PID: 150 case SPRN_PID:
81 kvmppc_set_pid(vcpu, spr_val); 151 kvmppc_set_pid(vcpu, spr_val);
82 break; 152 break;
83 case SPRN_PID1: 153 case SPRN_PID1:
84 if (spr_val != 0) 154 if (spr_val != 0)
85 return EMULATE_FAIL; 155 return EMULATE_FAIL;
86 vcpu_e500->pid[1] = spr_val; break; 156 vcpu_e500->pid[1] = spr_val;
157 break;
87 case SPRN_PID2: 158 case SPRN_PID2:
88 if (spr_val != 0) 159 if (spr_val != 0)
89 return EMULATE_FAIL; 160 return EMULATE_FAIL;
90 vcpu_e500->pid[2] = spr_val; break; 161 vcpu_e500->pid[2] = spr_val;
162 break;
91 case SPRN_MAS0: 163 case SPRN_MAS0:
92 vcpu->arch.shared->mas0 = spr_val; break; 164 vcpu->arch.shared->mas0 = spr_val;
165 break;
93 case SPRN_MAS1: 166 case SPRN_MAS1:
94 vcpu->arch.shared->mas1 = spr_val; break; 167 vcpu->arch.shared->mas1 = spr_val;
168 break;
95 case SPRN_MAS2: 169 case SPRN_MAS2:
96 vcpu->arch.shared->mas2 = spr_val; break; 170 vcpu->arch.shared->mas2 = spr_val;
171 break;
97 case SPRN_MAS3: 172 case SPRN_MAS3:
98 vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; 173 vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
99 vcpu->arch.shared->mas7_3 |= spr_val; 174 vcpu->arch.shared->mas7_3 |= spr_val;
100 break; 175 break;
101 case SPRN_MAS4: 176 case SPRN_MAS4:
102 vcpu->arch.shared->mas4 = spr_val; break; 177 vcpu->arch.shared->mas4 = spr_val;
178 break;
103 case SPRN_MAS6: 179 case SPRN_MAS6:
104 vcpu->arch.shared->mas6 = spr_val; break; 180 vcpu->arch.shared->mas6 = spr_val;
181 break;
105 case SPRN_MAS7: 182 case SPRN_MAS7:
106 vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; 183 vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
107 vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; 184 vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
108 break; 185 break;
186#endif
109 case SPRN_L1CSR0: 187 case SPRN_L1CSR0:
110 vcpu_e500->l1csr0 = spr_val; 188 vcpu_e500->l1csr0 = spr_val;
111 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); 189 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
112 break; 190 break;
113 case SPRN_L1CSR1: 191 case SPRN_L1CSR1:
114 vcpu_e500->l1csr1 = spr_val; break; 192 vcpu_e500->l1csr1 = spr_val;
193 break;
115 case SPRN_HID0: 194 case SPRN_HID0:
116 vcpu_e500->hid0 = spr_val; break; 195 vcpu_e500->hid0 = spr_val;
196 break;
117 case SPRN_HID1: 197 case SPRN_HID1:
118 vcpu_e500->hid1 = spr_val; break; 198 vcpu_e500->hid1 = spr_val;
199 break;
119 200
120 case SPRN_MMUCSR0: 201 case SPRN_MMUCSR0:
121 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, 202 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
@@ -135,81 +216,112 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
135 case SPRN_IVOR35: 216 case SPRN_IVOR35:
136 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; 217 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
137 break; 218 break;
138 219#ifdef CONFIG_KVM_BOOKE_HV
220 case SPRN_IVOR36:
221 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
222 break;
223 case SPRN_IVOR37:
224 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
225 break;
226#endif
139 default: 227 default:
140 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 228 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
141 } 229 }
142 230
143 return emulated; 231 return emulated;
144} 232}
145 233
146int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 234int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
147{ 235{
148 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 236 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
149 int emulated = EMULATE_DONE; 237 int emulated = EMULATE_DONE;
150 unsigned long val;
151 238
152 switch (sprn) { 239 switch (sprn) {
240#ifndef CONFIG_KVM_BOOKE_HV
153 case SPRN_PID: 241 case SPRN_PID:
154 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; 242 *spr_val = vcpu_e500->pid[0];
243 break;
155 case SPRN_PID1: 244 case SPRN_PID1:
156 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; 245 *spr_val = vcpu_e500->pid[1];
246 break;
157 case SPRN_PID2: 247 case SPRN_PID2:
158 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; 248 *spr_val = vcpu_e500->pid[2];
249 break;
159 case SPRN_MAS0: 250 case SPRN_MAS0:
160 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; 251 *spr_val = vcpu->arch.shared->mas0;
252 break;
161 case SPRN_MAS1: 253 case SPRN_MAS1:
162 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; 254 *spr_val = vcpu->arch.shared->mas1;
255 break;
163 case SPRN_MAS2: 256 case SPRN_MAS2:
164 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; 257 *spr_val = vcpu->arch.shared->mas2;
258 break;
165 case SPRN_MAS3: 259 case SPRN_MAS3:
166 val = (u32)vcpu->arch.shared->mas7_3; 260 *spr_val = (u32)vcpu->arch.shared->mas7_3;
167 kvmppc_set_gpr(vcpu, rt, val);
168 break; 261 break;
169 case SPRN_MAS4: 262 case SPRN_MAS4:
170 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; 263 *spr_val = vcpu->arch.shared->mas4;
264 break;
171 case SPRN_MAS6: 265 case SPRN_MAS6:
172 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; 266 *spr_val = vcpu->arch.shared->mas6;
267 break;
173 case SPRN_MAS7: 268 case SPRN_MAS7:
174 val = vcpu->arch.shared->mas7_3 >> 32; 269 *spr_val = vcpu->arch.shared->mas7_3 >> 32;
175 kvmppc_set_gpr(vcpu, rt, val);
176 break; 270 break;
271#endif
177 case SPRN_TLB0CFG: 272 case SPRN_TLB0CFG:
178 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; 273 *spr_val = vcpu->arch.tlbcfg[0];
274 break;
179 case SPRN_TLB1CFG: 275 case SPRN_TLB1CFG:
180 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; 276 *spr_val = vcpu->arch.tlbcfg[1];
277 break;
181 case SPRN_L1CSR0: 278 case SPRN_L1CSR0:
182 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; 279 *spr_val = vcpu_e500->l1csr0;
280 break;
183 case SPRN_L1CSR1: 281 case SPRN_L1CSR1:
184 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; 282 *spr_val = vcpu_e500->l1csr1;
283 break;
185 case SPRN_HID0: 284 case SPRN_HID0:
186 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; 285 *spr_val = vcpu_e500->hid0;
286 break;
187 case SPRN_HID1: 287 case SPRN_HID1:
188 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; 288 *spr_val = vcpu_e500->hid1;
289 break;
189 case SPRN_SVR: 290 case SPRN_SVR:
190 kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; 291 *spr_val = vcpu_e500->svr;
292 break;
191 293
192 case SPRN_MMUCSR0: 294 case SPRN_MMUCSR0:
193 kvmppc_set_gpr(vcpu, rt, 0); break; 295 *spr_val = 0;
296 break;
194 297
195 case SPRN_MMUCFG: 298 case SPRN_MMUCFG:
196 kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; 299 *spr_val = vcpu->arch.mmucfg;
300 break;
197 301
198 /* extra exceptions */ 302 /* extra exceptions */
199 case SPRN_IVOR32: 303 case SPRN_IVOR32:
200 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); 304 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
201 break; 305 break;
202 case SPRN_IVOR33: 306 case SPRN_IVOR33:
203 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); 307 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
204 break; 308 break;
205 case SPRN_IVOR34: 309 case SPRN_IVOR34:
206 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); 310 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
207 break; 311 break;
208 case SPRN_IVOR35: 312 case SPRN_IVOR35:
209 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); 313 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
314 break;
315#ifdef CONFIG_KVM_BOOKE_HV
316 case SPRN_IVOR36:
317 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
318 break;
319 case SPRN_IVOR37:
320 *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
210 break; 321 break;
322#endif
211 default: 323 default:
212 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 324 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
213 } 325 }
214 326
215 return emulated; 327 return emulated;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6e53e4164de1..c510fc961302 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -2,6 +2,9 @@
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. 2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 * 3 *
4 * Author: Yu Liu, yu.liu@freescale.com 4 * Author: Yu Liu, yu.liu@freescale.com
5 * Scott Wood, scottwood@freescale.com
6 * Ashish Kalra, ashish.kalra@freescale.com
7 * Varun Sethi, varun.sethi@freescale.com
5 * 8 *
6 * Description: 9 * Description:
7 * This file is based on arch/powerpc/kvm/44x_tlb.c, 10 * This file is based on arch/powerpc/kvm/44x_tlb.c,
@@ -26,210 +29,15 @@
26#include <linux/vmalloc.h> 29#include <linux/vmalloc.h>
27#include <linux/hugetlb.h> 30#include <linux/hugetlb.h>
28#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
29#include <asm/kvm_e500.h>
30 32
31#include "../mm/mmu_decl.h" 33#include "e500.h"
32#include "e500_tlb.h"
33#include "trace.h" 34#include "trace.h"
34#include "timing.h" 35#include "timing.h"
35 36
36#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) 37#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
37 38
38struct id {
39 unsigned long val;
40 struct id **pentry;
41};
42
43#define NUM_TIDS 256
44
45/*
46 * This table provide mappings from:
47 * (guestAS,guestTID,guestPR) --> ID of physical cpu
48 * guestAS [0..1]
49 * guestTID [0..255]
50 * guestPR [0..1]
51 * ID [1..255]
52 * Each vcpu keeps one vcpu_id_table.
53 */
54struct vcpu_id_table {
55 struct id id[2][NUM_TIDS][2];
56};
57
58/*
59 * This table provide reversed mappings of vcpu_id_table:
60 * ID --> address of vcpu_id_table item.
61 * Each physical core has one pcpu_id_table.
62 */
63struct pcpu_id_table {
64 struct id *entry[NUM_TIDS];
65};
66
67static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
68
69/* This variable keeps last used shadow ID on local core.
70 * The valid range of shadow ID is [1..255] */
71static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
72
73static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; 39static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
74 40
75static struct kvm_book3e_206_tlb_entry *get_entry(
76 struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
77{
78 int offset = vcpu_e500->gtlb_offset[tlbsel];
79 return &vcpu_e500->gtlb_arch[offset + entry];
80}
81
82/*
83 * Allocate a free shadow id and setup a valid sid mapping in given entry.
84 * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
85 *
86 * The caller must have preemption disabled, and keep it that way until
87 * it has finished with the returned shadow id (either written into the
88 * TLB or arch.shadow_pid, or discarded).
89 */
90static inline int local_sid_setup_one(struct id *entry)
91{
92 unsigned long sid;
93 int ret = -1;
94
95 sid = ++(__get_cpu_var(pcpu_last_used_sid));
96 if (sid < NUM_TIDS) {
97 __get_cpu_var(pcpu_sids).entry[sid] = entry;
98 entry->val = sid;
99 entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
100 ret = sid;
101 }
102
103 /*
104 * If sid == NUM_TIDS, we've run out of sids. We return -1, and
105 * the caller will invalidate everything and start over.
106 *
107 * sid > NUM_TIDS indicates a race, which we disable preemption to
108 * avoid.
109 */
110 WARN_ON(sid > NUM_TIDS);
111
112 return ret;
113}
114
115/*
116 * Check if given entry contain a valid shadow id mapping.
117 * An ID mapping is considered valid only if
118 * both vcpu and pcpu know this mapping.
119 *
120 * The caller must have preemption disabled, and keep it that way until
121 * it has finished with the returned shadow id (either written into the
122 * TLB or arch.shadow_pid, or discarded).
123 */
124static inline int local_sid_lookup(struct id *entry)
125{
126 if (entry && entry->val != 0 &&
127 __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
128 entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
129 return entry->val;
130 return -1;
131}
132
133/* Invalidate all id mappings on local core -- call with preempt disabled */
134static inline void local_sid_destroy_all(void)
135{
136 __get_cpu_var(pcpu_last_used_sid) = 0;
137 memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
138}
139
140static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
141{
142 vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
143 return vcpu_e500->idt;
144}
145
146static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
147{
148 kfree(vcpu_e500->idt);
149}
150
151/* Invalidate all mappings on vcpu */
152static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
153{
154 memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
155
156 /* Update shadow pid when mappings are changed */
157 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
158}
159
160/* Invalidate one ID mapping on vcpu */
161static inline void kvmppc_e500_id_table_reset_one(
162 struct kvmppc_vcpu_e500 *vcpu_e500,
163 int as, int pid, int pr)
164{
165 struct vcpu_id_table *idt = vcpu_e500->idt;
166
167 BUG_ON(as >= 2);
168 BUG_ON(pid >= NUM_TIDS);
169 BUG_ON(pr >= 2);
170
171 idt->id[as][pid][pr].val = 0;
172 idt->id[as][pid][pr].pentry = NULL;
173
174 /* Update shadow pid when mappings are changed */
175 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
176}
177
178/*
179 * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
180 * This function first lookup if a valid mapping exists,
181 * if not, then creates a new one.
182 *
183 * The caller must have preemption disabled, and keep it that way until
184 * it has finished with the returned shadow id (either written into the
185 * TLB or arch.shadow_pid, or discarded).
186 */
187static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
188 unsigned int as, unsigned int gid,
189 unsigned int pr, int avoid_recursion)
190{
191 struct vcpu_id_table *idt = vcpu_e500->idt;
192 int sid;
193
194 BUG_ON(as >= 2);
195 BUG_ON(gid >= NUM_TIDS);
196 BUG_ON(pr >= 2);
197
198 sid = local_sid_lookup(&idt->id[as][gid][pr]);
199
200 while (sid <= 0) {
201 /* No mapping yet */
202 sid = local_sid_setup_one(&idt->id[as][gid][pr]);
203 if (sid <= 0) {
204 _tlbil_all();
205 local_sid_destroy_all();
206 }
207
208 /* Update shadow pid when mappings are changed */
209 if (!avoid_recursion)
210 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
211 }
212
213 return sid;
214}
215
216/* Map guest pid to shadow.
217 * We use PID to keep shadow of current guest non-zero PID,
218 * and use PID1 to keep shadow of guest zero PID.
219 * So that guest tlbe with TID=0 can be accessed at any time */
220void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
221{
222 preempt_disable();
223 vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
224 get_cur_as(&vcpu_e500->vcpu),
225 get_cur_pid(&vcpu_e500->vcpu),
226 get_cur_pr(&vcpu_e500->vcpu), 1);
227 vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
228 get_cur_as(&vcpu_e500->vcpu), 0,
229 get_cur_pr(&vcpu_e500->vcpu), 1);
230 preempt_enable();
231}
232
233static inline unsigned int gtlb0_get_next_victim( 41static inline unsigned int gtlb0_get_next_victim(
234 struct kvmppc_vcpu_e500 *vcpu_e500) 42 struct kvmppc_vcpu_e500 *vcpu_e500)
235{ 43{
@@ -258,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
258 /* Mask off reserved bits. */ 66 /* Mask off reserved bits. */
259 mas3 &= MAS3_ATTRIB_MASK; 67 mas3 &= MAS3_ATTRIB_MASK;
260 68
69#ifndef CONFIG_KVM_BOOKE_HV
261 if (!usermode) { 70 if (!usermode) {
262 /* Guest is in supervisor mode, 71 /* Guest is in supervisor mode,
263 * so we need to translate guest 72 * so we need to translate guest
@@ -265,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
265 mas3 &= ~E500_TLB_USER_PERM_MASK; 74 mas3 &= ~E500_TLB_USER_PERM_MASK;
266 mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; 75 mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
267 } 76 }
268 77 mas3 |= E500_TLB_SUPER_PERM_MASK;
269 return mas3 | E500_TLB_SUPER_PERM_MASK; 78#endif
79 return mas3;
270} 80}
271 81
272static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) 82static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
@@ -292,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
292 mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); 102 mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
293 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); 103 mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
294 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); 104 mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
105#ifdef CONFIG_KVM_BOOKE_HV
106 mtspr(SPRN_MAS8, stlbe->mas8);
107#endif
295 asm volatile("isync; tlbwe" : : : "memory"); 108 asm volatile("isync; tlbwe" : : : "memory");
109
110#ifdef CONFIG_KVM_BOOKE_HV
111 /* Must clear mas8 for other host tlbwe's */
112 mtspr(SPRN_MAS8, 0);
113 isync();
114#endif
296 local_irq_restore(flags); 115 local_irq_restore(flags);
297 116
298 trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, 117 trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
@@ -337,6 +156,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
337 } 156 }
338} 157}
339 158
159#ifdef CONFIG_KVM_E500V2
340void kvmppc_map_magic(struct kvm_vcpu *vcpu) 160void kvmppc_map_magic(struct kvm_vcpu *vcpu)
341{ 161{
342 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 162 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -361,75 +181,41 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
361 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); 181 __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
362 preempt_enable(); 182 preempt_enable();
363} 183}
364 184#endif
365void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
366{
367 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
368
369 /* Shadow PID may be expired on local core */
370 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
371}
372
373void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
374{
375}
376 185
377static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, 186static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
378 int tlbsel, int esel) 187 int tlbsel, int esel)
379{ 188{
380 struct kvm_book3e_206_tlb_entry *gtlbe = 189 struct kvm_book3e_206_tlb_entry *gtlbe =
381 get_entry(vcpu_e500, tlbsel, esel); 190 get_entry(vcpu_e500, tlbsel, esel);
382 struct vcpu_id_table *idt = vcpu_e500->idt;
383 unsigned int pr, tid, ts, pid;
384 u32 val, eaddr;
385 unsigned long flags;
386
387 ts = get_tlb_ts(gtlbe);
388 tid = get_tlb_tid(gtlbe);
389
390 preempt_disable();
391
392 /* One guest ID may be mapped to two shadow IDs */
393 for (pr = 0; pr < 2; pr++) {
394 /*
395 * The shadow PID can have a valid mapping on at most one
396 * host CPU. In the common case, it will be valid on this
397 * CPU, in which case (for TLB0) we do a local invalidation
398 * of the specific address.
399 *
400 * If the shadow PID is not valid on the current host CPU, or
401 * if we're invalidating a TLB1 entry, we invalidate the
402 * entire shadow PID.
403 */
404 if (tlbsel == 1 ||
405 (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
406 kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
407 continue;
408 }
409 191
410 /* 192 if (tlbsel == 1 &&
411 * The guest is invalidating a TLB0 entry which is in a PID 193 vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
412 * that has a valid shadow mapping on this host CPU. We 194 u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
413 * search host TLB0 to invalidate it's shadow TLB entry, 195 int hw_tlb_indx;
414 * similar to __tlbil_va except that we need to look in AS1. 196 unsigned long flags;
415 */
416 val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
417 eaddr = get_tlb_eaddr(gtlbe);
418 197
419 local_irq_save(flags); 198 local_irq_save(flags);
420 199 while (tmp) {
421 mtspr(SPRN_MAS6, val); 200 hw_tlb_indx = __ilog2_u64(tmp & -tmp);
422 asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); 201 mtspr(SPRN_MAS0,
423 val = mfspr(SPRN_MAS1); 202 MAS0_TLBSEL(1) |
424 if (val & MAS1_VALID) { 203 MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
425 mtspr(SPRN_MAS1, val & ~MAS1_VALID); 204 mtspr(SPRN_MAS1, 0);
426 asm volatile("tlbwe"); 205 asm volatile("tlbwe");
206 vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
207 tmp &= tmp - 1;
427 } 208 }
428 209 mb();
210 vcpu_e500->g2h_tlb1_map[esel] = 0;
211 vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
429 local_irq_restore(flags); 212 local_irq_restore(flags);
213
214 return;
430 } 215 }
431 216
432 preempt_enable(); 217 /* Guest tlbe is backed by at most one host tlbe per shadow pid. */
218 kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
433} 219}
434 220
435static int tlb0_set_base(gva_t addr, int sets, int ways) 221static int tlb0_set_base(gva_t addr, int sets, int ways)
@@ -475,6 +261,9 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
475 set_base = gtlb0_set_base(vcpu_e500, eaddr); 261 set_base = gtlb0_set_base(vcpu_e500, eaddr);
476 size = vcpu_e500->gtlb_params[0].ways; 262 size = vcpu_e500->gtlb_params[0].ways;
477 } else { 263 } else {
264 if (eaddr < vcpu_e500->tlb1_min_eaddr ||
265 eaddr > vcpu_e500->tlb1_max_eaddr)
266 return -1;
478 set_base = 0; 267 set_base = 0;
479 } 268 }
480 269
@@ -530,6 +319,16 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
530 } 319 }
531} 320}
532 321
322static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
323{
324 if (vcpu_e500->g2h_tlb1_map)
325 memset(vcpu_e500->g2h_tlb1_map,
326 sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0);
327 if (vcpu_e500->h2g_tlb1_rmap)
328 memset(vcpu_e500->h2g_tlb1_rmap,
329 sizeof(unsigned int) * host_tlb_params[1].entries, 0);
330}
331
533static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) 332static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
534{ 333{
535 int tlbsel = 0; 334 int tlbsel = 0;
@@ -547,7 +346,7 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
547 int stlbsel = 1; 346 int stlbsel = 1;
548 int i; 347 int i;
549 348
550 kvmppc_e500_id_table_reset_all(vcpu_e500); 349 kvmppc_e500_tlbil_all(vcpu_e500);
551 350
552 for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { 351 for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
553 struct tlbe_ref *ref = 352 struct tlbe_ref *ref =
@@ -562,19 +361,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
562 unsigned int eaddr, int as) 361 unsigned int eaddr, int as)
563{ 362{
564 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 363 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
565 unsigned int victim, pidsel, tsized; 364 unsigned int victim, tsized;
566 int tlbsel; 365 int tlbsel;
567 366
568 /* since we only have two TLBs, only lower bit is used. */ 367 /* since we only have two TLBs, only lower bit is used. */
569 tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; 368 tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
570 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; 369 victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
571 pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
572 tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; 370 tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
573 371
574 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) 372 vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
575 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 373 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
576 vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) 374 vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
577 | MAS1_TID(vcpu_e500->pid[pidsel]) 375 | MAS1_TID(get_tlbmiss_tid(vcpu))
578 | MAS1_TSIZE(tsized); 376 | MAS1_TSIZE(tsized);
579 vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) 377 vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
580 | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); 378 | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
@@ -586,23 +384,26 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
586 384
587/* TID must be supplied by the caller */ 385/* TID must be supplied by the caller */
588static inline void kvmppc_e500_setup_stlbe( 386static inline void kvmppc_e500_setup_stlbe(
589 struct kvmppc_vcpu_e500 *vcpu_e500, 387 struct kvm_vcpu *vcpu,
590 struct kvm_book3e_206_tlb_entry *gtlbe, 388 struct kvm_book3e_206_tlb_entry *gtlbe,
591 int tsize, struct tlbe_ref *ref, u64 gvaddr, 389 int tsize, struct tlbe_ref *ref, u64 gvaddr,
592 struct kvm_book3e_206_tlb_entry *stlbe) 390 struct kvm_book3e_206_tlb_entry *stlbe)
593{ 391{
594 pfn_t pfn = ref->pfn; 392 pfn_t pfn = ref->pfn;
393 u32 pr = vcpu->arch.shared->msr & MSR_PR;
595 394
596 BUG_ON(!(ref->flags & E500_TLB_VALID)); 395 BUG_ON(!(ref->flags & E500_TLB_VALID));
597 396
598 /* Force TS=1 IPROT=0 for all guest mappings. */ 397 /* Force IPROT=0 for all guest mappings. */
599 stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID; 398 stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
600 stlbe->mas2 = (gvaddr & MAS2_EPN) 399 stlbe->mas2 = (gvaddr & MAS2_EPN) |
601 | e500_shadow_mas2_attrib(gtlbe->mas2, 400 e500_shadow_mas2_attrib(gtlbe->mas2, pr);
602 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 401 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
603 stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) 402 e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
604 | e500_shadow_mas3_attrib(gtlbe->mas7_3, 403
605 vcpu_e500->vcpu.arch.shared->msr & MSR_PR); 404#ifdef CONFIG_KVM_BOOKE_HV
405 stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
406#endif
606} 407}
607 408
608static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, 409static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -736,7 +537,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
736 kvmppc_e500_ref_release(ref); 537 kvmppc_e500_ref_release(ref);
737 kvmppc_e500_ref_setup(ref, gtlbe, pfn); 538 kvmppc_e500_ref_setup(ref, gtlbe, pfn);
738 539
739 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe); 540 kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
541 ref, gvaddr, stlbe);
740} 542}
741 543
742/* XXX only map the one-one case, for now use TLB0 */ 544/* XXX only map the one-one case, for now use TLB0 */
@@ -760,7 +562,7 @@ static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
760/* XXX for both one-one and one-to-many , for now use TLB1 */ 562/* XXX for both one-one and one-to-many , for now use TLB1 */
761static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, 563static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
762 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, 564 u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
763 struct kvm_book3e_206_tlb_entry *stlbe) 565 struct kvm_book3e_206_tlb_entry *stlbe, int esel)
764{ 566{
765 struct tlbe_ref *ref; 567 struct tlbe_ref *ref;
766 unsigned int victim; 568 unsigned int victim;
@@ -773,15 +575,74 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
773 ref = &vcpu_e500->tlb_refs[1][victim]; 575 ref = &vcpu_e500->tlb_refs[1][victim];
774 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); 576 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
775 577
578 vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim;
579 vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
580 if (vcpu_e500->h2g_tlb1_rmap[victim]) {
581 unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
582 vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
583 }
584 vcpu_e500->h2g_tlb1_rmap[victim] = esel;
585
776 return victim; 586 return victim;
777} 587}
778 588
779void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) 589static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
590{
591 int size = vcpu_e500->gtlb_params[1].entries;
592 unsigned int offset;
593 gva_t eaddr;
594 int i;
595
596 vcpu_e500->tlb1_min_eaddr = ~0UL;
597 vcpu_e500->tlb1_max_eaddr = 0;
598 offset = vcpu_e500->gtlb_offset[1];
599
600 for (i = 0; i < size; i++) {
601 struct kvm_book3e_206_tlb_entry *tlbe =
602 &vcpu_e500->gtlb_arch[offset + i];
603
604 if (!get_tlb_v(tlbe))
605 continue;
606
607 eaddr = get_tlb_eaddr(tlbe);
608 vcpu_e500->tlb1_min_eaddr =
609 min(vcpu_e500->tlb1_min_eaddr, eaddr);
610
611 eaddr = get_tlb_end(tlbe);
612 vcpu_e500->tlb1_max_eaddr =
613 max(vcpu_e500->tlb1_max_eaddr, eaddr);
614 }
615}
616
617static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
618 struct kvm_book3e_206_tlb_entry *gtlbe)
780{ 619{
620 unsigned long start, end, size;
621
622 size = get_tlb_bytes(gtlbe);
623 start = get_tlb_eaddr(gtlbe) & ~(size - 1);
624 end = start + size - 1;
625
626 return vcpu_e500->tlb1_min_eaddr == start ||
627 vcpu_e500->tlb1_max_eaddr == end;
628}
629
630/* This function is supposed to be called for a adding a new valid tlb entry */
631static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
632 struct kvm_book3e_206_tlb_entry *gtlbe)
633{
634 unsigned long start, end, size;
781 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 635 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
782 636
783 /* Recalc shadow pid since MSR changes */ 637 if (!get_tlb_v(gtlbe))
784 kvmppc_e500_recalc_shadow_pid(vcpu_e500); 638 return;
639
640 size = get_tlb_bytes(gtlbe);
641 start = get_tlb_eaddr(gtlbe) & ~(size - 1);
642 end = start + size - 1;
643
644 vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
645 vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
785} 646}
786 647
787static inline int kvmppc_e500_gtlbe_invalidate( 648static inline int kvmppc_e500_gtlbe_invalidate(
@@ -794,6 +655,9 @@ static inline int kvmppc_e500_gtlbe_invalidate(
794 if (unlikely(get_tlb_iprot(gtlbe))) 655 if (unlikely(get_tlb_iprot(gtlbe)))
795 return -1; 656 return -1;
796 657
658 if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
659 kvmppc_recalc_tlb1map_range(vcpu_e500);
660
797 gtlbe->mas1 = 0; 661 gtlbe->mas1 = 0;
798 662
799 return 0; 663 return 0;
@@ -811,7 +675,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
811 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); 675 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
812 676
813 /* Invalidate all vcpu id mappings */ 677 /* Invalidate all vcpu id mappings */
814 kvmppc_e500_id_table_reset_all(vcpu_e500); 678 kvmppc_e500_tlbil_all(vcpu_e500);
815 679
816 return EMULATE_DONE; 680 return EMULATE_DONE;
817} 681}
@@ -844,7 +708,59 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
844 } 708 }
845 709
846 /* Invalidate all vcpu id mappings */ 710 /* Invalidate all vcpu id mappings */
847 kvmppc_e500_id_table_reset_all(vcpu_e500); 711 kvmppc_e500_tlbil_all(vcpu_e500);
712
713 return EMULATE_DONE;
714}
715
716static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
717 int pid, int rt)
718{
719 struct kvm_book3e_206_tlb_entry *tlbe;
720 int tid, esel;
721
722 /* invalidate all entries */
723 for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
724 tlbe = get_entry(vcpu_e500, tlbsel, esel);
725 tid = get_tlb_tid(tlbe);
726 if (rt == 0 || tid == pid) {
727 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
728 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
729 }
730 }
731}
732
733static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
734 int ra, int rb)
735{
736 int tlbsel, esel;
737 gva_t ea;
738
739 ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
740 if (ra)
741 ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
742
743 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
744 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
745 if (esel >= 0) {
746 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
747 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
748 break;
749 }
750 }
751}
752
753int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
754{
755 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
756 int pid = get_cur_spid(vcpu);
757
758 if (rt == 0 || rt == 1) {
759 tlbilx_all(vcpu_e500, 0, pid, rt);
760 tlbilx_all(vcpu_e500, 1, pid, rt);
761 } else if (rt == 3) {
762 tlbilx_one(vcpu_e500, pid, ra, rb);
763 }
848 764
849 return EMULATE_DONE; 765 return EMULATE_DONE;
850} 766}
@@ -929,9 +845,7 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
929 int stid; 845 int stid;
930 846
931 preempt_disable(); 847 preempt_disable();
932 stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), 848 stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
933 get_tlb_tid(gtlbe),
934 get_cur_pr(&vcpu_e500->vcpu), 0);
935 849
936 stlbe->mas1 |= MAS1_TID(stid); 850 stlbe->mas1 |= MAS1_TID(stid);
937 write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); 851 write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
@@ -941,16 +855,21 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
941int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) 855int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
942{ 856{
943 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 857 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
944 struct kvm_book3e_206_tlb_entry *gtlbe; 858 struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
945 int tlbsel, esel; 859 int tlbsel, esel, stlbsel, sesel;
860 int recal = 0;
946 861
947 tlbsel = get_tlb_tlbsel(vcpu); 862 tlbsel = get_tlb_tlbsel(vcpu);
948 esel = get_tlb_esel(vcpu, tlbsel); 863 esel = get_tlb_esel(vcpu, tlbsel);
949 864
950 gtlbe = get_entry(vcpu_e500, tlbsel, esel); 865 gtlbe = get_entry(vcpu_e500, tlbsel, esel);
951 866
952 if (get_tlb_v(gtlbe)) 867 if (get_tlb_v(gtlbe)) {
953 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); 868 inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
869 if ((tlbsel == 1) &&
870 kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
871 recal = 1;
872 }
954 873
955 gtlbe->mas1 = vcpu->arch.shared->mas1; 874 gtlbe->mas1 = vcpu->arch.shared->mas1;
956 gtlbe->mas2 = vcpu->arch.shared->mas2; 875 gtlbe->mas2 = vcpu->arch.shared->mas2;
@@ -959,10 +878,20 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
959 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, 878 trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
960 gtlbe->mas2, gtlbe->mas7_3); 879 gtlbe->mas2, gtlbe->mas7_3);
961 880
881 if (tlbsel == 1) {
882 /*
883 * If a valid tlb1 entry is overwritten then recalculate the
884 * min/max TLB1 map address range otherwise no need to look
885 * in tlb1 array.
886 */
887 if (recal)
888 kvmppc_recalc_tlb1map_range(vcpu_e500);
889 else
890 kvmppc_set_tlb1map_range(vcpu, gtlbe);
891 }
892
962 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ 893 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
963 if (tlbe_is_host_safe(vcpu, gtlbe)) { 894 if (tlbe_is_host_safe(vcpu, gtlbe)) {
964 struct kvm_book3e_206_tlb_entry stlbe;
965 int stlbsel, sesel;
966 u64 eaddr; 895 u64 eaddr;
967 u64 raddr; 896 u64 raddr;
968 897
@@ -989,7 +918,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
989 * are mapped on the fly. */ 918 * are mapped on the fly. */
990 stlbsel = 1; 919 stlbsel = 1;
991 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, 920 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
992 raddr >> PAGE_SHIFT, gtlbe, &stlbe); 921 raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel);
993 break; 922 break;
994 923
995 default: 924 default:
@@ -1003,6 +932,48 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
1003 return EMULATE_DONE; 932 return EMULATE_DONE;
1004} 933}
1005 934
935static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
936 gva_t eaddr, unsigned int pid, int as)
937{
938 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
939 int esel, tlbsel;
940
941 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
942 esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
943 if (esel >= 0)
944 return index_of(tlbsel, esel);
945 }
946
947 return -1;
948}
949
950/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
951int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
952 struct kvm_translation *tr)
953{
954 int index;
955 gva_t eaddr;
956 u8 pid;
957 u8 as;
958
959 eaddr = tr->linear_address;
960 pid = (tr->linear_address >> 32) & 0xff;
961 as = (tr->linear_address >> 40) & 0x1;
962
963 index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
964 if (index < 0) {
965 tr->valid = 0;
966 return 0;
967 }
968
969 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
970 /* XXX what does "writeable" and "usermode" even mean? */
971 tr->valid = 1;
972
973 return 0;
974}
975
976
1006int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 977int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
1007{ 978{
1008 unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); 979 unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
@@ -1066,7 +1037,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1066 sesel = 0; /* unused */ 1037 sesel = 0; /* unused */
1067 priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; 1038 priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
1068 1039
1069 kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, 1040 kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
1070 &priv->ref, eaddr, &stlbe); 1041 &priv->ref, eaddr, &stlbe);
1071 break; 1042 break;
1072 1043
@@ -1075,7 +1046,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1075 1046
1076 stlbsel = 1; 1047 stlbsel = 1;
1077 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, 1048 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
1078 gtlbe, &stlbe); 1049 gtlbe, &stlbe, esel);
1079 break; 1050 break;
1080 } 1051 }
1081 1052
@@ -1087,52 +1058,13 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
1087 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); 1058 write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
1088} 1059}
1089 1060
1090int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
1091 gva_t eaddr, unsigned int pid, int as)
1092{
1093 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1094 int esel, tlbsel;
1095
1096 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
1097 esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
1098 if (esel >= 0)
1099 return index_of(tlbsel, esel);
1100 }
1101
1102 return -1;
1103}
1104
1105void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
1106{
1107 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1108
1109 if (vcpu->arch.pid != pid) {
1110 vcpu_e500->pid[0] = vcpu->arch.pid = pid;
1111 kvmppc_e500_recalc_shadow_pid(vcpu_e500);
1112 }
1113}
1114
1115void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
1116{
1117 struct kvm_book3e_206_tlb_entry *tlbe;
1118
1119 /* Insert large initial mapping for guest. */
1120 tlbe = get_entry(vcpu_e500, 1, 0);
1121 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
1122 tlbe->mas2 = 0;
1123 tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
1124
1125 /* 4K map for serial output. Used by kernel wrapper. */
1126 tlbe = get_entry(vcpu_e500, 1, 1);
1127 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
1128 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
1129 tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
1130}
1131
1132static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) 1061static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1133{ 1062{
1134 int i; 1063 int i;
1135 1064
1065 clear_tlb1_bitmap(vcpu_e500);
1066 kfree(vcpu_e500->g2h_tlb1_map);
1067
1136 clear_tlb_refs(vcpu_e500); 1068 clear_tlb_refs(vcpu_e500);
1137 kfree(vcpu_e500->gtlb_priv[0]); 1069 kfree(vcpu_e500->gtlb_priv[0]);
1138 kfree(vcpu_e500->gtlb_priv[1]); 1070 kfree(vcpu_e500->gtlb_priv[1]);
@@ -1155,6 +1087,36 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
1155 vcpu_e500->gtlb_arch = NULL; 1087 vcpu_e500->gtlb_arch = NULL;
1156} 1088}
1157 1089
1090void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
1091{
1092 sregs->u.e.mas0 = vcpu->arch.shared->mas0;
1093 sregs->u.e.mas1 = vcpu->arch.shared->mas1;
1094 sregs->u.e.mas2 = vcpu->arch.shared->mas2;
1095 sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
1096 sregs->u.e.mas4 = vcpu->arch.shared->mas4;
1097 sregs->u.e.mas6 = vcpu->arch.shared->mas6;
1098
1099 sregs->u.e.mmucfg = vcpu->arch.mmucfg;
1100 sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
1101 sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
1102 sregs->u.e.tlbcfg[2] = 0;
1103 sregs->u.e.tlbcfg[3] = 0;
1104}
1105
1106int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
1107{
1108 if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1109 vcpu->arch.shared->mas0 = sregs->u.e.mas0;
1110 vcpu->arch.shared->mas1 = sregs->u.e.mas1;
1111 vcpu->arch.shared->mas2 = sregs->u.e.mas2;
1112 vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
1113 vcpu->arch.shared->mas4 = sregs->u.e.mas4;
1114 vcpu->arch.shared->mas6 = sregs->u.e.mas6;
1115 }
1116
1117 return 0;
1118}
1119
1158int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, 1120int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1159 struct kvm_config_tlb *cfg) 1121 struct kvm_config_tlb *cfg)
1160{ 1122{
@@ -1163,6 +1125,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1163 char *virt; 1125 char *virt;
1164 struct page **pages; 1126 struct page **pages;
1165 struct tlbe_priv *privs[2] = {}; 1127 struct tlbe_priv *privs[2] = {};
1128 u64 *g2h_bitmap = NULL;
1166 size_t array_len; 1129 size_t array_len;
1167 u32 sets; 1130 u32 sets;
1168 int num_pages, ret, i; 1131 int num_pages, ret, i;
@@ -1224,10 +1187,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1224 if (!privs[0] || !privs[1]) 1187 if (!privs[0] || !privs[1])
1225 goto err_put_page; 1188 goto err_put_page;
1226 1189
1190 g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
1191 GFP_KERNEL);
1192 if (!g2h_bitmap)
1193 goto err_put_page;
1194
1227 free_gtlb(vcpu_e500); 1195 free_gtlb(vcpu_e500);
1228 1196
1229 vcpu_e500->gtlb_priv[0] = privs[0]; 1197 vcpu_e500->gtlb_priv[0] = privs[0];
1230 vcpu_e500->gtlb_priv[1] = privs[1]; 1198 vcpu_e500->gtlb_priv[1] = privs[1];
1199 vcpu_e500->g2h_tlb1_map = g2h_bitmap;
1231 1200
1232 vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) 1201 vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
1233 (virt + (cfg->array & (PAGE_SIZE - 1))); 1202 (virt + (cfg->array & (PAGE_SIZE - 1)));
@@ -1238,14 +1207,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1238 vcpu_e500->gtlb_offset[0] = 0; 1207 vcpu_e500->gtlb_offset[0] = 0;
1239 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; 1208 vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
1240 1209
1241 vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); 1210 vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
1211
1212 vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1242 if (params.tlb_sizes[0] <= 2048) 1213 if (params.tlb_sizes[0] <= 2048)
1243 vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; 1214 vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
1244 vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; 1215 vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
1245 1216
1246 vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); 1217 vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1247 vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; 1218 vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
1248 vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; 1219 vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
1249 1220
1250 vcpu_e500->shared_tlb_pages = pages; 1221 vcpu_e500->shared_tlb_pages = pages;
1251 vcpu_e500->num_shared_tlb_pages = num_pages; 1222 vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -1256,6 +1227,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
1256 vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; 1227 vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
1257 vcpu_e500->gtlb_params[1].sets = 1; 1228 vcpu_e500->gtlb_params[1].sets = 1;
1258 1229
1230 kvmppc_recalc_tlb1map_range(vcpu_e500);
1259 return 0; 1231 return 0;
1260 1232
1261err_put_page: 1233err_put_page:
@@ -1274,13 +1246,14 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
1274 struct kvm_dirty_tlb *dirty) 1246 struct kvm_dirty_tlb *dirty)
1275{ 1247{
1276 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 1248 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
1277 1249 kvmppc_recalc_tlb1map_range(vcpu_e500);
1278 clear_tlb_refs(vcpu_e500); 1250 clear_tlb_refs(vcpu_e500);
1279 return 0; 1251 return 0;
1280} 1252}
1281 1253
1282int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) 1254int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
1283{ 1255{
1256 struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
1284 int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); 1257 int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
1285 int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; 1258 int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
1286 1259
@@ -1357,22 +1330,32 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
1357 if (!vcpu_e500->gtlb_priv[1]) 1330 if (!vcpu_e500->gtlb_priv[1])
1358 goto err; 1331 goto err;
1359 1332
1360 if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) 1333 vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
1334 vcpu_e500->gtlb_params[1].entries,
1335 GFP_KERNEL);
1336 if (!vcpu_e500->g2h_tlb1_map)
1337 goto err;
1338
1339 vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
1340 host_tlb_params[1].entries,
1341 GFP_KERNEL);
1342 if (!vcpu_e500->h2g_tlb1_rmap)
1361 goto err; 1343 goto err;
1362 1344
1363 /* Init TLB configuration register */ 1345 /* Init TLB configuration register */
1364 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & 1346 vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
1365 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); 1347 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1366 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; 1348 vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
1367 vcpu_e500->tlb0cfg |= 1349 vcpu->arch.tlbcfg[0] |=
1368 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; 1350 vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
1369 1351
1370 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & 1352 vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
1371 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); 1353 ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
1372 vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries; 1354 vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
1373 vcpu_e500->tlb0cfg |= 1355 vcpu->arch.tlbcfg[1] |=
1374 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; 1356 vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
1375 1357
1358 kvmppc_recalc_tlb1map_range(vcpu_e500);
1376 return 0; 1359 return 0;
1377 1360
1378err: 1361err:
@@ -1385,8 +1368,7 @@ err:
1385void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) 1368void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
1386{ 1369{
1387 free_gtlb(vcpu_e500); 1370 free_gtlb(vcpu_e500);
1388 kvmppc_e500_id_table_free(vcpu_e500); 1371 kfree(vcpu_e500->h2g_tlb1_rmap);
1389
1390 kfree(vcpu_e500->tlb_refs[0]); 1372 kfree(vcpu_e500->tlb_refs[0]);
1391 kfree(vcpu_e500->tlb_refs[1]); 1373 kfree(vcpu_e500->tlb_refs[1]);
1392} 1374}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
deleted file mode 100644
index 5c6d2d7bf058..000000000000
--- a/arch/powerpc/kvm/e500_tlb.h
+++ /dev/null
@@ -1,174 +0,0 @@
1/*
2 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, yu.liu@freescale.com
5 *
6 * Description:
7 * This file is based on arch/powerpc/kvm/44x_tlb.h,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#ifndef __KVM_E500_TLB_H__
16#define __KVM_E500_TLB_H__
17
18#include <linux/kvm_host.h>
19#include <asm/mmu-book3e.h>
20#include <asm/tlb.h>
21#include <asm/kvm_e500.h>
22
23/* This geometry is the legacy default -- can be overridden by userspace */
24#define KVM_E500_TLB0_WAY_SIZE 128
25#define KVM_E500_TLB0_WAY_NUM 2
26
27#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
28#define KVM_E500_TLB1_SIZE 16
29
30#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
31#define tlbsel_of(index) ((index) >> 16)
32#define esel_of(index) ((index) & 0xFFFF)
33
34#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
35#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
36#define MAS2_ATTRIB_MASK \
37 (MAS2_X0 | MAS2_X1)
38#define MAS3_ATTRIB_MASK \
39 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
40 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
41
42extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
43extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
44extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
45extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
46extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
47extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
48extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
49extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
50extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
51extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
52extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
53extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
54extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
55
56/* TLB helper functions */
57static inline unsigned int
58get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
59{
60 return (tlbe->mas1 >> 7) & 0x1f;
61}
62
63static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
64{
65 return tlbe->mas2 & 0xfffff000;
66}
67
68static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
69{
70 unsigned int pgsize = get_tlb_size(tlbe);
71 return 1ULL << 10 << pgsize;
72}
73
74static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
75{
76 u64 bytes = get_tlb_bytes(tlbe);
77 return get_tlb_eaddr(tlbe) + bytes - 1;
78}
79
80static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
81{
82 return tlbe->mas7_3 & ~0xfffULL;
83}
84
85static inline unsigned int
86get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
87{
88 return (tlbe->mas1 >> 16) & 0xff;
89}
90
91static inline unsigned int
92get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
93{
94 return (tlbe->mas1 >> 12) & 0x1;
95}
96
97static inline unsigned int
98get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
99{
100 return (tlbe->mas1 >> 31) & 0x1;
101}
102
103static inline unsigned int
104get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
105{
106 return (tlbe->mas1 >> 30) & 0x1;
107}
108
109static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
110{
111 return vcpu->arch.pid & 0xff;
112}
113
114static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
115{
116 return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
117}
118
119static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
120{
121 return !!(vcpu->arch.shared->msr & MSR_PR);
122}
123
124static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
125{
126 return (vcpu->arch.shared->mas6 >> 16) & 0xff;
127}
128
129static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
130{
131 return vcpu->arch.shared->mas6 & 0x1;
132}
133
134static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
135{
136 /*
137 * Manual says that tlbsel has 2 bits wide.
138 * Since we only have two TLBs, only lower bit is used.
139 */
140 return (vcpu->arch.shared->mas0 >> 28) & 0x1;
141}
142
143static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
144{
145 return vcpu->arch.shared->mas0 & 0xfff;
146}
147
148static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
149{
150 return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
151}
152
153static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
154 const struct kvm_book3e_206_tlb_entry *tlbe)
155{
156 gpa_t gpa;
157
158 if (!get_tlb_v(tlbe))
159 return 0;
160
161 /* Does it match current guest AS? */
162 /* XXX what about IS != DS? */
163 if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
164 return 0;
165
166 gpa = get_tlb_raddr(tlbe);
167 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
168 /* Mapping is not for RAM. */
169 return 0;
170
171 return 1;
172}
173
174#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
new file mode 100644
index 000000000000..fe6c1de6b701
--- /dev/null
+++ b/arch/powerpc/kvm/e500mc.c
@@ -0,0 +1,342 @@
1/*
2 * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Varun Sethi, <varun.sethi@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/kvm/e500.c,
8 * by Yu Liu <yu.liu@freescale.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/kvm_host.h>
16#include <linux/slab.h>
17#include <linux/err.h>
18#include <linux/export.h>
19
20#include <asm/reg.h>
21#include <asm/cputable.h>
22#include <asm/tlbflush.h>
23#include <asm/kvm_ppc.h>
24#include <asm/dbell.h>
25
26#include "booke.h"
27#include "e500.h"
28
29void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
30{
31 enum ppc_dbell dbell_type;
32 unsigned long tag;
33
34 switch (type) {
35 case INT_CLASS_NONCRIT:
36 dbell_type = PPC_G_DBELL;
37 break;
38 case INT_CLASS_CRIT:
39 dbell_type = PPC_G_DBELL_CRIT;
40 break;
41 case INT_CLASS_MC:
42 dbell_type = PPC_G_DBELL_MC;
43 break;
44 default:
45 WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
46 return;
47 }
48
49
50 tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
51 mb();
52 ppc_msgsnd(dbell_type, 0, tag);
53}
54
55/* gtlbe must not be mapped by more than one host tlb entry */
56void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
57 struct kvm_book3e_206_tlb_entry *gtlbe)
58{
59 unsigned int tid, ts;
60 u32 val, eaddr, lpid;
61 unsigned long flags;
62
63 ts = get_tlb_ts(gtlbe);
64 tid = get_tlb_tid(gtlbe);
65 lpid = vcpu_e500->vcpu.kvm->arch.lpid;
66
67 /* We search the host TLB to invalidate its shadow TLB entry */
68 val = (tid << 16) | ts;
69 eaddr = get_tlb_eaddr(gtlbe);
70
71 local_irq_save(flags);
72
73 mtspr(SPRN_MAS6, val);
74 mtspr(SPRN_MAS5, MAS5_SGS | lpid);
75
76 asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
77 val = mfspr(SPRN_MAS1);
78 if (val & MAS1_VALID) {
79 mtspr(SPRN_MAS1, val & ~MAS1_VALID);
80 asm volatile("tlbwe");
81 }
82 mtspr(SPRN_MAS5, 0);
83 /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
84 mtspr(SPRN_MAS8, 0);
85 isync();
86
87 local_irq_restore(flags);
88}
89
90void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
91{
92 unsigned long flags;
93
94 local_irq_save(flags);
95 mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
96 asm volatile("tlbilxlpid");
97 mtspr(SPRN_MAS5, 0);
98 local_irq_restore(flags);
99}
100
101void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
102{
103 vcpu->arch.pid = pid;
104}
105
106void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
107{
108}
109
110void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
111{
112 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
113
114 kvmppc_booke_vcpu_load(vcpu, cpu);
115
116 mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
117 mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
118 mtspr(SPRN_GPIR, vcpu->vcpu_id);
119 mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
120 mtspr(SPRN_EPLC, vcpu->arch.eplc);
121 mtspr(SPRN_EPSC, vcpu->arch.epsc);
122
123 mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
124 mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
125 mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
126 mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
127 mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
128 mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
129 mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
130
131 mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
132 mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
133
134 mtspr(SPRN_GEPR, vcpu->arch.epr);
135 mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
136 mtspr(SPRN_GESR, vcpu->arch.shared->esr);
137
138 if (vcpu->arch.oldpir != mfspr(SPRN_PIR))
139 kvmppc_e500_tlbil_all(vcpu_e500);
140
141 kvmppc_load_guest_fp(vcpu);
142}
143
144void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
145{
146 vcpu->arch.eplc = mfspr(SPRN_EPLC);
147 vcpu->arch.epsc = mfspr(SPRN_EPSC);
148
149 vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
150 vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
151 vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
152 vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
153
154 vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
155 vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
156
157 vcpu->arch.epr = mfspr(SPRN_GEPR);
158 vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
159 vcpu->arch.shared->esr = mfspr(SPRN_GESR);
160
161 vcpu->arch.oldpir = mfspr(SPRN_PIR);
162
163 kvmppc_booke_vcpu_put(vcpu);
164}
165
166int kvmppc_core_check_processor_compat(void)
167{
168 int r;
169
170 if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
171 r = 0;
172 else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
173 r = 0;
174 else
175 r = -ENOTSUPP;
176
177 return r;
178}
179
180int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
181{
182 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
183
184 vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
185 SPRN_EPCR_DUVD;
186 vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
187 vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
188 vcpu->arch.epsc = vcpu->arch.eplc;
189
190 vcpu->arch.pvr = mfspr(SPRN_PVR);
191 vcpu_e500->svr = mfspr(SPRN_SVR);
192
193 vcpu->arch.cpu_type = KVM_CPU_E500MC;
194
195 return 0;
196}
197
198void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
199{
200 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
201
202 sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
203 KVM_SREGS_E_PC;
204 sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
205
206 sregs->u.e.impl.fsl.features = 0;
207 sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
208 sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
209 sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
210
211 kvmppc_get_sregs_e500_tlb(vcpu, sregs);
212
213 sregs->u.e.ivor_high[3] =
214 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
215 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
216 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
217
218 kvmppc_get_sregs_ivor(vcpu, sregs);
219}
220
221int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
222{
223 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
224 int ret;
225
226 if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
227 vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
228 vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
229 vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
230 }
231
232 ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
233 if (ret < 0)
234 return ret;
235
236 if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
237 return 0;
238
239 if (sregs->u.e.features & KVM_SREGS_E_PM) {
240 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
241 sregs->u.e.ivor_high[3];
242 }
243
244 if (sregs->u.e.features & KVM_SREGS_E_PC) {
245 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
246 sregs->u.e.ivor_high[4];
247 vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
248 sregs->u.e.ivor_high[5];
249 }
250
251 return kvmppc_set_sregs_ivor(vcpu, sregs);
252}
253
254struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
255{
256 struct kvmppc_vcpu_e500 *vcpu_e500;
257 struct kvm_vcpu *vcpu;
258 int err;
259
260 vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
261 if (!vcpu_e500) {
262 err = -ENOMEM;
263 goto out;
264 }
265 vcpu = &vcpu_e500->vcpu;
266
267 /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
268 vcpu->arch.oldpir = 0xffffffff;
269
270 err = kvm_vcpu_init(vcpu, kvm, id);
271 if (err)
272 goto free_vcpu;
273
274 err = kvmppc_e500_tlb_init(vcpu_e500);
275 if (err)
276 goto uninit_vcpu;
277
278 vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
279 if (!vcpu->arch.shared)
280 goto uninit_tlb;
281
282 return vcpu;
283
284uninit_tlb:
285 kvmppc_e500_tlb_uninit(vcpu_e500);
286uninit_vcpu:
287 kvm_vcpu_uninit(vcpu);
288
289free_vcpu:
290 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
291out:
292 return ERR_PTR(err);
293}
294
295void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
296{
297 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
298
299 free_page((unsigned long)vcpu->arch.shared);
300 kvmppc_e500_tlb_uninit(vcpu_e500);
301 kvm_vcpu_uninit(vcpu);
302 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
303}
304
305int kvmppc_core_init_vm(struct kvm *kvm)
306{
307 int lpid;
308
309 lpid = kvmppc_alloc_lpid();
310 if (lpid < 0)
311 return lpid;
312
313 kvm->arch.lpid = lpid;
314 return 0;
315}
316
317void kvmppc_core_destroy_vm(struct kvm *kvm)
318{
319 kvmppc_free_lpid(kvm->arch.lpid);
320}
321
322static int __init kvmppc_e500mc_init(void)
323{
324 int r;
325
326 r = kvmppc_booke_init();
327 if (r)
328 return r;
329
330 kvmppc_init_lpid(64);
331 kvmppc_claim_lpid(0); /* host */
332
333 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
334}
335
336static void __exit kvmppc_e500mc_exit(void)
337{
338 kvmppc_booke_exit();
339}
340
341module_init(kvmppc_e500mc_init);
342module_exit(kvmppc_e500mc_exit);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 968f40101883..f90e86dea7a2 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,6 +23,7 @@
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/kvm_host.h> 25#include <linux/kvm_host.h>
26#include <linux/clockchips.h>
26 27
27#include <asm/reg.h> 28#include <asm/reg.h>
28#include <asm/time.h> 29#include <asm/time.h>
@@ -35,7 +36,9 @@
35#define OP_TRAP 3 36#define OP_TRAP 3
36#define OP_TRAP_64 2 37#define OP_TRAP_64 2
37 38
39#define OP_31_XOP_TRAP 4
38#define OP_31_XOP_LWZX 23 40#define OP_31_XOP_LWZX 23
41#define OP_31_XOP_TRAP_64 68
39#define OP_31_XOP_LBZX 87 42#define OP_31_XOP_LBZX 87
40#define OP_31_XOP_STWX 151 43#define OP_31_XOP_STWX 151
41#define OP_31_XOP_STBX 215 44#define OP_31_XOP_STBX 215
@@ -102,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
102 */ 105 */
103 106
104 dec_time = vcpu->arch.dec; 107 dec_time = vcpu->arch.dec;
105 dec_time *= 1000; 108 /*
106 do_div(dec_time, tb_ticks_per_usec); 109 * Guest timebase ticks at the same frequency as host decrementer.
110 * So use the host decrementer calculations for decrementer emulation.
111 */
112 dec_time = dec_time << decrementer_clockevent.shift;
113 do_div(dec_time, decrementer_clockevent.mult);
107 dec_nsec = do_div(dec_time, NSEC_PER_SEC); 114 dec_nsec = do_div(dec_time, NSEC_PER_SEC);
108 hrtimer_start(&vcpu->arch.dec_timer, 115 hrtimer_start(&vcpu->arch.dec_timer,
109 ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); 116 ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
@@ -141,14 +148,13 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
141int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 148int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
142{ 149{
143 u32 inst = kvmppc_get_last_inst(vcpu); 150 u32 inst = kvmppc_get_last_inst(vcpu);
144 u32 ea; 151 int ra = get_ra(inst);
145 int ra; 152 int rs = get_rs(inst);
146 int rb; 153 int rt = get_rt(inst);
147 int rs; 154 int sprn = get_sprn(inst);
148 int rt;
149 int sprn;
150 enum emulation_result emulated = EMULATE_DONE; 155 enum emulation_result emulated = EMULATE_DONE;
151 int advance = 1; 156 int advance = 1;
157 ulong spr_val = 0;
152 158
153 /* this default type might be overwritten by subcategories */ 159 /* this default type might be overwritten by subcategories */
154 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 160 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -170,173 +176,143 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
170 case 31: 176 case 31:
171 switch (get_xop(inst)) { 177 switch (get_xop(inst)) {
172 178
179 case OP_31_XOP_TRAP:
180#ifdef CONFIG_64BIT
181 case OP_31_XOP_TRAP_64:
182#endif
183#ifdef CONFIG_PPC_BOOK3S
184 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
185#else
186 kvmppc_core_queue_program(vcpu,
187 vcpu->arch.shared->esr | ESR_PTR);
188#endif
189 advance = 0;
190 break;
173 case OP_31_XOP_LWZX: 191 case OP_31_XOP_LWZX:
174 rt = get_rt(inst);
175 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 192 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
176 break; 193 break;
177 194
178 case OP_31_XOP_LBZX: 195 case OP_31_XOP_LBZX:
179 rt = get_rt(inst);
180 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 196 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
181 break; 197 break;
182 198
183 case OP_31_XOP_LBZUX: 199 case OP_31_XOP_LBZUX:
184 rt = get_rt(inst);
185 ra = get_ra(inst);
186 rb = get_rb(inst);
187
188 ea = kvmppc_get_gpr(vcpu, rb);
189 if (ra)
190 ea += kvmppc_get_gpr(vcpu, ra);
191
192 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 200 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
193 kvmppc_set_gpr(vcpu, ra, ea); 201 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
194 break; 202 break;
195 203
196 case OP_31_XOP_STWX: 204 case OP_31_XOP_STWX:
197 rs = get_rs(inst);
198 emulated = kvmppc_handle_store(run, vcpu, 205 emulated = kvmppc_handle_store(run, vcpu,
199 kvmppc_get_gpr(vcpu, rs), 206 kvmppc_get_gpr(vcpu, rs),
200 4, 1); 207 4, 1);
201 break; 208 break;
202 209
203 case OP_31_XOP_STBX: 210 case OP_31_XOP_STBX:
204 rs = get_rs(inst);
205 emulated = kvmppc_handle_store(run, vcpu, 211 emulated = kvmppc_handle_store(run, vcpu,
206 kvmppc_get_gpr(vcpu, rs), 212 kvmppc_get_gpr(vcpu, rs),
207 1, 1); 213 1, 1);
208 break; 214 break;
209 215
210 case OP_31_XOP_STBUX: 216 case OP_31_XOP_STBUX:
211 rs = get_rs(inst);
212 ra = get_ra(inst);
213 rb = get_rb(inst);
214
215 ea = kvmppc_get_gpr(vcpu, rb);
216 if (ra)
217 ea += kvmppc_get_gpr(vcpu, ra);
218
219 emulated = kvmppc_handle_store(run, vcpu, 217 emulated = kvmppc_handle_store(run, vcpu,
220 kvmppc_get_gpr(vcpu, rs), 218 kvmppc_get_gpr(vcpu, rs),
221 1, 1); 219 1, 1);
222 kvmppc_set_gpr(vcpu, rs, ea); 220 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
223 break; 221 break;
224 222
225 case OP_31_XOP_LHAX: 223 case OP_31_XOP_LHAX:
226 rt = get_rt(inst);
227 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 224 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
228 break; 225 break;
229 226
230 case OP_31_XOP_LHZX: 227 case OP_31_XOP_LHZX:
231 rt = get_rt(inst);
232 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 228 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
233 break; 229 break;
234 230
235 case OP_31_XOP_LHZUX: 231 case OP_31_XOP_LHZUX:
236 rt = get_rt(inst);
237 ra = get_ra(inst);
238 rb = get_rb(inst);
239
240 ea = kvmppc_get_gpr(vcpu, rb);
241 if (ra)
242 ea += kvmppc_get_gpr(vcpu, ra);
243
244 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 232 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
245 kvmppc_set_gpr(vcpu, ra, ea); 233 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
246 break; 234 break;
247 235
248 case OP_31_XOP_MFSPR: 236 case OP_31_XOP_MFSPR:
249 sprn = get_sprn(inst);
250 rt = get_rt(inst);
251
252 switch (sprn) { 237 switch (sprn) {
253 case SPRN_SRR0: 238 case SPRN_SRR0:
254 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); 239 spr_val = vcpu->arch.shared->srr0;
255 break; 240 break;
256 case SPRN_SRR1: 241 case SPRN_SRR1:
257 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); 242 spr_val = vcpu->arch.shared->srr1;
258 break; 243 break;
259 case SPRN_PVR: 244 case SPRN_PVR:
260 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; 245 spr_val = vcpu->arch.pvr;
246 break;
261 case SPRN_PIR: 247 case SPRN_PIR:
262 kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; 248 spr_val = vcpu->vcpu_id;
249 break;
263 case SPRN_MSSSR0: 250 case SPRN_MSSSR0:
264 kvmppc_set_gpr(vcpu, rt, 0); break; 251 spr_val = 0;
252 break;
265 253
266 /* Note: mftb and TBRL/TBWL are user-accessible, so 254 /* Note: mftb and TBRL/TBWL are user-accessible, so
267 * the guest can always access the real TB anyways. 255 * the guest can always access the real TB anyways.
268 * In fact, we probably will never see these traps. */ 256 * In fact, we probably will never see these traps. */
269 case SPRN_TBWL: 257 case SPRN_TBWL:
270 kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; 258 spr_val = get_tb() >> 32;
259 break;
271 case SPRN_TBWU: 260 case SPRN_TBWU:
272 kvmppc_set_gpr(vcpu, rt, get_tb()); break; 261 spr_val = get_tb();
262 break;
273 263
274 case SPRN_SPRG0: 264 case SPRN_SPRG0:
275 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); 265 spr_val = vcpu->arch.shared->sprg0;
276 break; 266 break;
277 case SPRN_SPRG1: 267 case SPRN_SPRG1:
278 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); 268 spr_val = vcpu->arch.shared->sprg1;
279 break; 269 break;
280 case SPRN_SPRG2: 270 case SPRN_SPRG2:
281 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); 271 spr_val = vcpu->arch.shared->sprg2;
282 break; 272 break;
283 case SPRN_SPRG3: 273 case SPRN_SPRG3:
284 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); 274 spr_val = vcpu->arch.shared->sprg3;
285 break; 275 break;
286 /* Note: SPRG4-7 are user-readable, so we don't get 276 /* Note: SPRG4-7 are user-readable, so we don't get
287 * a trap. */ 277 * a trap. */
288 278
289 case SPRN_DEC: 279 case SPRN_DEC:
290 { 280 spr_val = kvmppc_get_dec(vcpu, get_tb());
291 kvmppc_set_gpr(vcpu, rt,
292 kvmppc_get_dec(vcpu, get_tb()));
293 break; 281 break;
294 }
295 default: 282 default:
296 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); 283 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
297 if (emulated == EMULATE_FAIL) { 284 &spr_val);
298 printk("mfspr: unknown spr %x\n", sprn); 285 if (unlikely(emulated == EMULATE_FAIL)) {
299 kvmppc_set_gpr(vcpu, rt, 0); 286 printk(KERN_INFO "mfspr: unknown spr "
287 "0x%x\n", sprn);
300 } 288 }
301 break; 289 break;
302 } 290 }
291 kvmppc_set_gpr(vcpu, rt, spr_val);
303 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); 292 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
304 break; 293 break;
305 294
306 case OP_31_XOP_STHX: 295 case OP_31_XOP_STHX:
307 rs = get_rs(inst);
308 ra = get_ra(inst);
309 rb = get_rb(inst);
310
311 emulated = kvmppc_handle_store(run, vcpu, 296 emulated = kvmppc_handle_store(run, vcpu,
312 kvmppc_get_gpr(vcpu, rs), 297 kvmppc_get_gpr(vcpu, rs),
313 2, 1); 298 2, 1);
314 break; 299 break;
315 300
316 case OP_31_XOP_STHUX: 301 case OP_31_XOP_STHUX:
317 rs = get_rs(inst);
318 ra = get_ra(inst);
319 rb = get_rb(inst);
320
321 ea = kvmppc_get_gpr(vcpu, rb);
322 if (ra)
323 ea += kvmppc_get_gpr(vcpu, ra);
324
325 emulated = kvmppc_handle_store(run, vcpu, 302 emulated = kvmppc_handle_store(run, vcpu,
326 kvmppc_get_gpr(vcpu, rs), 303 kvmppc_get_gpr(vcpu, rs),
327 2, 1); 304 2, 1);
328 kvmppc_set_gpr(vcpu, ra, ea); 305 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
329 break; 306 break;
330 307
331 case OP_31_XOP_MTSPR: 308 case OP_31_XOP_MTSPR:
332 sprn = get_sprn(inst); 309 spr_val = kvmppc_get_gpr(vcpu, rs);
333 rs = get_rs(inst);
334 switch (sprn) { 310 switch (sprn) {
335 case SPRN_SRR0: 311 case SPRN_SRR0:
336 vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); 312 vcpu->arch.shared->srr0 = spr_val;
337 break; 313 break;
338 case SPRN_SRR1: 314 case SPRN_SRR1:
339 vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); 315 vcpu->arch.shared->srr1 = spr_val;
340 break; 316 break;
341 317
342 /* XXX We need to context-switch the timebase for 318 /* XXX We need to context-switch the timebase for
@@ -347,27 +323,29 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
347 case SPRN_MSSSR0: break; 323 case SPRN_MSSSR0: break;
348 324
349 case SPRN_DEC: 325 case SPRN_DEC:
350 vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); 326 vcpu->arch.dec = spr_val;
351 kvmppc_emulate_dec(vcpu); 327 kvmppc_emulate_dec(vcpu);
352 break; 328 break;
353 329
354 case SPRN_SPRG0: 330 case SPRN_SPRG0:
355 vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); 331 vcpu->arch.shared->sprg0 = spr_val;
356 break; 332 break;
357 case SPRN_SPRG1: 333 case SPRN_SPRG1:
358 vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); 334 vcpu->arch.shared->sprg1 = spr_val;
359 break; 335 break;
360 case SPRN_SPRG2: 336 case SPRN_SPRG2:
361 vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); 337 vcpu->arch.shared->sprg2 = spr_val;
362 break; 338 break;
363 case SPRN_SPRG3: 339 case SPRN_SPRG3:
364 vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); 340 vcpu->arch.shared->sprg3 = spr_val;
365 break; 341 break;
366 342
367 default: 343 default:
368 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); 344 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
345 spr_val);
369 if (emulated == EMULATE_FAIL) 346 if (emulated == EMULATE_FAIL)
370 printk("mtspr: unknown spr %x\n", sprn); 347 printk(KERN_INFO "mtspr: unknown spr "
348 "0x%x\n", sprn);
371 break; 349 break;
372 } 350 }
373 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); 351 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
@@ -382,7 +360,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
382 break; 360 break;
383 361
384 case OP_31_XOP_LWBRX: 362 case OP_31_XOP_LWBRX:
385 rt = get_rt(inst);
386 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); 363 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
387 break; 364 break;
388 365
@@ -390,25 +367,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
390 break; 367 break;
391 368
392 case OP_31_XOP_STWBRX: 369 case OP_31_XOP_STWBRX:
393 rs = get_rs(inst);
394 ra = get_ra(inst);
395 rb = get_rb(inst);
396
397 emulated = kvmppc_handle_store(run, vcpu, 370 emulated = kvmppc_handle_store(run, vcpu,
398 kvmppc_get_gpr(vcpu, rs), 371 kvmppc_get_gpr(vcpu, rs),
399 4, 0); 372 4, 0);
400 break; 373 break;
401 374
402 case OP_31_XOP_LHBRX: 375 case OP_31_XOP_LHBRX:
403 rt = get_rt(inst);
404 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 376 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
405 break; 377 break;
406 378
407 case OP_31_XOP_STHBRX: 379 case OP_31_XOP_STHBRX:
408 rs = get_rs(inst);
409 ra = get_ra(inst);
410 rb = get_rb(inst);
411
412 emulated = kvmppc_handle_store(run, vcpu, 380 emulated = kvmppc_handle_store(run, vcpu,
413 kvmppc_get_gpr(vcpu, rs), 381 kvmppc_get_gpr(vcpu, rs),
414 2, 0); 382 2, 0);
@@ -421,99 +389,78 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
421 break; 389 break;
422 390
423 case OP_LWZ: 391 case OP_LWZ:
424 rt = get_rt(inst);
425 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 392 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
426 break; 393 break;
427 394
428 case OP_LWZU: 395 case OP_LWZU:
429 ra = get_ra(inst);
430 rt = get_rt(inst);
431 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 396 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
432 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 397 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
433 break; 398 break;
434 399
435 case OP_LBZ: 400 case OP_LBZ:
436 rt = get_rt(inst);
437 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 401 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
438 break; 402 break;
439 403
440 case OP_LBZU: 404 case OP_LBZU:
441 ra = get_ra(inst);
442 rt = get_rt(inst);
443 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 405 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
444 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 406 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
445 break; 407 break;
446 408
447 case OP_STW: 409 case OP_STW:
448 rs = get_rs(inst);
449 emulated = kvmppc_handle_store(run, vcpu, 410 emulated = kvmppc_handle_store(run, vcpu,
450 kvmppc_get_gpr(vcpu, rs), 411 kvmppc_get_gpr(vcpu, rs),
451 4, 1); 412 4, 1);
452 break; 413 break;
453 414
454 case OP_STWU: 415 case OP_STWU:
455 ra = get_ra(inst);
456 rs = get_rs(inst);
457 emulated = kvmppc_handle_store(run, vcpu, 416 emulated = kvmppc_handle_store(run, vcpu,
458 kvmppc_get_gpr(vcpu, rs), 417 kvmppc_get_gpr(vcpu, rs),
459 4, 1); 418 4, 1);
460 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 419 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
461 break; 420 break;
462 421
463 case OP_STB: 422 case OP_STB:
464 rs = get_rs(inst);
465 emulated = kvmppc_handle_store(run, vcpu, 423 emulated = kvmppc_handle_store(run, vcpu,
466 kvmppc_get_gpr(vcpu, rs), 424 kvmppc_get_gpr(vcpu, rs),
467 1, 1); 425 1, 1);
468 break; 426 break;
469 427
470 case OP_STBU: 428 case OP_STBU:
471 ra = get_ra(inst);
472 rs = get_rs(inst);
473 emulated = kvmppc_handle_store(run, vcpu, 429 emulated = kvmppc_handle_store(run, vcpu,
474 kvmppc_get_gpr(vcpu, rs), 430 kvmppc_get_gpr(vcpu, rs),
475 1, 1); 431 1, 1);
476 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 432 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
477 break; 433 break;
478 434
479 case OP_LHZ: 435 case OP_LHZ:
480 rt = get_rt(inst);
481 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 436 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
482 break; 437 break;
483 438
484 case OP_LHZU: 439 case OP_LHZU:
485 ra = get_ra(inst);
486 rt = get_rt(inst);
487 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 440 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
488 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 441 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
489 break; 442 break;
490 443
491 case OP_LHA: 444 case OP_LHA:
492 rt = get_rt(inst);
493 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 445 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
494 break; 446 break;
495 447
496 case OP_LHAU: 448 case OP_LHAU:
497 ra = get_ra(inst);
498 rt = get_rt(inst);
499 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 449 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
500 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 450 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
501 break; 451 break;
502 452
503 case OP_STH: 453 case OP_STH:
504 rs = get_rs(inst);
505 emulated = kvmppc_handle_store(run, vcpu, 454 emulated = kvmppc_handle_store(run, vcpu,
506 kvmppc_get_gpr(vcpu, rs), 455 kvmppc_get_gpr(vcpu, rs),
507 2, 1); 456 2, 1);
508 break; 457 break;
509 458
510 case OP_STHU: 459 case OP_STHU:
511 ra = get_ra(inst);
512 rs = get_rs(inst);
513 emulated = kvmppc_handle_store(run, vcpu, 460 emulated = kvmppc_handle_store(run, vcpu,
514 kvmppc_get_gpr(vcpu, rs), 461 kvmppc_get_gpr(vcpu, rs),
515 2, 1); 462 2, 1);
516 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 463 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
517 break; 464 break;
518 465
519 default: 466 default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 00d7e345b3fe..1493c8de947b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
43 v->requests; 43 v->requests;
44} 44}
45 45
46int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
47{
48 return 1;
49}
50
46int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 51int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
47{ 52{
48 int nr = kvmppc_get_gpr(vcpu, 11); 53 int nr = kvmppc_get_gpr(vcpu, 11);
@@ -74,7 +79,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
74 } 79 }
75 case HC_VENDOR_KVM | KVM_HC_FEATURES: 80 case HC_VENDOR_KVM | KVM_HC_FEATURES:
76 r = HC_EV_SUCCESS; 81 r = HC_EV_SUCCESS;
77#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500) 82#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
78 /* XXX Missing magic page on 44x */ 83 /* XXX Missing magic page on 44x */
79 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); 84 r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
80#endif 85#endif
@@ -109,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
109 goto out; 114 goto out;
110#endif 115#endif
111 116
117#ifdef CONFIG_KVM_BOOKE_HV
118 if (!cpu_has_feature(CPU_FTR_EMB_HV))
119 goto out;
120#endif
121
112 r = true; 122 r = true;
113 123
114out: 124out:
@@ -225,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext)
225 case KVM_CAP_PPC_PAIRED_SINGLES: 235 case KVM_CAP_PPC_PAIRED_SINGLES:
226 case KVM_CAP_PPC_OSI: 236 case KVM_CAP_PPC_OSI:
227 case KVM_CAP_PPC_GET_PVINFO: 237 case KVM_CAP_PPC_GET_PVINFO:
228#ifdef CONFIG_KVM_E500 238#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
229 case KVM_CAP_SW_TLB: 239 case KVM_CAP_SW_TLB:
230#endif 240#endif
231 r = 1; 241 r = 1;
@@ -234,10 +244,12 @@ int kvm_dev_ioctl_check_extension(long ext)
234 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 244 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
235 break; 245 break;
236#endif 246#endif
237#ifdef CONFIG_KVM_BOOK3S_64_HV 247#ifdef CONFIG_PPC_BOOK3S_64
238 case KVM_CAP_SPAPR_TCE: 248 case KVM_CAP_SPAPR_TCE:
239 r = 1; 249 r = 1;
240 break; 250 break;
251#endif /* CONFIG_PPC_BOOK3S_64 */
252#ifdef CONFIG_KVM_BOOK3S_64_HV
241 case KVM_CAP_PPC_SMT: 253 case KVM_CAP_PPC_SMT:
242 r = threads_per_core; 254 r = threads_per_core;
243 break; 255 break;
@@ -267,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext)
267 case KVM_CAP_MAX_VCPUS: 279 case KVM_CAP_MAX_VCPUS:
268 r = KVM_MAX_VCPUS; 280 r = KVM_MAX_VCPUS;
269 break; 281 break;
282#ifdef CONFIG_PPC_BOOK3S_64
283 case KVM_CAP_PPC_GET_SMMU_INFO:
284 r = 1;
285 break;
286#endif
270 default: 287 default:
271 r = 0; 288 r = 0;
272 break; 289 break;
@@ -588,21 +605,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
588 return r; 605 return r;
589} 606}
590 607
591void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
592{
593 int me;
594 int cpu = vcpu->cpu;
595
596 me = get_cpu();
597 if (waitqueue_active(vcpu->arch.wqp)) {
598 wake_up_interruptible(vcpu->arch.wqp);
599 vcpu->stat.halt_wakeup++;
600 } else if (cpu != me && cpu != -1) {
601 smp_send_reschedule(vcpu->cpu);
602 }
603 put_cpu();
604}
605
606int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 608int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
607{ 609{
608 if (irq->irq == KVM_INTERRUPT_UNSET) { 610 if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -611,6 +613,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
611 } 613 }
612 614
613 kvmppc_core_queue_external(vcpu, irq); 615 kvmppc_core_queue_external(vcpu, irq);
616
614 kvm_vcpu_kick(vcpu); 617 kvm_vcpu_kick(vcpu);
615 618
616 return 0; 619 return 0;
@@ -633,7 +636,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
633 r = 0; 636 r = 0;
634 vcpu->arch.papr_enabled = true; 637 vcpu->arch.papr_enabled = true;
635 break; 638 break;
636#ifdef CONFIG_KVM_E500 639#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
637 case KVM_CAP_SW_TLB: { 640 case KVM_CAP_SW_TLB: {
638 struct kvm_config_tlb cfg; 641 struct kvm_config_tlb cfg;
639 void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; 642 void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
@@ -710,7 +713,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
710 break; 713 break;
711 } 714 }
712 715
713#ifdef CONFIG_KVM_E500 716#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
714 case KVM_DIRTY_TLB: { 717 case KVM_DIRTY_TLB: {
715 struct kvm_dirty_tlb dirty; 718 struct kvm_dirty_tlb dirty;
716 r = -EFAULT; 719 r = -EFAULT;
@@ -720,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
720 break; 723 break;
721 } 724 }
722#endif 725#endif
723
724 default: 726 default:
725 r = -EINVAL; 727 r = -EINVAL;
726 } 728 }
@@ -777,7 +779,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
777 779
778 break; 780 break;
779 } 781 }
780#ifdef CONFIG_KVM_BOOK3S_64_HV 782#ifdef CONFIG_PPC_BOOK3S_64
781 case KVM_CREATE_SPAPR_TCE: { 783 case KVM_CREATE_SPAPR_TCE: {
782 struct kvm_create_spapr_tce create_tce; 784 struct kvm_create_spapr_tce create_tce;
783 struct kvm *kvm = filp->private_data; 785 struct kvm *kvm = filp->private_data;
@@ -788,7 +790,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
788 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 790 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
789 goto out; 791 goto out;
790 } 792 }
793#endif /* CONFIG_PPC_BOOK3S_64 */
791 794
795#ifdef CONFIG_KVM_BOOK3S_64_HV
792 case KVM_ALLOCATE_RMA: { 796 case KVM_ALLOCATE_RMA: {
793 struct kvm *kvm = filp->private_data; 797 struct kvm *kvm = filp->private_data;
794 struct kvm_allocate_rma rma; 798 struct kvm_allocate_rma rma;
@@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
800 } 804 }
801#endif /* CONFIG_KVM_BOOK3S_64_HV */ 805#endif /* CONFIG_KVM_BOOK3S_64_HV */
802 806
807#ifdef CONFIG_PPC_BOOK3S_64
808 case KVM_PPC_GET_SMMU_INFO: {
809 struct kvm *kvm = filp->private_data;
810 struct kvm_ppc_smmu_info info;
811
812 memset(&info, 0, sizeof(info));
813 r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
814 if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
815 r = -EFAULT;
816 break;
817 }
818#endif /* CONFIG_PPC_BOOK3S_64 */
803 default: 819 default:
804 r = -ENOTTY; 820 r = -ENOTTY;
805 } 821 }
@@ -808,6 +824,40 @@ out:
808 return r; 824 return r;
809} 825}
810 826
827static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
828static unsigned long nr_lpids;
829
830long kvmppc_alloc_lpid(void)
831{
832 long lpid;
833
834 do {
835 lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
836 if (lpid >= nr_lpids) {
837 pr_err("%s: No LPIDs free\n", __func__);
838 return -ENOMEM;
839 }
840 } while (test_and_set_bit(lpid, lpid_inuse));
841
842 return lpid;
843}
844
845void kvmppc_claim_lpid(long lpid)
846{
847 set_bit(lpid, lpid_inuse);
848}
849
850void kvmppc_free_lpid(long lpid)
851{
852 clear_bit(lpid, lpid_inuse);
853}
854
855void kvmppc_init_lpid(unsigned long nr_lpids_param)
856{
857 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
858 memset(lpid_inuse, 0, sizeof(lpid_inuse));
859}
860
811int kvm_arch_init(void *opaque) 861int kvm_arch_init(void *opaque)
812{ 862{
813 return 0; 863 return 0;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 8167d42a776f..bf191e72b2d8 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
93 case SIGNAL_EXITS: 93 case SIGNAL_EXITS:
94 vcpu->stat.signal_exits++; 94 vcpu->stat.signal_exits++;
95 break; 95 break;
96 case DBELL_EXITS:
97 vcpu->stat.dbell_exits++;
98 break;
99 case GDBELL_EXITS:
100 vcpu->stat.gdbell_exits++;
101 break;
96 } 102 }
97} 103}
98 104
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index 96076676e224..bdcbe0f8dd7b 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -52,4 +52,9 @@ struct kvm_sync_regs {
52 __u32 acrs[16]; /* access registers */ 52 __u32 acrs[16]; /* access registers */
53 __u64 crs[16]; /* control registers */ 53 __u64 crs[16]; /* control registers */
54}; 54};
55
56#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
57#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
58#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
59#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
55#endif 60#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 7343872890a2..dd17537b9a9d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -148,6 +148,7 @@ struct kvm_vcpu_stat {
148 u32 instruction_sigp_restart; 148 u32 instruction_sigp_restart;
149 u32 diagnose_10; 149 u32 diagnose_10;
150 u32 diagnose_44; 150 u32 diagnose_44;
151 u32 diagnose_9c;
151}; 152};
152 153
153struct kvm_s390_io_info { 154struct kvm_s390_io_info {
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 6964db226f83..a98832961035 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -149,6 +149,11 @@ static inline unsigned int kvm_arch_para_features(void)
149 return 0; 149 return 0;
150} 150}
151 151
152static inline bool kvm_check_and_clear_guest_paused(void)
153{
154 return false;
155}
156
152#endif 157#endif
153 158
154#endif /* __S390_KVM_PARA_H */ 159#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index fed7bee650a0..bf238c55740b 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -48,6 +48,7 @@ int sclp_cpu_deconfigure(u8 cpu);
48void sclp_facilities_detect(void); 48void sclp_facilities_detect(void);
49unsigned long long sclp_get_rnmax(void); 49unsigned long long sclp_get_rnmax(void);
50unsigned long long sclp_get_rzm(void); 50unsigned long long sclp_get_rzm(void);
51u8 sclp_get_fac85(void);
51int sclp_sdias_blk_count(void); 52int sclp_sdias_blk_count(void);
52int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); 53int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
53int sclp_chp_configure(struct chp_id chpid); 54int sclp_chp_configure(struct chp_id chpid);
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a353f0ea45c2..b23d9ac77dfc 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -47,9 +47,30 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
47{ 47{
48 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); 48 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
49 vcpu->stat.diagnose_44++; 49 vcpu->stat.diagnose_44++;
50 vcpu_put(vcpu); 50 kvm_vcpu_on_spin(vcpu);
51 yield(); 51 return 0;
52 vcpu_load(vcpu); 52}
53
54static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
55{
56 struct kvm *kvm = vcpu->kvm;
57 struct kvm_vcpu *tcpu;
58 int tid;
59 int i;
60
61 tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
62 vcpu->stat.diagnose_9c++;
63 VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
64
65 if (tid == vcpu->vcpu_id)
66 return 0;
67
68 kvm_for_each_vcpu(i, tcpu, kvm)
69 if (tcpu->vcpu_id == tid) {
70 kvm_vcpu_yield_to(tcpu);
71 break;
72 }
73
53 return 0; 74 return 0;
54} 75}
55 76
@@ -89,6 +110,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
89 return diag_release_pages(vcpu); 110 return diag_release_pages(vcpu);
90 case 0x44: 111 case 0x44:
91 return __diag_time_slice_end(vcpu); 112 return __diag_time_slice_end(vcpu);
113 case 0x9c:
114 return __diag_time_slice_end_directed(vcpu);
92 case 0x308: 115 case 0x308:
93 return __diag_ipl_functions(vcpu); 116 return __diag_ipl_functions(vcpu);
94 default: 117 default:
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 361456577c6f..979cbe55bf5e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -101,6 +101,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
101} 101}
102 102
103static intercept_handler_t instruction_handlers[256] = { 103static intercept_handler_t instruction_handlers[256] = {
104 [0x01] = kvm_s390_handle_01,
104 [0x83] = kvm_s390_handle_diag, 105 [0x83] = kvm_s390_handle_diag,
105 [0xae] = kvm_s390_handle_sigp, 106 [0xae] = kvm_s390_handle_sigp,
106 [0xb2] = kvm_s390_handle_b2, 107 [0xb2] = kvm_s390_handle_b2,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 217ce44395a4..664766d0c83c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
28#include <asm/pgtable.h> 28#include <asm/pgtable.h>
29#include <asm/nmi.h> 29#include <asm/nmi.h>
30#include <asm/switch_to.h> 30#include <asm/switch_to.h>
31#include <asm/sclp.h>
31#include "kvm-s390.h" 32#include "kvm-s390.h"
32#include "gaccess.h" 33#include "gaccess.h"
33 34
@@ -74,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
74 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 75 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
75 { "diagnose_10", VCPU_STAT(diagnose_10) }, 76 { "diagnose_10", VCPU_STAT(diagnose_10) },
76 { "diagnose_44", VCPU_STAT(diagnose_44) }, 77 { "diagnose_44", VCPU_STAT(diagnose_44) },
78 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
77 { NULL } 79 { NULL }
78}; 80};
79 81
@@ -133,8 +135,16 @@ int kvm_dev_ioctl_check_extension(long ext)
133 case KVM_CAP_S390_UCONTROL: 135 case KVM_CAP_S390_UCONTROL:
134#endif 136#endif
135 case KVM_CAP_SYNC_REGS: 137 case KVM_CAP_SYNC_REGS:
138 case KVM_CAP_ONE_REG:
136 r = 1; 139 r = 1;
137 break; 140 break;
141 case KVM_CAP_NR_VCPUS:
142 case KVM_CAP_MAX_VCPUS:
143 r = KVM_MAX_VCPUS;
144 break;
145 case KVM_CAP_S390_COW:
146 r = sclp_get_fac85() & 0x2;
147 break;
138 default: 148 default:
139 r = 0; 149 r = 0;
140 } 150 }
@@ -423,6 +433,71 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
423 return 0; 433 return 0;
424} 434}
425 435
436int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
437{
438 /* kvm common code refers to this, but never calls it */
439 BUG();
440 return 0;
441}
442
443static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
444 struct kvm_one_reg *reg)
445{
446 int r = -EINVAL;
447
448 switch (reg->id) {
449 case KVM_REG_S390_TODPR:
450 r = put_user(vcpu->arch.sie_block->todpr,
451 (u32 __user *)reg->addr);
452 break;
453 case KVM_REG_S390_EPOCHDIFF:
454 r = put_user(vcpu->arch.sie_block->epoch,
455 (u64 __user *)reg->addr);
456 break;
457 case KVM_REG_S390_CPU_TIMER:
458 r = put_user(vcpu->arch.sie_block->cputm,
459 (u64 __user *)reg->addr);
460 break;
461 case KVM_REG_S390_CLOCK_COMP:
462 r = put_user(vcpu->arch.sie_block->ckc,
463 (u64 __user *)reg->addr);
464 break;
465 default:
466 break;
467 }
468
469 return r;
470}
471
472static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
473 struct kvm_one_reg *reg)
474{
475 int r = -EINVAL;
476
477 switch (reg->id) {
478 case KVM_REG_S390_TODPR:
479 r = get_user(vcpu->arch.sie_block->todpr,
480 (u32 __user *)reg->addr);
481 break;
482 case KVM_REG_S390_EPOCHDIFF:
483 r = get_user(vcpu->arch.sie_block->epoch,
484 (u64 __user *)reg->addr);
485 break;
486 case KVM_REG_S390_CPU_TIMER:
487 r = get_user(vcpu->arch.sie_block->cputm,
488 (u64 __user *)reg->addr);
489 break;
490 case KVM_REG_S390_CLOCK_COMP:
491 r = get_user(vcpu->arch.sie_block->ckc,
492 (u64 __user *)reg->addr);
493 break;
494 default:
495 break;
496 }
497
498 return r;
499}
500
426static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 501static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
427{ 502{
428 kvm_s390_vcpu_initial_reset(vcpu); 503 kvm_s390_vcpu_initial_reset(vcpu);
@@ -753,6 +828,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
753 case KVM_S390_INITIAL_RESET: 828 case KVM_S390_INITIAL_RESET:
754 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 829 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
755 break; 830 break;
831 case KVM_SET_ONE_REG:
832 case KVM_GET_ONE_REG: {
833 struct kvm_one_reg reg;
834 r = -EFAULT;
835 if (copy_from_user(&reg, argp, sizeof(reg)))
836 break;
837 if (ioctl == KVM_SET_ONE_REG)
838 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
839 else
840 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
841 break;
842 }
756#ifdef CONFIG_KVM_S390_UCONTROL 843#ifdef CONFIG_KVM_S390_UCONTROL
757 case KVM_S390_UCAS_MAP: { 844 case KVM_S390_UCAS_MAP: {
758 struct kvm_s390_ucas_mapping ucasmap; 845 struct kvm_s390_ucas_mapping ucasmap;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ff28f9d1c9eb..2294377975e8 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,6 +79,7 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
79/* implemented in priv.c */ 79/* implemented in priv.c */
80int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); 80int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
81int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); 81int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
82int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
82 83
83/* implemented in sigp.c */ 84/* implemented in sigp.c */
84int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 85int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index e5a45dbd26ac..68a6b2ed16bf 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -380,3 +380,34 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
380 return -EOPNOTSUPP; 380 return -EOPNOTSUPP;
381} 381}
382 382
383static int handle_sckpf(struct kvm_vcpu *vcpu)
384{
385 u32 value;
386
387 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
388 return kvm_s390_inject_program_int(vcpu,
389 PGM_PRIVILEGED_OPERATION);
390
391 if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
392 return kvm_s390_inject_program_int(vcpu,
393 PGM_SPECIFICATION);
394
395 value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
396 vcpu->arch.sie_block->todpr = value;
397
398 return 0;
399}
400
401static intercept_handler_t x01_handlers[256] = {
402 [0x07] = handle_sckpf,
403};
404
405int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
406{
407 intercept_handler_t handler;
408
409 handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
410 if (handler)
411 return handler(vcpu);
412 return -EOPNOTSUPP;
413}
diff --git a/arch/score/include/asm/kvm_para.h b/arch/score/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/score/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/sh/include/asm/kvm_para.h b/arch/sh/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/sh/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/sparc/include/asm/kvm_para.h b/arch/sparc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/sparc/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/tile/include/asm/kvm_para.h b/arch/tile/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/tile/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/um/include/asm/kvm_para.h b/arch/um/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/um/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/unicore32/include/asm/kvm_para.h b/arch/unicore32/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/unicore32/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c222e1a1b12a..1ac46c22dd50 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -200,7 +200,7 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
200 200
201/* Type, address-of, and value of an instruction's operand. */ 201/* Type, address-of, and value of an instruction's operand. */
202struct operand { 202struct operand {
203 enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; 203 enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
204 unsigned int bytes; 204 unsigned int bytes;
205 union { 205 union {
206 unsigned long orig_val; 206 unsigned long orig_val;
@@ -213,12 +213,14 @@ struct operand {
213 unsigned seg; 213 unsigned seg;
214 } mem; 214 } mem;
215 unsigned xmm; 215 unsigned xmm;
216 unsigned mm;
216 } addr; 217 } addr;
217 union { 218 union {
218 unsigned long val; 219 unsigned long val;
219 u64 val64; 220 u64 val64;
220 char valptr[sizeof(unsigned long) + 2]; 221 char valptr[sizeof(unsigned long) + 2];
221 sse128_t vec_val; 222 sse128_t vec_val;
223 u64 mm_val;
222 }; 224 };
223}; 225};
224 226
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e5b97be12d2a..db7c1f2709a2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -173,6 +173,9 @@ enum {
173#define DR7_FIXED_1 0x00000400 173#define DR7_FIXED_1 0x00000400
174#define DR7_VOLATILE 0xffff23ff 174#define DR7_VOLATILE 0xffff23ff
175 175
176/* apic attention bits */
177#define KVM_APIC_CHECK_VAPIC 0
178
176/* 179/*
177 * We don't want allocation failures within the mmu code, so we preallocate 180 * We don't want allocation failures within the mmu code, so we preallocate
178 * enough memory for a single page fault in a cache. 181 * enough memory for a single page fault in a cache.
@@ -238,8 +241,6 @@ struct kvm_mmu_page {
238#endif 241#endif
239 242
240 int write_flooding_count; 243 int write_flooding_count;
241
242 struct rcu_head rcu;
243}; 244};
244 245
245struct kvm_pio_request { 246struct kvm_pio_request {
@@ -338,6 +339,7 @@ struct kvm_vcpu_arch {
338 u64 efer; 339 u64 efer;
339 u64 apic_base; 340 u64 apic_base;
340 struct kvm_lapic *apic; /* kernel irqchip context */ 341 struct kvm_lapic *apic; /* kernel irqchip context */
342 unsigned long apic_attention;
341 int32_t apic_arb_prio; 343 int32_t apic_arb_prio;
342 int mp_state; 344 int mp_state;
343 int sipi_vector; 345 int sipi_vector;
@@ -537,8 +539,6 @@ struct kvm_arch {
537 u64 hv_guest_os_id; 539 u64 hv_guest_os_id;
538 u64 hv_hypercall; 540 u64 hv_hypercall;
539 541
540 atomic_t reader_counter;
541
542 #ifdef CONFIG_KVM_MMU_AUDIT 542 #ifdef CONFIG_KVM_MMU_AUDIT
543 int audit_point; 543 int audit_point;
544 #endif 544 #endif
@@ -713,8 +713,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
713 713
714int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 714int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
715void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 715void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
716int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, 716void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
717 struct kvm_memory_slot *slot); 717 struct kvm_memory_slot *slot,
718 gfn_t gfn_offset, unsigned long mask);
718void kvm_mmu_zap_all(struct kvm *kvm); 719void kvm_mmu_zap_all(struct kvm *kvm);
719unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); 720unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
720void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); 721void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 183922e13de1..63ab1661d00e 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -95,6 +95,14 @@ struct kvm_vcpu_pv_apf_data {
95extern void kvmclock_init(void); 95extern void kvmclock_init(void);
96extern int kvm_register_clock(char *txt); 96extern int kvm_register_clock(char *txt);
97 97
98#ifdef CONFIG_KVM_CLOCK
99bool kvm_check_and_clear_guest_paused(void);
100#else
101static inline bool kvm_check_and_clear_guest_paused(void)
102{
103 return false;
104}
105#endif /* CONFIG_KVMCLOCK */
98 106
99/* This instruction is vmcall. On non-VT architectures, it will generate a 107/* This instruction is vmcall. On non-VT architectures, it will generate a
100 * trap that we will then rewrite to the appropriate instruction. 108 * trap that we will then rewrite to the appropriate instruction.
@@ -173,14 +181,16 @@ static inline int kvm_para_available(void)
173 if (boot_cpu_data.cpuid_level < 0) 181 if (boot_cpu_data.cpuid_level < 0)
174 return 0; /* So we don't blow up on old processors */ 182 return 0; /* So we don't blow up on old processors */
175 183
176 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 184 if (cpu_has_hypervisor) {
177 memcpy(signature + 0, &ebx, 4); 185 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
178 memcpy(signature + 4, &ecx, 4); 186 memcpy(signature + 0, &ebx, 4);
179 memcpy(signature + 8, &edx, 4); 187 memcpy(signature + 4, &ecx, 4);
180 signature[12] = 0; 188 memcpy(signature + 8, &edx, 4);
189 signature[12] = 0;
181 190
182 if (strcmp(signature, "KVMKVMKVM") == 0) 191 if (strcmp(signature, "KVMKVMKVM") == 0)
183 return 1; 192 return 1;
193 }
184 194
185 return 0; 195 return 0;
186} 196}
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 35f2d1948ada..6167fd798188 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,5 +40,6 @@ struct pvclock_wall_clock {
40} __attribute__((__packed__)); 40} __attribute__((__packed__));
41 41
42#define PVCLOCK_TSC_STABLE_BIT (1 << 0) 42#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
43#define PVCLOCK_GUEST_STOPPED (1 << 1)
43#endif /* __ASSEMBLY__ */ 44#endif /* __ASSEMBLY__ */
44#endif /* _ASM_X86_PVCLOCK_ABI_H */ 45#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f8492da65bfc..086eb58c6e80 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,7 @@
22#include <asm/msr.h> 22#include <asm/msr.h>
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/hardirq.h>
25 26
26#include <asm/x86_init.h> 27#include <asm/x86_init.h>
27#include <asm/reboot.h> 28#include <asm/reboot.h>
@@ -114,6 +115,25 @@ static void kvm_get_preset_lpj(void)
114 preset_lpj = lpj; 115 preset_lpj = lpj;
115} 116}
116 117
118bool kvm_check_and_clear_guest_paused(void)
119{
120 bool ret = false;
121 struct pvclock_vcpu_time_info *src;
122
123 /*
124 * per_cpu() is safe here because this function is only called from
125 * timer functions where preemption is already disabled.
126 */
127 WARN_ON(!in_atomic());
128 src = &__get_cpu_var(hv_clock);
129 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
130 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
131 ret = true;
132 }
133
134 return ret;
135}
136
117static struct clocksource kvm_clock = { 137static struct clocksource kvm_clock = {
118 .name = "kvm-clock", 138 .name = "kvm-clock",
119 .read = kvm_clock_get_cycles, 139 .read = kvm_clock_get_cycles,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1a7fe868f375..a28f338843ea 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
36 select TASKSTATS 36 select TASKSTATS
37 select TASK_DELAY_ACCT 37 select TASK_DELAY_ACCT
38 select PERF_EVENTS 38 select PERF_EVENTS
39 select HAVE_KVM_MSI
39 ---help--- 40 ---help---
40 Support hosting fully virtualized guest machines using hardware 41 Support hosting fully virtualized guest machines using hardware
41 virtualization extensions. You will need a fairly recent 42 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5bedaad6..7df1c6d839fb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -247,7 +247,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
247 247
248 /* cpuid 7.0.ebx */ 248 /* cpuid 7.0.ebx */
249 const u32 kvm_supported_word9_x86_features = 249 const u32 kvm_supported_word9_x86_features =
250 F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); 250 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
251 F(BMI2) | F(ERMS) | F(RTM);
251 252
252 /* all calls to cpuid_count() should be made on the same cpu */ 253 /* all calls to cpuid_count() should be made on the same cpu */
253 get_cpu(); 254 get_cpu();
@@ -397,7 +398,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
397 case KVM_CPUID_SIGNATURE: { 398 case KVM_CPUID_SIGNATURE: {
398 char signature[12] = "KVMKVMKVM\0\0"; 399 char signature[12] = "KVMKVMKVM\0\0";
399 u32 *sigptr = (u32 *)signature; 400 u32 *sigptr = (u32 *)signature;
400 entry->eax = 0; 401 entry->eax = KVM_CPUID_FEATURES;
401 entry->ebx = sigptr[0]; 402 entry->ebx = sigptr[0];
402 entry->ecx = sigptr[1]; 403 entry->ecx = sigptr[1];
403 entry->edx = sigptr[2]; 404 entry->edx = sigptr[2];
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 83756223f8aa..f95d242ee9f7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -142,6 +142,10 @@
142#define Src2FS (OpFS << Src2Shift) 142#define Src2FS (OpFS << Src2Shift)
143#define Src2GS (OpGS << Src2Shift) 143#define Src2GS (OpGS << Src2Shift)
144#define Src2Mask (OpMask << Src2Shift) 144#define Src2Mask (OpMask << Src2Shift)
145#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
146#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
147#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
148#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
145 149
146#define X2(x...) x, x 150#define X2(x...) x, x
147#define X3(x...) X2(x), x 151#define X3(x...) X2(x), x
@@ -557,6 +561,29 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
557 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); 561 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
558} 562}
559 563
564/*
565 * x86 defines three classes of vector instructions: explicitly
566 * aligned, explicitly unaligned, and the rest, which change behaviour
567 * depending on whether they're AVX encoded or not.
568 *
569 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
570 * subject to the same check.
571 */
572static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
573{
574 if (likely(size < 16))
575 return false;
576
577 if (ctxt->d & Aligned)
578 return true;
579 else if (ctxt->d & Unaligned)
580 return false;
581 else if (ctxt->d & Avx)
582 return false;
583 else
584 return true;
585}
586
560static int __linearize(struct x86_emulate_ctxt *ctxt, 587static int __linearize(struct x86_emulate_ctxt *ctxt,
561 struct segmented_address addr, 588 struct segmented_address addr,
562 unsigned size, bool write, bool fetch, 589 unsigned size, bool write, bool fetch,
@@ -621,6 +648,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
621 } 648 }
622 if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) 649 if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
623 la &= (u32)-1; 650 la &= (u32)-1;
651 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
652 return emulate_gp(ctxt, 0);
624 *linear = la; 653 *linear = la;
625 return X86EMUL_CONTINUE; 654 return X86EMUL_CONTINUE;
626bad: 655bad:
@@ -859,6 +888,40 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
859 ctxt->ops->put_fpu(ctxt); 888 ctxt->ops->put_fpu(ctxt);
860} 889}
861 890
891static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
892{
893 ctxt->ops->get_fpu(ctxt);
894 switch (reg) {
895 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
896 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
897 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
898 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
899 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
900 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
901 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
902 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
903 default: BUG();
904 }
905 ctxt->ops->put_fpu(ctxt);
906}
907
908static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
909{
910 ctxt->ops->get_fpu(ctxt);
911 switch (reg) {
912 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
913 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
914 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
915 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
916 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
917 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
918 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
919 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
920 default: BUG();
921 }
922 ctxt->ops->put_fpu(ctxt);
923}
924
862static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 925static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
863 struct operand *op) 926 struct operand *op)
864{ 927{
@@ -875,6 +938,13 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
875 read_sse_reg(ctxt, &op->vec_val, reg); 938 read_sse_reg(ctxt, &op->vec_val, reg);
876 return; 939 return;
877 } 940 }
941 if (ctxt->d & Mmx) {
942 reg &= 7;
943 op->type = OP_MM;
944 op->bytes = 8;
945 op->addr.mm = reg;
946 return;
947 }
878 948
879 op->type = OP_REG; 949 op->type = OP_REG;
880 if (ctxt->d & ByteOp) { 950 if (ctxt->d & ByteOp) {
@@ -902,7 +972,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
902 ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ 972 ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
903 } 973 }
904 974
905 ctxt->modrm = insn_fetch(u8, ctxt);
906 ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; 975 ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
907 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; 976 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
908 ctxt->modrm_rm |= (ctxt->modrm & 0x07); 977 ctxt->modrm_rm |= (ctxt->modrm & 0x07);
@@ -920,6 +989,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
920 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); 989 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
921 return rc; 990 return rc;
922 } 991 }
992 if (ctxt->d & Mmx) {
993 op->type = OP_MM;
994 op->bytes = 8;
995 op->addr.xmm = ctxt->modrm_rm & 7;
996 return rc;
997 }
923 fetch_register_operand(op); 998 fetch_register_operand(op);
924 return rc; 999 return rc;
925 } 1000 }
@@ -1387,6 +1462,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
1387 case OP_XMM: 1462 case OP_XMM:
1388 write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); 1463 write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
1389 break; 1464 break;
1465 case OP_MM:
1466 write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
1467 break;
1390 case OP_NONE: 1468 case OP_NONE:
1391 /* no writeback */ 1469 /* no writeback */
1392 break; 1470 break;
@@ -2790,7 +2868,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
2790 2868
2791static int em_mov(struct x86_emulate_ctxt *ctxt) 2869static int em_mov(struct x86_emulate_ctxt *ctxt)
2792{ 2870{
2793 ctxt->dst.val = ctxt->src.val; 2871 memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
2794 return X86EMUL_CONTINUE; 2872 return X86EMUL_CONTINUE;
2795} 2873}
2796 2874
@@ -2870,12 +2948,6 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
2870 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); 2948 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
2871} 2949}
2872 2950
2873static int em_movdqu(struct x86_emulate_ctxt *ctxt)
2874{
2875 memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes);
2876 return X86EMUL_CONTINUE;
2877}
2878
2879static int em_invlpg(struct x86_emulate_ctxt *ctxt) 2951static int em_invlpg(struct x86_emulate_ctxt *ctxt)
2880{ 2952{
2881 int rc; 2953 int rc;
@@ -3061,35 +3133,13 @@ static int em_btc(struct x86_emulate_ctxt *ctxt)
3061 3133
3062static int em_bsf(struct x86_emulate_ctxt *ctxt) 3134static int em_bsf(struct x86_emulate_ctxt *ctxt)
3063{ 3135{
3064 u8 zf; 3136 emulate_2op_SrcV_nobyte(ctxt, "bsf");
3065
3066 __asm__ ("bsf %2, %0; setz %1"
3067 : "=r"(ctxt->dst.val), "=q"(zf)
3068 : "r"(ctxt->src.val));
3069
3070 ctxt->eflags &= ~X86_EFLAGS_ZF;
3071 if (zf) {
3072 ctxt->eflags |= X86_EFLAGS_ZF;
3073 /* Disable writeback. */
3074 ctxt->dst.type = OP_NONE;
3075 }
3076 return X86EMUL_CONTINUE; 3137 return X86EMUL_CONTINUE;
3077} 3138}
3078 3139
3079static int em_bsr(struct x86_emulate_ctxt *ctxt) 3140static int em_bsr(struct x86_emulate_ctxt *ctxt)
3080{ 3141{
3081 u8 zf; 3142 emulate_2op_SrcV_nobyte(ctxt, "bsr");
3082
3083 __asm__ ("bsr %2, %0; setz %1"
3084 : "=r"(ctxt->dst.val), "=q"(zf)
3085 : "r"(ctxt->src.val));
3086
3087 ctxt->eflags &= ~X86_EFLAGS_ZF;
3088 if (zf) {
3089 ctxt->eflags |= X86_EFLAGS_ZF;
3090 /* Disable writeback. */
3091 ctxt->dst.type = OP_NONE;
3092 }
3093 return X86EMUL_CONTINUE; 3143 return X86EMUL_CONTINUE;
3094} 3144}
3095 3145
@@ -3286,8 +3336,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3286 .check_perm = (_p) } 3336 .check_perm = (_p) }
3287#define N D(0) 3337#define N D(0)
3288#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3338#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3289#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } 3339#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3290#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } 3340#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3291#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3341#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3292#define II(_f, _e, _i) \ 3342#define II(_f, _e, _i) \
3293 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3343 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
@@ -3307,25 +3357,25 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3307 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3357 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3308 3358
3309static struct opcode group7_rm1[] = { 3359static struct opcode group7_rm1[] = {
3310 DI(SrcNone | ModRM | Priv, monitor), 3360 DI(SrcNone | Priv, monitor),
3311 DI(SrcNone | ModRM | Priv, mwait), 3361 DI(SrcNone | Priv, mwait),
3312 N, N, N, N, N, N, 3362 N, N, N, N, N, N,
3313}; 3363};
3314 3364
3315static struct opcode group7_rm3[] = { 3365static struct opcode group7_rm3[] = {
3316 DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), 3366 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3317 II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), 3367 II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall),
3318 DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), 3368 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3319 DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), 3369 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3320 DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), 3370 DIP(SrcNone | Prot | Priv, stgi, check_svme),
3321 DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), 3371 DIP(SrcNone | Prot | Priv, clgi, check_svme),
3322 DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), 3372 DIP(SrcNone | Prot | Priv, skinit, check_svme),
3323 DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), 3373 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
3324}; 3374};
3325 3375
3326static struct opcode group7_rm7[] = { 3376static struct opcode group7_rm7[] = {
3327 N, 3377 N,
3328 DIP(SrcNone | ModRM, rdtscp, check_rdtsc), 3378 DIP(SrcNone, rdtscp, check_rdtsc),
3329 N, N, N, N, N, N, 3379 N, N, N, N, N, N,
3330}; 3380};
3331 3381
@@ -3341,81 +3391,86 @@ static struct opcode group1[] = {
3341}; 3391};
3342 3392
3343static struct opcode group1A[] = { 3393static struct opcode group1A[] = {
3344 I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, 3394 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3345}; 3395};
3346 3396
3347static struct opcode group3[] = { 3397static struct opcode group3[] = {
3348 I(DstMem | SrcImm | ModRM, em_test), 3398 I(DstMem | SrcImm, em_test),
3349 I(DstMem | SrcImm | ModRM, em_test), 3399 I(DstMem | SrcImm, em_test),
3350 I(DstMem | SrcNone | ModRM | Lock, em_not), 3400 I(DstMem | SrcNone | Lock, em_not),
3351 I(DstMem | SrcNone | ModRM | Lock, em_neg), 3401 I(DstMem | SrcNone | Lock, em_neg),
3352 I(SrcMem | ModRM, em_mul_ex), 3402 I(SrcMem, em_mul_ex),
3353 I(SrcMem | ModRM, em_imul_ex), 3403 I(SrcMem, em_imul_ex),
3354 I(SrcMem | ModRM, em_div_ex), 3404 I(SrcMem, em_div_ex),
3355 I(SrcMem | ModRM, em_idiv_ex), 3405 I(SrcMem, em_idiv_ex),
3356}; 3406};
3357 3407
3358static struct opcode group4[] = { 3408static struct opcode group4[] = {
3359 I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), 3409 I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
3360 I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), 3410 I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
3361 N, N, N, N, N, N, 3411 N, N, N, N, N, N,
3362}; 3412};
3363 3413
3364static struct opcode group5[] = { 3414static struct opcode group5[] = {
3365 I(DstMem | SrcNone | ModRM | Lock, em_grp45), 3415 I(DstMem | SrcNone | Lock, em_grp45),
3366 I(DstMem | SrcNone | ModRM | Lock, em_grp45), 3416 I(DstMem | SrcNone | Lock, em_grp45),
3367 I(SrcMem | ModRM | Stack, em_grp45), 3417 I(SrcMem | Stack, em_grp45),
3368 I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), 3418 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3369 I(SrcMem | ModRM | Stack, em_grp45), 3419 I(SrcMem | Stack, em_grp45),
3370 I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), 3420 I(SrcMemFAddr | ImplicitOps, em_grp45),
3371 I(SrcMem | ModRM | Stack, em_grp45), N, 3421 I(SrcMem | Stack, em_grp45), N,
3372}; 3422};
3373 3423
3374static struct opcode group6[] = { 3424static struct opcode group6[] = {
3375 DI(ModRM | Prot, sldt), 3425 DI(Prot, sldt),
3376 DI(ModRM | Prot, str), 3426 DI(Prot, str),
3377 DI(ModRM | Prot | Priv, lldt), 3427 DI(Prot | Priv, lldt),
3378 DI(ModRM | Prot | Priv, ltr), 3428 DI(Prot | Priv, ltr),
3379 N, N, N, N, 3429 N, N, N, N,
3380}; 3430};
3381 3431
3382static struct group_dual group7 = { { 3432static struct group_dual group7 = { {
3383 DI(ModRM | Mov | DstMem | Priv, sgdt), 3433 DI(Mov | DstMem | Priv, sgdt),
3384 DI(ModRM | Mov | DstMem | Priv, sidt), 3434 DI(Mov | DstMem | Priv, sidt),
3385 II(ModRM | SrcMem | Priv, em_lgdt, lgdt), 3435 II(SrcMem | Priv, em_lgdt, lgdt),
3386 II(ModRM | SrcMem | Priv, em_lidt, lidt), 3436 II(SrcMem | Priv, em_lidt, lidt),
3387 II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, 3437 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
3388 II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), 3438 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3389 II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3439 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3390}, { 3440}, {
3391 I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), 3441 I(SrcNone | Priv | VendorSpecific, em_vmcall),
3392 EXT(0, group7_rm1), 3442 EXT(0, group7_rm1),
3393 N, EXT(0, group7_rm3), 3443 N, EXT(0, group7_rm3),
3394 II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, 3444 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
3395 II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), 3445 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3446 EXT(0, group7_rm7),
3396} }; 3447} };
3397 3448
3398static struct opcode group8[] = { 3449static struct opcode group8[] = {
3399 N, N, N, N, 3450 N, N, N, N,
3400 I(DstMem | SrcImmByte | ModRM, em_bt), 3451 I(DstMem | SrcImmByte, em_bt),
3401 I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), 3452 I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
3402 I(DstMem | SrcImmByte | ModRM | Lock, em_btr), 3453 I(DstMem | SrcImmByte | Lock, em_btr),
3403 I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), 3454 I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
3404}; 3455};
3405 3456
3406static struct group_dual group9 = { { 3457static struct group_dual group9 = { {
3407 N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, 3458 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
3408}, { 3459}, {
3409 N, N, N, N, N, N, N, N, 3460 N, N, N, N, N, N, N, N,
3410} }; 3461} };
3411 3462
3412static struct opcode group11[] = { 3463static struct opcode group11[] = {
3413 I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), 3464 I(DstMem | SrcImm | Mov | PageTable, em_mov),
3414 X7(D(Undefined)), 3465 X7(D(Undefined)),
3415}; 3466};
3416 3467
3417static struct gprefix pfx_0f_6f_0f_7f = { 3468static struct gprefix pfx_0f_6f_0f_7f = {
3418 N, N, N, I(Sse, em_movdqu), 3469 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3470};
3471
3472static struct gprefix pfx_vmovntpx = {
3473 I(0, em_mov), N, N, N,
3419}; 3474};
3420 3475
3421static struct opcode opcode_table[256] = { 3476static struct opcode opcode_table[256] = {
@@ -3464,10 +3519,10 @@ static struct opcode opcode_table[256] = {
3464 /* 0x70 - 0x7F */ 3519 /* 0x70 - 0x7F */
3465 X16(D(SrcImmByte)), 3520 X16(D(SrcImmByte)),
3466 /* 0x80 - 0x87 */ 3521 /* 0x80 - 0x87 */
3467 G(ByteOp | DstMem | SrcImm | ModRM | Group, group1), 3522 G(ByteOp | DstMem | SrcImm, group1),
3468 G(DstMem | SrcImm | ModRM | Group, group1), 3523 G(DstMem | SrcImm, group1),
3469 G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), 3524 G(ByteOp | DstMem | SrcImm | No64, group1),
3470 G(DstMem | SrcImmByte | ModRM | Group, group1), 3525 G(DstMem | SrcImmByte, group1),
3471 I2bv(DstMem | SrcReg | ModRM, em_test), 3526 I2bv(DstMem | SrcReg | ModRM, em_test),
3472 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), 3527 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3473 /* 0x88 - 0x8F */ 3528 /* 0x88 - 0x8F */
@@ -3549,7 +3604,8 @@ static struct opcode twobyte_table[256] = {
3549 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), 3604 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
3550 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), 3605 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
3551 N, N, N, N, 3606 N, N, N, N,
3552 N, N, N, N, N, N, N, N, 3607 N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
3608 N, N, N, N,
3553 /* 0x30 - 0x3F */ 3609 /* 0x30 - 0x3F */
3554 II(ImplicitOps | Priv, em_wrmsr, wrmsr), 3610 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
3555 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3611 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
@@ -3897,17 +3953,16 @@ done_prefixes:
3897 } 3953 }
3898 ctxt->d = opcode.flags; 3954 ctxt->d = opcode.flags;
3899 3955
3956 if (ctxt->d & ModRM)
3957 ctxt->modrm = insn_fetch(u8, ctxt);
3958
3900 while (ctxt->d & GroupMask) { 3959 while (ctxt->d & GroupMask) {
3901 switch (ctxt->d & GroupMask) { 3960 switch (ctxt->d & GroupMask) {
3902 case Group: 3961 case Group:
3903 ctxt->modrm = insn_fetch(u8, ctxt);
3904 --ctxt->_eip;
3905 goffset = (ctxt->modrm >> 3) & 7; 3962 goffset = (ctxt->modrm >> 3) & 7;
3906 opcode = opcode.u.group[goffset]; 3963 opcode = opcode.u.group[goffset];
3907 break; 3964 break;
3908 case GroupDual: 3965 case GroupDual:
3909 ctxt->modrm = insn_fetch(u8, ctxt);
3910 --ctxt->_eip;
3911 goffset = (ctxt->modrm >> 3) & 7; 3966 goffset = (ctxt->modrm >> 3) & 7;
3912 if ((ctxt->modrm >> 6) == 3) 3967 if ((ctxt->modrm >> 6) == 3)
3913 opcode = opcode.u.gdual->mod3[goffset]; 3968 opcode = opcode.u.gdual->mod3[goffset];
@@ -3960,6 +4015,8 @@ done_prefixes:
3960 4015
3961 if (ctxt->d & Sse) 4016 if (ctxt->d & Sse)
3962 ctxt->op_bytes = 16; 4017 ctxt->op_bytes = 16;
4018 else if (ctxt->d & Mmx)
4019 ctxt->op_bytes = 8;
3963 4020
3964 /* ModRM and SIB bytes. */ 4021 /* ModRM and SIB bytes. */
3965 if (ctxt->d & ModRM) { 4022 if (ctxt->d & ModRM) {
@@ -4030,6 +4087,35 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
4030 return false; 4087 return false;
4031} 4088}
4032 4089
4090static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
4091{
4092 bool fault = false;
4093
4094 ctxt->ops->get_fpu(ctxt);
4095 asm volatile("1: fwait \n\t"
4096 "2: \n\t"
4097 ".pushsection .fixup,\"ax\" \n\t"
4098 "3: \n\t"
4099 "movb $1, %[fault] \n\t"
4100 "jmp 2b \n\t"
4101 ".popsection \n\t"
4102 _ASM_EXTABLE(1b, 3b)
4103 : [fault]"+qm"(fault));
4104 ctxt->ops->put_fpu(ctxt);
4105
4106 if (unlikely(fault))
4107 return emulate_exception(ctxt, MF_VECTOR, 0, false);
4108
4109 return X86EMUL_CONTINUE;
4110}
4111
4112static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4113 struct operand *op)
4114{
4115 if (op->type == OP_MM)
4116 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4117}
4118
4033int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4119int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4034{ 4120{
4035 struct x86_emulate_ops *ops = ctxt->ops; 4121 struct x86_emulate_ops *ops = ctxt->ops;
@@ -4054,18 +4140,31 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4054 goto done; 4140 goto done;
4055 } 4141 }
4056 4142
4057 if ((ctxt->d & Sse) 4143 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
4058 && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) 4144 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
4059 || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
4060 rc = emulate_ud(ctxt); 4145 rc = emulate_ud(ctxt);
4061 goto done; 4146 goto done;
4062 } 4147 }
4063 4148
4064 if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 4149 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
4065 rc = emulate_nm(ctxt); 4150 rc = emulate_nm(ctxt);
4066 goto done; 4151 goto done;
4067 } 4152 }
4068 4153
4154 if (ctxt->d & Mmx) {
4155 rc = flush_pending_x87_faults(ctxt);
4156 if (rc != X86EMUL_CONTINUE)
4157 goto done;
4158 /*
4159 * Now that we know the fpu is exception safe, we can fetch
4160 * operands from it.
4161 */
4162 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4163 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4164 if (!(ctxt->d & Mov))
4165 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4166 }
4167
4069 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4168 if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
4070 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4169 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4071 X86_ICPT_PRE_EXCEPT); 4170 X86_ICPT_PRE_EXCEPT);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index d68f99df690c..adba28f88d1a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -34,7 +34,6 @@
34 34
35#include <linux/kvm_host.h> 35#include <linux/kvm_host.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/workqueue.h>
38 37
39#include "irq.h" 38#include "irq.h"
40#include "i8254.h" 39#include "i8254.h"
@@ -249,7 +248,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
249 /* in this case, we had multiple outstanding pit interrupts 248 /* in this case, we had multiple outstanding pit interrupts
250 * that we needed to inject. Reinject 249 * that we needed to inject. Reinject
251 */ 250 */
252 queue_work(ps->pit->wq, &ps->pit->expired); 251 queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
253 ps->irq_ack = 1; 252 ps->irq_ack = 1;
254 spin_unlock(&ps->inject_lock); 253 spin_unlock(&ps->inject_lock);
255} 254}
@@ -270,7 +269,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
270static void destroy_pit_timer(struct kvm_pit *pit) 269static void destroy_pit_timer(struct kvm_pit *pit)
271{ 270{
272 hrtimer_cancel(&pit->pit_state.pit_timer.timer); 271 hrtimer_cancel(&pit->pit_state.pit_timer.timer);
273 cancel_work_sync(&pit->expired); 272 flush_kthread_work(&pit->expired);
274} 273}
275 274
276static bool kpit_is_periodic(struct kvm_timer *ktimer) 275static bool kpit_is_periodic(struct kvm_timer *ktimer)
@@ -284,7 +283,7 @@ static struct kvm_timer_ops kpit_ops = {
284 .is_periodic = kpit_is_periodic, 283 .is_periodic = kpit_is_periodic,
285}; 284};
286 285
287static void pit_do_work(struct work_struct *work) 286static void pit_do_work(struct kthread_work *work)
288{ 287{
289 struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); 288 struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
290 struct kvm *kvm = pit->kvm; 289 struct kvm *kvm = pit->kvm;
@@ -328,7 +327,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
328 327
329 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 328 if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
330 atomic_inc(&ktimer->pending); 329 atomic_inc(&ktimer->pending);
331 queue_work(pt->wq, &pt->expired); 330 queue_kthread_work(&pt->worker, &pt->expired);
332 } 331 }
333 332
334 if (ktimer->t_ops->is_periodic(ktimer)) { 333 if (ktimer->t_ops->is_periodic(ktimer)) {
@@ -353,7 +352,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
353 352
354 /* TODO The new value only affected after the retriggered */ 353 /* TODO The new value only affected after the retriggered */
355 hrtimer_cancel(&pt->timer); 354 hrtimer_cancel(&pt->timer);
356 cancel_work_sync(&ps->pit->expired); 355 flush_kthread_work(&ps->pit->expired);
357 pt->period = interval; 356 pt->period = interval;
358 ps->is_periodic = is_period; 357 ps->is_periodic = is_period;
359 358
@@ -669,6 +668,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
669{ 668{
670 struct kvm_pit *pit; 669 struct kvm_pit *pit;
671 struct kvm_kpit_state *pit_state; 670 struct kvm_kpit_state *pit_state;
671 struct pid *pid;
672 pid_t pid_nr;
672 int ret; 673 int ret;
673 674
674 pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); 675 pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
@@ -685,14 +686,20 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
685 mutex_lock(&pit->pit_state.lock); 686 mutex_lock(&pit->pit_state.lock);
686 spin_lock_init(&pit->pit_state.inject_lock); 687 spin_lock_init(&pit->pit_state.inject_lock);
687 688
688 pit->wq = create_singlethread_workqueue("kvm-pit-wq"); 689 pid = get_pid(task_tgid(current));
689 if (!pit->wq) { 690 pid_nr = pid_vnr(pid);
691 put_pid(pid);
692
693 init_kthread_worker(&pit->worker);
694 pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
695 "kvm-pit/%d", pid_nr);
696 if (IS_ERR(pit->worker_task)) {
690 mutex_unlock(&pit->pit_state.lock); 697 mutex_unlock(&pit->pit_state.lock);
691 kvm_free_irq_source_id(kvm, pit->irq_source_id); 698 kvm_free_irq_source_id(kvm, pit->irq_source_id);
692 kfree(pit); 699 kfree(pit);
693 return NULL; 700 return NULL;
694 } 701 }
695 INIT_WORK(&pit->expired, pit_do_work); 702 init_kthread_work(&pit->expired, pit_do_work);
696 703
697 kvm->arch.vpit = pit; 704 kvm->arch.vpit = pit;
698 pit->kvm = kvm; 705 pit->kvm = kvm;
@@ -736,7 +743,7 @@ fail:
736 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); 743 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
737 kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 744 kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
738 kvm_free_irq_source_id(kvm, pit->irq_source_id); 745 kvm_free_irq_source_id(kvm, pit->irq_source_id);
739 destroy_workqueue(pit->wq); 746 kthread_stop(pit->worker_task);
740 kfree(pit); 747 kfree(pit);
741 return NULL; 748 return NULL;
742} 749}
@@ -756,10 +763,10 @@ void kvm_free_pit(struct kvm *kvm)
756 mutex_lock(&kvm->arch.vpit->pit_state.lock); 763 mutex_lock(&kvm->arch.vpit->pit_state.lock);
757 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 764 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
758 hrtimer_cancel(timer); 765 hrtimer_cancel(timer);
759 cancel_work_sync(&kvm->arch.vpit->expired); 766 flush_kthread_work(&kvm->arch.vpit->expired);
767 kthread_stop(kvm->arch.vpit->worker_task);
760 kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); 768 kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
761 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 769 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
762 destroy_workqueue(kvm->arch.vpit->wq);
763 kfree(kvm->arch.vpit); 770 kfree(kvm->arch.vpit);
764 } 771 }
765} 772}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 51a97426e791..fdf40425ea1d 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -1,6 +1,8 @@
1#ifndef __I8254_H 1#ifndef __I8254_H
2#define __I8254_H 2#define __I8254_H
3 3
4#include <linux/kthread.h>
5
4#include "iodev.h" 6#include "iodev.h"
5 7
6struct kvm_kpit_channel_state { 8struct kvm_kpit_channel_state {
@@ -39,8 +41,9 @@ struct kvm_pit {
39 struct kvm_kpit_state pit_state; 41 struct kvm_kpit_state pit_state;
40 int irq_source_id; 42 int irq_source_id;
41 struct kvm_irq_mask_notifier mask_notifier; 43 struct kvm_irq_mask_notifier mask_notifier;
42 struct workqueue_struct *wq; 44 struct kthread_worker worker;
43 struct work_struct expired; 45 struct task_struct *worker_task;
46 struct kthread_work expired;
44}; 47};
45 48
46#define KVM_PIT_BASE_ADDRESS 0x40 49#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 858432287ab6..93c15743f1ee 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -92,6 +92,11 @@ static inline int apic_test_and_clear_vector(int vec, void *bitmap)
92 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 92 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
93} 93}
94 94
95static inline int apic_test_vector(int vec, void *bitmap)
96{
97 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
98}
99
95static inline void apic_set_vector(int vec, void *bitmap) 100static inline void apic_set_vector(int vec, void *bitmap)
96{ 101{
97 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 102 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -480,7 +485,6 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
480static void apic_set_eoi(struct kvm_lapic *apic) 485static void apic_set_eoi(struct kvm_lapic *apic)
481{ 486{
482 int vector = apic_find_highest_isr(apic); 487 int vector = apic_find_highest_isr(apic);
483 int trigger_mode;
484 /* 488 /*
485 * Not every write EOI will has corresponding ISR, 489 * Not every write EOI will has corresponding ISR,
486 * one example is when Kernel check timer on setup_IO_APIC 490 * one example is when Kernel check timer on setup_IO_APIC
@@ -491,12 +495,15 @@ static void apic_set_eoi(struct kvm_lapic *apic)
491 apic_clear_vector(vector, apic->regs + APIC_ISR); 495 apic_clear_vector(vector, apic->regs + APIC_ISR);
492 apic_update_ppr(apic); 496 apic_update_ppr(apic);
493 497
494 if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) 498 if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
495 trigger_mode = IOAPIC_LEVEL_TRIG; 499 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
496 else 500 int trigger_mode;
497 trigger_mode = IOAPIC_EDGE_TRIG; 501 if (apic_test_vector(vector, apic->regs + APIC_TMR))
498 if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) 502 trigger_mode = IOAPIC_LEVEL_TRIG;
503 else
504 trigger_mode = IOAPIC_EDGE_TRIG;
499 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 505 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
506 }
500 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 507 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
501} 508}
502 509
@@ -1081,6 +1088,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1081 apic_update_ppr(apic); 1088 apic_update_ppr(apic);
1082 1089
1083 vcpu->arch.apic_arb_prio = 0; 1090 vcpu->arch.apic_arb_prio = 0;
1091 vcpu->arch.apic_attention = 0;
1084 1092
1085 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1093 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
1086 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1094 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
@@ -1280,7 +1288,7 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1280 u32 data; 1288 u32 data;
1281 void *vapic; 1289 void *vapic;
1282 1290
1283 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1291 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1284 return; 1292 return;
1285 1293
1286 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1294 vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
@@ -1297,7 +1305,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1297 struct kvm_lapic *apic; 1305 struct kvm_lapic *apic;
1298 void *vapic; 1306 void *vapic;
1299 1307
1300 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1308 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1301 return; 1309 return;
1302 1310
1303 apic = vcpu->arch.apic; 1311 apic = vcpu->arch.apic;
@@ -1317,10 +1325,11 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1317 1325
1318void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1326void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
1319{ 1327{
1320 if (!irqchip_in_kernel(vcpu->kvm))
1321 return;
1322
1323 vcpu->arch.apic->vapic_addr = vapic_addr; 1328 vcpu->arch.apic->vapic_addr = vapic_addr;
1329 if (vapic_addr)
1330 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
1331 else
1332 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
1324} 1333}
1325 1334
1326int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1335int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4cb164268846..72102e0ab7cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -135,8 +135,6 @@ module_param(dbg, bool, 0644);
135#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ 135#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
136 | PT64_NX_MASK) 136 | PT64_NX_MASK)
137 137
138#define PTE_LIST_EXT 4
139
140#define ACC_EXEC_MASK 1 138#define ACC_EXEC_MASK 1
141#define ACC_WRITE_MASK PT_WRITABLE_MASK 139#define ACC_WRITE_MASK PT_WRITABLE_MASK
142#define ACC_USER_MASK PT_USER_MASK 140#define ACC_USER_MASK PT_USER_MASK
@@ -151,6 +149,9 @@ module_param(dbg, bool, 0644);
151 149
152#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 150#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
153 151
152/* make pte_list_desc fit well in cache line */
153#define PTE_LIST_EXT 3
154
154struct pte_list_desc { 155struct pte_list_desc {
155 u64 *sptes[PTE_LIST_EXT]; 156 u64 *sptes[PTE_LIST_EXT];
156 struct pte_list_desc *more; 157 struct pte_list_desc *more;
@@ -550,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
550 551
551static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) 552static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
552{ 553{
553 rcu_read_lock(); 554 /*
554 atomic_inc(&vcpu->kvm->arch.reader_counter); 555 * Prevent page table teardown by making any free-er wait during
555 556 * kvm_flush_remote_tlbs() IPI to all active vcpus.
556 /* Increase the counter before walking shadow page table */ 557 */
557 smp_mb__after_atomic_inc(); 558 local_irq_disable();
559 vcpu->mode = READING_SHADOW_PAGE_TABLES;
560 /*
561 * Make sure a following spte read is not reordered ahead of the write
562 * to vcpu->mode.
563 */
564 smp_mb();
558} 565}
559 566
560static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) 567static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
561{ 568{
562 /* Decrease the counter after walking shadow page table finished */ 569 /*
563 smp_mb__before_atomic_dec(); 570 * Make sure the write to vcpu->mode is not reordered in front of
564 atomic_dec(&vcpu->kvm->arch.reader_counter); 571 * reads to sptes. If it does, kvm_commit_zap_page() can see us
565 rcu_read_unlock(); 572 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
573 */
574 smp_mb();
575 vcpu->mode = OUTSIDE_GUEST_MODE;
576 local_irq_enable();
566} 577}
567 578
568static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 579static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -841,32 +852,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
841 return count; 852 return count;
842} 853}
843 854
844static u64 *pte_list_next(unsigned long *pte_list, u64 *spte)
845{
846 struct pte_list_desc *desc;
847 u64 *prev_spte;
848 int i;
849
850 if (!*pte_list)
851 return NULL;
852 else if (!(*pte_list & 1)) {
853 if (!spte)
854 return (u64 *)*pte_list;
855 return NULL;
856 }
857 desc = (struct pte_list_desc *)(*pte_list & ~1ul);
858 prev_spte = NULL;
859 while (desc) {
860 for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
861 if (prev_spte == spte)
862 return desc->sptes[i];
863 prev_spte = desc->sptes[i];
864 }
865 desc = desc->more;
866 }
867 return NULL;
868}
869
870static void 855static void
871pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, 856pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
872 int i, struct pte_list_desc *prev_desc) 857 int i, struct pte_list_desc *prev_desc)
@@ -987,11 +972,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
987 return pte_list_add(vcpu, spte, rmapp); 972 return pte_list_add(vcpu, spte, rmapp);
988} 973}
989 974
990static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
991{
992 return pte_list_next(rmapp, spte);
993}
994
995static void rmap_remove(struct kvm *kvm, u64 *spte) 975static void rmap_remove(struct kvm *kvm, u64 *spte)
996{ 976{
997 struct kvm_mmu_page *sp; 977 struct kvm_mmu_page *sp;
@@ -1004,106 +984,201 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
1004 pte_list_remove(spte, rmapp); 984 pte_list_remove(spte, rmapp);
1005} 985}
1006 986
987/*
988 * Used by the following functions to iterate through the sptes linked by a
989 * rmap. All fields are private and not assumed to be used outside.
990 */
991struct rmap_iterator {
992 /* private fields */
993 struct pte_list_desc *desc; /* holds the sptep if not NULL */
994 int pos; /* index of the sptep */
995};
996
997/*
998 * Iteration must be started by this function. This should also be used after
999 * removing/dropping sptes from the rmap link because in such cases the
1000 * information in the itererator may not be valid.
1001 *
1002 * Returns sptep if found, NULL otherwise.
1003 */
1004static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
1005{
1006 if (!rmap)
1007 return NULL;
1008
1009 if (!(rmap & 1)) {
1010 iter->desc = NULL;
1011 return (u64 *)rmap;
1012 }
1013
1014 iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
1015 iter->pos = 0;
1016 return iter->desc->sptes[iter->pos];
1017}
1018
1019/*
1020 * Must be used with a valid iterator: e.g. after rmap_get_first().
1021 *
1022 * Returns sptep if found, NULL otherwise.
1023 */
1024static u64 *rmap_get_next(struct rmap_iterator *iter)
1025{
1026 if (iter->desc) {
1027 if (iter->pos < PTE_LIST_EXT - 1) {
1028 u64 *sptep;
1029
1030 ++iter->pos;
1031 sptep = iter->desc->sptes[iter->pos];
1032 if (sptep)
1033 return sptep;
1034 }
1035
1036 iter->desc = iter->desc->more;
1037
1038 if (iter->desc) {
1039 iter->pos = 0;
1040 /* desc->sptes[0] cannot be NULL */
1041 return iter->desc->sptes[iter->pos];
1042 }
1043 }
1044
1045 return NULL;
1046}
1047
1007static void drop_spte(struct kvm *kvm, u64 *sptep) 1048static void drop_spte(struct kvm *kvm, u64 *sptep)
1008{ 1049{
1009 if (mmu_spte_clear_track_bits(sptep)) 1050 if (mmu_spte_clear_track_bits(sptep))
1010 rmap_remove(kvm, sptep); 1051 rmap_remove(kvm, sptep);
1011} 1052}
1012 1053
1013int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, 1054static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
1014 struct kvm_memory_slot *slot)
1015{ 1055{
1016 unsigned long *rmapp; 1056 u64 *sptep;
1017 u64 *spte; 1057 struct rmap_iterator iter;
1018 int i, write_protected = 0; 1058 int write_protected = 0;
1019 1059
1020 rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); 1060 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1021 spte = rmap_next(rmapp, NULL); 1061 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1022 while (spte) { 1062 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
1023 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1063
1024 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 1064 if (!is_writable_pte(*sptep)) {
1025 if (is_writable_pte(*spte)) { 1065 sptep = rmap_get_next(&iter);
1026 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); 1066 continue;
1027 write_protected = 1;
1028 } 1067 }
1029 spte = rmap_next(rmapp, spte);
1030 }
1031 1068
1032 /* check for huge page mappings */ 1069 if (level == PT_PAGE_TABLE_LEVEL) {
1033 for (i = PT_DIRECTORY_LEVEL; 1070 mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
1034 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1071 sptep = rmap_get_next(&iter);
1035 rmapp = __gfn_to_rmap(gfn, i, slot); 1072 } else {
1036 spte = rmap_next(rmapp, NULL); 1073 BUG_ON(!is_large_pte(*sptep));
1037 while (spte) { 1074 drop_spte(kvm, sptep);
1038 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1075 --kvm->stat.lpages;
1039 BUG_ON(!is_large_pte(*spte)); 1076 sptep = rmap_get_first(*rmapp, &iter);
1040 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
1041 if (is_writable_pte(*spte)) {
1042 drop_spte(kvm, spte);
1043 --kvm->stat.lpages;
1044 spte = NULL;
1045 write_protected = 1;
1046 }
1047 spte = rmap_next(rmapp, spte);
1048 } 1077 }
1078
1079 write_protected = 1;
1049 } 1080 }
1050 1081
1051 return write_protected; 1082 return write_protected;
1052} 1083}
1053 1084
1085/**
1086 * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
1087 * @kvm: kvm instance
1088 * @slot: slot to protect
1089 * @gfn_offset: start of the BITS_PER_LONG pages we care about
1090 * @mask: indicates which pages we should protect
1091 *
1092 * Used when we do not need to care about huge page mappings: e.g. during dirty
1093 * logging we do not have any such mappings.
1094 */
1095void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1096 struct kvm_memory_slot *slot,
1097 gfn_t gfn_offset, unsigned long mask)
1098{
1099 unsigned long *rmapp;
1100
1101 while (mask) {
1102 rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
1103 __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL);
1104
1105 /* clear the first set bit */
1106 mask &= mask - 1;
1107 }
1108}
1109
1054static int rmap_write_protect(struct kvm *kvm, u64 gfn) 1110static int rmap_write_protect(struct kvm *kvm, u64 gfn)
1055{ 1111{
1056 struct kvm_memory_slot *slot; 1112 struct kvm_memory_slot *slot;
1113 unsigned long *rmapp;
1114 int i;
1115 int write_protected = 0;
1057 1116
1058 slot = gfn_to_memslot(kvm, gfn); 1117 slot = gfn_to_memslot(kvm, gfn);
1059 return kvm_mmu_rmap_write_protect(kvm, gfn, slot); 1118
1119 for (i = PT_PAGE_TABLE_LEVEL;
1120 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1121 rmapp = __gfn_to_rmap(gfn, i, slot);
1122 write_protected |= __rmap_write_protect(kvm, rmapp, i);
1123 }
1124
1125 return write_protected;
1060} 1126}
1061 1127
1062static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 1128static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1063 unsigned long data) 1129 unsigned long data)
1064{ 1130{
1065 u64 *spte; 1131 u64 *sptep;
1132 struct rmap_iterator iter;
1066 int need_tlb_flush = 0; 1133 int need_tlb_flush = 0;
1067 1134
1068 while ((spte = rmap_next(rmapp, NULL))) { 1135 while ((sptep = rmap_get_first(*rmapp, &iter))) {
1069 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1136 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1070 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 1137 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
1071 drop_spte(kvm, spte); 1138
1139 drop_spte(kvm, sptep);
1072 need_tlb_flush = 1; 1140 need_tlb_flush = 1;
1073 } 1141 }
1142
1074 return need_tlb_flush; 1143 return need_tlb_flush;
1075} 1144}
1076 1145
1077static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, 1146static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1078 unsigned long data) 1147 unsigned long data)
1079{ 1148{
1149 u64 *sptep;
1150 struct rmap_iterator iter;
1080 int need_flush = 0; 1151 int need_flush = 0;
1081 u64 *spte, new_spte; 1152 u64 new_spte;
1082 pte_t *ptep = (pte_t *)data; 1153 pte_t *ptep = (pte_t *)data;
1083 pfn_t new_pfn; 1154 pfn_t new_pfn;
1084 1155
1085 WARN_ON(pte_huge(*ptep)); 1156 WARN_ON(pte_huge(*ptep));
1086 new_pfn = pte_pfn(*ptep); 1157 new_pfn = pte_pfn(*ptep);
1087 spte = rmap_next(rmapp, NULL); 1158
1088 while (spte) { 1159 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1089 BUG_ON(!is_shadow_present_pte(*spte)); 1160 BUG_ON(!is_shadow_present_pte(*sptep));
1090 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 1161 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
1162
1091 need_flush = 1; 1163 need_flush = 1;
1164
1092 if (pte_write(*ptep)) { 1165 if (pte_write(*ptep)) {
1093 drop_spte(kvm, spte); 1166 drop_spte(kvm, sptep);
1094 spte = rmap_next(rmapp, NULL); 1167 sptep = rmap_get_first(*rmapp, &iter);
1095 } else { 1168 } else {
1096 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); 1169 new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
1097 new_spte |= (u64)new_pfn << PAGE_SHIFT; 1170 new_spte |= (u64)new_pfn << PAGE_SHIFT;
1098 1171
1099 new_spte &= ~PT_WRITABLE_MASK; 1172 new_spte &= ~PT_WRITABLE_MASK;
1100 new_spte &= ~SPTE_HOST_WRITEABLE; 1173 new_spte &= ~SPTE_HOST_WRITEABLE;
1101 new_spte &= ~shadow_accessed_mask; 1174 new_spte &= ~shadow_accessed_mask;
1102 mmu_spte_clear_track_bits(spte); 1175
1103 mmu_spte_set(spte, new_spte); 1176 mmu_spte_clear_track_bits(sptep);
1104 spte = rmap_next(rmapp, spte); 1177 mmu_spte_set(sptep, new_spte);
1178 sptep = rmap_get_next(&iter);
1105 } 1179 }
1106 } 1180 }
1181
1107 if (need_flush) 1182 if (need_flush)
1108 kvm_flush_remote_tlbs(kvm); 1183 kvm_flush_remote_tlbs(kvm);
1109 1184
@@ -1162,7 +1237,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1162static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1237static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1163 unsigned long data) 1238 unsigned long data)
1164{ 1239{
1165 u64 *spte; 1240 u64 *sptep;
1241 struct rmap_iterator iter;
1166 int young = 0; 1242 int young = 0;
1167 1243
1168 /* 1244 /*
@@ -1175,25 +1251,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1175 if (!shadow_accessed_mask) 1251 if (!shadow_accessed_mask)
1176 return kvm_unmap_rmapp(kvm, rmapp, data); 1252 return kvm_unmap_rmapp(kvm, rmapp, data);
1177 1253
1178 spte = rmap_next(rmapp, NULL); 1254 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
1179 while (spte) { 1255 sptep = rmap_get_next(&iter)) {
1180 int _young; 1256 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1181 u64 _spte = *spte; 1257
1182 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1258 if (*sptep & PT_ACCESSED_MASK) {
1183 _young = _spte & PT_ACCESSED_MASK;
1184 if (_young) {
1185 young = 1; 1259 young = 1;
1186 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); 1260 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
1187 } 1261 }
1188 spte = rmap_next(rmapp, spte);
1189 } 1262 }
1263
1190 return young; 1264 return young;
1191} 1265}
1192 1266
1193static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1267static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1194 unsigned long data) 1268 unsigned long data)
1195{ 1269{
1196 u64 *spte; 1270 u64 *sptep;
1271 struct rmap_iterator iter;
1197 int young = 0; 1272 int young = 0;
1198 1273
1199 /* 1274 /*
@@ -1204,16 +1279,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1204 if (!shadow_accessed_mask) 1279 if (!shadow_accessed_mask)
1205 goto out; 1280 goto out;
1206 1281
1207 spte = rmap_next(rmapp, NULL); 1282 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
1208 while (spte) { 1283 sptep = rmap_get_next(&iter)) {
1209 u64 _spte = *spte; 1284 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1210 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1285
1211 young = _spte & PT_ACCESSED_MASK; 1286 if (*sptep & PT_ACCESSED_MASK) {
1212 if (young) {
1213 young = 1; 1287 young = 1;
1214 break; 1288 break;
1215 } 1289 }
1216 spte = rmap_next(rmapp, spte);
1217 } 1290 }
1218out: 1291out:
1219 return young; 1292 return young;
@@ -1865,10 +1938,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
1865 1938
1866static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) 1939static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1867{ 1940{
1868 u64 *parent_pte; 1941 u64 *sptep;
1942 struct rmap_iterator iter;
1869 1943
1870 while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) 1944 while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
1871 drop_parent_pte(sp, parent_pte); 1945 drop_parent_pte(sp, sptep);
1872} 1946}
1873 1947
1874static int mmu_zap_unsync_children(struct kvm *kvm, 1948static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -1925,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1925 return ret; 1999 return ret;
1926} 2000}
1927 2001
1928static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
1929{
1930 struct kvm_mmu_page *sp;
1931
1932 list_for_each_entry(sp, invalid_list, link)
1933 kvm_mmu_isolate_page(sp);
1934}
1935
1936static void free_pages_rcu(struct rcu_head *head)
1937{
1938 struct kvm_mmu_page *next, *sp;
1939
1940 sp = container_of(head, struct kvm_mmu_page, rcu);
1941 while (sp) {
1942 if (!list_empty(&sp->link))
1943 next = list_first_entry(&sp->link,
1944 struct kvm_mmu_page, link);
1945 else
1946 next = NULL;
1947 kvm_mmu_free_page(sp);
1948 sp = next;
1949 }
1950}
1951
1952static void kvm_mmu_commit_zap_page(struct kvm *kvm, 2002static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1953 struct list_head *invalid_list) 2003 struct list_head *invalid_list)
1954{ 2004{
@@ -1957,17 +2007,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1957 if (list_empty(invalid_list)) 2007 if (list_empty(invalid_list))
1958 return; 2008 return;
1959 2009
1960 kvm_flush_remote_tlbs(kvm); 2010 /*
1961 2011 * wmb: make sure everyone sees our modifications to the page tables
1962 if (atomic_read(&kvm->arch.reader_counter)) { 2012 * rmb: make sure we see changes to vcpu->mode
1963 kvm_mmu_isolate_pages(invalid_list); 2013 */
1964 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 2014 smp_mb();
1965 list_del_init(invalid_list);
1966 2015
1967 trace_kvm_mmu_delay_free_pages(sp); 2016 /*
1968 call_rcu(&sp->rcu, free_pages_rcu); 2017 * Wait for all vcpus to exit guest mode and/or lockless shadow
1969 return; 2018 * page table walks.
1970 } 2019 */
2020 kvm_flush_remote_tlbs(kvm);
1971 2021
1972 do { 2022 do {
1973 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 2023 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
@@ -1975,7 +2025,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1975 kvm_mmu_isolate_page(sp); 2025 kvm_mmu_isolate_page(sp);
1976 kvm_mmu_free_page(sp); 2026 kvm_mmu_free_page(sp);
1977 } while (!list_empty(invalid_list)); 2027 } while (!list_empty(invalid_list));
1978
1979} 2028}
1980 2029
1981/* 2030/*
@@ -3554,7 +3603,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
3554 * Skip write-flooding detected for the sp whose level is 1, because 3603 * Skip write-flooding detected for the sp whose level is 1, because
3555 * it can become unsync, then the guest page is not write-protected. 3604 * it can become unsync, then the guest page is not write-protected.
3556 */ 3605 */
3557 if (sp->role.level == 1) 3606 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
3558 return false; 3607 return false;
3559 3608
3560 return ++sp->write_flooding_count >= 3; 3609 return ++sp->write_flooding_count >= 3;
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 715da5a19a5b..7d7d0b9e23eb 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
192{ 192{
193 struct kvm_memory_slot *slot; 193 struct kvm_memory_slot *slot;
194 unsigned long *rmapp; 194 unsigned long *rmapp;
195 u64 *spte; 195 u64 *sptep;
196 struct rmap_iterator iter;
196 197
197 if (sp->role.direct || sp->unsync || sp->role.invalid) 198 if (sp->role.direct || sp->unsync || sp->role.invalid)
198 return; 199 return;
@@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
200 slot = gfn_to_memslot(kvm, sp->gfn); 201 slot = gfn_to_memslot(kvm, sp->gfn);
201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; 202 rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
202 203
203 spte = rmap_next(rmapp, NULL); 204 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
204 while (spte) { 205 sptep = rmap_get_next(&iter)) {
205 if (is_writable_pte(*spte)) 206 if (is_writable_pte(*sptep))
206 audit_printk(kvm, "shadow page has writable " 207 audit_printk(kvm, "shadow page has writable "
207 "mappings: gfn %llx role %x\n", 208 "mappings: gfn %llx role %x\n",
208 sp->gfn, sp->role.word); 209 sp->gfn, sp->role.word);
209 spte = rmap_next(rmapp, spte);
210 } 210 }
211} 211}
212 212
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index df5a70311be8..34f970937ef1 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -658,7 +658,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
658{ 658{
659 int offset = 0; 659 int offset = 0;
660 660
661 WARN_ON(sp->role.level != 1); 661 WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
662 662
663 if (PTTYPE == 32) 663 if (PTTYPE == 32)
664 offset = sp->role.quadrant << PT64_LEVEL_BITS; 664 offset = sp->role.quadrant << PT64_LEVEL_BITS;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e334389e1c75..f75af406b268 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -22,6 +22,7 @@
22#include "x86.h" 22#include "x86.h"
23 23
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/mod_devicetable.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
27#include <linux/highmem.h> 28#include <linux/highmem.h>
@@ -42,6 +43,12 @@
42MODULE_AUTHOR("Qumranet"); 43MODULE_AUTHOR("Qumranet");
43MODULE_LICENSE("GPL"); 44MODULE_LICENSE("GPL");
44 45
46static const struct x86_cpu_id svm_cpu_id[] = {
47 X86_FEATURE_MATCH(X86_FEATURE_SVM),
48 {}
49};
50MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
51
45#define IOPM_ALLOC_ORDER 2 52#define IOPM_ALLOC_ORDER 2
46#define MSRPM_ALLOC_ORDER 1 53#define MSRPM_ALLOC_ORDER 1
47 54
@@ -3240,6 +3247,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
3240 svm_clear_vintr(svm); 3247 svm_clear_vintr(svm);
3241 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 3248 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3242 mark_dirty(svm->vmcb, VMCB_INTR); 3249 mark_dirty(svm->vmcb, VMCB_INTR);
3250 ++svm->vcpu.stat.irq_window_exits;
3243 /* 3251 /*
3244 * If the user space waits to inject interrupts, exit as soon as 3252 * If the user space waits to inject interrupts, exit as soon as
3245 * possible 3253 * possible
@@ -3247,7 +3255,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
3247 if (!irqchip_in_kernel(svm->vcpu.kvm) && 3255 if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3248 kvm_run->request_interrupt_window && 3256 kvm_run->request_interrupt_window &&
3249 !kvm_cpu_has_interrupt(&svm->vcpu)) { 3257 !kvm_cpu_has_interrupt(&svm->vcpu)) {
3250 ++svm->vcpu.stat.irq_window_exits;
3251 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 3258 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3252 return 0; 3259 return 0;
3253 } 3260 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4ff0ab9bc3c8..32eb58866292 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -27,6 +27,7 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/moduleparam.h> 29#include <linux/moduleparam.h>
30#include <linux/mod_devicetable.h>
30#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
31#include <linux/slab.h> 32#include <linux/slab.h>
32#include <linux/tboot.h> 33#include <linux/tboot.h>
@@ -51,6 +52,12 @@
51MODULE_AUTHOR("Qumranet"); 52MODULE_AUTHOR("Qumranet");
52MODULE_LICENSE("GPL"); 53MODULE_LICENSE("GPL");
53 54
55static const struct x86_cpu_id vmx_cpu_id[] = {
56 X86_FEATURE_MATCH(X86_FEATURE_VMX),
57 {}
58};
59MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
60
54static bool __read_mostly enable_vpid = 1; 61static bool __read_mostly enable_vpid = 1;
55module_param_named(vpid, enable_vpid, bool, 0444); 62module_param_named(vpid, enable_vpid, bool, 0444);
56 63
@@ -386,6 +393,9 @@ struct vcpu_vmx {
386 struct { 393 struct {
387 int loaded; 394 int loaded;
388 u16 fs_sel, gs_sel, ldt_sel; 395 u16 fs_sel, gs_sel, ldt_sel;
396#ifdef CONFIG_X86_64
397 u16 ds_sel, es_sel;
398#endif
389 int gs_ldt_reload_needed; 399 int gs_ldt_reload_needed;
390 int fs_reload_needed; 400 int fs_reload_needed;
391 } host_state; 401 } host_state;
@@ -1411,6 +1421,11 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
1411 } 1421 }
1412 1422
1413#ifdef CONFIG_X86_64 1423#ifdef CONFIG_X86_64
1424 savesegment(ds, vmx->host_state.ds_sel);
1425 savesegment(es, vmx->host_state.es_sel);
1426#endif
1427
1428#ifdef CONFIG_X86_64
1414 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); 1429 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
1415 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); 1430 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
1416#else 1431#else
@@ -1450,6 +1465,19 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1450 } 1465 }
1451 if (vmx->host_state.fs_reload_needed) 1466 if (vmx->host_state.fs_reload_needed)
1452 loadsegment(fs, vmx->host_state.fs_sel); 1467 loadsegment(fs, vmx->host_state.fs_sel);
1468#ifdef CONFIG_X86_64
1469 if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
1470 loadsegment(ds, vmx->host_state.ds_sel);
1471 loadsegment(es, vmx->host_state.es_sel);
1472 }
1473#else
1474 /*
1475 * The sysexit path does not restore ds/es, so we must set them to
1476 * a reasonable value ourselves.
1477 */
1478 loadsegment(ds, __USER_DS);
1479 loadsegment(es, __USER_DS);
1480#endif
1453 reload_tss(); 1481 reload_tss();
1454#ifdef CONFIG_X86_64 1482#ifdef CONFIG_X86_64
1455 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1483 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
@@ -3633,8 +3661,18 @@ static void vmx_set_constant_host_state(void)
3633 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 3661 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
3634 3662
3635 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 3663 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
3664#ifdef CONFIG_X86_64
3665 /*
3666 * Load null selectors, so we can avoid reloading them in
3667 * __vmx_load_host_state(), in case userspace uses the null selectors
3668 * too (the expected case).
3669 */
3670 vmcs_write16(HOST_DS_SELECTOR, 0);
3671 vmcs_write16(HOST_ES_SELECTOR, 0);
3672#else
3636 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 3673 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
3637 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 3674 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
3675#endif
3638 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 3676 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
3639 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 3677 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
3640 3678
@@ -6256,7 +6294,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6256 } 6294 }
6257 } 6295 }
6258 6296
6259 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
6260 vmx->loaded_vmcs->launched = 1; 6297 vmx->loaded_vmcs->launched = 1;
6261 6298
6262 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 6299 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6343,7 +6380,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
6343 return &vmx->vcpu; 6380 return &vmx->vcpu;
6344 6381
6345free_vmcs: 6382free_vmcs:
6346 free_vmcs(vmx->loaded_vmcs->vmcs); 6383 free_loaded_vmcs(vmx->loaded_vmcs);
6347free_msrs: 6384free_msrs:
6348 kfree(vmx->guest_msrs); 6385 kfree(vmx->guest_msrs);
6349uninit_vcpu: 6386uninit_vcpu:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 185a2b823a2d..be6d54929fa7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2147,6 +2147,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2147 case KVM_CAP_ASYNC_PF: 2147 case KVM_CAP_ASYNC_PF:
2148 case KVM_CAP_GET_TSC_KHZ: 2148 case KVM_CAP_GET_TSC_KHZ:
2149 case KVM_CAP_PCI_2_3: 2149 case KVM_CAP_PCI_2_3:
2150 case KVM_CAP_KVMCLOCK_CTRL:
2150 r = 1; 2151 r = 1;
2151 break; 2152 break;
2152 case KVM_CAP_COALESCED_MMIO: 2153 case KVM_CAP_COALESCED_MMIO:
@@ -2597,6 +2598,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2597 return r; 2598 return r;
2598} 2599}
2599 2600
2601/*
2602 * kvm_set_guest_paused() indicates to the guest kernel that it has been
2603 * stopped by the hypervisor. This function will be called from the host only.
2604 * EINVAL is returned when the host attempts to set the flag for a guest that
2605 * does not support pv clocks.
2606 */
2607static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2608{
2609 struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
2610 if (!vcpu->arch.time_page)
2611 return -EINVAL;
2612 src->flags |= PVCLOCK_GUEST_STOPPED;
2613 mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
2614 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2615 return 0;
2616}
2617
2600long kvm_arch_vcpu_ioctl(struct file *filp, 2618long kvm_arch_vcpu_ioctl(struct file *filp,
2601 unsigned int ioctl, unsigned long arg) 2619 unsigned int ioctl, unsigned long arg)
2602{ 2620{
@@ -2873,6 +2891,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2873 r = vcpu->arch.virtual_tsc_khz; 2891 r = vcpu->arch.virtual_tsc_khz;
2874 goto out; 2892 goto out;
2875 } 2893 }
2894 case KVM_KVMCLOCK_CTRL: {
2895 r = kvm_set_guest_paused(vcpu);
2896 goto out;
2897 }
2876 default: 2898 default:
2877 r = -EINVAL; 2899 r = -EINVAL;
2878 } 2900 }
@@ -3045,57 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3045} 3067}
3046 3068
3047/** 3069/**
3048 * write_protect_slot - write protect a slot for dirty logging 3070 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
3049 * @kvm: the kvm instance 3071 * @kvm: kvm instance
3050 * @memslot: the slot we protect 3072 * @log: slot id and address to which we copy the log
3051 * @dirty_bitmap: the bitmap indicating which pages are dirty
3052 * @nr_dirty_pages: the number of dirty pages
3053 * 3073 *
3054 * We have two ways to find all sptes to protect: 3074 * We need to keep it in mind that VCPU threads can write to the bitmap
3055 * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and 3075 * concurrently. So, to avoid losing data, we keep the following order for
3056 * checks ones that have a spte mapping a page in the slot. 3076 * each bit:
3057 * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
3058 * 3077 *
3059 * Generally speaking, if there are not so many dirty pages compared to the 3078 * 1. Take a snapshot of the bit and clear it if needed.
3060 * number of shadow pages, we should use the latter. 3079 * 2. Write protect the corresponding page.
3080 * 3. Flush TLB's if needed.
3081 * 4. Copy the snapshot to the userspace.
3061 * 3082 *
3062 * Note that letting others write into a page marked dirty in the old bitmap 3083 * Between 2 and 3, the guest may write to the page using the remaining TLB
3063 * by using the remaining tlb entry is not a problem. That page will become 3084 * entry. This is not a problem because the page will be reported dirty at
3064 * write protected again when we flush the tlb and then be reported dirty to 3085 * step 4 using the snapshot taken before and step 3 ensures that successive
3065 * the user space by copying the old bitmap. 3086 * writes will be logged for the next call.
3066 */
3067static void write_protect_slot(struct kvm *kvm,
3068 struct kvm_memory_slot *memslot,
3069 unsigned long *dirty_bitmap,
3070 unsigned long nr_dirty_pages)
3071{
3072 spin_lock(&kvm->mmu_lock);
3073
3074 /* Not many dirty pages compared to # of shadow pages. */
3075 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
3076 unsigned long gfn_offset;
3077
3078 for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
3079 unsigned long gfn = memslot->base_gfn + gfn_offset;
3080
3081 kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
3082 }
3083 kvm_flush_remote_tlbs(kvm);
3084 } else
3085 kvm_mmu_slot_remove_write_access(kvm, memslot->id);
3086
3087 spin_unlock(&kvm->mmu_lock);
3088}
3089
3090/*
3091 * Get (and clear) the dirty memory log for a memory slot.
3092 */ 3087 */
3093int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 3088int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3094 struct kvm_dirty_log *log)
3095{ 3089{
3096 int r; 3090 int r;
3097 struct kvm_memory_slot *memslot; 3091 struct kvm_memory_slot *memslot;
3098 unsigned long n, nr_dirty_pages; 3092 unsigned long n, i;
3093 unsigned long *dirty_bitmap;
3094 unsigned long *dirty_bitmap_buffer;
3095 bool is_dirty = false;
3099 3096
3100 mutex_lock(&kvm->slots_lock); 3097 mutex_lock(&kvm->slots_lock);
3101 3098
@@ -3104,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3104 goto out; 3101 goto out;
3105 3102
3106 memslot = id_to_memslot(kvm->memslots, log->slot); 3103 memslot = id_to_memslot(kvm->memslots, log->slot);
3104
3105 dirty_bitmap = memslot->dirty_bitmap;
3107 r = -ENOENT; 3106 r = -ENOENT;
3108 if (!memslot->dirty_bitmap) 3107 if (!dirty_bitmap)
3109 goto out; 3108 goto out;
3110 3109
3111 n = kvm_dirty_bitmap_bytes(memslot); 3110 n = kvm_dirty_bitmap_bytes(memslot);
3112 nr_dirty_pages = memslot->nr_dirty_pages;
3113 3111
3114 /* If nothing is dirty, don't bother messing with page tables. */ 3112 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3115 if (nr_dirty_pages) { 3113 memset(dirty_bitmap_buffer, 0, n);
3116 struct kvm_memslots *slots, *old_slots;
3117 unsigned long *dirty_bitmap, *dirty_bitmap_head;
3118 3114
3119 dirty_bitmap = memslot->dirty_bitmap; 3115 spin_lock(&kvm->mmu_lock);
3120 dirty_bitmap_head = memslot->dirty_bitmap_head;
3121 if (dirty_bitmap == dirty_bitmap_head)
3122 dirty_bitmap_head += n / sizeof(long);
3123 memset(dirty_bitmap_head, 0, n);
3124 3116
3125 r = -ENOMEM; 3117 for (i = 0; i < n / sizeof(long); i++) {
3126 slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); 3118 unsigned long mask;
3127 if (!slots) 3119 gfn_t offset;
3128 goto out;
3129 3120
3130 memslot = id_to_memslot(slots, log->slot); 3121 if (!dirty_bitmap[i])
3131 memslot->nr_dirty_pages = 0; 3122 continue;
3132 memslot->dirty_bitmap = dirty_bitmap_head;
3133 update_memslots(slots, NULL);
3134 3123
3135 old_slots = kvm->memslots; 3124 is_dirty = true;
3136 rcu_assign_pointer(kvm->memslots, slots);
3137 synchronize_srcu_expedited(&kvm->srcu);
3138 kfree(old_slots);
3139 3125
3140 write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); 3126 mask = xchg(&dirty_bitmap[i], 0);
3127 dirty_bitmap_buffer[i] = mask;
3141 3128
3142 r = -EFAULT; 3129 offset = i * BITS_PER_LONG;
3143 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 3130 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3144 goto out;
3145 } else {
3146 r = -EFAULT;
3147 if (clear_user(log->dirty_bitmap, n))
3148 goto out;
3149 } 3131 }
3132 if (is_dirty)
3133 kvm_flush_remote_tlbs(kvm);
3134
3135 spin_unlock(&kvm->mmu_lock);
3136
3137 r = -EFAULT;
3138 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3139 goto out;
3150 3140
3151 r = 0; 3141 r = 0;
3152out: 3142out:
@@ -3728,9 +3718,8 @@ struct read_write_emulator_ops {
3728static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) 3718static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
3729{ 3719{
3730 if (vcpu->mmio_read_completed) { 3720 if (vcpu->mmio_read_completed) {
3731 memcpy(val, vcpu->mmio_data, bytes);
3732 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 3721 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3733 vcpu->mmio_phys_addr, *(u64 *)val); 3722 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
3734 vcpu->mmio_read_completed = 0; 3723 vcpu->mmio_read_completed = 0;
3735 return 1; 3724 return 1;
3736 } 3725 }
@@ -3766,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
3766static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 3755static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
3767 void *val, int bytes) 3756 void *val, int bytes)
3768{ 3757{
3769 memcpy(vcpu->mmio_data, val, bytes); 3758 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
3770 memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); 3759
3760 memcpy(vcpu->run->mmio.data, frag->data, frag->len);
3771 return X86EMUL_CONTINUE; 3761 return X86EMUL_CONTINUE;
3772} 3762}
3773 3763
@@ -3794,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
3794 gpa_t gpa; 3784 gpa_t gpa;
3795 int handled, ret; 3785 int handled, ret;
3796 bool write = ops->write; 3786 bool write = ops->write;
3797 3787 struct kvm_mmio_fragment *frag;
3798 if (ops->read_write_prepare &&
3799 ops->read_write_prepare(vcpu, val, bytes))
3800 return X86EMUL_CONTINUE;
3801 3788
3802 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); 3789 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
3803 3790
@@ -3823,15 +3810,19 @@ mmio:
3823 bytes -= handled; 3810 bytes -= handled;
3824 val += handled; 3811 val += handled;
3825 3812
3826 vcpu->mmio_needed = 1; 3813 while (bytes) {
3827 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3814 unsigned now = min(bytes, 8U);
3828 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3829 vcpu->mmio_size = bytes;
3830 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3831 vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
3832 vcpu->mmio_index = 0;
3833 3815
3834 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); 3816 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
3817 frag->gpa = gpa;
3818 frag->data = val;
3819 frag->len = now;
3820
3821 gpa += now;
3822 val += now;
3823 bytes -= now;
3824 }
3825 return X86EMUL_CONTINUE;
3835} 3826}
3836 3827
3837int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, 3828int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
@@ -3840,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
3840 struct read_write_emulator_ops *ops) 3831 struct read_write_emulator_ops *ops)
3841{ 3832{
3842 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3833 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3834 gpa_t gpa;
3835 int rc;
3836
3837 if (ops->read_write_prepare &&
3838 ops->read_write_prepare(vcpu, val, bytes))
3839 return X86EMUL_CONTINUE;
3840
3841 vcpu->mmio_nr_fragments = 0;
3843 3842
3844 /* Crossing a page boundary? */ 3843 /* Crossing a page boundary? */
3845 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3844 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3846 int rc, now; 3845 int now;
3847 3846
3848 now = -addr & ~PAGE_MASK; 3847 now = -addr & ~PAGE_MASK;
3849 rc = emulator_read_write_onepage(addr, val, now, exception, 3848 rc = emulator_read_write_onepage(addr, val, now, exception,
@@ -3856,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
3856 bytes -= now; 3855 bytes -= now;
3857 } 3856 }
3858 3857
3859 return emulator_read_write_onepage(addr, val, bytes, exception, 3858 rc = emulator_read_write_onepage(addr, val, bytes, exception,
3860 vcpu, ops); 3859 vcpu, ops);
3860 if (rc != X86EMUL_CONTINUE)
3861 return rc;
3862
3863 if (!vcpu->mmio_nr_fragments)
3864 return rc;
3865
3866 gpa = vcpu->mmio_fragments[0].gpa;
3867
3868 vcpu->mmio_needed = 1;
3869 vcpu->mmio_cur_fragment = 0;
3870
3871 vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
3872 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
3873 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3874 vcpu->run->mmio.phys_addr = gpa;
3875
3876 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
3861} 3877}
3862 3878
3863static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, 3879static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -5263,10 +5279,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5263 kvm_deliver_pmi(vcpu); 5279 kvm_deliver_pmi(vcpu);
5264 } 5280 }
5265 5281
5266 r = kvm_mmu_reload(vcpu);
5267 if (unlikely(r))
5268 goto out;
5269
5270 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5282 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5271 inject_pending_event(vcpu); 5283 inject_pending_event(vcpu);
5272 5284
@@ -5282,6 +5294,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5282 } 5294 }
5283 } 5295 }
5284 5296
5297 r = kvm_mmu_reload(vcpu);
5298 if (unlikely(r)) {
5299 kvm_x86_ops->cancel_injection(vcpu);
5300 goto out;
5301 }
5302
5285 preempt_disable(); 5303 preempt_disable();
5286 5304
5287 kvm_x86_ops->prepare_guest_switch(vcpu); 5305 kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -5456,33 +5474,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5456 return r; 5474 return r;
5457} 5475}
5458 5476
5477/*
5478 * Implements the following, as a state machine:
5479 *
5480 * read:
5481 * for each fragment
5482 * write gpa, len
5483 * exit
5484 * copy data
5485 * execute insn
5486 *
5487 * write:
5488 * for each fragment
5489 * write gpa, len
5490 * copy data
5491 * exit
5492 */
5459static int complete_mmio(struct kvm_vcpu *vcpu) 5493static int complete_mmio(struct kvm_vcpu *vcpu)
5460{ 5494{
5461 struct kvm_run *run = vcpu->run; 5495 struct kvm_run *run = vcpu->run;
5496 struct kvm_mmio_fragment *frag;
5462 int r; 5497 int r;
5463 5498
5464 if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) 5499 if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
5465 return 1; 5500 return 1;
5466 5501
5467 if (vcpu->mmio_needed) { 5502 if (vcpu->mmio_needed) {
5468 vcpu->mmio_needed = 0; 5503 /* Complete previous fragment */
5504 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
5469 if (!vcpu->mmio_is_write) 5505 if (!vcpu->mmio_is_write)
5470 memcpy(vcpu->mmio_data + vcpu->mmio_index, 5506 memcpy(frag->data, run->mmio.data, frag->len);
5471 run->mmio.data, 8); 5507 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
5472 vcpu->mmio_index += 8; 5508 vcpu->mmio_needed = 0;
5473 if (vcpu->mmio_index < vcpu->mmio_size) { 5509 if (vcpu->mmio_is_write)
5474 run->exit_reason = KVM_EXIT_MMIO; 5510 return 1;
5475 run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; 5511 vcpu->mmio_read_completed = 1;
5476 memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); 5512 goto done;
5477 run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
5478 run->mmio.is_write = vcpu->mmio_is_write;
5479 vcpu->mmio_needed = 1;
5480 return 0;
5481 } 5513 }
5514 /* Initiate next fragment */
5515 ++frag;
5516 run->exit_reason = KVM_EXIT_MMIO;
5517 run->mmio.phys_addr = frag->gpa;
5482 if (vcpu->mmio_is_write) 5518 if (vcpu->mmio_is_write)
5483 return 1; 5519 memcpy(run->mmio.data, frag->data, frag->len);
5484 vcpu->mmio_read_completed = 1; 5520 run->mmio.len = frag->len;
5521 run->mmio.is_write = vcpu->mmio_is_write;
5522 return 0;
5523
5485 } 5524 }
5525done:
5486 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5526 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5487 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 5527 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5488 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5528 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6399,21 +6439,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6399 kvm_cpu_has_interrupt(vcpu)); 6439 kvm_cpu_has_interrupt(vcpu));
6400} 6440}
6401 6441
6402void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 6442int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
6403{ 6443{
6404 int me; 6444 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
6405 int cpu = vcpu->cpu;
6406
6407 if (waitqueue_active(&vcpu->wq)) {
6408 wake_up_interruptible(&vcpu->wq);
6409 ++vcpu->stat.halt_wakeup;
6410 }
6411
6412 me = get_cpu();
6413 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
6414 if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
6415 smp_send_reschedule(cpu);
6416 put_cpu();
6417} 6445}
6418 6446
6419int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) 6447int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index cb80c293cdd8..3d1134ddb885 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -64,7 +64,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu)
64 64
65static inline int is_paging(struct kvm_vcpu *vcpu) 65static inline int is_paging(struct kvm_vcpu *vcpu)
66{ 66{
67 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 67 return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
68} 68}
69 69
70static inline u32 bit(int bitno) 70static inline u32 bit(int bitno)
diff --git a/arch/xtensa/include/asm/kvm_para.h b/arch/xtensa/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/xtensa/include/asm/kvm_para.h
@@ -0,0 +1 @@
#include <asm-generic/kvm_para.h>
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 36506366158d..766cb7b19b40 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -17,6 +17,7 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/mmzone.h> 18#include <linux/mmzone.h>
19#include <linux/memory.h> 19#include <linux/memory.h>
20#include <linux/module.h>
20#include <linux/platform_device.h> 21#include <linux/platform_device.h>
21#include <asm/chpid.h> 22#include <asm/chpid.h>
22#include <asm/sclp.h> 23#include <asm/sclp.h>
@@ -38,7 +39,8 @@ struct read_info_sccb {
38 u64 facilities; /* 48-55 */ 39 u64 facilities; /* 48-55 */
39 u8 _reserved2[84 - 56]; /* 56-83 */ 40 u8 _reserved2[84 - 56]; /* 56-83 */
40 u8 fac84; /* 84 */ 41 u8 fac84; /* 84 */
41 u8 _reserved3[91 - 85]; /* 85-90 */ 42 u8 fac85; /* 85 */
43 u8 _reserved3[91 - 86]; /* 86-90 */
42 u8 flags; /* 91 */ 44 u8 flags; /* 91 */
43 u8 _reserved4[100 - 92]; /* 92-99 */ 45 u8 _reserved4[100 - 92]; /* 92-99 */
44 u32 rnsize2; /* 100-103 */ 46 u32 rnsize2; /* 100-103 */
@@ -51,6 +53,7 @@ static int __initdata early_read_info_sccb_valid;
51 53
52u64 sclp_facilities; 54u64 sclp_facilities;
53static u8 sclp_fac84; 55static u8 sclp_fac84;
56static u8 sclp_fac85;
54static unsigned long long rzm; 57static unsigned long long rzm;
55static unsigned long long rnmax; 58static unsigned long long rnmax;
56 59
@@ -112,6 +115,7 @@ void __init sclp_facilities_detect(void)
112 sccb = &early_read_info_sccb; 115 sccb = &early_read_info_sccb;
113 sclp_facilities = sccb->facilities; 116 sclp_facilities = sccb->facilities;
114 sclp_fac84 = sccb->fac84; 117 sclp_fac84 = sccb->fac84;
118 sclp_fac85 = sccb->fac85;
115 rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 119 rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
116 rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; 120 rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
117 rzm <<= 20; 121 rzm <<= 20;
@@ -127,6 +131,12 @@ unsigned long long sclp_get_rzm(void)
127 return rzm; 131 return rzm;
128} 132}
129 133
134u8 sclp_get_fac85(void)
135{
136 return sclp_fac85;
137}
138EXPORT_SYMBOL_GPL(sclp_get_fac85);
139
130/* 140/*
131 * This function will be called after sclp_facilities_detect(), which gets 141 * This function will be called after sclp_facilities_detect(), which gets
132 * called from early.c code. Therefore the sccb should have valid contents. 142 * called from early.c code. Therefore the sccb should have valid contents.
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h
new file mode 100644
index 000000000000..5cba37f9eae1
--- /dev/null
+++ b/include/asm-generic/kvm_para.h
@@ -0,0 +1,22 @@
1#ifndef _ASM_GENERIC_KVM_PARA_H
2#define _ASM_GENERIC_KVM_PARA_H
3
4#ifdef __KERNEL__
5
6/*
7 * This function is used by architectures that support kvm to avoid issuing
8 * false soft lockup messages.
9 */
10static inline bool kvm_check_and_clear_guest_paused(void)
11{
12 return false;
13}
14
15static inline unsigned int kvm_arch_para_features(void)
16{
17 return 0;
18}
19
20#endif /* _KERNEL__ */
21
22#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6c322a90b92f..09f2b3aa2da7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo {
449 __u8 pad[108]; 449 __u8 pad[108];
450}; 450};
451 451
452/* for KVM_PPC_GET_SMMU_INFO */
453#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
454
455struct kvm_ppc_one_page_size {
456 __u32 page_shift; /* Page shift (or 0) */
457 __u32 pte_enc; /* Encoding in the HPTE (>>12) */
458};
459
460struct kvm_ppc_one_seg_page_size {
461 __u32 page_shift; /* Base page shift of segment (or 0) */
462 __u32 slb_enc; /* SLB encoding for BookS */
463 struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
464};
465
466#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
467#define KVM_PPC_1T_SEGMENTS 0x00000002
468
469struct kvm_ppc_smmu_info {
470 __u64 flags;
471 __u32 slb_size;
472 __u32 pad;
473 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
474};
475
452#define KVMIO 0xAE 476#define KVMIO 0xAE
453 477
454/* machine type bits, to be used as argument to KVM_CREATE_VM */ 478/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -589,6 +613,10 @@ struct kvm_ppc_pvinfo {
589#define KVM_CAP_S390_UCONTROL 73 613#define KVM_CAP_S390_UCONTROL 73
590#define KVM_CAP_SYNC_REGS 74 614#define KVM_CAP_SYNC_REGS 74
591#define KVM_CAP_PCI_2_3 75 615#define KVM_CAP_PCI_2_3 75
616#define KVM_CAP_KVMCLOCK_CTRL 76
617#define KVM_CAP_SIGNAL_MSI 77
618#define KVM_CAP_PPC_GET_SMMU_INFO 78
619#define KVM_CAP_S390_COW 79
592 620
593#ifdef KVM_CAP_IRQ_ROUTING 621#ifdef KVM_CAP_IRQ_ROUTING
594 622
@@ -714,6 +742,14 @@ struct kvm_one_reg {
714 __u64 addr; 742 __u64 addr;
715}; 743};
716 744
745struct kvm_msi {
746 __u32 address_lo;
747 __u32 address_hi;
748 __u32 data;
749 __u32 flags;
750 __u8 pad[16];
751};
752
717/* 753/*
718 * ioctls for VM fds 754 * ioctls for VM fds
719 */ 755 */
@@ -788,6 +824,10 @@ struct kvm_s390_ucas_mapping {
788/* Available with KVM_CAP_PCI_2_3 */ 824/* Available with KVM_CAP_PCI_2_3 */
789#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ 825#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
790 struct kvm_assigned_pci_dev) 826 struct kvm_assigned_pci_dev)
827/* Available with KVM_CAP_SIGNAL_MSI */
828#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
829/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
830#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
791 831
792/* 832/*
793 * ioctls for vcpu fds 833 * ioctls for vcpu fds
@@ -859,6 +899,8 @@ struct kvm_s390_ucas_mapping {
859/* Available with KVM_CAP_ONE_REG */ 899/* Available with KVM_CAP_ONE_REG */
860#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) 900#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg)
861#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) 901#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg)
902/* VM is being stopped by host */
903#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
862 904
863#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 905#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
864#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) 906#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 72cbf08d45fb..c4464356b35b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,20 @@
35#endif 35#endif
36 36
37/* 37/*
38 * If we support unaligned MMIO, at most one fragment will be split into two:
39 */
40#ifdef KVM_UNALIGNED_MMIO
41# define KVM_EXTRA_MMIO_FRAGMENTS 1
42#else
43# define KVM_EXTRA_MMIO_FRAGMENTS 0
44#endif
45
46#define KVM_USER_MMIO_SIZE 8
47
48#define KVM_MAX_MMIO_FRAGMENTS \
49 (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
50
51/*
38 * vcpu->requests bit members 52 * vcpu->requests bit members
39 */ 53 */
40#define KVM_REQ_TLB_FLUSH 0 54#define KVM_REQ_TLB_FLUSH 0
@@ -68,10 +82,11 @@ struct kvm_io_range {
68 struct kvm_io_device *dev; 82 struct kvm_io_device *dev;
69}; 83};
70 84
85#define NR_IOBUS_DEVS 1000
86
71struct kvm_io_bus { 87struct kvm_io_bus {
72 int dev_count; 88 int dev_count;
73#define NR_IOBUS_DEVS 300 89 struct kvm_io_range range[];
74 struct kvm_io_range range[NR_IOBUS_DEVS];
75}; 90};
76 91
77enum kvm_bus { 92enum kvm_bus {
@@ -113,7 +128,18 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
113enum { 128enum {
114 OUTSIDE_GUEST_MODE, 129 OUTSIDE_GUEST_MODE,
115 IN_GUEST_MODE, 130 IN_GUEST_MODE,
116 EXITING_GUEST_MODE 131 EXITING_GUEST_MODE,
132 READING_SHADOW_PAGE_TABLES,
133};
134
135/*
136 * Sometimes a large or cross-page mmio needs to be broken up into separate
137 * exits for userspace servicing.
138 */
139struct kvm_mmio_fragment {
140 gpa_t gpa;
141 void *data;
142 unsigned len;
117}; 143};
118 144
119struct kvm_vcpu { 145struct kvm_vcpu {
@@ -143,10 +169,9 @@ struct kvm_vcpu {
143 int mmio_needed; 169 int mmio_needed;
144 int mmio_read_completed; 170 int mmio_read_completed;
145 int mmio_is_write; 171 int mmio_is_write;
146 int mmio_size; 172 int mmio_cur_fragment;
147 int mmio_index; 173 int mmio_nr_fragments;
148 unsigned char mmio_data[KVM_MMIO_SIZE]; 174 struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
149 gpa_t mmio_phys_addr;
150#endif 175#endif
151 176
152#ifdef CONFIG_KVM_ASYNC_PF 177#ifdef CONFIG_KVM_ASYNC_PF
@@ -178,8 +203,6 @@ struct kvm_memory_slot {
178 unsigned long flags; 203 unsigned long flags;
179 unsigned long *rmap; 204 unsigned long *rmap;
180 unsigned long *dirty_bitmap; 205 unsigned long *dirty_bitmap;
181 unsigned long *dirty_bitmap_head;
182 unsigned long nr_dirty_pages;
183 struct kvm_arch_memory_slot arch; 206 struct kvm_arch_memory_slot arch;
184 unsigned long userspace_addr; 207 unsigned long userspace_addr;
185 int user_alloc; 208 int user_alloc;
@@ -438,6 +461,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
438 gfn_t gfn); 461 gfn_t gfn);
439 462
440void kvm_vcpu_block(struct kvm_vcpu *vcpu); 463void kvm_vcpu_block(struct kvm_vcpu *vcpu);
464void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
465bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
441void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); 466void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
442void kvm_resched(struct kvm_vcpu *vcpu); 467void kvm_resched(struct kvm_vcpu *vcpu);
443void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); 468void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
@@ -506,6 +531,7 @@ int kvm_arch_hardware_setup(void);
506void kvm_arch_hardware_unsetup(void); 531void kvm_arch_hardware_unsetup(void);
507void kvm_arch_check_processor_compat(void *rtn); 532void kvm_arch_check_processor_compat(void *rtn);
508int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); 533int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
534int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
509 535
510void kvm_free_physmem(struct kvm *kvm); 536void kvm_free_physmem(struct kvm *kvm);
511 537
@@ -521,6 +547,15 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
521} 547}
522#endif 548#endif
523 549
550static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
551{
552#ifdef __KVM_HAVE_ARCH_WQP
553 return vcpu->arch.wqp;
554#else
555 return &vcpu->wq;
556#endif
557}
558
524int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); 559int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
525void kvm_arch_destroy_vm(struct kvm *kvm); 560void kvm_arch_destroy_vm(struct kvm *kvm);
526void kvm_free_all_assigned_devices(struct kvm *kvm); 561void kvm_free_all_assigned_devices(struct kvm *kvm);
@@ -769,6 +804,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
769 unsigned flags); 804 unsigned flags);
770void kvm_free_irq_routing(struct kvm *kvm); 805void kvm_free_irq_routing(struct kvm *kvm);
771 806
807int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
808
772#else 809#else
773 810
774static inline void kvm_free_irq_routing(struct kvm *kvm) {} 811static inline void kvm_free_irq_routing(struct kvm *kvm) {}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index df30ee08bdd4..e5e1d85b8c7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,6 +24,7 @@
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25 25
26#include <asm/irq_regs.h> 26#include <asm/irq_regs.h>
27#include <linux/kvm_para.h>
27#include <linux/perf_event.h> 28#include <linux/perf_event.h>
28 29
29int watchdog_enabled = 1; 30int watchdog_enabled = 1;
@@ -280,6 +281,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
280 __this_cpu_write(softlockup_touch_sync, false); 281 __this_cpu_write(softlockup_touch_sync, false);
281 sched_clock_tick(); 282 sched_clock_tick();
282 } 283 }
284
285 /* Clear the guest paused flag on watchdog reset */
286 kvm_check_and_clear_guest_paused();
283 __touch_watchdog(); 287 __touch_watchdog();
284 return HRTIMER_RESTART; 288 return HRTIMER_RESTART;
285 } 289 }
@@ -292,6 +296,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
292 */ 296 */
293 duration = is_softlockup(touch_ts); 297 duration = is_softlockup(touch_ts);
294 if (unlikely(duration)) { 298 if (unlikely(duration)) {
299 /*
300 * If a virtual machine is stopped by the host it can look to
301 * the watchdog like a soft lockup, check to see if the host
302 * stopped the vm before we issue the warning
303 */
304 if (kvm_check_and_clear_guest_paused())
305 return HRTIMER_RESTART;
306
295 /* only warn once */ 307 /* only warn once */
296 if (__this_cpu_read(soft_watchdog_warn) == true) 308 if (__this_cpu_read(soft_watchdog_warn) == true)
297 return HRTIMER_RESTART; 309 return HRTIMER_RESTART;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index f63ccb0a5982..28694f4a9139 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -18,3 +18,6 @@ config KVM_MMIO
18 18
19config KVM_ASYNC_PF 19config KVM_ASYNC_PF
20 bool 20 bool
21
22config HAVE_KVM_MSI
23 bool
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index dcaf272c26c0..26fd54dc459e 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -254,13 +254,17 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
254 } 254 }
255} 255}
256 256
257bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
258{
259 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
260 smp_rmb();
261 return test_bit(vector, ioapic->handled_vectors);
262}
263
257void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) 264void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
258{ 265{
259 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 266 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
260 267
261 smp_rmb();
262 if (!test_bit(vector, ioapic->handled_vectors))
263 return;
264 spin_lock(&ioapic->lock); 268 spin_lock(&ioapic->lock);
265 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); 269 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
266 spin_unlock(&ioapic->lock); 270 spin_unlock(&ioapic->lock);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0b190c34ccc3..32872a09b63f 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -71,6 +71,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
71 int short_hand, int dest, int dest_mode); 71 int short_hand, int dest, int dest_mode);
72int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 72int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
73void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 73void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
74bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
74int kvm_ioapic_init(struct kvm *kvm); 75int kvm_ioapic_init(struct kvm *kvm);
75void kvm_ioapic_destroy(struct kvm *kvm); 76void kvm_ioapic_destroy(struct kvm *kvm);
76int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 77int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9f614b4e365f..a6a0365475ed 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -138,6 +138,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
138 return kvm_irq_delivery_to_apic(kvm, NULL, &irq); 138 return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
139} 139}
140 140
141int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
142{
143 struct kvm_kernel_irq_routing_entry route;
144
145 if (!irqchip_in_kernel(kvm) || msi->flags != 0)
146 return -EINVAL;
147
148 route.msi.address_lo = msi->address_lo;
149 route.msi.address_hi = msi->address_hi;
150 route.msi.data = msi->data;
151
152 return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
153}
154
141/* 155/*
142 * Return value: 156 * Return value:
143 * < 0 Interrupt was ignored (masked or not delivered for other reasons) 157 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9739b533ca2e..7e140683ff14 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
522 return; 522 return;
523 523
524 if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) 524 if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
525 vfree(memslot->dirty_bitmap_head); 525 vfree(memslot->dirty_bitmap);
526 else 526 else
527 kfree(memslot->dirty_bitmap_head); 527 kfree(memslot->dirty_bitmap);
528 528
529 memslot->dirty_bitmap = NULL; 529 memslot->dirty_bitmap = NULL;
530 memslot->dirty_bitmap_head = NULL;
531} 530}
532 531
533/* 532/*
@@ -611,8 +610,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
611 610
612/* 611/*
613 * Allocation size is twice as large as the actual dirty bitmap size. 612 * Allocation size is twice as large as the actual dirty bitmap size.
614 * This makes it possible to do double buffering: see x86's 613 * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
615 * kvm_vm_ioctl_get_dirty_log().
616 */ 614 */
617static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) 615static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
618{ 616{
@@ -627,8 +625,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
627 if (!memslot->dirty_bitmap) 625 if (!memslot->dirty_bitmap)
628 return -ENOMEM; 626 return -ENOMEM;
629 627
630 memslot->dirty_bitmap_head = memslot->dirty_bitmap;
631 memslot->nr_dirty_pages = 0;
632#endif /* !CONFIG_S390 */ 628#endif /* !CONFIG_S390 */
633 return 0; 629 return 0;
634} 630}
@@ -1477,8 +1473,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
1477 if (memslot && memslot->dirty_bitmap) { 1473 if (memslot && memslot->dirty_bitmap) {
1478 unsigned long rel_gfn = gfn - memslot->base_gfn; 1474 unsigned long rel_gfn = gfn - memslot->base_gfn;
1479 1475
1480 if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) 1476 /* TODO: introduce set_bit_le() and use it */
1481 memslot->nr_dirty_pages++; 1477 test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
1482 } 1478 }
1483} 1479}
1484 1480
@@ -1515,6 +1511,30 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1515 finish_wait(&vcpu->wq, &wait); 1511 finish_wait(&vcpu->wq, &wait);
1516} 1512}
1517 1513
1514#ifndef CONFIG_S390
1515/*
1516 * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
1517 */
1518void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1519{
1520 int me;
1521 int cpu = vcpu->cpu;
1522 wait_queue_head_t *wqp;
1523
1524 wqp = kvm_arch_vcpu_wq(vcpu);
1525 if (waitqueue_active(wqp)) {
1526 wake_up_interruptible(wqp);
1527 ++vcpu->stat.halt_wakeup;
1528 }
1529
1530 me = get_cpu();
1531 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
1532 if (kvm_arch_vcpu_should_kick(vcpu))
1533 smp_send_reschedule(cpu);
1534 put_cpu();
1535}
1536#endif /* !CONFIG_S390 */
1537
1518void kvm_resched(struct kvm_vcpu *vcpu) 1538void kvm_resched(struct kvm_vcpu *vcpu)
1519{ 1539{
1520 if (!need_resched()) 1540 if (!need_resched())
@@ -1523,6 +1543,31 @@ void kvm_resched(struct kvm_vcpu *vcpu)
1523} 1543}
1524EXPORT_SYMBOL_GPL(kvm_resched); 1544EXPORT_SYMBOL_GPL(kvm_resched);
1525 1545
1546bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1547{
1548 struct pid *pid;
1549 struct task_struct *task = NULL;
1550
1551 rcu_read_lock();
1552 pid = rcu_dereference(target->pid);
1553 if (pid)
1554 task = get_pid_task(target->pid, PIDTYPE_PID);
1555 rcu_read_unlock();
1556 if (!task)
1557 return false;
1558 if (task->flags & PF_VCPU) {
1559 put_task_struct(task);
1560 return false;
1561 }
1562 if (yield_to(task, 1)) {
1563 put_task_struct(task);
1564 return true;
1565 }
1566 put_task_struct(task);
1567 return false;
1568}
1569EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
1570
1526void kvm_vcpu_on_spin(struct kvm_vcpu *me) 1571void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1527{ 1572{
1528 struct kvm *kvm = me->kvm; 1573 struct kvm *kvm = me->kvm;
@@ -1541,8 +1586,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1541 */ 1586 */
1542 for (pass = 0; pass < 2 && !yielded; pass++) { 1587 for (pass = 0; pass < 2 && !yielded; pass++) {
1543 kvm_for_each_vcpu(i, vcpu, kvm) { 1588 kvm_for_each_vcpu(i, vcpu, kvm) {
1544 struct task_struct *task = NULL;
1545 struct pid *pid;
1546 if (!pass && i < last_boosted_vcpu) { 1589 if (!pass && i < last_boosted_vcpu) {
1547 i = last_boosted_vcpu; 1590 i = last_boosted_vcpu;
1548 continue; 1591 continue;
@@ -1552,24 +1595,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1552 continue; 1595 continue;
1553 if (waitqueue_active(&vcpu->wq)) 1596 if (waitqueue_active(&vcpu->wq))
1554 continue; 1597 continue;
1555 rcu_read_lock(); 1598 if (kvm_vcpu_yield_to(vcpu)) {
1556 pid = rcu_dereference(vcpu->pid);
1557 if (pid)
1558 task = get_pid_task(vcpu->pid, PIDTYPE_PID);
1559 rcu_read_unlock();
1560 if (!task)
1561 continue;
1562 if (task->flags & PF_VCPU) {
1563 put_task_struct(task);
1564 continue;
1565 }
1566 if (yield_to(task, 1)) {
1567 put_task_struct(task);
1568 kvm->last_boosted_vcpu = i; 1599 kvm->last_boosted_vcpu = i;
1569 yielded = 1; 1600 yielded = 1;
1570 break; 1601 break;
1571 } 1602 }
1572 put_task_struct(task);
1573 } 1603 }
1574 } 1604 }
1575} 1605}
@@ -2040,6 +2070,17 @@ static long kvm_vm_ioctl(struct file *filp,
2040 mutex_unlock(&kvm->lock); 2070 mutex_unlock(&kvm->lock);
2041 break; 2071 break;
2042#endif 2072#endif
2073#ifdef CONFIG_HAVE_KVM_MSI
2074 case KVM_SIGNAL_MSI: {
2075 struct kvm_msi msi;
2076
2077 r = -EFAULT;
2078 if (copy_from_user(&msi, argp, sizeof msi))
2079 goto out;
2080 r = kvm_send_userspace_msi(kvm, &msi);
2081 break;
2082 }
2083#endif
2043 default: 2084 default:
2044 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2085 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
2045 if (r == -ENOTTY) 2086 if (r == -ENOTTY)
@@ -2168,6 +2209,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
2168 case KVM_CAP_SET_BOOT_CPU_ID: 2209 case KVM_CAP_SET_BOOT_CPU_ID:
2169#endif 2210#endif
2170 case KVM_CAP_INTERNAL_ERROR_DATA: 2211 case KVM_CAP_INTERNAL_ERROR_DATA:
2212#ifdef CONFIG_HAVE_KVM_MSI
2213 case KVM_CAP_SIGNAL_MSI:
2214#endif
2171 return 1; 2215 return 1;
2172#ifdef CONFIG_HAVE_KVM_IRQCHIP 2216#ifdef CONFIG_HAVE_KVM_IRQCHIP
2173 case KVM_CAP_IRQ_ROUTING: 2217 case KVM_CAP_IRQ_ROUTING:
@@ -2394,9 +2438,6 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
2394int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, 2438int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
2395 gpa_t addr, int len) 2439 gpa_t addr, int len)
2396{ 2440{
2397 if (bus->dev_count == NR_IOBUS_DEVS)
2398 return -ENOSPC;
2399
2400 bus->range[bus->dev_count++] = (struct kvm_io_range) { 2441 bus->range[bus->dev_count++] = (struct kvm_io_range) {
2401 .addr = addr, 2442 .addr = addr,
2402 .len = len, 2443 .len = len,
@@ -2496,12 +2537,15 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2496 struct kvm_io_bus *new_bus, *bus; 2537 struct kvm_io_bus *new_bus, *bus;
2497 2538
2498 bus = kvm->buses[bus_idx]; 2539 bus = kvm->buses[bus_idx];
2499 if (bus->dev_count > NR_IOBUS_DEVS-1) 2540 if (bus->dev_count > NR_IOBUS_DEVS - 1)
2500 return -ENOSPC; 2541 return -ENOSPC;
2501 2542
2502 new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); 2543 new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
2544 sizeof(struct kvm_io_range)), GFP_KERNEL);
2503 if (!new_bus) 2545 if (!new_bus)
2504 return -ENOMEM; 2546 return -ENOMEM;
2547 memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
2548 sizeof(struct kvm_io_range)));
2505 kvm_io_bus_insert_dev(new_bus, dev, addr, len); 2549 kvm_io_bus_insert_dev(new_bus, dev, addr, len);
2506 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2550 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2507 synchronize_srcu_expedited(&kvm->srcu); 2551 synchronize_srcu_expedited(&kvm->srcu);
@@ -2518,27 +2562,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2518 struct kvm_io_bus *new_bus, *bus; 2562 struct kvm_io_bus *new_bus, *bus;
2519 2563
2520 bus = kvm->buses[bus_idx]; 2564 bus = kvm->buses[bus_idx];
2521
2522 new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
2523 if (!new_bus)
2524 return -ENOMEM;
2525
2526 r = -ENOENT; 2565 r = -ENOENT;
2527 for (i = 0; i < new_bus->dev_count; i++) 2566 for (i = 0; i < bus->dev_count; i++)
2528 if (new_bus->range[i].dev == dev) { 2567 if (bus->range[i].dev == dev) {
2529 r = 0; 2568 r = 0;
2530 new_bus->dev_count--;
2531 new_bus->range[i] = new_bus->range[new_bus->dev_count];
2532 sort(new_bus->range, new_bus->dev_count,
2533 sizeof(struct kvm_io_range),
2534 kvm_io_bus_sort_cmp, NULL);
2535 break; 2569 break;
2536 } 2570 }
2537 2571
2538 if (r) { 2572 if (r)
2539 kfree(new_bus);
2540 return r; 2573 return r;
2541 } 2574
2575 new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
2576 sizeof(struct kvm_io_range)), GFP_KERNEL);
2577 if (!new_bus)
2578 return -ENOMEM;
2579
2580 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
2581 new_bus->dev_count--;
2582 memcpy(new_bus->range + i, bus->range + i + 1,
2583 (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
2542 2584
2543 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2585 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2544 synchronize_srcu_expedited(&kvm->srcu); 2586 synchronize_srcu_expedited(&kvm->srcu);