aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/psci.txt37
-rw-r--r--Documentation/virtual/kvm/api.txt33
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt26
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt14
-rw-r--r--Documentation/virtual/kvm/s390-diag.txt2
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_psci.h6
-rw-r--r--arch/arm/include/asm/psci.h7
-rw-r--r--arch/arm/include/uapi/asm/kvm.h10
-rw-r--r--arch/arm/kernel/psci.c196
-rw-r--r--arch/arm/kernel/psci_smp.c33
-rw-r--r--arch/arm/kvm/arm.c1
-rw-r--r--arch/arm/kvm/handle_exit.c10
-rw-r--r--arch/arm/kvm/psci.c235
-rw-r--r--arch/arm64/include/asm/cpu_ops.h2
-rw-r--r--arch/arm64/include/asm/cputype.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_psci.h6
-rw-r--r--arch/arm64/include/asm/psci.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h13
-rw-r--r--arch/arm64/kernel/psci.c231
-rw-r--r--arch/arm64/kernel/smp.c22
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/handle_exit.c10
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c2
-rw-r--r--arch/mips/Kconfig12
-rw-r--r--arch/mips/include/asm/kvm_host.h183
-rw-r--r--arch/mips/include/uapi/asm/kvm.h35
-rw-r--r--arch/mips/kvm/kvm_locore.S32
-rw-r--r--arch/mips/kvm/kvm_mips.c145
-rw-r--r--arch/mips/kvm/kvm_mips_dyntrans.c15
-rw-r--r--arch/mips/kvm/kvm_mips_emul.c557
-rw-r--r--arch/mips/kvm/kvm_tlb.c77
-rw-r--r--arch/mips/kvm/kvm_trap_emul.c86
-rw-r--r--arch/mips/mm/cache.c1
-rw-r--r--arch/mips/mti-malta/malta-time.c14
-rw-r--r--arch/powerpc/include/asm/disassemble.h34
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h146
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h5
-rw-r--r--arch/powerpc/include/asm/kvm_host.h9
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h80
-rw-r--r--arch/powerpc/include/asm/reg.h12
-rw-r--r--arch/powerpc/include/asm/reg_booke.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h6
-rw-r--r--arch/powerpc/kernel/align.c34
-rw-r--r--arch/powerpc/kernel/asm-offsets.c11
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c5
-rw-r--r--arch/powerpc/kernel/kvm.c2
-rw-r--r--arch/powerpc/kernel/paca.c3
-rw-r--r--arch/powerpc/kvm/Kconfig2
-rw-r--r--arch/powerpc/kvm/book3s.c106
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c41
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c39
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c15
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c116
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S87
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c156
-rw-r--r--arch/powerpc/kvm/book3s_exports.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c48
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S60
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S23
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c16
-rw-r--r--arch/powerpc/kvm/book3s_pr.c238
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c16
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c29
-rw-r--r--arch/powerpc/kvm/book3s_segment.S25
-rw-r--r--arch/powerpc/kvm/e500_emulate.c15
-rw-r--r--arch/powerpc/kvm/emulate.c24
-rw-r--r--arch/powerpc/kvm/mpic.c5
-rw-r--r--arch/powerpc/kvm/powerpc.c64
-rw-r--r--arch/powerpc/kvm/trace_pr.h2
-rw-r--r--arch/powerpc/mm/slb.c2
-rw-r--r--arch/s390/include/asm/ctl_reg.h14
-rw-r--r--arch/s390/include/asm/kvm_host.h163
-rw-r--r--arch/s390/include/asm/lowcore.h10
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/pgalloc.h3
-rw-r--r--arch/s390/include/asm/pgtable.h169
-rw-r--r--arch/s390/include/asm/ptrace.h44
-rw-r--r--arch/s390/include/asm/sclp.h8
-rw-r--r--arch/s390/include/uapi/asm/kvm.h28
-rw-r--r--arch/s390/include/uapi/asm/sie.h245
-rw-r--r--arch/s390/kernel/asm-offsets.c14
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/entry64.S4
-rw-r--r--arch/s390/kvm/Makefile4
-rw-r--r--arch/s390/kvm/diag.c19
-rw-r--r--arch/s390/kvm/gaccess.c726
-rw-r--r--arch/s390/kvm/gaccess.h379
-rw-r--r--arch/s390/kvm/guestdbg.c482
-rw-r--r--arch/s390/kvm/intercept.c222
-rw-r--r--arch/s390/kvm/interrupt.c400
-rw-r--r--arch/s390/kvm/kvm-s390.c554
-rw-r--r--arch/s390/kvm/kvm-s390.h73
-rw-r--r--arch/s390/kvm/priv.c357
-rw-r--r--arch/s390/kvm/sigp.c103
-rw-r--r--arch/s390/kvm/trace-s390.h43
-rw-r--r--arch/s390/kvm/trace.h99
-rw-r--r--arch/s390/mm/pgtable.c89
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/traps.h5
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/cpuid.h7
-rw-r--r--arch/x86/kvm/emulate.c93
-rw-r--r--arch/x86/kvm/irq.c1
-rw-r--r--arch/x86/kvm/lapic.c62
-rw-r--r--arch/x86/kvm/mmu.c84
-rw-r--r--arch/x86/kvm/mmu.h33
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/svm.c63
-rw-r--r--arch/x86/kvm/trace.h20
-rw-r--r--arch/x86/kvm/vmx.c333
-rw-r--r--arch/x86/kvm/x86.c61
-rw-r--r--drivers/s390/char/sclp_early.c34
-rw-r--r--include/linux/kvm_host.h15
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/kvm.h17
-rw-r--r--include/uapi/linux/psci.h90
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c68
-rw-r--r--virt/kvm/irq_comm.c17
-rw-r--r--virt/kvm/irqchip.c31
-rw-r--r--virt/kvm/kvm_main.c25
133 files changed, 7181 insertions, 1749 deletions
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt
index 433afe9cb590..b4a58f39223c 100644
--- a/Documentation/devicetree/bindings/arm/psci.txt
+++ b/Documentation/devicetree/bindings/arm/psci.txt
@@ -21,7 +21,15 @@ to #0.
21 21
22Main node required properties: 22Main node required properties:
23 23
24 - compatible : Must be "arm,psci" 24 - compatible : should contain at least one of:
25
26 * "arm,psci" : for implementations complying to PSCI versions prior to
27 0.2. For these cases function IDs must be provided.
28
29 * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function
30 IDs are not required and should be ignored by an OS with PSCI 0.2
31 support, but are permitted to be present for compatibility with
32 existing software when "arm,psci" is later in the compatible list.
25 33
26 - method : The method of calling the PSCI firmware. Permitted 34 - method : The method of calling the PSCI firmware. Permitted
27 values are: 35 values are:
@@ -45,6 +53,8 @@ Main node optional properties:
45 53
46Example: 54Example:
47 55
56Case 1: PSCI v0.1 only.
57
48 psci { 58 psci {
49 compatible = "arm,psci"; 59 compatible = "arm,psci";
50 method = "smc"; 60 method = "smc";
@@ -53,3 +63,28 @@ Example:
53 cpu_on = <0x95c10002>; 63 cpu_on = <0x95c10002>;
54 migrate = <0x95c10003>; 64 migrate = <0x95c10003>;
55 }; 65 };
66
67
68Case 2: PSCI v0.2 only
69
70 psci {
71 compatible = "arm,psci-0.2";
72 method = "smc";
73 };
74
75Case 3: PSCI v0.2 and PSCI v0.1.
76
77 A DTB may provide IDs for use by kernels without PSCI 0.2 support,
78 enabling firmware and hypervisors to support existing and new kernels.
79 These IDs will be ignored by kernels with PSCI 0.2 support, which will
80 use the standard PSCI 0.2 IDs exclusively.
81
82 psci {
83 compatible = "arm,psci-0.2", "arm,psci";
84 method = "hvc";
85
86 cpu_on = < arbitrary value >;
87 cpu_off = < arbitrary value >;
88
89 ...
90 };
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index b4f53653c106..75f20c6038a9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1794,6 +1794,11 @@ registers, find a list below:
1794 PPC | KVM_REG_PPC_MMCR0 | 64 1794 PPC | KVM_REG_PPC_MMCR0 | 64
1795 PPC | KVM_REG_PPC_MMCR1 | 64 1795 PPC | KVM_REG_PPC_MMCR1 | 64
1796 PPC | KVM_REG_PPC_MMCRA | 64 1796 PPC | KVM_REG_PPC_MMCRA | 64
1797 PPC | KVM_REG_PPC_MMCR2 | 64
1798 PPC | KVM_REG_PPC_MMCRS | 64
1799 PPC | KVM_REG_PPC_SIAR | 64
1800 PPC | KVM_REG_PPC_SDAR | 64
1801 PPC | KVM_REG_PPC_SIER | 64
1797 PPC | KVM_REG_PPC_PMC1 | 32 1802 PPC | KVM_REG_PPC_PMC1 | 32
1798 PPC | KVM_REG_PPC_PMC2 | 32 1803 PPC | KVM_REG_PPC_PMC2 | 32
1799 PPC | KVM_REG_PPC_PMC3 | 32 1804 PPC | KVM_REG_PPC_PMC3 | 32
@@ -1868,6 +1873,7 @@ registers, find a list below:
1868 PPC | KVM_REG_PPC_PPR | 64 1873 PPC | KVM_REG_PPC_PPR | 64
1869 PPC | KVM_REG_PPC_ARCH_COMPAT 32 1874 PPC | KVM_REG_PPC_ARCH_COMPAT 32
1870 PPC | KVM_REG_PPC_DABRX | 32 1875 PPC | KVM_REG_PPC_DABRX | 32
1876 PPC | KVM_REG_PPC_WORT | 64
1871 PPC | KVM_REG_PPC_TM_GPR0 | 64 1877 PPC | KVM_REG_PPC_TM_GPR0 | 64
1872 ... 1878 ...
1873 PPC | KVM_REG_PPC_TM_GPR31 | 64 1879 PPC | KVM_REG_PPC_TM_GPR31 | 64
@@ -2211,6 +2217,8 @@ KVM_S390_SIGP_STOP (vcpu) - sigp restart
2211KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm 2217KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
2212KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm 2218KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
2213KVM_S390_RESTART (vcpu) - restart 2219KVM_S390_RESTART (vcpu) - restart
2220KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
2221KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
2214KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt 2222KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
2215 parameters in parm and parm64 2223 parameters in parm and parm64
2216KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm 2224KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
@@ -2314,8 +2322,8 @@ struct kvm_create_device {
2314 2322
23154.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR 23234.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
2316 2324
2317Capability: KVM_CAP_DEVICE_CTRL 2325Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
2318Type: device ioctl 2326Type: device ioctl, vm ioctl
2319Parameters: struct kvm_device_attr 2327Parameters: struct kvm_device_attr
2320Returns: 0 on success, -1 on error 2328Returns: 0 on success, -1 on error
2321Errors: 2329Errors:
@@ -2340,8 +2348,8 @@ struct kvm_device_attr {
2340 2348
23414.81 KVM_HAS_DEVICE_ATTR 23494.81 KVM_HAS_DEVICE_ATTR
2342 2350
2343Capability: KVM_CAP_DEVICE_CTRL 2351Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
2344Type: device ioctl 2352Type: device ioctl, vm ioctl
2345Parameters: struct kvm_device_attr 2353Parameters: struct kvm_device_attr
2346Returns: 0 on success, -1 on error 2354Returns: 0 on success, -1 on error
2347Errors: 2355Errors:
@@ -2376,6 +2384,8 @@ Possible features:
2376 Depends on KVM_CAP_ARM_PSCI. 2384 Depends on KVM_CAP_ARM_PSCI.
2377 - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. 2385 - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
2378 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2386 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
2387 - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
2388 Depends on KVM_CAP_ARM_PSCI_0_2.
2379 2389
2380 2390
23814.83 KVM_ARM_PREFERRED_TARGET 23914.83 KVM_ARM_PREFERRED_TARGET
@@ -2738,6 +2748,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
2738external interrupt has just been delivered into the guest. User space 2748external interrupt has just been delivered into the guest. User space
2739should put the acknowledged interrupt vector into the 'epr' field. 2749should put the acknowledged interrupt vector into the 'epr' field.
2740 2750
2751 /* KVM_EXIT_SYSTEM_EVENT */
2752 struct {
2753#define KVM_SYSTEM_EVENT_SHUTDOWN 1
2754#define KVM_SYSTEM_EVENT_RESET 2
2755 __u32 type;
2756 __u64 flags;
2757 } system_event;
2758
2759If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
2760a system-level event using some architecture specific mechanism (hypercall
2761or some special instruction). In case of ARM/ARM64, this is triggered using
2762HVC instruction based PSCI call from the vcpu. The 'type' field describes
2763the system-level event type. The 'flags' field describes architecture
2764specific flags for the system-level event.
2765
2741 /* Fix the size of the union. */ 2766 /* Fix the size of the union. */
2742 char padding[256]; 2767 char padding[256];
2743 }; 2768 };
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
new file mode 100644
index 000000000000..0d16f96c0eac
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -0,0 +1,26 @@
1Generic vm interface
2====================================
3
4The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
5KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
6struct kvm_device_attr as other devices, but targets VM-wide settings
7and controls.
8
9The groups and attributes per virtual machine, if any, are architecture
10specific.
11
121. GROUP: KVM_S390_VM_MEM_CTRL
13Architectures: s390
14
151.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
16Parameters: none
17Returns: -EBUSY if already a vcpus is defined, otherwise 0
18
19Enables CMMA for the virtual machine
20
211.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
22Parameteres: none
23Returns: 0
24
25Clear the CMMA status for all guest pages, so any pages the guest marked
26as unused are again used any may not be reclaimed by the host.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 4643cde517c4..319560646f32 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -94,10 +94,24 @@ a bitmap of available features inside the magic page.
94The following enhancements to the magic page are currently available: 94The following enhancements to the magic page are currently available:
95 95
96 KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page 96 KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
97 KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
97 98
98For enhanced features in the magic page, please check for the existence of the 99For enhanced features in the magic page, please check for the existence of the
99feature before using them! 100feature before using them!
100 101
102Magic page flags
103================
104
105In addition to features that indicate whether a host is capable of a particular
106feature we also have a channel for a guest to tell the guest whether it's capable
107of something. This is what we call "flags".
108
109Flags are passed to the host in the low 12 bits of the Effective Address.
110
111The following flags are currently available for a guest to expose:
112
113 MAGIC_PAGE_FLAG_NOT_MAPPED_NX Guest handles NX bits correclty wrt magic page
114
101MSR bits 115MSR bits
102======== 116========
103 117
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
index f1de4fbade15..48c4921794ed 100644
--- a/Documentation/virtual/kvm/s390-diag.txt
+++ b/Documentation/virtual/kvm/s390-diag.txt
@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
78 78
79If the function code specifies 0x501, breakpoint functions may be performed. 79If the function code specifies 0x501, breakpoint functions may be performed.
80This function code is handled by userspace. 80This function code is handled by userspace.
81
82This diagnose function code has no subfunctions and uses no parameters.
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 09af14999c9b..193ceaf01bfd 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -36,7 +36,7 @@
36#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 36#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
37#define KVM_HAVE_ONE_REG 37#define KVM_HAVE_ONE_REG
38 38
39#define KVM_VCPU_MAX_FEATURES 1 39#define KVM_VCPU_MAX_FEATURES 2
40 40
41#include <kvm/arm_vgic.h> 41#include <kvm/arm_vgic.h>
42 42
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
18#ifndef __ARM_KVM_PSCI_H__ 18#ifndef __ARM_KVM_PSCI_H__
19#define __ARM_KVM_PSCI_H__ 19#define __ARM_KVM_PSCI_H__
20 20
21bool kvm_psci_call(struct kvm_vcpu *vcpu); 21#define KVM_ARM_PSCI_0_1 1
22#define KVM_ARM_PSCI_0_2 2
23
24int kvm_psci_version(struct kvm_vcpu *vcpu);
25int kvm_psci_call(struct kvm_vcpu *vcpu);
22 26
23#endif /* __ARM_KVM_PSCI_H__ */ 27#endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index c4ae171850f8..c25ef3ec6d1f 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -29,16 +29,19 @@ struct psci_operations {
29 int (*cpu_off)(struct psci_power_state state); 29 int (*cpu_off)(struct psci_power_state state);
30 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); 30 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
31 int (*migrate)(unsigned long cpuid); 31 int (*migrate)(unsigned long cpuid);
32 int (*affinity_info)(unsigned long target_affinity,
33 unsigned long lowest_affinity_level);
34 int (*migrate_info_type)(void);
32}; 35};
33 36
34extern struct psci_operations psci_ops; 37extern struct psci_operations psci_ops;
35extern struct smp_operations psci_smp_ops; 38extern struct smp_operations psci_smp_ops;
36 39
37#ifdef CONFIG_ARM_PSCI 40#ifdef CONFIG_ARM_PSCI
38void psci_init(void); 41int psci_init(void);
39bool psci_smp_available(void); 42bool psci_smp_available(void);
40#else 43#else
41static inline void psci_init(void) { } 44static inline int psci_init(void) { return 0; }
42static inline bool psci_smp_available(void) { return false; } 45static inline bool psci_smp_available(void) { return false; }
43#endif 46#endif
44 47
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index ef0c8785ba16..e6ebdd3471e5 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,6 +20,7 @@
20#define __ARM_KVM_H__ 20#define __ARM_KVM_H__
21 21
22#include <linux/types.h> 22#include <linux/types.h>
23#include <linux/psci.h>
23#include <asm/ptrace.h> 24#include <asm/ptrace.h>
24 25
25#define __KVM_HAVE_GUEST_DEBUG 26#define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
83#define KVM_VGIC_V2_CPU_SIZE 0x2000 84#define KVM_VGIC_V2_CPU_SIZE 0x2000
84 85
85#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ 86#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
87#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
86 88
87struct kvm_vcpu_init { 89struct kvm_vcpu_init {
88 __u32 target; 90 __u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
201#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) 203#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
202#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) 204#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
203 205
204#define KVM_PSCI_RET_SUCCESS 0 206#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
205#define KVM_PSCI_RET_NI ((unsigned long)-1) 207#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
206#define KVM_PSCI_RET_INVAL ((unsigned long)-2) 208#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
207#define KVM_PSCI_RET_DENIED ((unsigned long)-3) 209#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
208 210
209#endif /* __ARM_KVM_H__ */ 211#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 46931880093d..f73891b6b730 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -17,63 +17,58 @@
17 17
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/of.h> 19#include <linux/of.h>
20#include <linux/reboot.h>
21#include <linux/pm.h>
22#include <uapi/linux/psci.h>
20 23
21#include <asm/compiler.h> 24#include <asm/compiler.h>
22#include <asm/errno.h> 25#include <asm/errno.h>
23#include <asm/opcodes-sec.h> 26#include <asm/opcodes-sec.h>
24#include <asm/opcodes-virt.h> 27#include <asm/opcodes-virt.h>
25#include <asm/psci.h> 28#include <asm/psci.h>
29#include <asm/system_misc.h>
26 30
27struct psci_operations psci_ops; 31struct psci_operations psci_ops;
28 32
29static int (*invoke_psci_fn)(u32, u32, u32, u32); 33static int (*invoke_psci_fn)(u32, u32, u32, u32);
34typedef int (*psci_initcall_t)(const struct device_node *);
30 35
31enum psci_function { 36enum psci_function {
32 PSCI_FN_CPU_SUSPEND, 37 PSCI_FN_CPU_SUSPEND,
33 PSCI_FN_CPU_ON, 38 PSCI_FN_CPU_ON,
34 PSCI_FN_CPU_OFF, 39 PSCI_FN_CPU_OFF,
35 PSCI_FN_MIGRATE, 40 PSCI_FN_MIGRATE,
41 PSCI_FN_AFFINITY_INFO,
42 PSCI_FN_MIGRATE_INFO_TYPE,
36 PSCI_FN_MAX, 43 PSCI_FN_MAX,
37}; 44};
38 45
39static u32 psci_function_id[PSCI_FN_MAX]; 46static u32 psci_function_id[PSCI_FN_MAX];
40 47
41#define PSCI_RET_SUCCESS 0
42#define PSCI_RET_EOPNOTSUPP -1
43#define PSCI_RET_EINVAL -2
44#define PSCI_RET_EPERM -3
45
46static int psci_to_linux_errno(int errno) 48static int psci_to_linux_errno(int errno)
47{ 49{
48 switch (errno) { 50 switch (errno) {
49 case PSCI_RET_SUCCESS: 51 case PSCI_RET_SUCCESS:
50 return 0; 52 return 0;
51 case PSCI_RET_EOPNOTSUPP: 53 case PSCI_RET_NOT_SUPPORTED:
52 return -EOPNOTSUPP; 54 return -EOPNOTSUPP;
53 case PSCI_RET_EINVAL: 55 case PSCI_RET_INVALID_PARAMS:
54 return -EINVAL; 56 return -EINVAL;
55 case PSCI_RET_EPERM: 57 case PSCI_RET_DENIED:
56 return -EPERM; 58 return -EPERM;
57 }; 59 };
58 60
59 return -EINVAL; 61 return -EINVAL;
60} 62}
61 63
62#define PSCI_POWER_STATE_ID_MASK 0xffff
63#define PSCI_POWER_STATE_ID_SHIFT 0
64#define PSCI_POWER_STATE_TYPE_MASK 0x1
65#define PSCI_POWER_STATE_TYPE_SHIFT 16
66#define PSCI_POWER_STATE_AFFL_MASK 0x3
67#define PSCI_POWER_STATE_AFFL_SHIFT 24
68
69static u32 psci_power_state_pack(struct psci_power_state state) 64static u32 psci_power_state_pack(struct psci_power_state state)
70{ 65{
71 return ((state.id & PSCI_POWER_STATE_ID_MASK) 66 return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
72 << PSCI_POWER_STATE_ID_SHIFT) | 67 & PSCI_0_2_POWER_STATE_ID_MASK) |
73 ((state.type & PSCI_POWER_STATE_TYPE_MASK) 68 ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
74 << PSCI_POWER_STATE_TYPE_SHIFT) | 69 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
75 ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) 70 ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
76 << PSCI_POWER_STATE_AFFL_SHIFT); 71 & PSCI_0_2_POWER_STATE_AFFL_MASK);
77} 72}
78 73
79/* 74/*
@@ -110,6 +105,14 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
110 return function_id; 105 return function_id;
111} 106}
112 107
108static int psci_get_version(void)
109{
110 int err;
111
112 err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
113 return err;
114}
115
113static int psci_cpu_suspend(struct psci_power_state state, 116static int psci_cpu_suspend(struct psci_power_state state,
114 unsigned long entry_point) 117 unsigned long entry_point)
115{ 118{
@@ -153,26 +156,36 @@ static int psci_migrate(unsigned long cpuid)
153 return psci_to_linux_errno(err); 156 return psci_to_linux_errno(err);
154} 157}
155 158
156static const struct of_device_id psci_of_match[] __initconst = { 159static int psci_affinity_info(unsigned long target_affinity,
157 { .compatible = "arm,psci", }, 160 unsigned long lowest_affinity_level)
158 {}, 161{
159}; 162 int err;
163 u32 fn;
164
165 fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
166 err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
167 return err;
168}
160 169
161void __init psci_init(void) 170static int psci_migrate_info_type(void)
162{ 171{
163 struct device_node *np; 172 int err;
164 const char *method; 173 u32 fn;
165 u32 id;
166 174
167 np = of_find_matching_node(NULL, psci_of_match); 175 fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
168 if (!np) 176 err = invoke_psci_fn(fn, 0, 0, 0);
169 return; 177 return err;
178}
179
180static int get_set_conduit_method(struct device_node *np)
181{
182 const char *method;
170 183
171 pr_info("probing function IDs from device-tree\n"); 184 pr_info("probing for conduit method from DT.\n");
172 185
173 if (of_property_read_string(np, "method", &method)) { 186 if (of_property_read_string(np, "method", &method)) {
174 pr_warning("missing \"method\" property\n"); 187 pr_warn("missing \"method\" property\n");
175 goto out_put_node; 188 return -ENXIO;
176 } 189 }
177 190
178 if (!strcmp("hvc", method)) { 191 if (!strcmp("hvc", method)) {
@@ -180,10 +193,99 @@ void __init psci_init(void)
180 } else if (!strcmp("smc", method)) { 193 } else if (!strcmp("smc", method)) {
181 invoke_psci_fn = __invoke_psci_fn_smc; 194 invoke_psci_fn = __invoke_psci_fn_smc;
182 } else { 195 } else {
183 pr_warning("invalid \"method\" property: %s\n", method); 196 pr_warn("invalid \"method\" property: %s\n", method);
197 return -EINVAL;
198 }
199 return 0;
200}
201
202static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
203{
204 invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
205}
206
207static void psci_sys_poweroff(void)
208{
209 invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
210}
211
212/*
213 * PSCI Function IDs for v0.2+ are well defined so use
214 * standard values.
215 */
216static int psci_0_2_init(struct device_node *np)
217{
218 int err, ver;
219
220 err = get_set_conduit_method(np);
221
222 if (err)
223 goto out_put_node;
224
225 ver = psci_get_version();
226
227 if (ver == PSCI_RET_NOT_SUPPORTED) {
228 /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
229 pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
230 err = -EOPNOTSUPP;
184 goto out_put_node; 231 goto out_put_node;
232 } else {
233 pr_info("PSCIv%d.%d detected in firmware.\n",
234 PSCI_VERSION_MAJOR(ver),
235 PSCI_VERSION_MINOR(ver));
236
237 if (PSCI_VERSION_MAJOR(ver) == 0 &&
238 PSCI_VERSION_MINOR(ver) < 2) {
239 err = -EINVAL;
240 pr_err("Conflicting PSCI version detected.\n");
241 goto out_put_node;
242 }
185 } 243 }
186 244
245 pr_info("Using standard PSCI v0.2 function IDs\n");
246 psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
247 psci_ops.cpu_suspend = psci_cpu_suspend;
248
249 psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
250 psci_ops.cpu_off = psci_cpu_off;
251
252 psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
253 psci_ops.cpu_on = psci_cpu_on;
254
255 psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
256 psci_ops.migrate = psci_migrate;
257
258 psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
259 psci_ops.affinity_info = psci_affinity_info;
260
261 psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
262 PSCI_0_2_FN_MIGRATE_INFO_TYPE;
263 psci_ops.migrate_info_type = psci_migrate_info_type;
264
265 arm_pm_restart = psci_sys_reset;
266
267 pm_power_off = psci_sys_poweroff;
268
269out_put_node:
270 of_node_put(np);
271 return err;
272}
273
274/*
275 * PSCI < v0.2 get PSCI Function IDs via DT.
276 */
277static int psci_0_1_init(struct device_node *np)
278{
279 u32 id;
280 int err;
281
282 err = get_set_conduit_method(np);
283
284 if (err)
285 goto out_put_node;
286
287 pr_info("Using PSCI v0.1 Function IDs from DT\n");
288
187 if (!of_property_read_u32(np, "cpu_suspend", &id)) { 289 if (!of_property_read_u32(np, "cpu_suspend", &id)) {
188 psci_function_id[PSCI_FN_CPU_SUSPEND] = id; 290 psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
189 psci_ops.cpu_suspend = psci_cpu_suspend; 291 psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -206,5 +308,25 @@ void __init psci_init(void)
206 308
207out_put_node: 309out_put_node:
208 of_node_put(np); 310 of_node_put(np);
209 return; 311 return err;
312}
313
314static const struct of_device_id psci_of_match[] __initconst = {
315 { .compatible = "arm,psci", .data = psci_0_1_init},
316 { .compatible = "arm,psci-0.2", .data = psci_0_2_init},
317 {},
318};
319
320int __init psci_init(void)
321{
322 struct device_node *np;
323 const struct of_device_id *matched_np;
324 psci_initcall_t init_fn;
325
326 np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
327 if (!np)
328 return -ENODEV;
329
330 init_fn = (psci_initcall_t)matched_np->data;
331 return init_fn(np);
210} 332}
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 570a48cc3d64..28a1db4da704 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -16,6 +16,8 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/smp.h> 17#include <linux/smp.h>
18#include <linux/of.h> 18#include <linux/of.h>
19#include <linux/delay.h>
20#include <uapi/linux/psci.h>
19 21
20#include <asm/psci.h> 22#include <asm/psci.h>
21#include <asm/smp_plat.h> 23#include <asm/smp_plat.h>
@@ -66,6 +68,36 @@ void __ref psci_cpu_die(unsigned int cpu)
66 /* We should never return */ 68 /* We should never return */
67 panic("psci: cpu %d failed to shutdown\n", cpu); 69 panic("psci: cpu %d failed to shutdown\n", cpu);
68} 70}
71
72int __ref psci_cpu_kill(unsigned int cpu)
73{
74 int err, i;
75
76 if (!psci_ops.affinity_info)
77 return 1;
78 /*
79 * cpu_kill could race with cpu_die and we can
80 * potentially end up declaring this cpu undead
81 * while it is dying. So, try again a few times.
82 */
83
84 for (i = 0; i < 10; i++) {
85 err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
86 if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
87 pr_info("CPU%d killed.\n", cpu);
88 return 1;
89 }
90
91 msleep(10);
92 pr_info("Retrying again to check for CPU kill\n");
93 }
94
95 pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
96 cpu, err);
97 /* Make platform_cpu_kill() fail. */
98 return 0;
99}
100
69#endif 101#endif
70 102
71bool __init psci_smp_available(void) 103bool __init psci_smp_available(void)
@@ -78,5 +110,6 @@ struct smp_operations __initdata psci_smp_ops = {
78 .smp_boot_secondary = psci_boot_secondary, 110 .smp_boot_secondary = psci_boot_secondary,
79#ifdef CONFIG_HOTPLUG_CPU 111#ifdef CONFIG_HOTPLUG_CPU
80 .cpu_die = psci_cpu_die, 112 .cpu_die = psci_cpu_die,
113 .cpu_kill = psci_cpu_kill,
81#endif 114#endif
82}; 115};
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f0e50a0f3a65..3c82b37c0f9e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext)
197 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 197 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
198 case KVM_CAP_ONE_REG: 198 case KVM_CAP_ONE_REG:
199 case KVM_CAP_ARM_PSCI: 199 case KVM_CAP_ARM_PSCI:
200 case KVM_CAP_ARM_PSCI_0_2:
200 r = 1; 201 r = 1;
201 break; 202 break;
202 case KVM_CAP_COALESCED_MMIO: 203 case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 0de91fc6de0f..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
38 38
39static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) 39static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
40{ 40{
41 int ret;
42
41 trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), 43 trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
42 kvm_vcpu_hvc_get_imm(vcpu)); 44 kvm_vcpu_hvc_get_imm(vcpu));
43 45
44 if (kvm_psci_call(vcpu)) 46 ret = kvm_psci_call(vcpu);
47 if (ret < 0) {
48 kvm_inject_undefined(vcpu);
45 return 1; 49 return 1;
50 }
46 51
47 kvm_inject_undefined(vcpu); 52 return ret;
48 return 1;
49} 53}
50 54
51static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) 55static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 448f60e8d23c..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -27,6 +27,36 @@
27 * as described in ARM document number ARM DEN 0022A. 27 * as described in ARM document number ARM DEN 0022A.
28 */ 28 */
29 29
30#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
31
32static unsigned long psci_affinity_mask(unsigned long affinity_level)
33{
34 if (affinity_level <= 3)
35 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
36
37 return 0;
38}
39
40static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
41{
42 /*
43 * NOTE: For simplicity, we make VCPU suspend emulation to be
44 * same-as WFI (Wait-for-interrupt) emulation.
45 *
46 * This means for KVM the wakeup events are interrupts and
47 * this is consistent with intended use of StateID as described
48 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
49 *
50 * Further, we also treat power-down request to be same as
51 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
52 * specification (ARM DEN 0022A). This means all suspend states
53 * for KVM will preserve the register state.
54 */
55 kvm_vcpu_block(vcpu);
56
57 return PSCI_RET_SUCCESS;
58}
59
30static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 60static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
31{ 61{
32 vcpu->arch.pause = true; 62 vcpu->arch.pause = true;
@@ -38,6 +68,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
38 struct kvm_vcpu *vcpu = NULL, *tmp; 68 struct kvm_vcpu *vcpu = NULL, *tmp;
39 wait_queue_head_t *wq; 69 wait_queue_head_t *wq;
40 unsigned long cpu_id; 70 unsigned long cpu_id;
71 unsigned long context_id;
41 unsigned long mpidr; 72 unsigned long mpidr;
42 phys_addr_t target_pc; 73 phys_addr_t target_pc;
43 int i; 74 int i;
@@ -58,10 +89,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
58 * Make sure the caller requested a valid CPU and that the CPU is 89 * Make sure the caller requested a valid CPU and that the CPU is
59 * turned off. 90 * turned off.
60 */ 91 */
61 if (!vcpu || !vcpu->arch.pause) 92 if (!vcpu)
62 return KVM_PSCI_RET_INVAL; 93 return PSCI_RET_INVALID_PARAMS;
94 if (!vcpu->arch.pause) {
95 if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
96 return PSCI_RET_ALREADY_ON;
97 else
98 return PSCI_RET_INVALID_PARAMS;
99 }
63 100
64 target_pc = *vcpu_reg(source_vcpu, 2); 101 target_pc = *vcpu_reg(source_vcpu, 2);
102 context_id = *vcpu_reg(source_vcpu, 3);
65 103
66 kvm_reset_vcpu(vcpu); 104 kvm_reset_vcpu(vcpu);
67 105
@@ -76,26 +114,160 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
76 kvm_vcpu_set_be(vcpu); 114 kvm_vcpu_set_be(vcpu);
77 115
78 *vcpu_pc(vcpu) = target_pc; 116 *vcpu_pc(vcpu) = target_pc;
117 /*
118 * NOTE: We always update r0 (or x0) because for PSCI v0.1
119 * the general puspose registers are undefined upon CPU_ON.
120 */
121 *vcpu_reg(vcpu, 0) = context_id;
79 vcpu->arch.pause = false; 122 vcpu->arch.pause = false;
80 smp_mb(); /* Make sure the above is visible */ 123 smp_mb(); /* Make sure the above is visible */
81 124
82 wq = kvm_arch_vcpu_wq(vcpu); 125 wq = kvm_arch_vcpu_wq(vcpu);
83 wake_up_interruptible(wq); 126 wake_up_interruptible(wq);
84 127
85 return KVM_PSCI_RET_SUCCESS; 128 return PSCI_RET_SUCCESS;
86} 129}
87 130
88/** 131static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
89 * kvm_psci_call - handle PSCI call if r0 value is in range 132{
90 * @vcpu: Pointer to the VCPU struct 133 int i;
91 * 134 unsigned long mpidr;
92 * Handle PSCI calls from guests through traps from HVC instructions. 135 unsigned long target_affinity;
93 * The calling convention is similar to SMC calls to the secure world where 136 unsigned long target_affinity_mask;
94 * the function number is placed in r0 and this function returns true if the 137 unsigned long lowest_affinity_level;
95 * function number specified in r0 is withing the PSCI range, and false 138 struct kvm *kvm = vcpu->kvm;
96 * otherwise. 139 struct kvm_vcpu *tmp;
97 */ 140
98bool kvm_psci_call(struct kvm_vcpu *vcpu) 141 target_affinity = *vcpu_reg(vcpu, 1);
142 lowest_affinity_level = *vcpu_reg(vcpu, 2);
143
144 /* Determine target affinity mask */
145 target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
146 if (!target_affinity_mask)
147 return PSCI_RET_INVALID_PARAMS;
148
149 /* Ignore other bits of target affinity */
150 target_affinity &= target_affinity_mask;
151
152 /*
153 * If one or more VCPU matching target affinity are running
154 * then ON else OFF
155 */
156 kvm_for_each_vcpu(i, tmp, kvm) {
157 mpidr = kvm_vcpu_get_mpidr(tmp);
158 if (((mpidr & target_affinity_mask) == target_affinity) &&
159 !tmp->arch.pause) {
160 return PSCI_0_2_AFFINITY_LEVEL_ON;
161 }
162 }
163
164 return PSCI_0_2_AFFINITY_LEVEL_OFF;
165}
166
167static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
168{
169 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
170 vcpu->run->system_event.type = type;
171 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
172}
173
174static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
175{
176 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
177}
178
179static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
180{
181 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
182}
183
184int kvm_psci_version(struct kvm_vcpu *vcpu)
185{
186 if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
187 return KVM_ARM_PSCI_0_2;
188
189 return KVM_ARM_PSCI_0_1;
190}
191
192static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
193{
194 int ret = 1;
195 unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
196 unsigned long val;
197
198 switch (psci_fn) {
199 case PSCI_0_2_FN_PSCI_VERSION:
200 /*
201 * Bits[31:16] = Major Version = 0
202 * Bits[15:0] = Minor Version = 2
203 */
204 val = 2;
205 break;
206 case PSCI_0_2_FN_CPU_SUSPEND:
207 case PSCI_0_2_FN64_CPU_SUSPEND:
208 val = kvm_psci_vcpu_suspend(vcpu);
209 break;
210 case PSCI_0_2_FN_CPU_OFF:
211 kvm_psci_vcpu_off(vcpu);
212 val = PSCI_RET_SUCCESS;
213 break;
214 case PSCI_0_2_FN_CPU_ON:
215 case PSCI_0_2_FN64_CPU_ON:
216 val = kvm_psci_vcpu_on(vcpu);
217 break;
218 case PSCI_0_2_FN_AFFINITY_INFO:
219 case PSCI_0_2_FN64_AFFINITY_INFO:
220 val = kvm_psci_vcpu_affinity_info(vcpu);
221 break;
222 case PSCI_0_2_FN_MIGRATE:
223 case PSCI_0_2_FN64_MIGRATE:
224 val = PSCI_RET_NOT_SUPPORTED;
225 break;
226 case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
227 /*
228 * Trusted OS is MP hence does not require migration
229 * or
230 * Trusted OS is not present
231 */
232 val = PSCI_0_2_TOS_MP;
233 break;
234 case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
235 case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
236 val = PSCI_RET_NOT_SUPPORTED;
237 break;
238 case PSCI_0_2_FN_SYSTEM_OFF:
239 kvm_psci_system_off(vcpu);
240 /*
241 * We should'nt be going back to guest VCPU after
242 * receiving SYSTEM_OFF request.
243 *
244 * If user space accidently/deliberately resumes
245 * guest VCPU after SYSTEM_OFF request then guest
246 * VCPU should see internal failure from PSCI return
247 * value. To achieve this, we preload r0 (or x0) with
248 * PSCI return value INTERNAL_FAILURE.
249 */
250 val = PSCI_RET_INTERNAL_FAILURE;
251 ret = 0;
252 break;
253 case PSCI_0_2_FN_SYSTEM_RESET:
254 kvm_psci_system_reset(vcpu);
255 /*
256 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
257 * with PSCI return value INTERNAL_FAILURE.
258 */
259 val = PSCI_RET_INTERNAL_FAILURE;
260 ret = 0;
261 break;
262 default:
263 return -EINVAL;
264 }
265
266 *vcpu_reg(vcpu, 0) = val;
267 return ret;
268}
269
270static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
99{ 271{
100 unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); 272 unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
101 unsigned long val; 273 unsigned long val;
@@ -103,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
103 switch (psci_fn) { 275 switch (psci_fn) {
104 case KVM_PSCI_FN_CPU_OFF: 276 case KVM_PSCI_FN_CPU_OFF:
105 kvm_psci_vcpu_off(vcpu); 277 kvm_psci_vcpu_off(vcpu);
106 val = KVM_PSCI_RET_SUCCESS; 278 val = PSCI_RET_SUCCESS;
107 break; 279 break;
108 case KVM_PSCI_FN_CPU_ON: 280 case KVM_PSCI_FN_CPU_ON:
109 val = kvm_psci_vcpu_on(vcpu); 281 val = kvm_psci_vcpu_on(vcpu);
110 break; 282 break;
111 case KVM_PSCI_FN_CPU_SUSPEND: 283 case KVM_PSCI_FN_CPU_SUSPEND:
112 case KVM_PSCI_FN_MIGRATE: 284 case KVM_PSCI_FN_MIGRATE:
113 val = KVM_PSCI_RET_NI; 285 val = PSCI_RET_NOT_SUPPORTED;
114 break; 286 break;
115
116 default: 287 default:
117 return false; 288 return -EINVAL;
118 } 289 }
119 290
120 *vcpu_reg(vcpu, 0) = val; 291 *vcpu_reg(vcpu, 0) = val;
121 return true; 292 return 1;
293}
294
295/**
296 * kvm_psci_call - handle PSCI call if r0 value is in range
297 * @vcpu: Pointer to the VCPU struct
298 *
299 * Handle PSCI calls from guests through traps from HVC instructions.
300 * The calling convention is similar to SMC calls to the secure world
301 * where the function number is placed in r0.
302 *
303 * This function returns: > 0 (success), 0 (success but exit to user
304 * space), and < 0 (errors)
305 *
306 * Errors:
307 * -EINVAL: Unrecognized PSCI function
308 */
309int kvm_psci_call(struct kvm_vcpu *vcpu)
310{
311 switch (kvm_psci_version(vcpu)) {
312 case KVM_ARM_PSCI_0_2:
313 return kvm_psci_0_2_call(vcpu);
314 case KVM_ARM_PSCI_0_1:
315 return kvm_psci_0_1_call(vcpu);
316 default:
317 return -EINVAL;
318 };
122} 319}
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 152413076503..d7b4b38a8e86 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -39,6 +39,7 @@ struct device_node;
39 * from the cpu to be killed. 39 * from the cpu to be killed.
40 * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the 40 * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
41 * cpu being killed. 41 * cpu being killed.
42 * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
42 * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing 43 * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
43 * to wrong parameters or error conditions. Called from the 44 * to wrong parameters or error conditions. Called from the
44 * CPU being suspended. Must be called with IRQs disabled. 45 * CPU being suspended. Must be called with IRQs disabled.
@@ -52,6 +53,7 @@ struct cpu_operations {
52#ifdef CONFIG_HOTPLUG_CPU 53#ifdef CONFIG_HOTPLUG_CPU
53 int (*cpu_disable)(unsigned int cpu); 54 int (*cpu_disable)(unsigned int cpu);
54 void (*cpu_die)(unsigned int cpu); 55 void (*cpu_die)(unsigned int cpu);
56 int (*cpu_kill)(unsigned int cpu);
55#endif 57#endif
56#ifdef CONFIG_ARM64_CPU_SUSPEND 58#ifdef CONFIG_ARM64_CPU_SUSPEND
57 int (*cpu_suspend)(unsigned long); 59 int (*cpu_suspend)(unsigned long);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index c404fb0df3a6..27f54a7cc81b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
41 41
42#define ARM_CPU_PART_AEM_V8 0xD0F0 42#define ARM_CPU_PART_AEM_V8 0xD0F0
43#define ARM_CPU_PART_FOUNDATION 0xD000 43#define ARM_CPU_PART_FOUNDATION 0xD000
44#define ARM_CPU_PART_CORTEX_A53 0xD030
44#define ARM_CPU_PART_CORTEX_A57 0xD070 45#define ARM_CPU_PART_CORTEX_A57 0xD070
45 46
46#define APM_CPU_PART_POTENZA 0x0000 47#define APM_CPU_PART_POTENZA 0x0000
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a1d69751562..92242ce06309 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,7 +39,7 @@
39#include <kvm/arm_vgic.h> 39#include <kvm/arm_vgic.h>
40#include <kvm/arm_arch_timer.h> 40#include <kvm/arm_arch_timer.h>
41 41
42#define KVM_VCPU_MAX_FEATURES 2 42#define KVM_VCPU_MAX_FEATURES 3
43 43
44struct kvm_vcpu; 44struct kvm_vcpu;
45int kvm_target_cpu(void); 45int kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a4816355..bc39e557c56c 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
18#ifndef __ARM64_KVM_PSCI_H__ 18#ifndef __ARM64_KVM_PSCI_H__
19#define __ARM64_KVM_PSCI_H__ 19#define __ARM64_KVM_PSCI_H__
20 20
21bool kvm_psci_call(struct kvm_vcpu *vcpu); 21#define KVM_ARM_PSCI_0_1 1
22#define KVM_ARM_PSCI_0_2 2
23
24int kvm_psci_version(struct kvm_vcpu *vcpu);
25int kvm_psci_call(struct kvm_vcpu *vcpu);
22 26
23#endif /* __ARM64_KVM_PSCI_H__ */ 27#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index d15ab8b46336..e5312ea0ec1a 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,6 +14,6 @@
14#ifndef __ASM_PSCI_H 14#ifndef __ASM_PSCI_H
15#define __ASM_PSCI_H 15#define __ASM_PSCI_H
16 16
17void psci_init(void); 17int psci_init(void);
18 18
19#endif /* __ASM_PSCI_H */ 19#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index eaf54a30bedc..e633ff8cdec8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,6 +31,7 @@
31#define KVM_NR_SPSR 5 31#define KVM_NR_SPSR 5
32 32
33#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
34#include <linux/psci.h>
34#include <asm/types.h> 35#include <asm/types.h>
35#include <asm/ptrace.h> 36#include <asm/ptrace.h>
36 37
@@ -56,8 +57,9 @@ struct kvm_regs {
56#define KVM_ARM_TARGET_FOUNDATION_V8 1 57#define KVM_ARM_TARGET_FOUNDATION_V8 1
57#define KVM_ARM_TARGET_CORTEX_A57 2 58#define KVM_ARM_TARGET_CORTEX_A57 2
58#define KVM_ARM_TARGET_XGENE_POTENZA 3 59#define KVM_ARM_TARGET_XGENE_POTENZA 3
60#define KVM_ARM_TARGET_CORTEX_A53 4
59 61
60#define KVM_ARM_NUM_TARGETS 4 62#define KVM_ARM_NUM_TARGETS 5
61 63
62/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 64/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
63#define KVM_ARM_DEVICE_TYPE_SHIFT 0 65#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -77,6 +79,7 @@ struct kvm_regs {
77 79
78#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ 80#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
79#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ 81#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
82#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
80 83
81struct kvm_vcpu_init { 84struct kvm_vcpu_init {
82 __u32 target; 85 __u32 target;
@@ -186,10 +189,10 @@ struct kvm_arch_memory_slot {
186#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) 189#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
187#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) 190#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
188 191
189#define KVM_PSCI_RET_SUCCESS 0 192#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
190#define KVM_PSCI_RET_NI ((unsigned long)-1) 193#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
191#define KVM_PSCI_RET_INVAL ((unsigned long)-2) 194#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
192#define KVM_PSCI_RET_DENIED ((unsigned long)-3) 195#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
193 196
194#endif 197#endif
195 198
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index ea4828a4aa96..9e9798f91172 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -18,12 +18,17 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/of.h> 19#include <linux/of.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/reboot.h>
22#include <linux/pm.h>
23#include <linux/delay.h>
24#include <uapi/linux/psci.h>
21 25
22#include <asm/compiler.h> 26#include <asm/compiler.h>
23#include <asm/cpu_ops.h> 27#include <asm/cpu_ops.h>
24#include <asm/errno.h> 28#include <asm/errno.h>
25#include <asm/psci.h> 29#include <asm/psci.h>
26#include <asm/smp_plat.h> 30#include <asm/smp_plat.h>
31#include <asm/system_misc.h>
27 32
28#define PSCI_POWER_STATE_TYPE_STANDBY 0 33#define PSCI_POWER_STATE_TYPE_STANDBY 0
29#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 34#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
@@ -40,58 +45,52 @@ struct psci_operations {
40 int (*cpu_off)(struct psci_power_state state); 45 int (*cpu_off)(struct psci_power_state state);
41 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); 46 int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
42 int (*migrate)(unsigned long cpuid); 47 int (*migrate)(unsigned long cpuid);
48 int (*affinity_info)(unsigned long target_affinity,
49 unsigned long lowest_affinity_level);
50 int (*migrate_info_type)(void);
43}; 51};
44 52
45static struct psci_operations psci_ops; 53static struct psci_operations psci_ops;
46 54
47static int (*invoke_psci_fn)(u64, u64, u64, u64); 55static int (*invoke_psci_fn)(u64, u64, u64, u64);
56typedef int (*psci_initcall_t)(const struct device_node *);
48 57
49enum psci_function { 58enum psci_function {
50 PSCI_FN_CPU_SUSPEND, 59 PSCI_FN_CPU_SUSPEND,
51 PSCI_FN_CPU_ON, 60 PSCI_FN_CPU_ON,
52 PSCI_FN_CPU_OFF, 61 PSCI_FN_CPU_OFF,
53 PSCI_FN_MIGRATE, 62 PSCI_FN_MIGRATE,
63 PSCI_FN_AFFINITY_INFO,
64 PSCI_FN_MIGRATE_INFO_TYPE,
54 PSCI_FN_MAX, 65 PSCI_FN_MAX,
55}; 66};
56 67
57static u32 psci_function_id[PSCI_FN_MAX]; 68static u32 psci_function_id[PSCI_FN_MAX];
58 69
59#define PSCI_RET_SUCCESS 0
60#define PSCI_RET_EOPNOTSUPP -1
61#define PSCI_RET_EINVAL -2
62#define PSCI_RET_EPERM -3
63
64static int psci_to_linux_errno(int errno) 70static int psci_to_linux_errno(int errno)
65{ 71{
66 switch (errno) { 72 switch (errno) {
67 case PSCI_RET_SUCCESS: 73 case PSCI_RET_SUCCESS:
68 return 0; 74 return 0;
69 case PSCI_RET_EOPNOTSUPP: 75 case PSCI_RET_NOT_SUPPORTED:
70 return -EOPNOTSUPP; 76 return -EOPNOTSUPP;
71 case PSCI_RET_EINVAL: 77 case PSCI_RET_INVALID_PARAMS:
72 return -EINVAL; 78 return -EINVAL;
73 case PSCI_RET_EPERM: 79 case PSCI_RET_DENIED:
74 return -EPERM; 80 return -EPERM;
75 }; 81 };
76 82
77 return -EINVAL; 83 return -EINVAL;
78} 84}
79 85
80#define PSCI_POWER_STATE_ID_MASK 0xffff
81#define PSCI_POWER_STATE_ID_SHIFT 0
82#define PSCI_POWER_STATE_TYPE_MASK 0x1
83#define PSCI_POWER_STATE_TYPE_SHIFT 16
84#define PSCI_POWER_STATE_AFFL_MASK 0x3
85#define PSCI_POWER_STATE_AFFL_SHIFT 24
86
87static u32 psci_power_state_pack(struct psci_power_state state) 86static u32 psci_power_state_pack(struct psci_power_state state)
88{ 87{
89 return ((state.id & PSCI_POWER_STATE_ID_MASK) 88 return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
90 << PSCI_POWER_STATE_ID_SHIFT) | 89 & PSCI_0_2_POWER_STATE_ID_MASK) |
91 ((state.type & PSCI_POWER_STATE_TYPE_MASK) 90 ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
92 << PSCI_POWER_STATE_TYPE_SHIFT) | 91 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
93 ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) 92 ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
94 << PSCI_POWER_STATE_AFFL_SHIFT); 93 & PSCI_0_2_POWER_STATE_AFFL_MASK);
95} 94}
96 95
97/* 96/*
@@ -128,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,
128 return function_id; 127 return function_id;
129} 128}
130 129
130static int psci_get_version(void)
131{
132 int err;
133
134 err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
135 return err;
136}
137
131static int psci_cpu_suspend(struct psci_power_state state, 138static int psci_cpu_suspend(struct psci_power_state state,
132 unsigned long entry_point) 139 unsigned long entry_point)
133{ 140{
@@ -171,26 +178,36 @@ static int psci_migrate(unsigned long cpuid)
171 return psci_to_linux_errno(err); 178 return psci_to_linux_errno(err);
172} 179}
173 180
174static const struct of_device_id psci_of_match[] __initconst = { 181static int psci_affinity_info(unsigned long target_affinity,
175 { .compatible = "arm,psci", }, 182 unsigned long lowest_affinity_level)
176 {}, 183{
177}; 184 int err;
185 u32 fn;
186
187 fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
188 err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
189 return err;
190}
178 191
179void __init psci_init(void) 192static int psci_migrate_info_type(void)
180{ 193{
181 struct device_node *np; 194 int err;
182 const char *method; 195 u32 fn;
183 u32 id;
184 196
185 np = of_find_matching_node(NULL, psci_of_match); 197 fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
186 if (!np) 198 err = invoke_psci_fn(fn, 0, 0, 0);
187 return; 199 return err;
200}
188 201
189 pr_info("probing function IDs from device-tree\n"); 202static int get_set_conduit_method(struct device_node *np)
203{
204 const char *method;
205
206 pr_info("probing for conduit method from DT.\n");
190 207
191 if (of_property_read_string(np, "method", &method)) { 208 if (of_property_read_string(np, "method", &method)) {
192 pr_warning("missing \"method\" property\n"); 209 pr_warn("missing \"method\" property\n");
193 goto out_put_node; 210 return -ENXIO;
194 } 211 }
195 212
196 if (!strcmp("hvc", method)) { 213 if (!strcmp("hvc", method)) {
@@ -198,10 +215,99 @@ void __init psci_init(void)
198 } else if (!strcmp("smc", method)) { 215 } else if (!strcmp("smc", method)) {
199 invoke_psci_fn = __invoke_psci_fn_smc; 216 invoke_psci_fn = __invoke_psci_fn_smc;
200 } else { 217 } else {
201 pr_warning("invalid \"method\" property: %s\n", method); 218 pr_warn("invalid \"method\" property: %s\n", method);
219 return -EINVAL;
220 }
221 return 0;
222}
223
224static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
225{
226 invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
227}
228
229static void psci_sys_poweroff(void)
230{
231 invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
232}
233
234/*
235 * PSCI Function IDs for v0.2+ are well defined so use
236 * standard values.
237 */
238static int psci_0_2_init(struct device_node *np)
239{
240 int err, ver;
241
242 err = get_set_conduit_method(np);
243
244 if (err)
245 goto out_put_node;
246
247 ver = psci_get_version();
248
249 if (ver == PSCI_RET_NOT_SUPPORTED) {
250 /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
251 pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
252 err = -EOPNOTSUPP;
202 goto out_put_node; 253 goto out_put_node;
254 } else {
255 pr_info("PSCIv%d.%d detected in firmware.\n",
256 PSCI_VERSION_MAJOR(ver),
257 PSCI_VERSION_MINOR(ver));
258
259 if (PSCI_VERSION_MAJOR(ver) == 0 &&
260 PSCI_VERSION_MINOR(ver) < 2) {
261 err = -EINVAL;
262 pr_err("Conflicting PSCI version detected.\n");
263 goto out_put_node;
264 }
203 } 265 }
204 266
267 pr_info("Using standard PSCI v0.2 function IDs\n");
268 psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
269 psci_ops.cpu_suspend = psci_cpu_suspend;
270
271 psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
272 psci_ops.cpu_off = psci_cpu_off;
273
274 psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON;
275 psci_ops.cpu_on = psci_cpu_on;
276
277 psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE;
278 psci_ops.migrate = psci_migrate;
279
280 psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO;
281 psci_ops.affinity_info = psci_affinity_info;
282
283 psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
284 PSCI_0_2_FN_MIGRATE_INFO_TYPE;
285 psci_ops.migrate_info_type = psci_migrate_info_type;
286
287 arm_pm_restart = psci_sys_reset;
288
289 pm_power_off = psci_sys_poweroff;
290
291out_put_node:
292 of_node_put(np);
293 return err;
294}
295
296/*
297 * PSCI < v0.2 get PSCI Function IDs via DT.
298 */
299static int psci_0_1_init(struct device_node *np)
300{
301 u32 id;
302 int err;
303
304 err = get_set_conduit_method(np);
305
306 if (err)
307 goto out_put_node;
308
309 pr_info("Using PSCI v0.1 Function IDs from DT\n");
310
205 if (!of_property_read_u32(np, "cpu_suspend", &id)) { 311 if (!of_property_read_u32(np, "cpu_suspend", &id)) {
206 psci_function_id[PSCI_FN_CPU_SUSPEND] = id; 312 psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
207 psci_ops.cpu_suspend = psci_cpu_suspend; 313 psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -224,7 +330,28 @@ void __init psci_init(void)
224 330
225out_put_node: 331out_put_node:
226 of_node_put(np); 332 of_node_put(np);
227 return; 333 return err;
334}
335
336static const struct of_device_id psci_of_match[] __initconst = {
337 { .compatible = "arm,psci", .data = psci_0_1_init},
338 { .compatible = "arm,psci-0.2", .data = psci_0_2_init},
339 {},
340};
341
342int __init psci_init(void)
343{
344 struct device_node *np;
345 const struct of_device_id *matched_np;
346 psci_initcall_t init_fn;
347
348 np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
349
350 if (!np)
351 return -ENODEV;
352
353 init_fn = (psci_initcall_t)matched_np->data;
354 return init_fn(np);
228} 355}
229 356
230#ifdef CONFIG_SMP 357#ifdef CONFIG_SMP
@@ -277,6 +404,35 @@ static void cpu_psci_cpu_die(unsigned int cpu)
277 404
278 pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); 405 pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
279} 406}
407
408static int cpu_psci_cpu_kill(unsigned int cpu)
409{
410 int err, i;
411
412 if (!psci_ops.affinity_info)
413 return 1;
414 /*
415 * cpu_kill could race with cpu_die and we can
416 * potentially end up declaring this cpu undead
417 * while it is dying. So, try again a few times.
418 */
419
420 for (i = 0; i < 10; i++) {
421 err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
422 if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
423 pr_info("CPU%d killed.\n", cpu);
424 return 1;
425 }
426
427 msleep(10);
428 pr_info("Retrying again to check for CPU kill\n");
429 }
430
431 pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
432 cpu, err);
433 /* Make op_cpu_kill() fail. */
434 return 0;
435}
280#endif 436#endif
281 437
282const struct cpu_operations cpu_psci_ops = { 438const struct cpu_operations cpu_psci_ops = {
@@ -287,6 +443,7 @@ const struct cpu_operations cpu_psci_ops = {
287#ifdef CONFIG_HOTPLUG_CPU 443#ifdef CONFIG_HOTPLUG_CPU
288 .cpu_disable = cpu_psci_cpu_disable, 444 .cpu_disable = cpu_psci_cpu_disable,
289 .cpu_die = cpu_psci_cpu_die, 445 .cpu_die = cpu_psci_cpu_die,
446 .cpu_kill = cpu_psci_cpu_kill,
290#endif 447#endif
291}; 448};
292 449
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f0a141dd5655..c3cb160edc69 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -228,6 +228,19 @@ int __cpu_disable(void)
228 return 0; 228 return 0;
229} 229}
230 230
231static int op_cpu_kill(unsigned int cpu)
232{
233 /*
234 * If we have no means of synchronising with the dying CPU, then assume
235 * that it is really dead. We can only wait for an arbitrary length of
236 * time and hope that it's dead, so let's skip the wait and just hope.
237 */
238 if (!cpu_ops[cpu]->cpu_kill)
239 return 1;
240
241 return cpu_ops[cpu]->cpu_kill(cpu);
242}
243
231static DECLARE_COMPLETION(cpu_died); 244static DECLARE_COMPLETION(cpu_died);
232 245
233/* 246/*
@@ -241,6 +254,15 @@ void __cpu_die(unsigned int cpu)
241 return; 254 return;
242 } 255 }
243 pr_notice("CPU%u: shutdown\n", cpu); 256 pr_notice("CPU%u: shutdown\n", cpu);
257
258 /*
259 * Now that the dying CPU is beyond the point of no return w.r.t.
260 * in-kernel synchronisation, try to get the firwmare to help us to
261 * verify that it has really left the kernel before we consider
262 * clobbering anything it might still be using.
263 */
264 if (!op_cpu_kill(cpu))
265 pr_warn("CPU%d may not have shut down cleanly\n", cpu);
244} 266}
245 267
246/* 268/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..60b5c31f3c10 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void)
214 return KVM_ARM_TARGET_AEM_V8; 214 return KVM_ARM_TARGET_AEM_V8;
215 case ARM_CPU_PART_FOUNDATION: 215 case ARM_CPU_PART_FOUNDATION:
216 return KVM_ARM_TARGET_FOUNDATION_V8; 216 return KVM_ARM_TARGET_FOUNDATION_V8;
217 case ARM_CPU_PART_CORTEX_A53:
218 return KVM_ARM_TARGET_CORTEX_A53;
217 case ARM_CPU_PART_CORTEX_A57: 219 case ARM_CPU_PART_CORTEX_A57:
218 return KVM_ARM_TARGET_CORTEX_A57; 220 return KVM_ARM_TARGET_CORTEX_A57;
219 }; 221 };
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7bc41eab4c64..182415e1a952 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
30 30
31static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) 31static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
32{ 32{
33 if (kvm_psci_call(vcpu)) 33 int ret;
34
35 ret = kvm_psci_call(vcpu);
36 if (ret < 0) {
37 kvm_inject_undefined(vcpu);
34 return 1; 38 return 1;
39 }
35 40
36 kvm_inject_undefined(vcpu); 41 return ret;
37 return 1;
38} 42}
39 43
40static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) 44static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 8fe6f76b0edc..475fd2929310 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
88 &genericv8_target_table); 88 &genericv8_target_table);
89 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, 89 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
90 &genericv8_target_table); 90 &genericv8_target_table);
91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
92 &genericv8_target_table);
91 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, 93 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
92 &genericv8_target_table); 94 &genericv8_target_table);
93 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, 95 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5cd695f905a1..5e0014e864f3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1756,14 +1756,14 @@ config KVM_GUEST
1756 help 1756 help
1757 Select this option if building a guest kernel for KVM (Trap & Emulate) mode 1757 Select this option if building a guest kernel for KVM (Trap & Emulate) mode
1758 1758
1759config KVM_HOST_FREQ 1759config KVM_GUEST_TIMER_FREQ
1760 int "KVM Host Processor Frequency (MHz)" 1760 int "Count/Compare Timer Frequency (MHz)"
1761 depends on KVM_GUEST 1761 depends on KVM_GUEST
1762 default 500 1762 default 100
1763 help 1763 help
1764 Select this option if building a guest kernel for KVM to skip 1764 Set this to non-zero if building a guest kernel for KVM to skip RTC
1765 RTC emulation when determining guest CPU Frequency. Instead, the guest 1765 emulation when determining guest CPU Frequency. Instead, the guest's
1766 processor frequency is automatically derived from the host frequency. 1766 timer frequency is specified directly.
1767 1767
1768choice 1768choice
1769 prompt "Kernel page size" 1769 prompt "Kernel page size"
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 060aaa6348d7..b0aa95565752 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -19,6 +19,38 @@
19#include <linux/threads.h> 19#include <linux/threads.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21 21
22/* MIPS KVM register ids */
23#define MIPS_CP0_32(_R, _S) \
24 (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
25
26#define MIPS_CP0_64(_R, _S) \
27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
28
29#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0)
30#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
31#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
32#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
33#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
34#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
35#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
36#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
37#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
38#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
39#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
40#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
41#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
42#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
43#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
44#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0)
45#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1)
46#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0)
47#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1)
48#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2)
49#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3)
50#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
51#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
52#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
53
22 54
23#define KVM_MAX_VCPUS 1 55#define KVM_MAX_VCPUS 1
24#define KVM_USER_MEM_SLOTS 8 56#define KVM_USER_MEM_SLOTS 8
@@ -372,8 +404,19 @@ struct kvm_vcpu_arch {
372 404
373 u32 io_gpr; /* GPR used as IO source/target */ 405 u32 io_gpr; /* GPR used as IO source/target */
374 406
375 /* Used to calibrate the virutal count register for the guest */ 407 struct hrtimer comparecount_timer;
376 int32_t host_cp0_count; 408 /* Count timer control KVM register */
409 uint32_t count_ctl;
410 /* Count bias from the raw time */
411 uint32_t count_bias;
412 /* Frequency of timer in Hz */
413 uint32_t count_hz;
414 /* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */
415 s64 count_dyn_bias;
416 /* Resume time */
417 ktime_t count_resume;
418 /* Period of timer tick in ns */
419 u64 count_period;
377 420
378 /* Bitmask of exceptions that are pending */ 421 /* Bitmask of exceptions that are pending */
379 unsigned long pending_exceptions; 422 unsigned long pending_exceptions;
@@ -394,8 +437,6 @@ struct kvm_vcpu_arch {
394 uint32_t guest_kernel_asid[NR_CPUS]; 437 uint32_t guest_kernel_asid[NR_CPUS];
395 struct mm_struct guest_kernel_mm, guest_user_mm; 438 struct mm_struct guest_kernel_mm, guest_user_mm;
396 439
397 struct hrtimer comparecount_timer;
398
399 int last_sched_cpu; 440 int last_sched_cpu;
400 441
401 /* WAIT executed */ 442 /* WAIT executed */
@@ -410,6 +451,7 @@ struct kvm_vcpu_arch {
410#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) 451#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
411#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) 452#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
412#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) 453#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
454#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
413#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) 455#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
414#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) 456#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
415#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) 457#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
@@ -449,15 +491,74 @@ struct kvm_vcpu_arch {
449#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) 491#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
450#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) 492#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
451 493
494/*
495 * Some of the guest registers may be modified asynchronously (e.g. from a
496 * hrtimer callback in hard irq context) and therefore need stronger atomicity
497 * guarantees than other registers.
498 */
499
500static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
501 unsigned long val)
502{
503 unsigned long temp;
504 do {
505 __asm__ __volatile__(
506 " .set mips3 \n"
507 " " __LL "%0, %1 \n"
508 " or %0, %2 \n"
509 " " __SC "%0, %1 \n"
510 " .set mips0 \n"
511 : "=&r" (temp), "+m" (*reg)
512 : "r" (val));
513 } while (unlikely(!temp));
514}
515
516static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
517 unsigned long val)
518{
519 unsigned long temp;
520 do {
521 __asm__ __volatile__(
522 " .set mips3 \n"
523 " " __LL "%0, %1 \n"
524 " and %0, %2 \n"
525 " " __SC "%0, %1 \n"
526 " .set mips0 \n"
527 : "=&r" (temp), "+m" (*reg)
528 : "r" (~val));
529 } while (unlikely(!temp));
530}
531
532static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
533 unsigned long change,
534 unsigned long val)
535{
536 unsigned long temp;
537 do {
538 __asm__ __volatile__(
539 " .set mips3 \n"
540 " " __LL "%0, %1 \n"
541 " and %0, %2 \n"
542 " or %0, %3 \n"
543 " " __SC "%0, %1 \n"
544 " .set mips0 \n"
545 : "=&r" (temp), "+m" (*reg)
546 : "r" (~change), "r" (val & change));
547 } while (unlikely(!temp));
548}
549
452#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) 550#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
453#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) 551#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
454#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val)) 552
455#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) 553/* Cause can be modified asynchronously from hardirq hrtimer callback */
554#define kvm_set_c0_guest_cause(cop0, val) \
555 _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
556#define kvm_clear_c0_guest_cause(cop0, val) \
557 _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
456#define kvm_change_c0_guest_cause(cop0, change, val) \ 558#define kvm_change_c0_guest_cause(cop0, change, val) \
457{ \ 559 _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \
458 kvm_clear_c0_guest_cause(cop0, change); \ 560 change, val)
459 kvm_set_c0_guest_cause(cop0, ((val) & (change))); \ 561
460}
461#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) 562#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
462#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) 563#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
463#define kvm_change_c0_guest_ebase(cop0, change, val) \ 564#define kvm_change_c0_guest_ebase(cop0, change, val) \
@@ -468,29 +569,33 @@ struct kvm_vcpu_arch {
468 569
469 570
470struct kvm_mips_callbacks { 571struct kvm_mips_callbacks {
471 int (*handle_cop_unusable) (struct kvm_vcpu *vcpu); 572 int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
472 int (*handle_tlb_mod) (struct kvm_vcpu *vcpu); 573 int (*handle_tlb_mod)(struct kvm_vcpu *vcpu);
473 int (*handle_tlb_ld_miss) (struct kvm_vcpu *vcpu); 574 int (*handle_tlb_ld_miss)(struct kvm_vcpu *vcpu);
474 int (*handle_tlb_st_miss) (struct kvm_vcpu *vcpu); 575 int (*handle_tlb_st_miss)(struct kvm_vcpu *vcpu);
475 int (*handle_addr_err_st) (struct kvm_vcpu *vcpu); 576 int (*handle_addr_err_st)(struct kvm_vcpu *vcpu);
476 int (*handle_addr_err_ld) (struct kvm_vcpu *vcpu); 577 int (*handle_addr_err_ld)(struct kvm_vcpu *vcpu);
477 int (*handle_syscall) (struct kvm_vcpu *vcpu); 578 int (*handle_syscall)(struct kvm_vcpu *vcpu);
478 int (*handle_res_inst) (struct kvm_vcpu *vcpu); 579 int (*handle_res_inst)(struct kvm_vcpu *vcpu);
479 int (*handle_break) (struct kvm_vcpu *vcpu); 580 int (*handle_break)(struct kvm_vcpu *vcpu);
480 int (*vm_init) (struct kvm *kvm); 581 int (*vm_init)(struct kvm *kvm);
481 int (*vcpu_init) (struct kvm_vcpu *vcpu); 582 int (*vcpu_init)(struct kvm_vcpu *vcpu);
482 int (*vcpu_setup) (struct kvm_vcpu *vcpu); 583 int (*vcpu_setup)(struct kvm_vcpu *vcpu);
483 gpa_t(*gva_to_gpa) (gva_t gva); 584 gpa_t (*gva_to_gpa)(gva_t gva);
484 void (*queue_timer_int) (struct kvm_vcpu *vcpu); 585 void (*queue_timer_int)(struct kvm_vcpu *vcpu);
485 void (*dequeue_timer_int) (struct kvm_vcpu *vcpu); 586 void (*dequeue_timer_int)(struct kvm_vcpu *vcpu);
486 void (*queue_io_int) (struct kvm_vcpu *vcpu, 587 void (*queue_io_int)(struct kvm_vcpu *vcpu,
487 struct kvm_mips_interrupt *irq); 588 struct kvm_mips_interrupt *irq);
488 void (*dequeue_io_int) (struct kvm_vcpu *vcpu, 589 void (*dequeue_io_int)(struct kvm_vcpu *vcpu,
489 struct kvm_mips_interrupt *irq); 590 struct kvm_mips_interrupt *irq);
490 int (*irq_deliver) (struct kvm_vcpu *vcpu, unsigned int priority, 591 int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority,
491 uint32_t cause); 592 uint32_t cause);
492 int (*irq_clear) (struct kvm_vcpu *vcpu, unsigned int priority, 593 int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority,
493 uint32_t cause); 594 uint32_t cause);
595 int (*get_one_reg)(struct kvm_vcpu *vcpu,
596 const struct kvm_one_reg *reg, s64 *v);
597 int (*set_one_reg)(struct kvm_vcpu *vcpu,
598 const struct kvm_one_reg *reg, s64 v);
494}; 599};
495extern struct kvm_mips_callbacks *kvm_mips_callbacks; 600extern struct kvm_mips_callbacks *kvm_mips_callbacks;
496int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); 601int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -609,7 +714,16 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
609extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, 714extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
610 struct kvm_run *run); 715 struct kvm_run *run);
611 716
612enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu); 717uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
718void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
719void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
720void kvm_mips_init_count(struct kvm_vcpu *vcpu);
721int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
722int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
723int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
724void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
725void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
726enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
613 727
614enum emulation_result kvm_mips_check_privilege(unsigned long cause, 728enum emulation_result kvm_mips_check_privilege(unsigned long cause,
615 uint32_t *opc, 729 uint32_t *opc,
@@ -646,7 +760,6 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
646 struct kvm_vcpu *vcpu); 760 struct kvm_vcpu *vcpu);
647 761
648/* Misc */ 762/* Misc */
649extern void mips32_SyncICache(unsigned long addr, unsigned long size);
650extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu); 763extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
651extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); 764extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
652 765
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index f09ff5ae2059..2c04b6d9ff85 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -106,6 +106,41 @@ struct kvm_fpu {
106#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33) 106#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
107#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34) 107#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
108 108
109/* KVM specific control registers */
110
111/*
112 * CP0_Count control
113 * DC: Set 0: Master disable CP0_Count and set COUNT_RESUME to now
114 * Set 1: Master re-enable CP0_Count with unchanged bias, handling timer
115 * interrupts since COUNT_RESUME
116 * This can be used to freeze the timer to get a consistent snapshot of
117 * the CP0_Count and timer interrupt pending state, while also resuming
118 * safely without losing time or guest timer interrupts.
119 * Other: Reserved, do not change.
120 */
121#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
122 0x20000 | 0)
123#define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001
124
125/*
126 * CP0_Count resume monotonic nanoseconds
127 * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master
128 * disable). Any reads and writes of Count related registers while
129 * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is
130 * cleared again (master enable) any timer interrupts since this time will be
131 * emulated.
132 * Modifications to times in the future are rejected.
133 */
134#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
135 0x20000 | 1)
136/*
137 * CP0_Count rate in Hz
138 * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
139 * discontinuities in CP0_Count.
140 */
141#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
142 0x20000 | 2)
143
109/* 144/*
110 * KVM MIPS specific structures and definitions 145 * KVM MIPS specific structures and definitions
111 * 146 *
diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S
index bbace092ad0a..033ac343e72c 100644
--- a/arch/mips/kvm/kvm_locore.S
+++ b/arch/mips/kvm/kvm_locore.S
@@ -611,35 +611,3 @@ MIPSX(exceptions):
611 .word _C_LABEL(MIPSX(GuestException)) # 29 611 .word _C_LABEL(MIPSX(GuestException)) # 29
612 .word _C_LABEL(MIPSX(GuestException)) # 30 612 .word _C_LABEL(MIPSX(GuestException)) # 30
613 .word _C_LABEL(MIPSX(GuestException)) # 31 613 .word _C_LABEL(MIPSX(GuestException)) # 31
614
615
616/* This routine makes changes to the instruction stream effective to the hardware.
617 * It should be called after the instruction stream is written.
618 * On return, the new instructions are effective.
619 * Inputs:
620 * a0 = Start address of new instruction stream
621 * a1 = Size, in bytes, of new instruction stream
622 */
623
624#define HW_SYNCI_Step $1
625LEAF(MIPSX(SyncICache))
626 .set push
627 .set mips32r2
628 beq a1, zero, 20f
629 nop
630 REG_ADDU a1, a0, a1
631 rdhwr v0, HW_SYNCI_Step
632 beq v0, zero, 20f
633 nop
63410:
635 synci 0(a0)
636 REG_ADDU a0, a0, v0
637 sltu v1, a0, a1
638 bne v1, zero, 10b
639 nop
640 sync
64120:
642 jr.hb ra
643 nop
644 .set pop
645END(MIPSX(SyncICache))
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index da5186fbd77a..cd5e4f568439 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -61,11 +61,6 @@ static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
61 return 0; 61 return 0;
62} 62}
63 63
64gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
65{
66 return gfn;
67}
68
69/* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we 64/* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we
70 * are "runnable" if interrupts are pending 65 * are "runnable" if interrupts are pending
71 */ 66 */
@@ -130,8 +125,8 @@ static void kvm_mips_init_vm_percpu(void *arg)
130int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 125int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
131{ 126{
132 if (atomic_inc_return(&kvm_mips_instance) == 1) { 127 if (atomic_inc_return(&kvm_mips_instance) == 1) {
133 kvm_info("%s: 1st KVM instance, setup host TLB parameters\n", 128 kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n",
134 __func__); 129 __func__);
135 on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); 130 on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
136 } 131 }
137 132
@@ -149,9 +144,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
149 if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) 144 if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
150 kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]); 145 kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
151 } 146 }
152 147 kfree(kvm->arch.guest_pmap);
153 if (kvm->arch.guest_pmap)
154 kfree(kvm->arch.guest_pmap);
155 148
156 kvm_for_each_vcpu(i, vcpu, kvm) { 149 kvm_for_each_vcpu(i, vcpu, kvm) {
157 kvm_arch_vcpu_free(vcpu); 150 kvm_arch_vcpu_free(vcpu);
@@ -186,8 +179,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
186 179
187 /* If this is the last instance, restore wired count */ 180 /* If this is the last instance, restore wired count */
188 if (atomic_dec_return(&kvm_mips_instance) == 0) { 181 if (atomic_dec_return(&kvm_mips_instance) == 0) {
189 kvm_info("%s: last KVM instance, restoring TLB parameters\n", 182 kvm_debug("%s: last KVM instance, restoring TLB parameters\n",
190 __func__); 183 __func__);
191 on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1); 184 on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
192 } 185 }
193} 186}
@@ -249,9 +242,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
249 goto out; 242 goto out;
250 } 243 }
251 244
252 kvm_info 245 kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
253 ("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", 246 npages, kvm->arch.guest_pmap);
254 npages, kvm->arch.guest_pmap);
255 247
256 /* Now setup the page table */ 248 /* Now setup the page table */
257 for (i = 0; i < npages; i++) { 249 for (i = 0; i < npages; i++) {
@@ -296,7 +288,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
296 if (err) 288 if (err)
297 goto out_free_cpu; 289 goto out_free_cpu;
298 290
299 kvm_info("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); 291 kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
300 292
301 /* Allocate space for host mode exception handlers that handle 293 /* Allocate space for host mode exception handlers that handle
302 * guest mode exits 294 * guest mode exits
@@ -304,7 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
304 if (cpu_has_veic || cpu_has_vint) { 296 if (cpu_has_veic || cpu_has_vint) {
305 size = 0x200 + VECTORSPACING * 64; 297 size = 0x200 + VECTORSPACING * 64;
306 } else { 298 } else {
307 size = 0x200; 299 size = 0x4000;
308 } 300 }
309 301
310 /* Save Linux EBASE */ 302 /* Save Linux EBASE */
@@ -316,8 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
316 err = -ENOMEM; 308 err = -ENOMEM;
317 goto out_free_cpu; 309 goto out_free_cpu;
318 } 310 }
319 kvm_info("Allocated %d bytes for KVM Exception Handlers @ %p\n", 311 kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
320 ALIGN(size, PAGE_SIZE), gebase); 312 ALIGN(size, PAGE_SIZE), gebase);
321 313
322 /* Save new ebase */ 314 /* Save new ebase */
323 vcpu->arch.guest_ebase = gebase; 315 vcpu->arch.guest_ebase = gebase;
@@ -342,15 +334,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
342 334
343 /* General handler, relocate to unmapped space for sanity's sake */ 335 /* General handler, relocate to unmapped space for sanity's sake */
344 offset = 0x2000; 336 offset = 0x2000;
345 kvm_info("Installing KVM Exception handlers @ %p, %#x bytes\n", 337 kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n",
346 gebase + offset, 338 gebase + offset,
347 mips32_GuestExceptionEnd - mips32_GuestException); 339 mips32_GuestExceptionEnd - mips32_GuestException);
348 340
349 memcpy(gebase + offset, mips32_GuestException, 341 memcpy(gebase + offset, mips32_GuestException,
350 mips32_GuestExceptionEnd - mips32_GuestException); 342 mips32_GuestExceptionEnd - mips32_GuestException);
351 343
352 /* Invalidate the icache for these ranges */ 344 /* Invalidate the icache for these ranges */
353 mips32_SyncICache((unsigned long) gebase, ALIGN(size, PAGE_SIZE)); 345 local_flush_icache_range((unsigned long)gebase,
346 (unsigned long)gebase + ALIGN(size, PAGE_SIZE));
354 347
355 /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */ 348 /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */
356 vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL); 349 vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL);
@@ -360,14 +353,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
360 goto out_free_gebase; 353 goto out_free_gebase;
361 } 354 }
362 355
363 kvm_info("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage); 356 kvm_debug("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage);
364 kvm_mips_commpage_init(vcpu); 357 kvm_mips_commpage_init(vcpu);
365 358
366 /* Init */ 359 /* Init */
367 vcpu->arch.last_sched_cpu = -1; 360 vcpu->arch.last_sched_cpu = -1;
368 361
369 /* Start off the timer */ 362 /* Start off the timer */
370 kvm_mips_emulate_count(vcpu); 363 kvm_mips_init_count(vcpu);
371 364
372 return vcpu; 365 return vcpu;
373 366
@@ -389,12 +382,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
389 382
390 kvm_mips_dump_stats(vcpu); 383 kvm_mips_dump_stats(vcpu);
391 384
392 if (vcpu->arch.guest_ebase) 385 kfree(vcpu->arch.guest_ebase);
393 kfree(vcpu->arch.guest_ebase); 386 kfree(vcpu->arch.kseg0_commpage);
394
395 if (vcpu->arch.kseg0_commpage)
396 kfree(vcpu->arch.kseg0_commpage);
397
398} 387}
399 388
400void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 389void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -423,11 +412,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
423 vcpu->mmio_needed = 0; 412 vcpu->mmio_needed = 0;
424 } 413 }
425 414
415 local_irq_disable();
426 /* Check if we have any exceptions/interrupts pending */ 416 /* Check if we have any exceptions/interrupts pending */
427 kvm_mips_deliver_interrupts(vcpu, 417 kvm_mips_deliver_interrupts(vcpu,
428 kvm_read_c0_guest_cause(vcpu->arch.cop0)); 418 kvm_read_c0_guest_cause(vcpu->arch.cop0));
429 419
430 local_irq_disable();
431 kvm_guest_enter(); 420 kvm_guest_enter();
432 421
433 r = __kvm_mips_vcpu_run(run, vcpu); 422 r = __kvm_mips_vcpu_run(run, vcpu);
@@ -490,36 +479,6 @@ kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
490 return -ENOIOCTLCMD; 479 return -ENOIOCTLCMD;
491} 480}
492 481
493#define MIPS_CP0_32(_R, _S) \
494 (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
495
496#define MIPS_CP0_64(_R, _S) \
497 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
498
499#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0)
500#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
501#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
502#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
503#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
504#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
505#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
506#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
507#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
508#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
509#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
510#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
511#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
512#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
513#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
514#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1)
515#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0)
516#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1)
517#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2)
518#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3)
519#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
520#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
521#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
522
523static u64 kvm_mips_get_one_regs[] = { 482static u64 kvm_mips_get_one_regs[] = {
524 KVM_REG_MIPS_R0, 483 KVM_REG_MIPS_R0,
525 KVM_REG_MIPS_R1, 484 KVM_REG_MIPS_R1,
@@ -560,25 +519,34 @@ static u64 kvm_mips_get_one_regs[] = {
560 519
561 KVM_REG_MIPS_CP0_INDEX, 520 KVM_REG_MIPS_CP0_INDEX,
562 KVM_REG_MIPS_CP0_CONTEXT, 521 KVM_REG_MIPS_CP0_CONTEXT,
522 KVM_REG_MIPS_CP0_USERLOCAL,
563 KVM_REG_MIPS_CP0_PAGEMASK, 523 KVM_REG_MIPS_CP0_PAGEMASK,
564 KVM_REG_MIPS_CP0_WIRED, 524 KVM_REG_MIPS_CP0_WIRED,
525 KVM_REG_MIPS_CP0_HWRENA,
565 KVM_REG_MIPS_CP0_BADVADDR, 526 KVM_REG_MIPS_CP0_BADVADDR,
527 KVM_REG_MIPS_CP0_COUNT,
566 KVM_REG_MIPS_CP0_ENTRYHI, 528 KVM_REG_MIPS_CP0_ENTRYHI,
529 KVM_REG_MIPS_CP0_COMPARE,
567 KVM_REG_MIPS_CP0_STATUS, 530 KVM_REG_MIPS_CP0_STATUS,
568 KVM_REG_MIPS_CP0_CAUSE, 531 KVM_REG_MIPS_CP0_CAUSE,
569 /* EPC set via kvm_regs, et al. */ 532 KVM_REG_MIPS_CP0_EPC,
570 KVM_REG_MIPS_CP0_CONFIG, 533 KVM_REG_MIPS_CP0_CONFIG,
571 KVM_REG_MIPS_CP0_CONFIG1, 534 KVM_REG_MIPS_CP0_CONFIG1,
572 KVM_REG_MIPS_CP0_CONFIG2, 535 KVM_REG_MIPS_CP0_CONFIG2,
573 KVM_REG_MIPS_CP0_CONFIG3, 536 KVM_REG_MIPS_CP0_CONFIG3,
574 KVM_REG_MIPS_CP0_CONFIG7, 537 KVM_REG_MIPS_CP0_CONFIG7,
575 KVM_REG_MIPS_CP0_ERROREPC 538 KVM_REG_MIPS_CP0_ERROREPC,
539
540 KVM_REG_MIPS_COUNT_CTL,
541 KVM_REG_MIPS_COUNT_RESUME,
542 KVM_REG_MIPS_COUNT_HZ,
576}; 543};
577 544
578static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, 545static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
579 const struct kvm_one_reg *reg) 546 const struct kvm_one_reg *reg)
580{ 547{
581 struct mips_coproc *cop0 = vcpu->arch.cop0; 548 struct mips_coproc *cop0 = vcpu->arch.cop0;
549 int ret;
582 s64 v; 550 s64 v;
583 551
584 switch (reg->id) { 552 switch (reg->id) {
@@ -601,24 +569,36 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
601 case KVM_REG_MIPS_CP0_CONTEXT: 569 case KVM_REG_MIPS_CP0_CONTEXT:
602 v = (long)kvm_read_c0_guest_context(cop0); 570 v = (long)kvm_read_c0_guest_context(cop0);
603 break; 571 break;
572 case KVM_REG_MIPS_CP0_USERLOCAL:
573 v = (long)kvm_read_c0_guest_userlocal(cop0);
574 break;
604 case KVM_REG_MIPS_CP0_PAGEMASK: 575 case KVM_REG_MIPS_CP0_PAGEMASK:
605 v = (long)kvm_read_c0_guest_pagemask(cop0); 576 v = (long)kvm_read_c0_guest_pagemask(cop0);
606 break; 577 break;
607 case KVM_REG_MIPS_CP0_WIRED: 578 case KVM_REG_MIPS_CP0_WIRED:
608 v = (long)kvm_read_c0_guest_wired(cop0); 579 v = (long)kvm_read_c0_guest_wired(cop0);
609 break; 580 break;
581 case KVM_REG_MIPS_CP0_HWRENA:
582 v = (long)kvm_read_c0_guest_hwrena(cop0);
583 break;
610 case KVM_REG_MIPS_CP0_BADVADDR: 584 case KVM_REG_MIPS_CP0_BADVADDR:
611 v = (long)kvm_read_c0_guest_badvaddr(cop0); 585 v = (long)kvm_read_c0_guest_badvaddr(cop0);
612 break; 586 break;
613 case KVM_REG_MIPS_CP0_ENTRYHI: 587 case KVM_REG_MIPS_CP0_ENTRYHI:
614 v = (long)kvm_read_c0_guest_entryhi(cop0); 588 v = (long)kvm_read_c0_guest_entryhi(cop0);
615 break; 589 break;
590 case KVM_REG_MIPS_CP0_COMPARE:
591 v = (long)kvm_read_c0_guest_compare(cop0);
592 break;
616 case KVM_REG_MIPS_CP0_STATUS: 593 case KVM_REG_MIPS_CP0_STATUS:
617 v = (long)kvm_read_c0_guest_status(cop0); 594 v = (long)kvm_read_c0_guest_status(cop0);
618 break; 595 break;
619 case KVM_REG_MIPS_CP0_CAUSE: 596 case KVM_REG_MIPS_CP0_CAUSE:
620 v = (long)kvm_read_c0_guest_cause(cop0); 597 v = (long)kvm_read_c0_guest_cause(cop0);
621 break; 598 break;
599 case KVM_REG_MIPS_CP0_EPC:
600 v = (long)kvm_read_c0_guest_epc(cop0);
601 break;
622 case KVM_REG_MIPS_CP0_ERROREPC: 602 case KVM_REG_MIPS_CP0_ERROREPC:
623 v = (long)kvm_read_c0_guest_errorepc(cop0); 603 v = (long)kvm_read_c0_guest_errorepc(cop0);
624 break; 604 break;
@@ -637,6 +617,15 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
637 case KVM_REG_MIPS_CP0_CONFIG7: 617 case KVM_REG_MIPS_CP0_CONFIG7:
638 v = (long)kvm_read_c0_guest_config7(cop0); 618 v = (long)kvm_read_c0_guest_config7(cop0);
639 break; 619 break;
620 /* registers to be handled specially */
621 case KVM_REG_MIPS_CP0_COUNT:
622 case KVM_REG_MIPS_COUNT_CTL:
623 case KVM_REG_MIPS_COUNT_RESUME:
624 case KVM_REG_MIPS_COUNT_HZ:
625 ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
626 if (ret)
627 return ret;
628 break;
640 default: 629 default:
641 return -EINVAL; 630 return -EINVAL;
642 } 631 }
@@ -697,12 +686,18 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
697 case KVM_REG_MIPS_CP0_CONTEXT: 686 case KVM_REG_MIPS_CP0_CONTEXT:
698 kvm_write_c0_guest_context(cop0, v); 687 kvm_write_c0_guest_context(cop0, v);
699 break; 688 break;
689 case KVM_REG_MIPS_CP0_USERLOCAL:
690 kvm_write_c0_guest_userlocal(cop0, v);
691 break;
700 case KVM_REG_MIPS_CP0_PAGEMASK: 692 case KVM_REG_MIPS_CP0_PAGEMASK:
701 kvm_write_c0_guest_pagemask(cop0, v); 693 kvm_write_c0_guest_pagemask(cop0, v);
702 break; 694 break;
703 case KVM_REG_MIPS_CP0_WIRED: 695 case KVM_REG_MIPS_CP0_WIRED:
704 kvm_write_c0_guest_wired(cop0, v); 696 kvm_write_c0_guest_wired(cop0, v);
705 break; 697 break;
698 case KVM_REG_MIPS_CP0_HWRENA:
699 kvm_write_c0_guest_hwrena(cop0, v);
700 break;
706 case KVM_REG_MIPS_CP0_BADVADDR: 701 case KVM_REG_MIPS_CP0_BADVADDR:
707 kvm_write_c0_guest_badvaddr(cop0, v); 702 kvm_write_c0_guest_badvaddr(cop0, v);
708 break; 703 break;
@@ -712,12 +707,20 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
712 case KVM_REG_MIPS_CP0_STATUS: 707 case KVM_REG_MIPS_CP0_STATUS:
713 kvm_write_c0_guest_status(cop0, v); 708 kvm_write_c0_guest_status(cop0, v);
714 break; 709 break;
715 case KVM_REG_MIPS_CP0_CAUSE: 710 case KVM_REG_MIPS_CP0_EPC:
716 kvm_write_c0_guest_cause(cop0, v); 711 kvm_write_c0_guest_epc(cop0, v);
717 break; 712 break;
718 case KVM_REG_MIPS_CP0_ERROREPC: 713 case KVM_REG_MIPS_CP0_ERROREPC:
719 kvm_write_c0_guest_errorepc(cop0, v); 714 kvm_write_c0_guest_errorepc(cop0, v);
720 break; 715 break;
716 /* registers to be handled specially */
717 case KVM_REG_MIPS_CP0_COUNT:
718 case KVM_REG_MIPS_CP0_COMPARE:
719 case KVM_REG_MIPS_CP0_CAUSE:
720 case KVM_REG_MIPS_COUNT_CTL:
721 case KVM_REG_MIPS_COUNT_RESUME:
722 case KVM_REG_MIPS_COUNT_HZ:
723 return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
721 default: 724 default:
722 return -EINVAL; 725 return -EINVAL;
723 } 726 }
@@ -920,7 +923,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
920 return -1; 923 return -1;
921 924
922 printk("VCPU Register Dump:\n"); 925 printk("VCPU Register Dump:\n");
923 printk("\tpc = 0x%08lx\n", vcpu->arch.pc);; 926 printk("\tpc = 0x%08lx\n", vcpu->arch.pc);
924 printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions); 927 printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
925 928
926 for (i = 0; i < 32; i += 4) { 929 for (i = 0; i < 32; i += 4) {
@@ -969,7 +972,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
969 return 0; 972 return 0;
970} 973}
971 974
972void kvm_mips_comparecount_func(unsigned long data) 975static void kvm_mips_comparecount_func(unsigned long data)
973{ 976{
974 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 977 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
975 978
@@ -984,15 +987,13 @@ void kvm_mips_comparecount_func(unsigned long data)
984/* 987/*
985 * low level hrtimer wake routine. 988 * low level hrtimer wake routine.
986 */ 989 */
987enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) 990static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
988{ 991{
989 struct kvm_vcpu *vcpu; 992 struct kvm_vcpu *vcpu;
990 993
991 vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer); 994 vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
992 kvm_mips_comparecount_func((unsigned long) vcpu); 995 kvm_mips_comparecount_func((unsigned long) vcpu);
993 hrtimer_forward_now(&vcpu->arch.comparecount_timer, 996 return kvm_mips_count_timeout(vcpu);
994 ktime_set(0, MS_TO_NS(10)));
995 return HRTIMER_RESTART;
996} 997}
997 998
998int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 999int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/mips/kvm/kvm_mips_dyntrans.c b/arch/mips/kvm/kvm_mips_dyntrans.c
index 96528e2d1ea6..b80e41d858fd 100644
--- a/arch/mips/kvm/kvm_mips_dyntrans.c
+++ b/arch/mips/kvm/kvm_mips_dyntrans.c
@@ -16,6 +16,7 @@
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <asm/cacheflush.h>
19 20
20#include "kvm_mips_comm.h" 21#include "kvm_mips_comm.h"
21 22
@@ -40,7 +41,7 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
40 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa 41 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
41 (vcpu, (unsigned long) opc)); 42 (vcpu, (unsigned long) opc));
42 memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); 43 memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
43 mips32_SyncICache(kseg0_opc, 32); 44 local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
44 45
45 return result; 46 return result;
46} 47}
@@ -66,7 +67,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc,
66 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa 67 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
67 (vcpu, (unsigned long) opc)); 68 (vcpu, (unsigned long) opc));
68 memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); 69 memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
69 mips32_SyncICache(kseg0_opc, 32); 70 local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
70 71
71 return result; 72 return result;
72} 73}
@@ -99,11 +100,12 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
99 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa 100 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
100 (vcpu, (unsigned long) opc)); 101 (vcpu, (unsigned long) opc));
101 memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t)); 102 memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t));
102 mips32_SyncICache(kseg0_opc, 32); 103 local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
103 } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { 104 } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
104 local_irq_save(flags); 105 local_irq_save(flags);
105 memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t)); 106 memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t));
106 mips32_SyncICache((unsigned long) opc, 32); 107 local_flush_icache_range((unsigned long)opc,
108 (unsigned long)opc + 32);
107 local_irq_restore(flags); 109 local_irq_restore(flags);
108 } else { 110 } else {
109 kvm_err("%s: Invalid address: %p\n", __func__, opc); 111 kvm_err("%s: Invalid address: %p\n", __func__, opc);
@@ -134,11 +136,12 @@ kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
134 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa 136 CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
135 (vcpu, (unsigned long) opc)); 137 (vcpu, (unsigned long) opc));
136 memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t)); 138 memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t));
137 mips32_SyncICache(kseg0_opc, 32); 139 local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
138 } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { 140 } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
139 local_irq_save(flags); 141 local_irq_save(flags);
140 memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t)); 142 memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t));
141 mips32_SyncICache((unsigned long) opc, 32); 143 local_flush_icache_range((unsigned long)opc,
144 (unsigned long)opc + 32);
142 local_irq_restore(flags); 145 local_irq_restore(flags);
143 } else { 146 } else {
144 kvm_err("%s: Invalid address: %p\n", __func__, opc); 147 kvm_err("%s: Invalid address: %p\n", __func__, opc);
diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c
index e3fec99941a7..8d4840090082 100644
--- a/arch/mips/kvm/kvm_mips_emul.c
+++ b/arch/mips/kvm/kvm_mips_emul.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/ktime.h>
14#include <linux/kvm_host.h> 15#include <linux/kvm_host.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
@@ -228,25 +229,520 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause)
228 return er; 229 return er;
229} 230}
230 231
231/* Everytime the compare register is written to, we need to decide when to fire 232/**
232 * the timer that represents timer ticks to the GUEST. 233 * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled.
234 * @vcpu: Virtual CPU.
233 * 235 *
236 * Returns: 1 if the CP0_Count timer is disabled by either the guest
237 * CP0_Cause.DC bit or the count_ctl.DC bit.
238 * 0 otherwise (in which case CP0_Count timer is running).
234 */ 239 */
235enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu) 240static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
236{ 241{
237 struct mips_coproc *cop0 = vcpu->arch.cop0; 242 struct mips_coproc *cop0 = vcpu->arch.cop0;
238 enum emulation_result er = EMULATE_DONE; 243 return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) ||
244 (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC);
245}
246
247/**
248 * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count.
249 *
250 * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias.
251 *
252 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
253 */
254static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now)
255{
256 s64 now_ns, periods;
257 u64 delta;
258
259 now_ns = ktime_to_ns(now);
260 delta = now_ns + vcpu->arch.count_dyn_bias;
261
262 if (delta >= vcpu->arch.count_period) {
263 /* If delta is out of safe range the bias needs adjusting */
264 periods = div64_s64(now_ns, vcpu->arch.count_period);
265 vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period;
266 /* Recalculate delta with new bias */
267 delta = now_ns + vcpu->arch.count_dyn_bias;
268 }
269
270 /*
271 * We've ensured that:
272 * delta < count_period
273 *
274 * Therefore the intermediate delta*count_hz will never overflow since
275 * at the boundary condition:
276 * delta = count_period
277 * delta = NSEC_PER_SEC * 2^32 / count_hz
278 * delta * count_hz = NSEC_PER_SEC * 2^32
279 */
280 return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC);
281}
282
283/**
284 * kvm_mips_count_time() - Get effective current time.
285 * @vcpu: Virtual CPU.
286 *
287 * Get effective monotonic ktime. This is usually a straightforward ktime_get(),
288 * except when the master disable bit is set in count_ctl, in which case it is
289 * count_resume, i.e. the time that the count was disabled.
290 *
291 * Returns: Effective monotonic ktime for CP0_Count.
292 */
293static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
294{
295 if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
296 return vcpu->arch.count_resume;
297
298 return ktime_get();
299}
300
301/**
302 * kvm_mips_read_count_running() - Read the current count value as if running.
303 * @vcpu: Virtual CPU.
304 * @now: Kernel time to read CP0_Count at.
305 *
306 * Returns the current guest CP0_Count register at time @now and handles if the
307 * timer interrupt is pending and hasn't been handled yet.
308 *
309 * Returns: The current value of the guest CP0_Count register.
310 */
311static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
312{
313 ktime_t expires;
314 int running;
315
316 /* Is the hrtimer pending? */
317 expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
318 if (ktime_compare(now, expires) >= 0) {
319 /*
320 * Cancel it while we handle it so there's no chance of
321 * interference with the timeout handler.
322 */
323 running = hrtimer_cancel(&vcpu->arch.comparecount_timer);
324
325 /* Nothing should be waiting on the timeout */
326 kvm_mips_callbacks->queue_timer_int(vcpu);
327
328 /*
329 * Restart the timer if it was running based on the expiry time
330 * we read, so that we don't push it back 2 periods.
331 */
332 if (running) {
333 expires = ktime_add_ns(expires,
334 vcpu->arch.count_period);
335 hrtimer_start(&vcpu->arch.comparecount_timer, expires,
336 HRTIMER_MODE_ABS);
337 }
338 }
339
340 /* Return the biased and scaled guest CP0_Count */
341 return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
342}
343
344/**
345 * kvm_mips_read_count() - Read the current count value.
346 * @vcpu: Virtual CPU.
347 *
348 * Read the current guest CP0_Count value, taking into account whether the timer
349 * is stopped.
350 *
351 * Returns: The current guest CP0_Count value.
352 */
353uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu)
354{
355 struct mips_coproc *cop0 = vcpu->arch.cop0;
356
357 /* If count disabled just read static copy of count */
358 if (kvm_mips_count_disabled(vcpu))
359 return kvm_read_c0_guest_count(cop0);
360
361 return kvm_mips_read_count_running(vcpu, ktime_get());
362}
363
364/**
365 * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer.
366 * @vcpu: Virtual CPU.
367 * @count: Output pointer for CP0_Count value at point of freeze.
368 *
369 * Freeze the hrtimer safely and return both the ktime and the CP0_Count value
370 * at the point it was frozen. It is guaranteed that any pending interrupts at
371 * the point it was frozen are handled, and none after that point.
372 *
373 * This is useful where the time/CP0_Count is needed in the calculation of the
374 * new parameters.
375 *
376 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
377 *
378 * Returns: The ktime at the point of freeze.
379 */
380static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu,
381 uint32_t *count)
382{
383 ktime_t now;
384
385 /* stop hrtimer before finding time */
386 hrtimer_cancel(&vcpu->arch.comparecount_timer);
387 now = ktime_get();
388
389 /* find count at this point and handle pending hrtimer */
390 *count = kvm_mips_read_count_running(vcpu, now);
391
392 return now;
393}
394
239 395
240 /* If COUNT is enabled */ 396/**
241 if (!(kvm_read_c0_guest_cause(cop0) & CAUSEF_DC)) { 397 * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry.
242 hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer); 398 * @vcpu: Virtual CPU.
243 hrtimer_start(&vcpu->arch.comparecount_timer, 399 * @now: ktime at point of resume.
244 ktime_set(0, MS_TO_NS(10)), HRTIMER_MODE_REL); 400 * @count: CP0_Count at point of resume.
401 *
402 * Resumes the timer and updates the timer expiry based on @now and @count.
403 * This can be used in conjunction with kvm_mips_freeze_timer() when timer
404 * parameters need to be changed.
405 *
406 * It is guaranteed that a timer interrupt immediately after resume will be
407 * handled, but not if CP_Compare is exactly at @count. That case is already
408 * handled by kvm_mips_freeze_timer().
409 *
410 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
411 */
412static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
413 ktime_t now, uint32_t count)
414{
415 struct mips_coproc *cop0 = vcpu->arch.cop0;
416 uint32_t compare;
417 u64 delta;
418 ktime_t expire;
419
420 /* Calculate timeout (wrap 0 to 2^32) */
421 compare = kvm_read_c0_guest_compare(cop0);
422 delta = (u64)(uint32_t)(compare - count - 1) + 1;
423 delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
424 expire = ktime_add_ns(now, delta);
425
426 /* Update hrtimer to use new timeout */
427 hrtimer_cancel(&vcpu->arch.comparecount_timer);
428 hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS);
429}
430
431/**
432 * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
433 * @vcpu: Virtual CPU.
434 *
435 * Recalculates and updates the expiry time of the hrtimer. This can be used
436 * after timer parameters have been altered which do not depend on the time that
437 * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
438 * kvm_mips_resume_hrtimer() are used directly).
439 *
440 * It is guaranteed that no timer interrupts will be lost in the process.
441 *
442 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
443 */
444static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
445{
446 ktime_t now;
447 uint32_t count;
448
449 /*
450 * freeze_hrtimer takes care of a timer interrupts <= count, and
451 * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
452 */
453 now = kvm_mips_freeze_hrtimer(vcpu, &count);
454 kvm_mips_resume_hrtimer(vcpu, now, count);
455}
456
457/**
458 * kvm_mips_write_count() - Modify the count and update timer.
459 * @vcpu: Virtual CPU.
460 * @count: Guest CP0_Count value to set.
461 *
462 * Sets the CP0_Count value and updates the timer accordingly.
463 */
464void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count)
465{
466 struct mips_coproc *cop0 = vcpu->arch.cop0;
467 ktime_t now;
468
469 /* Calculate bias */
470 now = kvm_mips_count_time(vcpu);
471 vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
472
473 if (kvm_mips_count_disabled(vcpu))
474 /* The timer's disabled, adjust the static count */
475 kvm_write_c0_guest_count(cop0, count);
476 else
477 /* Update timeout */
478 kvm_mips_resume_hrtimer(vcpu, now, count);
479}
480
481/**
482 * kvm_mips_init_count() - Initialise timer.
483 * @vcpu: Virtual CPU.
484 *
485 * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set
486 * it going if it's enabled.
487 */
488void kvm_mips_init_count(struct kvm_vcpu *vcpu)
489{
490 /* 100 MHz */
491 vcpu->arch.count_hz = 100*1000*1000;
492 vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
493 vcpu->arch.count_hz);
494 vcpu->arch.count_dyn_bias = 0;
495
496 /* Starting at 0 */
497 kvm_mips_write_count(vcpu, 0);
498}
499
500/**
501 * kvm_mips_set_count_hz() - Update the frequency of the timer.
502 * @vcpu: Virtual CPU.
503 * @count_hz: Frequency of CP0_Count timer in Hz.
504 *
505 * Change the frequency of the CP0_Count timer. This is done atomically so that
506 * CP0_Count is continuous and no timer interrupt is lost.
507 *
508 * Returns: -EINVAL if @count_hz is out of range.
509 * 0 on success.
510 */
511int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
512{
513 struct mips_coproc *cop0 = vcpu->arch.cop0;
514 int dc;
515 ktime_t now;
516 u32 count;
517
518 /* ensure the frequency is in a sensible range... */
519 if (count_hz <= 0 || count_hz > NSEC_PER_SEC)
520 return -EINVAL;
521 /* ... and has actually changed */
522 if (vcpu->arch.count_hz == count_hz)
523 return 0;
524
525 /* Safely freeze timer so we can keep it continuous */
526 dc = kvm_mips_count_disabled(vcpu);
527 if (dc) {
528 now = kvm_mips_count_time(vcpu);
529 count = kvm_read_c0_guest_count(cop0);
245 } else { 530 } else {
246 hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer); 531 now = kvm_mips_freeze_hrtimer(vcpu, &count);
247 } 532 }
248 533
249 return er; 534 /* Update the frequency */
535 vcpu->arch.count_hz = count_hz;
536 vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
537 vcpu->arch.count_dyn_bias = 0;
538
539 /* Calculate adjusted bias so dynamic count is unchanged */
540 vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
541
542 /* Update and resume hrtimer */
543 if (!dc)
544 kvm_mips_resume_hrtimer(vcpu, now, count);
545 return 0;
546}
547
548/**
549 * kvm_mips_write_compare() - Modify compare and update timer.
550 * @vcpu: Virtual CPU.
551 * @compare: New CP0_Compare value.
552 *
553 * Update CP0_Compare to a new value and update the timeout.
554 */
555void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
556{
557 struct mips_coproc *cop0 = vcpu->arch.cop0;
558
559 /* if unchanged, must just be an ack */
560 if (kvm_read_c0_guest_compare(cop0) == compare)
561 return;
562
563 /* Update compare */
564 kvm_write_c0_guest_compare(cop0, compare);
565
566 /* Update timeout if count enabled */
567 if (!kvm_mips_count_disabled(vcpu))
568 kvm_mips_update_hrtimer(vcpu);
569}
570
571/**
572 * kvm_mips_count_disable() - Disable count.
573 * @vcpu: Virtual CPU.
574 *
575 * Disable the CP0_Count timer. A timer interrupt on or before the final stop
576 * time will be handled but not after.
577 *
578 * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or
579 * count_ctl.DC has been set (count disabled).
580 *
581 * Returns: The time that the timer was stopped.
582 */
583static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
584{
585 struct mips_coproc *cop0 = vcpu->arch.cop0;
586 uint32_t count;
587 ktime_t now;
588
589 /* Stop hrtimer */
590 hrtimer_cancel(&vcpu->arch.comparecount_timer);
591
592 /* Set the static count from the dynamic count, handling pending TI */
593 now = ktime_get();
594 count = kvm_mips_read_count_running(vcpu, now);
595 kvm_write_c0_guest_count(cop0, count);
596
597 return now;
598}
599
600/**
601 * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC.
602 * @vcpu: Virtual CPU.
603 *
604 * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or
605 * before the final stop time will be handled if the timer isn't disabled by
606 * count_ctl.DC, but not after.
607 *
608 * Assumes CP0_Cause.DC is clear (count enabled).
609 */
610void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
611{
612 struct mips_coproc *cop0 = vcpu->arch.cop0;
613
614 kvm_set_c0_guest_cause(cop0, CAUSEF_DC);
615 if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
616 kvm_mips_count_disable(vcpu);
617}
618
619/**
620 * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC.
621 * @vcpu: Virtual CPU.
622 *
623 * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after
624 * the start time will be handled if the timer isn't disabled by count_ctl.DC,
625 * potentially before even returning, so the caller should be careful with
626 * ordering of CP0_Cause modifications so as not to lose it.
627 *
628 * Assumes CP0_Cause.DC is set (count disabled).
629 */
630void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
631{
632 struct mips_coproc *cop0 = vcpu->arch.cop0;
633 uint32_t count;
634
635 kvm_clear_c0_guest_cause(cop0, CAUSEF_DC);
636
637 /*
638 * Set the dynamic count to match the static count.
639 * This starts the hrtimer if count_ctl.DC allows it.
640 * Otherwise it conveniently updates the biases.
641 */
642 count = kvm_read_c0_guest_count(cop0);
643 kvm_mips_write_count(vcpu, count);
644}
645
646/**
647 * kvm_mips_set_count_ctl() - Update the count control KVM register.
648 * @vcpu: Virtual CPU.
649 * @count_ctl: Count control register new value.
650 *
651 * Set the count control KVM register. The timer is updated accordingly.
652 *
653 * Returns: -EINVAL if reserved bits are set.
654 * 0 on success.
655 */
656int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
657{
658 struct mips_coproc *cop0 = vcpu->arch.cop0;
659 s64 changed = count_ctl ^ vcpu->arch.count_ctl;
660 s64 delta;
661 ktime_t expire, now;
662 uint32_t count, compare;
663
664 /* Only allow defined bits to be changed */
665 if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC))
666 return -EINVAL;
667
668 /* Apply new value */
669 vcpu->arch.count_ctl = count_ctl;
670
671 /* Master CP0_Count disable */
672 if (changed & KVM_REG_MIPS_COUNT_CTL_DC) {
673 /* Is CP0_Cause.DC already disabling CP0_Count? */
674 if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) {
675 if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)
676 /* Just record the current time */
677 vcpu->arch.count_resume = ktime_get();
678 } else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) {
679 /* disable timer and record current time */
680 vcpu->arch.count_resume = kvm_mips_count_disable(vcpu);
681 } else {
682 /*
683 * Calculate timeout relative to static count at resume
684 * time (wrap 0 to 2^32).
685 */
686 count = kvm_read_c0_guest_count(cop0);
687 compare = kvm_read_c0_guest_compare(cop0);
688 delta = (u64)(uint32_t)(compare - count - 1) + 1;
689 delta = div_u64(delta * NSEC_PER_SEC,
690 vcpu->arch.count_hz);
691 expire = ktime_add_ns(vcpu->arch.count_resume, delta);
692
693 /* Handle pending interrupt */
694 now = ktime_get();
695 if (ktime_compare(now, expire) >= 0)
696 /* Nothing should be waiting on the timeout */
697 kvm_mips_callbacks->queue_timer_int(vcpu);
698
699 /* Resume hrtimer without changing bias */
700 count = kvm_mips_read_count_running(vcpu, now);
701 kvm_mips_resume_hrtimer(vcpu, now, count);
702 }
703 }
704
705 return 0;
706}
707
708/**
709 * kvm_mips_set_count_resume() - Update the count resume KVM register.
710 * @vcpu: Virtual CPU.
711 * @count_resume: Count resume register new value.
712 *
713 * Set the count resume KVM register.
714 *
715 * Returns: -EINVAL if out of valid range (0..now).
716 * 0 on success.
717 */
718int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume)
719{
720 /*
721 * It doesn't make sense for the resume time to be in the future, as it
722 * would be possible for the next interrupt to be more than a full
723 * period in the future.
724 */
725 if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get()))
726 return -EINVAL;
727
728 vcpu->arch.count_resume = ns_to_ktime(count_resume);
729 return 0;
730}
731
732/**
733 * kvm_mips_count_timeout() - Push timer forward on timeout.
734 * @vcpu: Virtual CPU.
735 *
736 * Handle an hrtimer event by push the hrtimer forward a period.
737 *
738 * Returns: The hrtimer_restart value to return to the hrtimer subsystem.
739 */
740enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu)
741{
742 /* Add the Count period to the current expiry time */
743 hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer,
744 vcpu->arch.count_period);
745 return HRTIMER_RESTART;
250} 746}
251 747
252enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) 748enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
@@ -471,8 +967,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
471#endif 967#endif
472 /* Get reg */ 968 /* Get reg */
473 if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { 969 if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
474 /* XXXKYMA: Run the Guest count register @ 1/4 the rate of the host */ 970 vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu);
475 vcpu->arch.gprs[rt] = (read_c0_count() >> 2);
476 } else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) { 971 } else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
477 vcpu->arch.gprs[rt] = 0x0; 972 vcpu->arch.gprs[rt] = 0x0;
478#ifdef CONFIG_KVM_MIPS_DYN_TRANS 973#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -539,10 +1034,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
539 } 1034 }
540 /* Are we writing to COUNT */ 1035 /* Are we writing to COUNT */
541 else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { 1036 else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
542 /* Linux doesn't seem to write into COUNT, we throw an error 1037 kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
543 * if we notice a write to COUNT
544 */
545 /*er = EMULATE_FAIL; */
546 goto done; 1038 goto done;
547 } else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) { 1039 } else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) {
548 kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n", 1040 kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n",
@@ -552,8 +1044,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
552 /* If we are writing to COMPARE */ 1044 /* If we are writing to COMPARE */
553 /* Clear pending timer interrupt, if any */ 1045 /* Clear pending timer interrupt, if any */
554 kvm_mips_callbacks->dequeue_timer_int(vcpu); 1046 kvm_mips_callbacks->dequeue_timer_int(vcpu);
555 kvm_write_c0_guest_compare(cop0, 1047 kvm_mips_write_compare(vcpu,
556 vcpu->arch.gprs[rt]); 1048 vcpu->arch.gprs[rt]);
557 } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { 1049 } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
558 kvm_write_c0_guest_status(cop0, 1050 kvm_write_c0_guest_status(cop0,
559 vcpu->arch.gprs[rt]); 1051 vcpu->arch.gprs[rt]);
@@ -564,6 +1056,20 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
564#ifdef CONFIG_KVM_MIPS_DYN_TRANS 1056#ifdef CONFIG_KVM_MIPS_DYN_TRANS
565 kvm_mips_trans_mtc0(inst, opc, vcpu); 1057 kvm_mips_trans_mtc0(inst, opc, vcpu);
566#endif 1058#endif
1059 } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
1060 uint32_t old_cause, new_cause;
1061 old_cause = kvm_read_c0_guest_cause(cop0);
1062 new_cause = vcpu->arch.gprs[rt];
1063 /* Update R/W bits */
1064 kvm_change_c0_guest_cause(cop0, 0x08800300,
1065 new_cause);
1066 /* DC bit enabling/disabling timer? */
1067 if ((old_cause ^ new_cause) & CAUSEF_DC) {
1068 if (new_cause & CAUSEF_DC)
1069 kvm_mips_count_disable_cause(vcpu);
1070 else
1071 kvm_mips_count_enable_cause(vcpu);
1072 }
567 } else { 1073 } else {
568 cop0->reg[rd][sel] = vcpu->arch.gprs[rt]; 1074 cop0->reg[rd][sel] = vcpu->arch.gprs[rt];
569#ifdef CONFIG_KVM_MIPS_DYN_TRANS 1075#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -887,7 +1393,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
887 1393
888 printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa)); 1394 printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa));
889 1395
890 mips32_SyncICache(CKSEG0ADDR(pa), 32); 1396 local_flush_icache_range(CKSEG0ADDR(pa), 32);
891 return 0; 1397 return 0;
892} 1398}
893 1399
@@ -1325,8 +1831,12 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc,
1325 struct kvm_run *run, struct kvm_vcpu *vcpu) 1831 struct kvm_run *run, struct kvm_vcpu *vcpu)
1326{ 1832{
1327 enum emulation_result er = EMULATE_DONE; 1833 enum emulation_result er = EMULATE_DONE;
1328
1329#ifdef DEBUG 1834#ifdef DEBUG
1835 struct mips_coproc *cop0 = vcpu->arch.cop0;
1836 unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
1837 (kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
1838 int index;
1839
1330 /* 1840 /*
1331 * If address not in the guest TLB, then we are in trouble 1841 * If address not in the guest TLB, then we are in trouble
1332 */ 1842 */
@@ -1553,8 +2063,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
1553 current_cpu_data.icache.linesz); 2063 current_cpu_data.icache.linesz);
1554 break; 2064 break;
1555 case 2: /* Read count register */ 2065 case 2: /* Read count register */
1556 printk("RDHWR: Cont register\n"); 2066 arch->gprs[rt] = kvm_mips_read_count(vcpu);
1557 arch->gprs[rt] = kvm_read_c0_guest_count(cop0);
1558 break; 2067 break;
1559 case 3: /* Count register resolution */ 2068 case 3: /* Count register resolution */
1560 switch (current_cpu_data.cputype) { 2069 switch (current_cpu_data.cputype) {
@@ -1810,11 +2319,9 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc,
1810 er = EMULATE_FAIL; 2319 er = EMULATE_FAIL;
1811 } 2320 }
1812 } else { 2321 } else {
1813#ifdef DEBUG
1814 kvm_debug 2322 kvm_debug
1815 ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", 2323 ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n",
1816 tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); 2324 tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1);
1817#endif
1818 /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */ 2325 /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */
1819 kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, 2326 kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
1820 NULL); 2327 NULL);
diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c
index 50ab9c4d4a5d..8a5a700ad8de 100644
--- a/arch/mips/kvm/kvm_tlb.c
+++ b/arch/mips/kvm/kvm_tlb.c
@@ -222,26 +222,19 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
222 return -1; 222 return -1;
223 } 223 }
224 224
225 if (idx < 0) {
226 idx = read_c0_random() % current_cpu_data.tlbsize;
227 write_c0_index(idx);
228 mtc0_tlbw_hazard();
229 }
230 write_c0_entrylo0(entrylo0); 225 write_c0_entrylo0(entrylo0);
231 write_c0_entrylo1(entrylo1); 226 write_c0_entrylo1(entrylo1);
232 mtc0_tlbw_hazard(); 227 mtc0_tlbw_hazard();
233 228
234 tlb_write_indexed(); 229 if (idx < 0)
230 tlb_write_random();
231 else
232 tlb_write_indexed();
235 tlbw_use_hazard(); 233 tlbw_use_hazard();
236 234
237#ifdef DEBUG 235 kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
238 if (debug) { 236 vcpu->arch.pc, idx, read_c0_entryhi(),
239 kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] " 237 read_c0_entrylo0(), read_c0_entrylo1());
240 "entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
241 vcpu->arch.pc, idx, read_c0_entryhi(),
242 read_c0_entrylo0(), read_c0_entrylo1());
243 }
244#endif
245 238
246 /* Flush D-cache */ 239 /* Flush D-cache */
247 if (flush_dcache_mask) { 240 if (flush_dcache_mask) {
@@ -348,11 +341,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
348 mtc0_tlbw_hazard(); 341 mtc0_tlbw_hazard();
349 tlbw_use_hazard(); 342 tlbw_use_hazard();
350 343
351#ifdef DEBUG
352 kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", 344 kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
353 vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), 345 vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
354 read_c0_entrylo0(), read_c0_entrylo1()); 346 read_c0_entrylo0(), read_c0_entrylo1());
355#endif
356 347
357 /* Restore old ASID */ 348 /* Restore old ASID */
358 write_c0_entryhi(old_entryhi); 349 write_c0_entryhi(old_entryhi);
@@ -400,10 +391,8 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
400 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | 391 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
401 (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); 392 (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
402 393
403#ifdef DEBUG
404 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, 394 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
405 tlb->tlb_lo0, tlb->tlb_lo1); 395 tlb->tlb_lo0, tlb->tlb_lo1);
406#endif
407 396
408 return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, 397 return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
409 tlb->tlb_mask); 398 tlb->tlb_mask);
@@ -424,10 +413,8 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
424 } 413 }
425 } 414 }
426 415
427#ifdef DEBUG
428 kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n", 416 kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n",
429 __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1); 417 __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1);
430#endif
431 418
432 return index; 419 return index;
433} 420}
@@ -461,9 +448,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
461 448
462 local_irq_restore(flags); 449 local_irq_restore(flags);
463 450
464#ifdef DEBUG
465 kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx); 451 kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
466#endif
467 452
468 return idx; 453 return idx;
469} 454}
@@ -508,12 +493,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
508 493
509 local_irq_restore(flags); 494 local_irq_restore(flags);
510 495
511#ifdef DEBUG 496 if (idx > 0)
512 if (idx > 0) {
513 kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__, 497 kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
514 (va & VPN2_MASK) | (vcpu->arch.asid_map[va & ASID_MASK] & ASID_MASK), idx); 498 (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx);
515 }
516#endif
517 499
518 return 0; 500 return 0;
519} 501}
@@ -658,15 +640,30 @@ void kvm_local_flush_tlb_all(void)
658 local_irq_restore(flags); 640 local_irq_restore(flags);
659} 641}
660 642
643/**
644 * kvm_mips_migrate_count() - Migrate timer.
645 * @vcpu: Virtual CPU.
646 *
647 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
648 * if it was running prior to being cancelled.
649 *
650 * Must be called when the VCPU is migrated to a different CPU to ensure that
651 * timer expiry during guest execution interrupts the guest and causes the
652 * interrupt to be delivered in a timely manner.
653 */
654static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
655{
656 if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
657 hrtimer_restart(&vcpu->arch.comparecount_timer);
658}
659
661/* Restore ASID once we are scheduled back after preemption */ 660/* Restore ASID once we are scheduled back after preemption */
662void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 661void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
663{ 662{
664 unsigned long flags; 663 unsigned long flags;
665 int newasid = 0; 664 int newasid = 0;
666 665
667#ifdef DEBUG
668 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); 666 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
669#endif
670 667
671 /* Alocate new kernel and user ASIDs if needed */ 668 /* Alocate new kernel and user ASIDs if needed */
672 669
@@ -682,17 +679,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
682 vcpu->arch.guest_user_mm.context.asid[cpu]; 679 vcpu->arch.guest_user_mm.context.asid[cpu];
683 newasid++; 680 newasid++;
684 681
685 kvm_info("[%d]: cpu_context: %#lx\n", cpu, 682 kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
686 cpu_context(cpu, current->mm)); 683 cpu_context(cpu, current->mm));
687 kvm_info("[%d]: Allocated new ASID for Guest Kernel: %#x\n", 684 kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
688 cpu, vcpu->arch.guest_kernel_asid[cpu]); 685 cpu, vcpu->arch.guest_kernel_asid[cpu]);
689 kvm_info("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, 686 kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
690 vcpu->arch.guest_user_asid[cpu]); 687 vcpu->arch.guest_user_asid[cpu]);
691 } 688 }
692 689
693 if (vcpu->arch.last_sched_cpu != cpu) { 690 if (vcpu->arch.last_sched_cpu != cpu) {
694 kvm_info("[%d->%d]KVM VCPU[%d] switch\n", 691 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
695 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); 692 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
693 /*
694 * Migrate the timer interrupt to the current CPU so that it
695 * always interrupts the guest and synchronously triggers a
696 * guest timer interrupt.
697 */
698 kvm_mips_migrate_count(vcpu);
696 } 699 }
697 700
698 if (!newasid) { 701 if (!newasid) {
diff --git a/arch/mips/kvm/kvm_trap_emul.c b/arch/mips/kvm/kvm_trap_emul.c
index 30d725321db1..693f952b2fbb 100644
--- a/arch/mips/kvm/kvm_trap_emul.c
+++ b/arch/mips/kvm/kvm_trap_emul.c
@@ -32,9 +32,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
32 gpa = KVM_INVALID_ADDR; 32 gpa = KVM_INVALID_ADDR;
33 } 33 }
34 34
35#ifdef DEBUG
36 kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa); 35 kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa);
37#endif
38 36
39 return gpa; 37 return gpa;
40} 38}
@@ -85,11 +83,9 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
85 83
86 if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 84 if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
87 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { 85 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
88#ifdef DEBUG
89 kvm_debug 86 kvm_debug
90 ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", 87 ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
91 cause, opc, badvaddr); 88 cause, opc, badvaddr);
92#endif
93 er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); 89 er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
94 90
95 if (er == EMULATE_DONE) 91 if (er == EMULATE_DONE)
@@ -138,11 +134,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
138 } 134 }
139 } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 135 } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
140 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { 136 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
141#ifdef DEBUG
142 kvm_debug 137 kvm_debug
143 ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", 138 ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
144 cause, opc, badvaddr); 139 cause, opc, badvaddr);
145#endif
146 er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); 140 er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
147 if (er == EMULATE_DONE) 141 if (er == EMULATE_DONE)
148 ret = RESUME_GUEST; 142 ret = RESUME_GUEST;
@@ -188,10 +182,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
188 } 182 }
189 } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 183 } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
190 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { 184 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
191#ifdef DEBUG
192 kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n", 185 kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n",
193 vcpu->arch.pc, badvaddr); 186 vcpu->arch.pc, badvaddr);
194#endif
195 187
196 /* User Address (UA) fault, this could happen if 188 /* User Address (UA) fault, this could happen if
197 * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this 189 * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this
@@ -236,9 +228,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
236 228
237 if (KVM_GUEST_KERNEL_MODE(vcpu) 229 if (KVM_GUEST_KERNEL_MODE(vcpu)
238 && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { 230 && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
239#ifdef DEBUG
240 kvm_debug("Emulate Store to MMIO space\n"); 231 kvm_debug("Emulate Store to MMIO space\n");
241#endif
242 er = kvm_mips_emulate_inst(cause, opc, run, vcpu); 232 er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
243 if (er == EMULATE_FAIL) { 233 if (er == EMULATE_FAIL) {
244 printk("Emulate Store to MMIO space failed\n"); 234 printk("Emulate Store to MMIO space failed\n");
@@ -268,9 +258,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
268 int ret = RESUME_GUEST; 258 int ret = RESUME_GUEST;
269 259
270 if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { 260 if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
271#ifdef DEBUG
272 kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); 261 kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
273#endif
274 er = kvm_mips_emulate_inst(cause, opc, run, vcpu); 262 er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
275 if (er == EMULATE_FAIL) { 263 if (er == EMULATE_FAIL) {
276 printk("Emulate Load from MMIO space failed\n"); 264 printk("Emulate Load from MMIO space failed\n");
@@ -401,6 +389,78 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
401 return 0; 389 return 0;
402} 390}
403 391
392static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
393 const struct kvm_one_reg *reg,
394 s64 *v)
395{
396 switch (reg->id) {
397 case KVM_REG_MIPS_CP0_COUNT:
398 *v = kvm_mips_read_count(vcpu);
399 break;
400 case KVM_REG_MIPS_COUNT_CTL:
401 *v = vcpu->arch.count_ctl;
402 break;
403 case KVM_REG_MIPS_COUNT_RESUME:
404 *v = ktime_to_ns(vcpu->arch.count_resume);
405 break;
406 case KVM_REG_MIPS_COUNT_HZ:
407 *v = vcpu->arch.count_hz;
408 break;
409 default:
410 return -EINVAL;
411 }
412 return 0;
413}
414
415static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
416 const struct kvm_one_reg *reg,
417 s64 v)
418{
419 struct mips_coproc *cop0 = vcpu->arch.cop0;
420 int ret = 0;
421
422 switch (reg->id) {
423 case KVM_REG_MIPS_CP0_COUNT:
424 kvm_mips_write_count(vcpu, v);
425 break;
426 case KVM_REG_MIPS_CP0_COMPARE:
427 kvm_mips_write_compare(vcpu, v);
428 break;
429 case KVM_REG_MIPS_CP0_CAUSE:
430 /*
431 * If the timer is stopped or started (DC bit) it must look
432 * atomic with changes to the interrupt pending bits (TI, IRQ5).
433 * A timer interrupt should not happen in between.
434 */
435 if ((kvm_read_c0_guest_cause(cop0) ^ v) & CAUSEF_DC) {
436 if (v & CAUSEF_DC) {
437 /* disable timer first */
438 kvm_mips_count_disable_cause(vcpu);
439 kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
440 } else {
441 /* enable timer last */
442 kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
443 kvm_mips_count_enable_cause(vcpu);
444 }
445 } else {
446 kvm_write_c0_guest_cause(cop0, v);
447 }
448 break;
449 case KVM_REG_MIPS_COUNT_CTL:
450 ret = kvm_mips_set_count_ctl(vcpu, v);
451 break;
452 case KVM_REG_MIPS_COUNT_RESUME:
453 ret = kvm_mips_set_count_resume(vcpu, v);
454 break;
455 case KVM_REG_MIPS_COUNT_HZ:
456 ret = kvm_mips_set_count_hz(vcpu, v);
457 break;
458 default:
459 return -EINVAL;
460 }
461 return ret;
462}
463
404static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { 464static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
405 /* exit handlers */ 465 /* exit handlers */
406 .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, 466 .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -423,6 +483,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
423 .dequeue_io_int = kvm_mips_dequeue_io_int_cb, 483 .dequeue_io_int = kvm_mips_dequeue_io_int_cb,
424 .irq_deliver = kvm_mips_irq_deliver_cb, 484 .irq_deliver = kvm_mips_irq_deliver_cb,
425 .irq_clear = kvm_mips_irq_clear_cb, 485 .irq_clear = kvm_mips_irq_clear_cb,
486 .get_one_reg = kvm_trap_emul_get_one_reg,
487 .set_one_reg = kvm_trap_emul_set_one_reg,
426}; 488};
427 489
428int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) 490int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 9e67cdea3c74..f7b91d3a371d 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -31,6 +31,7 @@ void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
31void (*flush_icache_range)(unsigned long start, unsigned long end); 31void (*flush_icache_range)(unsigned long start, unsigned long end);
32EXPORT_SYMBOL_GPL(flush_icache_range); 32EXPORT_SYMBOL_GPL(flush_icache_range);
33void (*local_flush_icache_range)(unsigned long start, unsigned long end); 33void (*local_flush_icache_range)(unsigned long start, unsigned long end);
34EXPORT_SYMBOL_GPL(local_flush_icache_range);
34 35
35void (*__flush_cache_vmap)(void); 36void (*__flush_cache_vmap)(void);
36void (*__flush_cache_vunmap)(void); 37void (*__flush_cache_vunmap)(void);
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 319009912142..3778a359f3ad 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -74,18 +74,8 @@ static void __init estimate_frequencies(void)
74 unsigned int giccount = 0, gicstart = 0; 74 unsigned int giccount = 0, gicstart = 0;
75#endif 75#endif
76 76
77#if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) 77#if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ
78 unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); 78 mips_hpt_frequency = CONFIG_KVM_GUEST_TIMER_FREQ * 1000000;
79
80 /*
81 * XXXKYMA: hardwire the CPU frequency to Host Freq/4
82 */
83 count = (CONFIG_KVM_HOST_FREQ * 1000000) >> 3;
84 if ((prid != (PRID_COMP_MIPS | PRID_IMP_20KC)) &&
85 (prid != (PRID_COMP_MIPS | PRID_IMP_25KF)))
86 count *= 2;
87
88 mips_hpt_frequency = count;
89 return; 79 return;
90#endif 80#endif
91 81
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 856f8deb557a..6330a61b875a 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -81,4 +81,38 @@ static inline unsigned int get_oc(u32 inst)
81{ 81{
82 return (inst >> 11) & 0x7fff; 82 return (inst >> 11) & 0x7fff;
83} 83}
84
85#define IS_XFORM(inst) (get_op(inst) == 31)
86#define IS_DSFORM(inst) (get_op(inst) >= 56)
87
88/*
89 * Create a DSISR value from the instruction
90 */
91static inline unsigned make_dsisr(unsigned instr)
92{
93 unsigned dsisr;
94
95
96 /* bits 6:15 --> 22:31 */
97 dsisr = (instr & 0x03ff0000) >> 16;
98
99 if (IS_XFORM(instr)) {
100 /* bits 29:30 --> 15:16 */
101 dsisr |= (instr & 0x00000006) << 14;
102 /* bit 25 --> 17 */
103 dsisr |= (instr & 0x00000040) << 8;
104 /* bits 21:24 --> 18:21 */
105 dsisr |= (instr & 0x00000780) << 3;
106 } else {
107 /* bit 5 --> 17 */
108 dsisr |= (instr & 0x04000000) >> 12;
109 /* bits 1: 4 --> 18:21 */
110 dsisr |= (instr & 0x78000000) >> 17;
111 /* bits 30:31 --> 12:13 */
112 if (IS_DSFORM(instr))
113 dsisr |= (instr & 0x00000003) << 18;
114 }
115
116 return dsisr;
117}
84#endif /* __ASM_PPC_DISASSEMBLE_H__ */ 118#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 19eb74a95b59..9601741080e5 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -102,6 +102,7 @@
102#define BOOK3S_INTERRUPT_PERFMON 0xf00 102#define BOOK3S_INTERRUPT_PERFMON 0xf00
103#define BOOK3S_INTERRUPT_ALTIVEC 0xf20 103#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
104#define BOOK3S_INTERRUPT_VSX 0xf40 104#define BOOK3S_INTERRUPT_VSX 0xf40
105#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
105#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 106#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
106 107
107#define BOOK3S_IRQPRIO_SYSTEM_RESET 0 108#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
@@ -114,14 +115,15 @@
114#define BOOK3S_IRQPRIO_FP_UNAVAIL 7 115#define BOOK3S_IRQPRIO_FP_UNAVAIL 7
115#define BOOK3S_IRQPRIO_ALTIVEC 8 116#define BOOK3S_IRQPRIO_ALTIVEC 8
116#define BOOK3S_IRQPRIO_VSX 9 117#define BOOK3S_IRQPRIO_VSX 9
117#define BOOK3S_IRQPRIO_SYSCALL 10 118#define BOOK3S_IRQPRIO_FAC_UNAVAIL 10
118#define BOOK3S_IRQPRIO_MACHINE_CHECK 11 119#define BOOK3S_IRQPRIO_SYSCALL 11
119#define BOOK3S_IRQPRIO_DEBUG 12 120#define BOOK3S_IRQPRIO_MACHINE_CHECK 12
120#define BOOK3S_IRQPRIO_EXTERNAL 13 121#define BOOK3S_IRQPRIO_DEBUG 13
121#define BOOK3S_IRQPRIO_DECREMENTER 14 122#define BOOK3S_IRQPRIO_EXTERNAL 14
122#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 15 123#define BOOK3S_IRQPRIO_DECREMENTER 15
123#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 16 124#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
124#define BOOK3S_IRQPRIO_MAX 17 125#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17
126#define BOOK3S_IRQPRIO_MAX 18
125 127
126#define BOOK3S_HFLAG_DCBZ32 0x1 128#define BOOK3S_HFLAG_DCBZ32 0x1
127#define BOOK3S_HFLAG_SLB 0x2 129#define BOOK3S_HFLAG_SLB 0x2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index bb1e38a23ac7..f52f65694527 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -268,9 +268,10 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
268 return vcpu->arch.pc; 268 return vcpu->arch.pc;
269} 269}
270 270
271static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
271static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) 272static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
272{ 273{
273 return (vcpu->arch.shared->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); 274 return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
274} 275}
275 276
276static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) 277static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 51388befeddb..fddb72b48ce9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -77,34 +77,122 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
77 return old == 0; 77 return old == 0;
78} 78}
79 79
80static inline int __hpte_actual_psize(unsigned int lp, int psize)
81{
82 int i, shift;
83 unsigned int mask;
84
85 /* start from 1 ignoring MMU_PAGE_4K */
86 for (i = 1; i < MMU_PAGE_COUNT; i++) {
87
88 /* invalid penc */
89 if (mmu_psize_defs[psize].penc[i] == -1)
90 continue;
91 /*
92 * encoding bits per actual page size
93 * PTE LP actual page size
94 * rrrr rrrz >=8KB
95 * rrrr rrzz >=16KB
96 * rrrr rzzz >=32KB
97 * rrrr zzzz >=64KB
98 * .......
99 */
100 shift = mmu_psize_defs[i].shift - LP_SHIFT;
101 if (shift > LP_BITS)
102 shift = LP_BITS;
103 mask = (1 << shift) - 1;
104 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
105 return i;
106 }
107 return -1;
108}
109
80static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 110static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
81 unsigned long pte_index) 111 unsigned long pte_index)
82{ 112{
83 unsigned long rb, va_low; 113 int b_psize, a_psize;
114 unsigned int penc;
115 unsigned long rb = 0, va_low, sllp;
116 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
117
118 if (!(v & HPTE_V_LARGE)) {
119 /* both base and actual psize is 4k */
120 b_psize = MMU_PAGE_4K;
121 a_psize = MMU_PAGE_4K;
122 } else {
123 for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
124
125 /* valid entries have a shift value */
126 if (!mmu_psize_defs[b_psize].shift)
127 continue;
84 128
129 a_psize = __hpte_actual_psize(lp, b_psize);
130 if (a_psize != -1)
131 break;
132 }
133 }
134 /*
135 * Ignore the top 14 bits of va
136 * v have top two bits covering segment size, hence move
137 * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
138 * AVA field in v also have the lower 23 bits ignored.
139 * For base page size 4K we need 14 .. 65 bits (so need to
140 * collect extra 11 bits)
141 * For others we need 14..14+i
142 */
143 /* This covers 14..54 bits of va*/
85 rb = (v & ~0x7fUL) << 16; /* AVA field */ 144 rb = (v & ~0x7fUL) << 16; /* AVA field */
145 /*
146 * AVA in v had cleared lower 23 bits. We need to derive
147 * that from pteg index
148 */
86 va_low = pte_index >> 3; 149 va_low = pte_index >> 3;
87 if (v & HPTE_V_SECONDARY) 150 if (v & HPTE_V_SECONDARY)
88 va_low = ~va_low; 151 va_low = ~va_low;
89 /* xor vsid from AVA */ 152 /*
153 * get the vpn bits from va_low using reverse of hashing.
154 * In v we have va with 23 bits dropped and then left shifted
155 * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
156 * right shift it with (SID_SHIFT - (23 - 7))
157 */
90 if (!(v & HPTE_V_1TB_SEG)) 158 if (!(v & HPTE_V_1TB_SEG))
91 va_low ^= v >> 12; 159 va_low ^= v >> (SID_SHIFT - 16);
92 else 160 else
93 va_low ^= v >> 24; 161 va_low ^= v >> (SID_SHIFT_1T - 16);
94 va_low &= 0x7ff; 162 va_low &= 0x7ff;
95 if (v & HPTE_V_LARGE) { 163
96 rb |= 1; /* L field */ 164 switch (b_psize) {
97 if (cpu_has_feature(CPU_FTR_ARCH_206) && 165 case MMU_PAGE_4K:
98 (r & 0xff000)) { 166 sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
99 /* non-16MB large page, must be 64k */ 167 ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
100 /* (masks depend on page size) */ 168 rb |= sllp << 5; /* AP field */
101 rb |= 0x1000; /* page encoding in LP field */ 169 rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
102 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ 170 break;
103 rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */ 171 default:
104 } 172 {
105 } else { 173 int aval_shift;
106 /* 4kB page */ 174 /*
107 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ 175 * remaining 7bits of AVA/LP fields
176 * Also contain the rr bits of LP
177 */
178 rb |= (va_low & 0x7f) << 16;
179 /*
180 * Now clear not needed LP bits based on actual psize
181 */
182 rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
183 /*
184 * AVAL field 58..77 - base_page_shift bits of va
185 * we have space for 58..64 bits, Missing bits should
186 * be zero filled. +1 is to take care of L bit shift
187 */
188 aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
189 rb |= ((va_low << aval_shift) & 0xfe);
190
191 rb |= 1; /* L field */
192 penc = mmu_psize_defs[b_psize].penc[a_psize];
193 rb |= penc << 12; /* LP field */
194 break;
195 }
108 } 196 }
109 rb |= (v >> 54) & 0x300; /* B field */ 197 rb |= (v >> 54) & 0x300; /* B field */
110 return rb; 198 return rb;
@@ -112,14 +200,26 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
112 200
113static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) 201static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
114{ 202{
203 int size, a_psize;
204 /* Look at the 8 bit LP value */
205 unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
206
115 /* only handle 4k, 64k and 16M pages for now */ 207 /* only handle 4k, 64k and 16M pages for now */
116 if (!(h & HPTE_V_LARGE)) 208 if (!(h & HPTE_V_LARGE))
117 return 1ul << 12; /* 4k page */ 209 return 1ul << 12;
118 if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206)) 210 else {
119 return 1ul << 16; /* 64k page */ 211 for (size = 0; size < MMU_PAGE_COUNT; size++) {
120 if ((l & 0xff000) == 0) 212 /* valid entries have a shift value */
121 return 1ul << 24; /* 16M page */ 213 if (!mmu_psize_defs[size].shift)
122 return 0; /* error */ 214 continue;
215
216 a_psize = __hpte_actual_psize(lp, size);
217 if (a_psize != -1)
218 return 1ul << mmu_psize_defs[a_psize].shift;
219 }
220
221 }
222 return 0;
123} 223}
124 224
125static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) 225static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 821725c1bf46..5bdfb5dd3400 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,6 +104,7 @@ struct kvmppc_host_state {
104#ifdef CONFIG_PPC_BOOK3S_64 104#ifdef CONFIG_PPC_BOOK3S_64
105 u64 cfar; 105 u64 cfar;
106 u64 ppr; 106 u64 ppr;
107 u64 host_fscr;
107#endif 108#endif
108}; 109};
109 110
@@ -133,6 +134,7 @@ struct kvmppc_book3s_shadow_vcpu {
133 u64 esid; 134 u64 esid;
134 u64 vsid; 135 u64 vsid;
135 } slb[64]; /* guest SLB */ 136 } slb[64]; /* guest SLB */
137 u64 shadow_fscr;
136#endif 138#endif
137}; 139};
138 140
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 80d46b5a7efb..c7aed6105ff9 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -108,9 +108,4 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
108{ 108{
109 return vcpu->arch.fault_dear; 109 return vcpu->arch.fault_dear;
110} 110}
111
112static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu)
113{
114 return vcpu->arch.shared->msr;
115}
116#endif /* __ASM_KVM_BOOKE_H__ */ 111#endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1eaea2dea174..bb66d8b8efdf 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -449,7 +449,9 @@ struct kvm_vcpu_arch {
449 ulong pc; 449 ulong pc;
450 ulong ctr; 450 ulong ctr;
451 ulong lr; 451 ulong lr;
452#ifdef CONFIG_PPC_BOOK3S
452 ulong tar; 453 ulong tar;
454#endif
453 455
454 ulong xer; 456 ulong xer;
455 u32 cr; 457 u32 cr;
@@ -475,6 +477,7 @@ struct kvm_vcpu_arch {
475 ulong ppr; 477 ulong ppr;
476 ulong pspb; 478 ulong pspb;
477 ulong fscr; 479 ulong fscr;
480 ulong shadow_fscr;
478 ulong ebbhr; 481 ulong ebbhr;
479 ulong ebbrr; 482 ulong ebbrr;
480 ulong bescr; 483 ulong bescr;
@@ -562,6 +565,7 @@ struct kvm_vcpu_arch {
562#ifdef CONFIG_PPC_BOOK3S 565#ifdef CONFIG_PPC_BOOK3S
563 ulong fault_dar; 566 ulong fault_dar;
564 u32 fault_dsisr; 567 u32 fault_dsisr;
568 unsigned long intr_msr;
565#endif 569#endif
566 570
567#ifdef CONFIG_BOOKE 571#ifdef CONFIG_BOOKE
@@ -622,8 +626,12 @@ struct kvm_vcpu_arch {
622 wait_queue_head_t cpu_run; 626 wait_queue_head_t cpu_run;
623 627
624 struct kvm_vcpu_arch_shared *shared; 628 struct kvm_vcpu_arch_shared *shared;
629#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
630 bool shared_big_endian;
631#endif
625 unsigned long magic_page_pa; /* phys addr to map the magic page to */ 632 unsigned long magic_page_pa; /* phys addr to map the magic page to */
626 unsigned long magic_page_ea; /* effect. addr to map the magic page to */ 633 unsigned long magic_page_ea; /* effect. addr to map the magic page to */
634 bool disable_kernel_nx;
627 635
628 int irq_type; /* one of KVM_IRQ_* */ 636 int irq_type; /* one of KVM_IRQ_* */
629 int irq_cpu_id; 637 int irq_cpu_id;
@@ -654,7 +662,6 @@ struct kvm_vcpu_arch {
654 spinlock_t tbacct_lock; 662 spinlock_t tbacct_lock;
655 u64 busy_stolen; 663 u64 busy_stolen;
656 u64 busy_preempt; 664 u64 busy_preempt;
657 unsigned long intr_msr;
658#endif 665#endif
659}; 666};
660 667
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4096f16502a9..4a7cc453be0b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -449,6 +449,84 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
449} 449}
450 450
451/* 451/*
452 * Shared struct helpers. The shared struct can be little or big endian,
453 * depending on the guest endianness. So expose helpers to all of them.
454 */
455static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
456{
457#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
458 /* Only Book3S_64 PR supports bi-endian for now */
459 return vcpu->arch.shared_big_endian;
460#elif defined(CONFIG_PPC_BOOK3S_64) && defined(__LITTLE_ENDIAN__)
461 /* Book3s_64 HV on little endian is always little endian */
462 return false;
463#else
464 return true;
465#endif
466}
467
468#define SHARED_WRAPPER_GET(reg, size) \
469static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
470{ \
471 if (kvmppc_shared_big_endian(vcpu)) \
472 return be##size##_to_cpu(vcpu->arch.shared->reg); \
473 else \
474 return le##size##_to_cpu(vcpu->arch.shared->reg); \
475} \
476
477#define SHARED_WRAPPER_SET(reg, size) \
478static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \
479{ \
480 if (kvmppc_shared_big_endian(vcpu)) \
481 vcpu->arch.shared->reg = cpu_to_be##size(val); \
482 else \
483 vcpu->arch.shared->reg = cpu_to_le##size(val); \
484} \
485
486#define SHARED_WRAPPER(reg, size) \
487 SHARED_WRAPPER_GET(reg, size) \
488 SHARED_WRAPPER_SET(reg, size) \
489
490SHARED_WRAPPER(critical, 64)
491SHARED_WRAPPER(sprg0, 64)
492SHARED_WRAPPER(sprg1, 64)
493SHARED_WRAPPER(sprg2, 64)
494SHARED_WRAPPER(sprg3, 64)
495SHARED_WRAPPER(srr0, 64)
496SHARED_WRAPPER(srr1, 64)
497SHARED_WRAPPER(dar, 64)
498SHARED_WRAPPER_GET(msr, 64)
499static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
500{
501 if (kvmppc_shared_big_endian(vcpu))
502 vcpu->arch.shared->msr = cpu_to_be64(val);
503 else
504 vcpu->arch.shared->msr = cpu_to_le64(val);
505}
506SHARED_WRAPPER(dsisr, 32)
507SHARED_WRAPPER(int_pending, 32)
508SHARED_WRAPPER(sprg4, 64)
509SHARED_WRAPPER(sprg5, 64)
510SHARED_WRAPPER(sprg6, 64)
511SHARED_WRAPPER(sprg7, 64)
512
513static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr)
514{
515 if (kvmppc_shared_big_endian(vcpu))
516 return be32_to_cpu(vcpu->arch.shared->sr[nr]);
517 else
518 return le32_to_cpu(vcpu->arch.shared->sr[nr]);
519}
520
521static inline void kvmppc_set_sr(struct kvm_vcpu *vcpu, int nr, u32 val)
522{
523 if (kvmppc_shared_big_endian(vcpu))
524 vcpu->arch.shared->sr[nr] = cpu_to_be32(val);
525 else
526 vcpu->arch.shared->sr[nr] = cpu_to_le32(val);
527}
528
529/*
452 * Please call after prepare_to_enter. This function puts the lazy ee and irq 530 * Please call after prepare_to_enter. This function puts the lazy ee and irq
453 * disabled tracking state back to normal mode, without actually enabling 531 * disabled tracking state back to normal mode, without actually enabling
454 * interrupts. 532 * interrupts.
@@ -485,7 +563,7 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
485 msr_64bit = MSR_SF; 563 msr_64bit = MSR_SF;
486#endif 564#endif
487 565
488 if (!(vcpu->arch.shared->msr & msr_64bit)) 566 if (!(kvmppc_get_msr(vcpu) & msr_64bit))
489 ea = (uint32_t)ea; 567 ea = (uint32_t)ea;
490 568
491 return ea; 569 return ea;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5d2e0bc7e03..4852bcf270f3 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -670,18 +670,20 @@
670#define MMCR0_PROBLEM_DISABLE MMCR0_FCP 670#define MMCR0_PROBLEM_DISABLE MMCR0_FCP
671#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ 671#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */
672#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ 672#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */
673#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ 673#define MMCR0_PMXE ASM_CONST(0x04000000) /* perf mon exception enable */
674#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ 674#define MMCR0_FCECE ASM_CONST(0x02000000) /* freeze ctrs on enabled cond or event */
675#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ 675#define MMCR0_TBEE 0x00400000UL /* time base exception enable */
676#define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ 676#define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
677#define MMCR0_EBE 0x00100000UL /* Event based branch enable */ 677#define MMCR0_EBE 0x00100000UL /* Event based branch enable */
678#define MMCR0_PMCC 0x000c0000UL /* PMC control */ 678#define MMCR0_PMCC 0x000c0000UL /* PMC control */
679#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ 679#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */
680#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ 680#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/
681#define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ 681#define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/
682#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ 682#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */
683#define MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */ 683#define MMCR0_PMAO_SYNC ASM_CONST(0x00000800) /* PMU intr is synchronous */
684#define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ 684#define MMCR0_C56RUN ASM_CONST(0x00000100) /* PMC5/6 count when RUN=0 */
685/* performance monitor alert has occurred, set to 0 after handling exception */
686#define MMCR0_PMAO ASM_CONST(0x00000080)
685#define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ 687#define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
686#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ 688#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */
687#define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ 689#define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 163c3b05a76e..464f1089b532 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -583,6 +583,7 @@
583 583
584/* Bit definitions for L1CSR0. */ 584/* Bit definitions for L1CSR0. */
585#define L1CSR0_CPE 0x00010000 /* Data Cache Parity Enable */ 585#define L1CSR0_CPE 0x00010000 /* Data Cache Parity Enable */
586#define L1CSR0_CUL 0x00000400 /* Data Cache Unable to Lock */
586#define L1CSR0_CLFC 0x00000100 /* Cache Lock Bits Flash Clear */ 587#define L1CSR0_CLFC 0x00000100 /* Cache Lock Bits Flash Clear */
587#define L1CSR0_DCFI 0x00000002 /* Data Cache Flash Invalidate */ 588#define L1CSR0_DCFI 0x00000002 /* Data Cache Flash Invalidate */
588#define L1CSR0_CFI 0x00000002 /* Cache Flash Invalidate */ 589#define L1CSR0_CFI 0x00000002 /* Cache Flash Invalidate */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index a6665be4f3ab..2bc4a9409a93 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -545,7 +545,6 @@ struct kvm_get_htab_header {
545#define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) 545#define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
546#define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) 546#define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
547#define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) 547#define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
548#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4)
549 548
550#define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) 549#define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
551#define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) 550#define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
@@ -555,6 +554,7 @@ struct kvm_get_htab_header {
555#define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) 554#define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
556 555
557#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) 556#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
557#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
558 558
559/* Transactional Memory checkpointed state: 559/* Transactional Memory checkpointed state:
560 * This is all GPRs, all VSX regs and a subset of SPRs 560 * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index e3af3286a068..91e42f09b323 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -82,10 +82,16 @@ struct kvm_vcpu_arch_shared {
82 82
83#define KVM_FEATURE_MAGIC_PAGE 1 83#define KVM_FEATURE_MAGIC_PAGE 1
84 84
85/* Magic page flags from host to guest */
86
85#define KVM_MAGIC_FEAT_SR (1 << 0) 87#define KVM_MAGIC_FEAT_SR (1 << 0)
86 88
87/* MASn, ESR, PIR, and high SPRGs */ 89/* MASn, ESR, PIR, and high SPRGs */
88#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1) 90#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1)
89 91
92/* Magic page flags from guest to host */
93
94#define MAGIC_PAGE_FLAG_NOT_MAPPED_NX (1 << 0)
95
90 96
91#endif /* _UAPI__POWERPC_KVM_PARA_H__ */ 97#endif /* _UAPI__POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 94908af308d8..34f55524d456 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -25,14 +25,13 @@
25#include <asm/cputable.h> 25#include <asm/cputable.h>
26#include <asm/emulated_ops.h> 26#include <asm/emulated_ops.h>
27#include <asm/switch_to.h> 27#include <asm/switch_to.h>
28#include <asm/disassemble.h>
28 29
29struct aligninfo { 30struct aligninfo {
30 unsigned char len; 31 unsigned char len;
31 unsigned char flags; 32 unsigned char flags;
32}; 33};
33 34
34#define IS_XFORM(inst) (((inst) >> 26) == 31)
35#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
36 35
37#define INVALID { 0, 0 } 36#define INVALID { 0, 0 }
38 37
@@ -192,37 +191,6 @@ static struct aligninfo aligninfo[128] = {
192}; 191};
193 192
194/* 193/*
195 * Create a DSISR value from the instruction
196 */
197static inline unsigned make_dsisr(unsigned instr)
198{
199 unsigned dsisr;
200
201
202 /* bits 6:15 --> 22:31 */
203 dsisr = (instr & 0x03ff0000) >> 16;
204
205 if (IS_XFORM(instr)) {
206 /* bits 29:30 --> 15:16 */
207 dsisr |= (instr & 0x00000006) << 14;
208 /* bit 25 --> 17 */
209 dsisr |= (instr & 0x00000040) << 8;
210 /* bits 21:24 --> 18:21 */
211 dsisr |= (instr & 0x00000780) << 3;
212 } else {
213 /* bit 5 --> 17 */
214 dsisr |= (instr & 0x04000000) >> 12;
215 /* bits 1: 4 --> 18:21 */
216 dsisr |= (instr & 0x78000000) >> 17;
217 /* bits 30:31 --> 12:13 */
218 if (IS_DSFORM(instr))
219 dsisr |= (instr & 0x00000003) << 18;
220 }
221
222 return dsisr;
223}
224
225/*
226 * The dcbz (data cache block zero) instruction 194 * The dcbz (data cache block zero) instruction
227 * gives an alignment fault if used on non-cacheable 195 * gives an alignment fault if used on non-cacheable
228 * memory. We handle the fault mainly for the 196 * memory. We handle the fault mainly for the
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index dba8140ebc20..93e1465c8496 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,6 +54,7 @@
54#endif 54#endif
55#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S) 55#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
56#include <asm/kvm_book3s.h> 56#include <asm/kvm_book3s.h>
57#include <asm/kvm_ppc.h>
57#endif 58#endif
58 59
59#ifdef CONFIG_PPC32 60#ifdef CONFIG_PPC32
@@ -445,7 +446,9 @@ int main(void)
445 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 446 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
446 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 447 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
447 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 448 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
449#ifdef CONFIG_PPC_BOOK3S
448 DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); 450 DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
451#endif
449 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 452 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
450 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 453 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
451#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 454#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -467,6 +470,9 @@ int main(void)
467 DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); 470 DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
468 DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); 471 DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
469 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 472 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
473#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
474 DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
475#endif
470 476
471 DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); 477 DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
472 DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); 478 DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
@@ -493,7 +499,6 @@ int main(void)
493 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 499 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
494 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 500 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
495 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 501 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
496 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
497#endif 502#endif
498#ifdef CONFIG_PPC_BOOK3S 503#ifdef CONFIG_PPC_BOOK3S
499 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 504 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -528,11 +533,13 @@ int main(void)
528 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 533 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
529 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); 534 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
530 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 535 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
536 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
531 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 537 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
532 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 538 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
533 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); 539 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
534 DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); 540 DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
535 DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); 541 DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
542 DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
536 DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); 543 DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
537 DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); 544 DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
538 DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); 545 DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
@@ -614,6 +621,7 @@ int main(void)
614#ifdef CONFIG_PPC64 621#ifdef CONFIG_PPC64
615 SVCPU_FIELD(SVCPU_SLB, slb); 622 SVCPU_FIELD(SVCPU_SLB, slb);
616 SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); 623 SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
624 SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
617#endif 625#endif
618 626
619 HSTATE_FIELD(HSTATE_HOST_R1, host_r1); 627 HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
@@ -649,6 +657,7 @@ int main(void)
649#ifdef CONFIG_PPC_BOOK3S_64 657#ifdef CONFIG_PPC_BOOK3S_64
650 HSTATE_FIELD(HSTATE_CFAR, cfar); 658 HSTATE_FIELD(HSTATE_CFAR, cfar);
651 HSTATE_FIELD(HSTATE_PPR, ppr); 659 HSTATE_FIELD(HSTATE_PPR, ppr);
660 HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
652#endif /* CONFIG_PPC_BOOK3S_64 */ 661#endif /* CONFIG_PPC_BOOK3S_64 */
653 662
654#else /* CONFIG_PPC_BOOK3S */ 663#else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 7898be90f2dc..d9b79358b833 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -47,9 +47,10 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
47 return -1; 47 return -1;
48 48
49 for (i = 0; i < (len / 4); i++) { 49 for (i = 0; i < (len / 4); i++) {
50 patch_instruction(epapr_hypercall_start + i, insts[i]); 50 u32 inst = be32_to_cpu(insts[i]);
51 patch_instruction(epapr_hypercall_start + i, inst);
51#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) 52#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
52 patch_instruction(epapr_ev_idle_start + i, insts[i]); 53 patch_instruction(epapr_ev_idle_start + i, inst);
53#endif 54#endif
54 } 55 }
55 56
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index dd8695f6cb6d..33aa4ddf597d 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -417,7 +417,7 @@ static void kvm_map_magic_page(void *data)
417 ulong out[8]; 417 ulong out[8];
418 418
419 in[0] = KVM_MAGIC_PAGE; 419 in[0] = KVM_MAGIC_PAGE;
420 in[1] = KVM_MAGIC_PAGE; 420 in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX;
421 421
422 epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); 422 epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
423 423
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ad302f845e5d..d6e195e8cd4c 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -98,6 +98,9 @@ static inline void free_lppacas(void) { }
98/* 98/*
99 * 3 persistent SLBs are registered here. The buffer will be zero 99 * 3 persistent SLBs are registered here. The buffer will be zero
100 * initially, hence will all be invaild until we actually write them. 100 * initially, hence will all be invaild until we actually write them.
101 *
102 * If you make the number of persistent SLB entries dynamic, please also
103 * update PR KVM to flush and restore them accordingly.
101 */ 104 */
102static struct slb_shadow *slb_shadow; 105static struct slb_shadow *slb_shadow;
103 106
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 141b2027189a..d6a53b95de94 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -6,7 +6,6 @@ source "virt/kvm/Kconfig"
6 6
7menuconfig VIRTUALIZATION 7menuconfig VIRTUALIZATION
8 bool "Virtualization" 8 bool "Virtualization"
9 depends on !CPU_LITTLE_ENDIAN
10 ---help--- 9 ---help---
11 Say Y here to get to see options for using your Linux host to run 10 Say Y here to get to see options for using your Linux host to run
12 other operating systems inside virtual machines (guests). 11 other operating systems inside virtual machines (guests).
@@ -76,6 +75,7 @@ config KVM_BOOK3S_64
76config KVM_BOOK3S_64_HV 75config KVM_BOOK3S_64_HV
77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 76 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
78 depends on KVM_BOOK3S_64 77 depends on KVM_BOOK3S_64
78 depends on !CPU_LITTLE_ENDIAN
79 select KVM_BOOK3S_HV_POSSIBLE 79 select KVM_BOOK3S_HV_POSSIBLE
80 select MMU_NOTIFIER 80 select MMU_NOTIFIER
81 select CMA 81 select CMA
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7af190a266b3..c254c27f240e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -85,9 +85,9 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
85 if (is_kvmppc_hv_enabled(vcpu->kvm)) 85 if (is_kvmppc_hv_enabled(vcpu->kvm))
86 return; 86 return;
87 if (pending_now) 87 if (pending_now)
88 vcpu->arch.shared->int_pending = 1; 88 kvmppc_set_int_pending(vcpu, 1);
89 else if (old_pending) 89 else if (old_pending)
90 vcpu->arch.shared->int_pending = 0; 90 kvmppc_set_int_pending(vcpu, 0);
91} 91}
92 92
93static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 93static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
@@ -99,11 +99,11 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
99 if (is_kvmppc_hv_enabled(vcpu->kvm)) 99 if (is_kvmppc_hv_enabled(vcpu->kvm))
100 return false; 100 return false;
101 101
102 crit_raw = vcpu->arch.shared->critical; 102 crit_raw = kvmppc_get_critical(vcpu);
103 crit_r1 = kvmppc_get_gpr(vcpu, 1); 103 crit_r1 = kvmppc_get_gpr(vcpu, 1);
104 104
105 /* Truncate crit indicators in 32 bit mode */ 105 /* Truncate crit indicators in 32 bit mode */
106 if (!(vcpu->arch.shared->msr & MSR_SF)) { 106 if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
107 crit_raw &= 0xffffffff; 107 crit_raw &= 0xffffffff;
108 crit_r1 &= 0xffffffff; 108 crit_r1 &= 0xffffffff;
109 } 109 }
@@ -111,15 +111,15 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
111 /* Critical section when crit == r1 */ 111 /* Critical section when crit == r1 */
112 crit = (crit_raw == crit_r1); 112 crit = (crit_raw == crit_r1);
113 /* ... and we're in supervisor mode */ 113 /* ... and we're in supervisor mode */
114 crit = crit && !(vcpu->arch.shared->msr & MSR_PR); 114 crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
115 115
116 return crit; 116 return crit;
117} 117}
118 118
119void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 119void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
120{ 120{
121 vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); 121 kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
122 vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; 122 kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
123 kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); 123 kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
124 vcpu->arch.mmu.reset_msr(vcpu); 124 vcpu->arch.mmu.reset_msr(vcpu);
125} 125}
@@ -145,6 +145,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
145 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break; 145 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
146 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; 146 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
147 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; 147 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
148 case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break;
148 default: prio = BOOK3S_IRQPRIO_MAX; break; 149 default: prio = BOOK3S_IRQPRIO_MAX; break;
149 } 150 }
150 151
@@ -225,12 +226,12 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
225 226
226 switch (priority) { 227 switch (priority) {
227 case BOOK3S_IRQPRIO_DECREMENTER: 228 case BOOK3S_IRQPRIO_DECREMENTER:
228 deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; 229 deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
229 vec = BOOK3S_INTERRUPT_DECREMENTER; 230 vec = BOOK3S_INTERRUPT_DECREMENTER;
230 break; 231 break;
231 case BOOK3S_IRQPRIO_EXTERNAL: 232 case BOOK3S_IRQPRIO_EXTERNAL:
232 case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: 233 case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
233 deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; 234 deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
234 vec = BOOK3S_INTERRUPT_EXTERNAL; 235 vec = BOOK3S_INTERRUPT_EXTERNAL;
235 break; 236 break;
236 case BOOK3S_IRQPRIO_SYSTEM_RESET: 237 case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -275,6 +276,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
275 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: 276 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
276 vec = BOOK3S_INTERRUPT_PERFMON; 277 vec = BOOK3S_INTERRUPT_PERFMON;
277 break; 278 break;
279 case BOOK3S_IRQPRIO_FAC_UNAVAIL:
280 vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
281 break;
278 default: 282 default:
279 deliver = 0; 283 deliver = 0;
280 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); 284 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
@@ -343,7 +347,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
343{ 347{
344 ulong mp_pa = vcpu->arch.magic_page_pa; 348 ulong mp_pa = vcpu->arch.magic_page_pa;
345 349
346 if (!(vcpu->arch.shared->msr & MSR_SF)) 350 if (!(kvmppc_get_msr(vcpu) & MSR_SF))
347 mp_pa = (uint32_t)mp_pa; 351 mp_pa = (uint32_t)mp_pa;
348 352
349 /* Magic page override */ 353 /* Magic page override */
@@ -367,7 +371,7 @@ EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
367static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 371static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
368 bool iswrite, struct kvmppc_pte *pte) 372 bool iswrite, struct kvmppc_pte *pte)
369{ 373{
370 int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); 374 int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
371 int r; 375 int r;
372 376
373 if (relocated) { 377 if (relocated) {
@@ -498,18 +502,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
498 regs->ctr = kvmppc_get_ctr(vcpu); 502 regs->ctr = kvmppc_get_ctr(vcpu);
499 regs->lr = kvmppc_get_lr(vcpu); 503 regs->lr = kvmppc_get_lr(vcpu);
500 regs->xer = kvmppc_get_xer(vcpu); 504 regs->xer = kvmppc_get_xer(vcpu);
501 regs->msr = vcpu->arch.shared->msr; 505 regs->msr = kvmppc_get_msr(vcpu);
502 regs->srr0 = vcpu->arch.shared->srr0; 506 regs->srr0 = kvmppc_get_srr0(vcpu);
503 regs->srr1 = vcpu->arch.shared->srr1; 507 regs->srr1 = kvmppc_get_srr1(vcpu);
504 regs->pid = vcpu->arch.pid; 508 regs->pid = vcpu->arch.pid;
505 regs->sprg0 = vcpu->arch.shared->sprg0; 509 regs->sprg0 = kvmppc_get_sprg0(vcpu);
506 regs->sprg1 = vcpu->arch.shared->sprg1; 510 regs->sprg1 = kvmppc_get_sprg1(vcpu);
507 regs->sprg2 = vcpu->arch.shared->sprg2; 511 regs->sprg2 = kvmppc_get_sprg2(vcpu);
508 regs->sprg3 = vcpu->arch.shared->sprg3; 512 regs->sprg3 = kvmppc_get_sprg3(vcpu);
509 regs->sprg4 = vcpu->arch.shared->sprg4; 513 regs->sprg4 = kvmppc_get_sprg4(vcpu);
510 regs->sprg5 = vcpu->arch.shared->sprg5; 514 regs->sprg5 = kvmppc_get_sprg5(vcpu);
511 regs->sprg6 = vcpu->arch.shared->sprg6; 515 regs->sprg6 = kvmppc_get_sprg6(vcpu);
512 regs->sprg7 = vcpu->arch.shared->sprg7; 516 regs->sprg7 = kvmppc_get_sprg7(vcpu);
513 517
514 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 518 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
515 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 519 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -527,16 +531,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
527 kvmppc_set_lr(vcpu, regs->lr); 531 kvmppc_set_lr(vcpu, regs->lr);
528 kvmppc_set_xer(vcpu, regs->xer); 532 kvmppc_set_xer(vcpu, regs->xer);
529 kvmppc_set_msr(vcpu, regs->msr); 533 kvmppc_set_msr(vcpu, regs->msr);
530 vcpu->arch.shared->srr0 = regs->srr0; 534 kvmppc_set_srr0(vcpu, regs->srr0);
531 vcpu->arch.shared->srr1 = regs->srr1; 535 kvmppc_set_srr1(vcpu, regs->srr1);
532 vcpu->arch.shared->sprg0 = regs->sprg0; 536 kvmppc_set_sprg0(vcpu, regs->sprg0);
533 vcpu->arch.shared->sprg1 = regs->sprg1; 537 kvmppc_set_sprg1(vcpu, regs->sprg1);
534 vcpu->arch.shared->sprg2 = regs->sprg2; 538 kvmppc_set_sprg2(vcpu, regs->sprg2);
535 vcpu->arch.shared->sprg3 = regs->sprg3; 539 kvmppc_set_sprg3(vcpu, regs->sprg3);
536 vcpu->arch.shared->sprg4 = regs->sprg4; 540 kvmppc_set_sprg4(vcpu, regs->sprg4);
537 vcpu->arch.shared->sprg5 = regs->sprg5; 541 kvmppc_set_sprg5(vcpu, regs->sprg5);
538 vcpu->arch.shared->sprg6 = regs->sprg6; 542 kvmppc_set_sprg6(vcpu, regs->sprg6);
539 vcpu->arch.shared->sprg7 = regs->sprg7; 543 kvmppc_set_sprg7(vcpu, regs->sprg7);
540 544
541 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 545 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
542 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 546 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -570,10 +574,10 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
570 r = 0; 574 r = 0;
571 switch (reg->id) { 575 switch (reg->id) {
572 case KVM_REG_PPC_DAR: 576 case KVM_REG_PPC_DAR:
573 val = get_reg_val(reg->id, vcpu->arch.shared->dar); 577 val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
574 break; 578 break;
575 case KVM_REG_PPC_DSISR: 579 case KVM_REG_PPC_DSISR:
576 val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); 580 val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
577 break; 581 break;
578 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 582 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
579 i = reg->id - KVM_REG_PPC_FPR0; 583 i = reg->id - KVM_REG_PPC_FPR0;
@@ -627,6 +631,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
627 val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); 631 val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
628 break; 632 break;
629#endif /* CONFIG_KVM_XICS */ 633#endif /* CONFIG_KVM_XICS */
634 case KVM_REG_PPC_FSCR:
635 val = get_reg_val(reg->id, vcpu->arch.fscr);
636 break;
637 case KVM_REG_PPC_TAR:
638 val = get_reg_val(reg->id, vcpu->arch.tar);
639 break;
640 case KVM_REG_PPC_EBBHR:
641 val = get_reg_val(reg->id, vcpu->arch.ebbhr);
642 break;
643 case KVM_REG_PPC_EBBRR:
644 val = get_reg_val(reg->id, vcpu->arch.ebbrr);
645 break;
646 case KVM_REG_PPC_BESCR:
647 val = get_reg_val(reg->id, vcpu->arch.bescr);
648 break;
630 default: 649 default:
631 r = -EINVAL; 650 r = -EINVAL;
632 break; 651 break;
@@ -660,10 +679,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
660 r = 0; 679 r = 0;
661 switch (reg->id) { 680 switch (reg->id) {
662 case KVM_REG_PPC_DAR: 681 case KVM_REG_PPC_DAR:
663 vcpu->arch.shared->dar = set_reg_val(reg->id, val); 682 kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
664 break; 683 break;
665 case KVM_REG_PPC_DSISR: 684 case KVM_REG_PPC_DSISR:
666 vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); 685 kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
667 break; 686 break;
668 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 687 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
669 i = reg->id - KVM_REG_PPC_FPR0; 688 i = reg->id - KVM_REG_PPC_FPR0;
@@ -716,6 +735,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
716 set_reg_val(reg->id, val)); 735 set_reg_val(reg->id, val));
717 break; 736 break;
718#endif /* CONFIG_KVM_XICS */ 737#endif /* CONFIG_KVM_XICS */
738 case KVM_REG_PPC_FSCR:
739 vcpu->arch.fscr = set_reg_val(reg->id, val);
740 break;
741 case KVM_REG_PPC_TAR:
742 vcpu->arch.tar = set_reg_val(reg->id, val);
743 break;
744 case KVM_REG_PPC_EBBHR:
745 vcpu->arch.ebbhr = set_reg_val(reg->id, val);
746 break;
747 case KVM_REG_PPC_EBBRR:
748 vcpu->arch.ebbrr = set_reg_val(reg->id, val);
749 break;
750 case KVM_REG_PPC_BESCR:
751 vcpu->arch.bescr = set_reg_val(reg->id, val);
752 break;
719 default: 753 default:
720 r = -EINVAL; 754 r = -EINVAL;
721 break; 755 break;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 76a64ce6a5b6..93503bbdae43 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -91,7 +91,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
91 91
92static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr) 92static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
93{ 93{
94 return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf]; 94 return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);
95} 95}
96 96
97static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, 97static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -131,7 +131,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
131 pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash; 131 pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
132 132
133 dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n", 133 dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
134 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, 134 kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,
135 sr_vsid(sre)); 135 sr_vsid(sre));
136 136
137 r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); 137 r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
@@ -160,7 +160,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
160 else 160 else
161 bat = &vcpu_book3s->ibat[i]; 161 bat = &vcpu_book3s->ibat[i];
162 162
163 if (vcpu->arch.shared->msr & MSR_PR) { 163 if (kvmppc_get_msr(vcpu) & MSR_PR) {
164 if (!bat->vp) 164 if (!bat->vp)
165 continue; 165 continue;
166 } else { 166 } else {
@@ -208,6 +208,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
208 u32 sre; 208 u32 sre;
209 hva_t ptegp; 209 hva_t ptegp;
210 u32 pteg[16]; 210 u32 pteg[16];
211 u32 pte0, pte1;
211 u32 ptem = 0; 212 u32 ptem = 0;
212 int i; 213 int i;
213 int found = 0; 214 int found = 0;
@@ -233,14 +234,16 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
233 } 234 }
234 235
235 for (i=0; i<16; i+=2) { 236 for (i=0; i<16; i+=2) {
236 if (ptem == pteg[i]) { 237 pte0 = be32_to_cpu(pteg[i]);
238 pte1 = be32_to_cpu(pteg[i + 1]);
239 if (ptem == pte0) {
237 u8 pp; 240 u8 pp;
238 241
239 pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF); 242 pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF);
240 pp = pteg[i+1] & 3; 243 pp = pte1 & 3;
241 244
242 if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) || 245 if ((sr_kp(sre) && (kvmppc_get_msr(vcpu) & MSR_PR)) ||
243 (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR))) 246 (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))
244 pp |= 4; 247 pp |= 4;
245 248
246 pte->may_write = false; 249 pte->may_write = false;
@@ -260,7 +263,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
260 } 263 }
261 264
262 dprintk_pte("MMU: Found PTE -> %x %x - %x\n", 265 dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
263 pteg[i], pteg[i+1], pp); 266 pte0, pte1, pp);
264 found = 1; 267 found = 1;
265 break; 268 break;
266 } 269 }
@@ -269,8 +272,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
269 /* Update PTE C and A bits, so the guest's swapper knows we used the 272 /* Update PTE C and A bits, so the guest's swapper knows we used the
270 page */ 273 page */
271 if (found) { 274 if (found) {
272 u32 pte_r = pteg[i+1]; 275 u32 pte_r = pte1;
273 char __user *addr = (char __user *) &pteg[i+1]; 276 char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32));
274 277
275 /* 278 /*
276 * Use single-byte writes to update the HPTE, to 279 * Use single-byte writes to update the HPTE, to
@@ -296,7 +299,8 @@ no_page_found:
296 to_book3s(vcpu)->sdr1, ptegp); 299 to_book3s(vcpu)->sdr1, ptegp);
297 for (i=0; i<16; i+=2) { 300 for (i=0; i<16; i+=2) {
298 dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n", 301 dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
299 i, pteg[i], pteg[i+1], ptem); 302 i, be32_to_cpu(pteg[i]),
303 be32_to_cpu(pteg[i+1]), ptem);
300 } 304 }
301 } 305 }
302 306
@@ -316,7 +320,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
316 /* Magic page override */ 320 /* Magic page override */
317 if (unlikely(mp_ea) && 321 if (unlikely(mp_ea) &&
318 unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && 322 unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
319 !(vcpu->arch.shared->msr & MSR_PR)) { 323 !(kvmppc_get_msr(vcpu) & MSR_PR)) {
320 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); 324 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
321 pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff); 325 pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
322 pte->raddr &= KVM_PAM; 326 pte->raddr &= KVM_PAM;
@@ -341,13 +345,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
341 345
342static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum) 346static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
343{ 347{
344 return vcpu->arch.shared->sr[srnum]; 348 return kvmppc_get_sr(vcpu, srnum);
345} 349}
346 350
347static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, 351static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
348 ulong value) 352 ulong value)
349{ 353{
350 vcpu->arch.shared->sr[srnum] = value; 354 kvmppc_set_sr(vcpu, srnum, value);
351 kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT); 355 kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
352} 356}
353 357
@@ -367,8 +371,9 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
367 ulong ea = esid << SID_SHIFT; 371 ulong ea = esid << SID_SHIFT;
368 u32 sr; 372 u32 sr;
369 u64 gvsid = esid; 373 u64 gvsid = esid;
374 u64 msr = kvmppc_get_msr(vcpu);
370 375
371 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 376 if (msr & (MSR_DR|MSR_IR)) {
372 sr = find_sr(vcpu, ea); 377 sr = find_sr(vcpu, ea);
373 if (sr_valid(sr)) 378 if (sr_valid(sr))
374 gvsid = sr_vsid(sr); 379 gvsid = sr_vsid(sr);
@@ -377,7 +382,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
377 /* In case we only have one of MSR_IR or MSR_DR set, let's put 382 /* In case we only have one of MSR_IR or MSR_DR set, let's put
378 that in the real-mode context (and hope RM doesn't access 383 that in the real-mode context (and hope RM doesn't access
379 high memory) */ 384 high memory) */
380 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 385 switch (msr & (MSR_DR|MSR_IR)) {
381 case 0: 386 case 0:
382 *vsid = VSID_REAL | esid; 387 *vsid = VSID_REAL | esid;
383 break; 388 break;
@@ -397,7 +402,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
397 BUG(); 402 BUG();
398 } 403 }
399 404
400 if (vcpu->arch.shared->msr & MSR_PR) 405 if (msr & MSR_PR)
401 *vsid |= VSID_PR; 406 *vsid |= VSID_PR;
402 407
403 return 0; 408 return 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 5fac89dfe4cd..678e75370495 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -92,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
92 struct kvmppc_sid_map *map; 92 struct kvmppc_sid_map *map;
93 u16 sid_map_mask; 93 u16 sid_map_mask;
94 94
95 if (vcpu->arch.shared->msr & MSR_PR) 95 if (kvmppc_get_msr(vcpu) & MSR_PR)
96 gvsid |= VSID_PR; 96 gvsid |= VSID_PR;
97 97
98 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 98 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -279,7 +279,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
279 u16 sid_map_mask; 279 u16 sid_map_mask;
280 static int backwards_map = 0; 280 static int backwards_map = 0;
281 281
282 if (vcpu->arch.shared->msr & MSR_PR) 282 if (kvmppc_get_msr(vcpu) & MSR_PR)
283 gvsid |= VSID_PR; 283 gvsid |= VSID_PR;
284 284
285 /* We might get collisions that trap in preceding order, so let's 285 /* We might get collisions that trap in preceding order, so let's
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 83da1f868fd5..774a253ca4e1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -38,7 +38,7 @@
38 38
39static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) 39static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
40{ 40{
41 kvmppc_set_msr(vcpu, MSR_SF); 41 kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
42} 42}
43 43
44static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( 44static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
@@ -226,7 +226,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
226 /* Magic page override */ 226 /* Magic page override */
227 if (unlikely(mp_ea) && 227 if (unlikely(mp_ea) &&
228 unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && 228 unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
229 !(vcpu->arch.shared->msr & MSR_PR)) { 229 !(kvmppc_get_msr(vcpu) & MSR_PR)) {
230 gpte->eaddr = eaddr; 230 gpte->eaddr = eaddr;
231 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); 231 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
232 gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff); 232 gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
@@ -269,18 +269,21 @@ do_second:
269 goto no_page_found; 269 goto no_page_found;
270 } 270 }
271 271
272 if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp) 272 if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)
273 key = 4; 273 key = 4;
274 else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks) 274 else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)
275 key = 4; 275 key = 4;
276 276
277 for (i=0; i<16; i+=2) { 277 for (i=0; i<16; i+=2) {
278 u64 pte0 = be64_to_cpu(pteg[i]);
279 u64 pte1 = be64_to_cpu(pteg[i + 1]);
280
278 /* Check all relevant fields of 1st dword */ 281 /* Check all relevant fields of 1st dword */
279 if ((pteg[i] & v_mask) == v_val) { 282 if ((pte0 & v_mask) == v_val) {
280 /* If large page bit is set, check pgsize encoding */ 283 /* If large page bit is set, check pgsize encoding */
281 if (slbe->large && 284 if (slbe->large &&
282 (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 285 (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
283 pgsize = decode_pagesize(slbe, pteg[i+1]); 286 pgsize = decode_pagesize(slbe, pte1);
284 if (pgsize < 0) 287 if (pgsize < 0)
285 continue; 288 continue;
286 } 289 }
@@ -297,8 +300,8 @@ do_second:
297 goto do_second; 300 goto do_second;
298 } 301 }
299 302
300 v = pteg[i]; 303 v = be64_to_cpu(pteg[i]);
301 r = pteg[i+1]; 304 r = be64_to_cpu(pteg[i+1]);
302 pp = (r & HPTE_R_PP) | key; 305 pp = (r & HPTE_R_PP) | key;
303 if (r & HPTE_R_PP0) 306 if (r & HPTE_R_PP0)
304 pp |= 8; 307 pp |= 8;
@@ -310,6 +313,9 @@ do_second:
310 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); 313 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
311 gpte->page_size = pgsize; 314 gpte->page_size = pgsize;
312 gpte->may_execute = ((r & HPTE_R_N) ? false : true); 315 gpte->may_execute = ((r & HPTE_R_N) ? false : true);
316 if (unlikely(vcpu->arch.disable_kernel_nx) &&
317 !(kvmppc_get_msr(vcpu) & MSR_PR))
318 gpte->may_execute = true;
313 gpte->may_read = false; 319 gpte->may_read = false;
314 gpte->may_write = false; 320 gpte->may_write = false;
315 321
@@ -342,14 +348,14 @@ do_second:
342 * non-PAPR platforms such as mac99, and this is 348 * non-PAPR platforms such as mac99, and this is
343 * what real hardware does. 349 * what real hardware does.
344 */ 350 */
345 char __user *addr = (char __user *) &pteg[i+1]; 351 char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
346 r |= HPTE_R_R; 352 r |= HPTE_R_R;
347 put_user(r >> 8, addr + 6); 353 put_user(r >> 8, addr + 6);
348 } 354 }
349 if (iswrite && gpte->may_write && !(r & HPTE_R_C)) { 355 if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
350 /* Set the dirty flag */ 356 /* Set the dirty flag */
351 /* Use a single byte write */ 357 /* Use a single byte write */
352 char __user *addr = (char __user *) &pteg[i+1]; 358 char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
353 r |= HPTE_R_C; 359 r |= HPTE_R_C;
354 put_user(r, addr + 7); 360 put_user(r, addr + 7);
355 } 361 }
@@ -479,7 +485,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
479 vcpu->arch.slb[i].origv = 0; 485 vcpu->arch.slb[i].origv = 0;
480 } 486 }
481 487
482 if (vcpu->arch.shared->msr & MSR_IR) { 488 if (kvmppc_get_msr(vcpu) & MSR_IR) {
483 kvmppc_mmu_flush_segments(vcpu); 489 kvmppc_mmu_flush_segments(vcpu);
484 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 490 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
485 } 491 }
@@ -563,7 +569,7 @@ static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
563{ 569{
564 ulong mp_ea = vcpu->arch.magic_page_ea; 570 ulong mp_ea = vcpu->arch.magic_page_ea;
565 571
566 return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) && 572 return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) &&
567 (mp_ea >> SID_SHIFT) == esid; 573 (mp_ea >> SID_SHIFT) == esid;
568} 574}
569#endif 575#endif
@@ -576,8 +582,9 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
576 u64 gvsid = esid; 582 u64 gvsid = esid;
577 ulong mp_ea = vcpu->arch.magic_page_ea; 583 ulong mp_ea = vcpu->arch.magic_page_ea;
578 int pagesize = MMU_PAGE_64K; 584 int pagesize = MMU_PAGE_64K;
585 u64 msr = kvmppc_get_msr(vcpu);
579 586
580 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 587 if (msr & (MSR_DR|MSR_IR)) {
581 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); 588 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
582 if (slb) { 589 if (slb) {
583 gvsid = slb->vsid; 590 gvsid = slb->vsid;
@@ -590,7 +597,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
590 } 597 }
591 } 598 }
592 599
593 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 600 switch (msr & (MSR_DR|MSR_IR)) {
594 case 0: 601 case 0:
595 gvsid = VSID_REAL | esid; 602 gvsid = VSID_REAL | esid;
596 break; 603 break;
@@ -623,7 +630,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
623 gvsid |= VSID_64K; 630 gvsid |= VSID_64K;
624#endif 631#endif
625 632
626 if (vcpu->arch.shared->msr & MSR_PR) 633 if (kvmppc_get_msr(vcpu) & MSR_PR)
627 gvsid |= VSID_PR; 634 gvsid |= VSID_PR;
628 635
629 *vsid = gvsid; 636 *vsid = gvsid;
@@ -633,7 +640,7 @@ no_slb:
633 /* Catch magic page case */ 640 /* Catch magic page case */
634 if (unlikely(mp_ea) && 641 if (unlikely(mp_ea) &&
635 unlikely(esid == (mp_ea >> SID_SHIFT)) && 642 unlikely(esid == (mp_ea >> SID_SHIFT)) &&
636 !(vcpu->arch.shared->msr & MSR_PR)) { 643 !(kvmppc_get_msr(vcpu) & MSR_PR)) {
637 *vsid = VSID_REAL | esid; 644 *vsid = VSID_REAL | esid;
638 return 0; 645 return 0;
639 } 646 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 0d513af62bba..0ac98392f363 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -58,7 +58,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
58 struct kvmppc_sid_map *map; 58 struct kvmppc_sid_map *map;
59 u16 sid_map_mask; 59 u16 sid_map_mask;
60 60
61 if (vcpu->arch.shared->msr & MSR_PR) 61 if (kvmppc_get_msr(vcpu) & MSR_PR)
62 gvsid |= VSID_PR; 62 gvsid |= VSID_PR;
63 63
64 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 64 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -230,7 +230,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
230 u16 sid_map_mask; 230 u16 sid_map_mask;
231 static int backwards_map = 0; 231 static int backwards_map = 0;
232 232
233 if (vcpu->arch.shared->msr & MSR_PR) 233 if (kvmppc_get_msr(vcpu) & MSR_PR)
234 gvsid |= VSID_PR; 234 gvsid |= VSID_PR;
235 235
236 /* We might get collisions that trap in preceding order, so let's 236 /* We might get collisions that trap in preceding order, so let's
@@ -271,11 +271,8 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
271 int found_inval = -1; 271 int found_inval = -1;
272 int r; 272 int r;
273 273
274 if (!svcpu->slb_max)
275 svcpu->slb_max = 1;
276
277 /* Are we overwriting? */ 274 /* Are we overwriting? */
278 for (i = 1; i < svcpu->slb_max; i++) { 275 for (i = 0; i < svcpu->slb_max; i++) {
279 if (!(svcpu->slb[i].esid & SLB_ESID_V)) 276 if (!(svcpu->slb[i].esid & SLB_ESID_V))
280 found_inval = i; 277 found_inval = i;
281 else if ((svcpu->slb[i].esid & ESID_MASK) == esid) { 278 else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
@@ -285,7 +282,7 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
285 } 282 }
286 283
287 /* Found a spare entry that was invalidated before */ 284 /* Found a spare entry that was invalidated before */
288 if (found_inval > 0) { 285 if (found_inval >= 0) {
289 r = found_inval; 286 r = found_inval;
290 goto out; 287 goto out;
291 } 288 }
@@ -359,7 +356,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
359 ulong seg_mask = -seg_size; 356 ulong seg_mask = -seg_size;
360 int i; 357 int i;
361 358
362 for (i = 1; i < svcpu->slb_max; i++) { 359 for (i = 0; i < svcpu->slb_max; i++) {
363 if ((svcpu->slb[i].esid & SLB_ESID_V) && 360 if ((svcpu->slb[i].esid & SLB_ESID_V) &&
364 (svcpu->slb[i].esid & seg_mask) == ea) { 361 (svcpu->slb[i].esid & seg_mask) == ea) {
365 /* Invalidate this entry */ 362 /* Invalidate this entry */
@@ -373,7 +370,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
373void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 370void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
374{ 371{
375 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 372 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
376 svcpu->slb_max = 1; 373 svcpu->slb_max = 0;
377 svcpu->slb[0].esid = 0; 374 svcpu->slb[0].esid = 0;
378 svcpu_put(svcpu); 375 svcpu_put(svcpu);
379} 376}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index fb25ebc0af0c..80561074078d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -52,7 +52,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm);
52 52
53long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 53long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
54{ 54{
55 unsigned long hpt; 55 unsigned long hpt = 0;
56 struct revmap_entry *rev; 56 struct revmap_entry *rev;
57 struct page *page = NULL; 57 struct page *page = NULL;
58 long order = KVM_DEFAULT_HPT_ORDER; 58 long order = KVM_DEFAULT_HPT_ORDER;
@@ -64,22 +64,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
64 } 64 }
65 65
66 kvm->arch.hpt_cma_alloc = 0; 66 kvm->arch.hpt_cma_alloc = 0;
67 /* 67 VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
68 * try first to allocate it from the kernel page allocator. 68 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
69 * We keep the CMA reserved for failed allocation. 69 if (page) {
70 */ 70 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
71 hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | 71 kvm->arch.hpt_cma_alloc = 1;
72 __GFP_NOWARN, order - PAGE_SHIFT);
73
74 /* Next try to allocate from the preallocated pool */
75 if (!hpt) {
76 VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
77 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
78 if (page) {
79 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
80 kvm->arch.hpt_cma_alloc = 1;
81 } else
82 --order;
83 } 72 }
84 73
85 /* Lastly try successively smaller sizes from the page allocator */ 74 /* Lastly try successively smaller sizes from the page allocator */
@@ -596,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
596 struct kvm *kvm = vcpu->kvm; 585 struct kvm *kvm = vcpu->kvm;
597 unsigned long *hptep, hpte[3], r; 586 unsigned long *hptep, hpte[3], r;
598 unsigned long mmu_seq, psize, pte_size; 587 unsigned long mmu_seq, psize, pte_size;
588 unsigned long gpa_base, gfn_base;
599 unsigned long gpa, gfn, hva, pfn; 589 unsigned long gpa, gfn, hva, pfn;
600 struct kvm_memory_slot *memslot; 590 struct kvm_memory_slot *memslot;
601 unsigned long *rmap; 591 unsigned long *rmap;
@@ -634,7 +624,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
634 624
635 /* Translate the logical address and get the page */ 625 /* Translate the logical address and get the page */
636 psize = hpte_page_size(hpte[0], r); 626 psize = hpte_page_size(hpte[0], r);
637 gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); 627 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
628 gfn_base = gpa_base >> PAGE_SHIFT;
629 gpa = gpa_base | (ea & (psize - 1));
638 gfn = gpa >> PAGE_SHIFT; 630 gfn = gpa >> PAGE_SHIFT;
639 memslot = gfn_to_memslot(kvm, gfn); 631 memslot = gfn_to_memslot(kvm, gfn);
640 632
@@ -646,6 +638,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
646 if (!kvm->arch.using_mmu_notifiers) 638 if (!kvm->arch.using_mmu_notifiers)
647 return -EFAULT; /* should never get here */ 639 return -EFAULT; /* should never get here */
648 640
641 /*
642 * This should never happen, because of the slot_is_aligned()
643 * check in kvmppc_do_h_enter().
644 */
645 if (gfn_base < memslot->base_gfn)
646 return -EFAULT;
647
649 /* used to check for invalidations in progress */ 648 /* used to check for invalidations in progress */
650 mmu_seq = kvm->mmu_notifier_seq; 649 mmu_seq = kvm->mmu_notifier_seq;
651 smp_rmb(); 650 smp_rmb();
@@ -738,7 +737,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
738 goto out_unlock; 737 goto out_unlock;
739 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 738 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
740 739
741 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 740 /* Always put the HPTE in the rmap chain for the page base address */
741 rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
742 lock_rmap(rmap); 742 lock_rmap(rmap);
743 743
744 /* Check if we might have been invalidated; let the guest retry if so */ 744 /* Check if we might have been invalidated; let the guest retry if so */
@@ -1060,22 +1060,33 @@ void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
1060 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 1060 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
1061} 1061}
1062 1062
1063static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 1063static int vcpus_running(struct kvm *kvm)
1064{
1065 return atomic_read(&kvm->arch.vcpus_running) != 0;
1066}
1067
1068/*
1069 * Returns the number of system pages that are dirty.
1070 * This can be more than 1 if we find a huge-page HPTE.
1071 */
1072static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1064{ 1073{
1065 struct revmap_entry *rev = kvm->arch.revmap; 1074 struct revmap_entry *rev = kvm->arch.revmap;
1066 unsigned long head, i, j; 1075 unsigned long head, i, j;
1076 unsigned long n;
1077 unsigned long v, r;
1067 unsigned long *hptep; 1078 unsigned long *hptep;
1068 int ret = 0; 1079 int npages_dirty = 0;
1069 1080
1070 retry: 1081 retry:
1071 lock_rmap(rmapp); 1082 lock_rmap(rmapp);
1072 if (*rmapp & KVMPPC_RMAP_CHANGED) { 1083 if (*rmapp & KVMPPC_RMAP_CHANGED) {
1073 *rmapp &= ~KVMPPC_RMAP_CHANGED; 1084 *rmapp &= ~KVMPPC_RMAP_CHANGED;
1074 ret = 1; 1085 npages_dirty = 1;
1075 } 1086 }
1076 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 1087 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
1077 unlock_rmap(rmapp); 1088 unlock_rmap(rmapp);
1078 return ret; 1089 return npages_dirty;
1079 } 1090 }
1080 1091
1081 i = head = *rmapp & KVMPPC_RMAP_INDEX; 1092 i = head = *rmapp & KVMPPC_RMAP_INDEX;
@@ -1083,7 +1094,22 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1083 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 1094 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
1084 j = rev[i].forw; 1095 j = rev[i].forw;
1085 1096
1086 if (!(hptep[1] & HPTE_R_C)) 1097 /*
1098 * Checking the C (changed) bit here is racy since there
1099 * is no guarantee about when the hardware writes it back.
1100 * If the HPTE is not writable then it is stable since the
1101 * page can't be written to, and we would have done a tlbie
1102 * (which forces the hardware to complete any writeback)
1103 * when making the HPTE read-only.
1104 * If vcpus are running then this call is racy anyway
1105 * since the page could get dirtied subsequently, so we
1106 * expect there to be a further call which would pick up
1107 * any delayed C bit writeback.
1108 * Otherwise we need to do the tlbie even if C==0 in
1109 * order to pick up any delayed writeback of C.
1110 */
1111 if (!(hptep[1] & HPTE_R_C) &&
1112 (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
1087 continue; 1113 continue;
1088 1114
1089 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 1115 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
@@ -1095,24 +1121,33 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
1095 } 1121 }
1096 1122
1097 /* Now check and modify the HPTE */ 1123 /* Now check and modify the HPTE */
1098 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { 1124 if (!(hptep[0] & HPTE_V_VALID))
1099 /* need to make it temporarily absent to clear C */ 1125 continue;
1100 hptep[0] |= HPTE_V_ABSENT; 1126
1101 kvmppc_invalidate_hpte(kvm, hptep, i); 1127 /* need to make it temporarily absent so C is stable */
1102 hptep[1] &= ~HPTE_R_C; 1128 hptep[0] |= HPTE_V_ABSENT;
1103 eieio(); 1129 kvmppc_invalidate_hpte(kvm, hptep, i);
1104 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 1130 v = hptep[0];
1131 r = hptep[1];
1132 if (r & HPTE_R_C) {
1133 hptep[1] = r & ~HPTE_R_C;
1105 if (!(rev[i].guest_rpte & HPTE_R_C)) { 1134 if (!(rev[i].guest_rpte & HPTE_R_C)) {
1106 rev[i].guest_rpte |= HPTE_R_C; 1135 rev[i].guest_rpte |= HPTE_R_C;
1107 note_hpte_modification(kvm, &rev[i]); 1136 note_hpte_modification(kvm, &rev[i]);
1108 } 1137 }
1109 ret = 1; 1138 n = hpte_page_size(v, r);
1139 n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
1140 if (n > npages_dirty)
1141 npages_dirty = n;
1142 eieio();
1110 } 1143 }
1111 hptep[0] &= ~HPTE_V_HVLOCK; 1144 v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
1145 v |= HPTE_V_VALID;
1146 hptep[0] = v;
1112 } while ((i = j) != head); 1147 } while ((i = j) != head);
1113 1148
1114 unlock_rmap(rmapp); 1149 unlock_rmap(rmapp);
1115 return ret; 1150 return npages_dirty;
1116} 1151}
1117 1152
1118static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1153static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
@@ -1136,15 +1171,22 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1136long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1171long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1137 unsigned long *map) 1172 unsigned long *map)
1138{ 1173{
1139 unsigned long i; 1174 unsigned long i, j;
1140 unsigned long *rmapp; 1175 unsigned long *rmapp;
1141 struct kvm_vcpu *vcpu; 1176 struct kvm_vcpu *vcpu;
1142 1177
1143 preempt_disable(); 1178 preempt_disable();
1144 rmapp = memslot->arch.rmap; 1179 rmapp = memslot->arch.rmap;
1145 for (i = 0; i < memslot->npages; ++i) { 1180 for (i = 0; i < memslot->npages; ++i) {
1146 if (kvm_test_clear_dirty(kvm, rmapp) && map) 1181 int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
1147 __set_bit_le(i, map); 1182 /*
1183 * Note that if npages > 0 then i must be a multiple of npages,
1184 * since we always put huge-page HPTEs in the rmap chain
1185 * corresponding to their page base address.
1186 */
1187 if (npages && map)
1188 for (j = i; npages; ++j, --npages)
1189 __set_bit_le(j, map);
1148 ++rmapp; 1190 ++rmapp;
1149 } 1191 }
1150 1192
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 4f12e8f0c718..3589c4e3d49b 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -17,30 +17,9 @@
17 * Authors: Alexander Graf <agraf@suse.de> 17 * Authors: Alexander Graf <agraf@suse.de>
18 */ 18 */
19 19
20#ifdef __LITTLE_ENDIAN__ 20#define SHADOW_SLB_ENTRY_LEN 0x10
21#error Need to fix SLB shadow accesses in little endian mode 21#define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x)
22#endif 22#define OFFSET_VSID(x) ((SHADOW_SLB_ENTRY_LEN * x) + 8)
23
24#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10))
25#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
26#define UNBOLT_SLB_ENTRY(num) \
27 ld r9, SHADOW_SLB_ESID(num)(r12); \
28 /* Invalid? Skip. */; \
29 rldicl. r0, r9, 37, 63; \
30 beq slb_entry_skip_ ## num; \
31 xoris r9, r9, SLB_ESID_V@h; \
32 std r9, SHADOW_SLB_ESID(num)(r12); \
33 slb_entry_skip_ ## num:
34
35#define REBOLT_SLB_ENTRY(num) \
36 ld r10, SHADOW_SLB_ESID(num)(r11); \
37 cmpdi r10, 0; \
38 beq slb_exit_skip_ ## num; \
39 oris r10, r10, SLB_ESID_V@h; \
40 ld r9, SHADOW_SLB_VSID(num)(r11); \
41 slbmte r9, r10; \
42 std r10, SHADOW_SLB_ESID(num)(r11); \
43slb_exit_skip_ ## num:
44 23
45/****************************************************************************** 24/******************************************************************************
46 * * 25 * *
@@ -64,20 +43,15 @@ slb_exit_skip_ ## num:
64 * SVCPU[LR] = guest LR 43 * SVCPU[LR] = guest LR
65 */ 44 */
66 45
67 /* Remove LPAR shadow entries */ 46BEGIN_FW_FTR_SECTION
68 47
69#if SLB_NUM_BOLTED == 3 48 /* Declare SLB shadow as 0 entries big */
70 49
71 ld r12, PACA_SLBSHADOWPTR(r13) 50 ld r11, PACA_SLBSHADOWPTR(r13)
51 li r8, 0
52 stb r8, 3(r11)
72 53
73 /* Remove bolted entries */ 54END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
74 UNBOLT_SLB_ENTRY(0)
75 UNBOLT_SLB_ENTRY(1)
76 UNBOLT_SLB_ENTRY(2)
77
78#else
79#error unknown number of bolted entries
80#endif
81 55
82 /* Flush SLB */ 56 /* Flush SLB */
83 57
@@ -100,7 +74,7 @@ slb_loop_enter:
100 74
101 ld r10, 0(r11) 75 ld r10, 0(r11)
102 76
103 rldicl. r0, r10, 37, 63 77 andis. r9, r10, SLB_ESID_V@h
104 beq slb_loop_enter_skip 78 beq slb_loop_enter_skip
105 79
106 ld r9, 8(r11) 80 ld r9, 8(r11)
@@ -137,23 +111,42 @@ slb_do_enter:
137 * 111 *
138 */ 112 */
139 113
140 /* Restore bolted entries from the shadow and fix it along the way */ 114 /* Remove all SLB entries that are in use. */
141 115
142 /* We don't store anything in entry 0, so we don't need to take care of it */ 116 li r0, r0
117 slbmte r0, r0
143 slbia 118 slbia
144 isync
145 119
146#if SLB_NUM_BOLTED == 3 120 /* Restore bolted entries from the shadow */
147 121
148 ld r11, PACA_SLBSHADOWPTR(r13) 122 ld r11, PACA_SLBSHADOWPTR(r13)
149 123
150 REBOLT_SLB_ENTRY(0) 124BEGIN_FW_FTR_SECTION
151 REBOLT_SLB_ENTRY(1) 125
152 REBOLT_SLB_ENTRY(2) 126 /* Declare SLB shadow as SLB_NUM_BOLTED entries big */
153 127
154#else 128 li r8, SLB_NUM_BOLTED
155#error unknown number of bolted entries 129 stb r8, 3(r11)
156#endif 130
131END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
132
133 /* Manually load all entries from shadow SLB */
134
135 li r8, SLBSHADOW_SAVEAREA
136 li r7, SLBSHADOW_SAVEAREA + 8
137
138 .rept SLB_NUM_BOLTED
139 LDX_BE r10, r11, r8
140 cmpdi r10, 0
141 beq 1f
142 LDX_BE r9, r11, r7
143 slbmte r9, r10
1441: addi r7, r7, SHADOW_SLB_ENTRY_LEN
145 addi r8, r8, SHADOW_SLB_ENTRY_LEN
146 .endr
147
148 isync
149 sync
157 150
158slb_do_exit: 151slb_do_exit:
159 152
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 99d40f8977e8..3f295269af37 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -80,7 +80,7 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
80 return false; 80 return false;
81 81
82 /* Limit user space to its own small SPR set */ 82 /* Limit user space to its own small SPR set */
83 if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM) 83 if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)
84 return false; 84 return false;
85 85
86 return true; 86 return true;
@@ -94,14 +94,31 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
94 int rs = get_rs(inst); 94 int rs = get_rs(inst);
95 int ra = get_ra(inst); 95 int ra = get_ra(inst);
96 int rb = get_rb(inst); 96 int rb = get_rb(inst);
97 u32 inst_sc = 0x44000002;
97 98
98 switch (get_op(inst)) { 99 switch (get_op(inst)) {
100 case 0:
101 emulated = EMULATE_FAIL;
102 if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
103 (inst == swab32(inst_sc))) {
104 /*
105 * This is the byte reversed syscall instruction of our
106 * hypercall handler. Early versions of LE Linux didn't
107 * swap the instructions correctly and ended up in
108 * illegal instructions.
109 * Just always fail hypercalls on these broken systems.
110 */
111 kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
112 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
113 emulated = EMULATE_DONE;
114 }
115 break;
99 case 19: 116 case 19:
100 switch (get_xop(inst)) { 117 switch (get_xop(inst)) {
101 case OP_19_XOP_RFID: 118 case OP_19_XOP_RFID:
102 case OP_19_XOP_RFI: 119 case OP_19_XOP_RFI:
103 kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0); 120 kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
104 kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); 121 kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
105 *advance = 0; 122 *advance = 0;
106 break; 123 break;
107 124
@@ -113,16 +130,16 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
113 case 31: 130 case 31:
114 switch (get_xop(inst)) { 131 switch (get_xop(inst)) {
115 case OP_31_XOP_MFMSR: 132 case OP_31_XOP_MFMSR:
116 kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); 133 kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));
117 break; 134 break;
118 case OP_31_XOP_MTMSRD: 135 case OP_31_XOP_MTMSRD:
119 { 136 {
120 ulong rs_val = kvmppc_get_gpr(vcpu, rs); 137 ulong rs_val = kvmppc_get_gpr(vcpu, rs);
121 if (inst & 0x10000) { 138 if (inst & 0x10000) {
122 ulong new_msr = vcpu->arch.shared->msr; 139 ulong new_msr = kvmppc_get_msr(vcpu);
123 new_msr &= ~(MSR_RI | MSR_EE); 140 new_msr &= ~(MSR_RI | MSR_EE);
124 new_msr |= rs_val & (MSR_RI | MSR_EE); 141 new_msr |= rs_val & (MSR_RI | MSR_EE);
125 vcpu->arch.shared->msr = new_msr; 142 kvmppc_set_msr_fast(vcpu, new_msr);
126 } else 143 } else
127 kvmppc_set_msr(vcpu, rs_val); 144 kvmppc_set_msr(vcpu, rs_val);
128 break; 145 break;
@@ -179,7 +196,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
179 ulong cmd = kvmppc_get_gpr(vcpu, 3); 196 ulong cmd = kvmppc_get_gpr(vcpu, 3);
180 int i; 197 int i;
181 198
182 if ((vcpu->arch.shared->msr & MSR_PR) || 199 if ((kvmppc_get_msr(vcpu) & MSR_PR) ||
183 !vcpu->arch.papr_enabled) { 200 !vcpu->arch.papr_enabled) {
184 emulated = EMULATE_FAIL; 201 emulated = EMULATE_FAIL;
185 break; 202 break;
@@ -261,14 +278,14 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
261 ra_val = kvmppc_get_gpr(vcpu, ra); 278 ra_val = kvmppc_get_gpr(vcpu, ra);
262 279
263 addr = (ra_val + rb_val) & ~31ULL; 280 addr = (ra_val + rb_val) & ~31ULL;
264 if (!(vcpu->arch.shared->msr & MSR_SF)) 281 if (!(kvmppc_get_msr(vcpu) & MSR_SF))
265 addr &= 0xffffffff; 282 addr &= 0xffffffff;
266 vaddr = addr; 283 vaddr = addr;
267 284
268 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 285 r = kvmppc_st(vcpu, &addr, 32, zeros, true);
269 if ((r == -ENOENT) || (r == -EPERM)) { 286 if ((r == -ENOENT) || (r == -EPERM)) {
270 *advance = 0; 287 *advance = 0;
271 vcpu->arch.shared->dar = vaddr; 288 kvmppc_set_dar(vcpu, vaddr);
272 vcpu->arch.fault_dar = vaddr; 289 vcpu->arch.fault_dar = vaddr;
273 290
274 dsisr = DSISR_ISSTORE; 291 dsisr = DSISR_ISSTORE;
@@ -277,7 +294,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
277 else if (r == -EPERM) 294 else if (r == -EPERM)
278 dsisr |= DSISR_PROTFAULT; 295 dsisr |= DSISR_PROTFAULT;
279 296
280 vcpu->arch.shared->dsisr = dsisr; 297 kvmppc_set_dsisr(vcpu, dsisr);
281 vcpu->arch.fault_dsisr = dsisr; 298 vcpu->arch.fault_dsisr = dsisr;
282 299
283 kvmppc_book3s_queue_irqprio(vcpu, 300 kvmppc_book3s_queue_irqprio(vcpu,
@@ -356,10 +373,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
356 to_book3s(vcpu)->sdr1 = spr_val; 373 to_book3s(vcpu)->sdr1 = spr_val;
357 break; 374 break;
358 case SPRN_DSISR: 375 case SPRN_DSISR:
359 vcpu->arch.shared->dsisr = spr_val; 376 kvmppc_set_dsisr(vcpu, spr_val);
360 break; 377 break;
361 case SPRN_DAR: 378 case SPRN_DAR:
362 vcpu->arch.shared->dar = spr_val; 379 kvmppc_set_dar(vcpu, spr_val);
363 break; 380 break;
364 case SPRN_HIOR: 381 case SPRN_HIOR:
365 to_book3s(vcpu)->hior = spr_val; 382 to_book3s(vcpu)->hior = spr_val;
@@ -438,6 +455,31 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
438 case SPRN_GQR7: 455 case SPRN_GQR7:
439 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; 456 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
440 break; 457 break;
458 case SPRN_FSCR:
459 vcpu->arch.fscr = spr_val;
460 break;
461#ifdef CONFIG_PPC_BOOK3S_64
462 case SPRN_BESCR:
463 vcpu->arch.bescr = spr_val;
464 break;
465 case SPRN_EBBHR:
466 vcpu->arch.ebbhr = spr_val;
467 break;
468 case SPRN_EBBRR:
469 vcpu->arch.ebbrr = spr_val;
470 break;
471#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
472 case SPRN_TFHAR:
473 vcpu->arch.tfhar = spr_val;
474 break;
475 case SPRN_TEXASR:
476 vcpu->arch.texasr = spr_val;
477 break;
478 case SPRN_TFIAR:
479 vcpu->arch.tfiar = spr_val;
480 break;
481#endif
482#endif
441 case SPRN_ICTC: 483 case SPRN_ICTC:
442 case SPRN_THRM1: 484 case SPRN_THRM1:
443 case SPRN_THRM2: 485 case SPRN_THRM2:
@@ -455,6 +497,13 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
455 case SPRN_WPAR_GEKKO: 497 case SPRN_WPAR_GEKKO:
456 case SPRN_MSSSR0: 498 case SPRN_MSSSR0:
457 case SPRN_DABR: 499 case SPRN_DABR:
500#ifdef CONFIG_PPC_BOOK3S_64
501 case SPRN_MMCRS:
502 case SPRN_MMCRA:
503 case SPRN_MMCR0:
504 case SPRN_MMCR1:
505 case SPRN_MMCR2:
506#endif
458 break; 507 break;
459unprivileged: 508unprivileged:
460 default: 509 default:
@@ -493,10 +542,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
493 *spr_val = to_book3s(vcpu)->sdr1; 542 *spr_val = to_book3s(vcpu)->sdr1;
494 break; 543 break;
495 case SPRN_DSISR: 544 case SPRN_DSISR:
496 *spr_val = vcpu->arch.shared->dsisr; 545 *spr_val = kvmppc_get_dsisr(vcpu);
497 break; 546 break;
498 case SPRN_DAR: 547 case SPRN_DAR:
499 *spr_val = vcpu->arch.shared->dar; 548 *spr_val = kvmppc_get_dar(vcpu);
500 break; 549 break;
501 case SPRN_HIOR: 550 case SPRN_HIOR:
502 *spr_val = to_book3s(vcpu)->hior; 551 *spr_val = to_book3s(vcpu)->hior;
@@ -538,6 +587,31 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
538 case SPRN_GQR7: 587 case SPRN_GQR7:
539 *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; 588 *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
540 break; 589 break;
590 case SPRN_FSCR:
591 *spr_val = vcpu->arch.fscr;
592 break;
593#ifdef CONFIG_PPC_BOOK3S_64
594 case SPRN_BESCR:
595 *spr_val = vcpu->arch.bescr;
596 break;
597 case SPRN_EBBHR:
598 *spr_val = vcpu->arch.ebbhr;
599 break;
600 case SPRN_EBBRR:
601 *spr_val = vcpu->arch.ebbrr;
602 break;
603#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
604 case SPRN_TFHAR:
605 *spr_val = vcpu->arch.tfhar;
606 break;
607 case SPRN_TEXASR:
608 *spr_val = vcpu->arch.texasr;
609 break;
610 case SPRN_TFIAR:
611 *spr_val = vcpu->arch.tfiar;
612 break;
613#endif
614#endif
541 case SPRN_THRM1: 615 case SPRN_THRM1:
542 case SPRN_THRM2: 616 case SPRN_THRM2:
543 case SPRN_THRM3: 617 case SPRN_THRM3:
@@ -553,6 +627,14 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
553 case SPRN_WPAR_GEKKO: 627 case SPRN_WPAR_GEKKO:
554 case SPRN_MSSSR0: 628 case SPRN_MSSSR0:
555 case SPRN_DABR: 629 case SPRN_DABR:
630#ifdef CONFIG_PPC_BOOK3S_64
631 case SPRN_MMCRS:
632 case SPRN_MMCRA:
633 case SPRN_MMCR0:
634 case SPRN_MMCR1:
635 case SPRN_MMCR2:
636 case SPRN_TIR:
637#endif
556 *spr_val = 0; 638 *spr_val = 0;
557 break; 639 break;
558 default: 640 default:
@@ -569,48 +651,17 @@ unprivileged:
569 651
570u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) 652u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
571{ 653{
572 u32 dsisr = 0; 654 return make_dsisr(inst);
573
574 /*
575 * This is what the spec says about DSISR bits (not mentioned = 0):
576 *
577 * 12:13 [DS] Set to bits 30:31
578 * 15:16 [X] Set to bits 29:30
579 * 17 [X] Set to bit 25
580 * [D/DS] Set to bit 5
581 * 18:21 [X] Set to bits 21:24
582 * [D/DS] Set to bits 1:4
583 * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS)
584 * 27:31 Set to bits 11:15 (RA)
585 */
586
587 switch (get_op(inst)) {
588 /* D-form */
589 case OP_LFS:
590 case OP_LFD:
591 case OP_STFD:
592 case OP_STFS:
593 dsisr |= (inst >> 12) & 0x4000; /* bit 17 */
594 dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
595 break;
596 /* X-form */
597 case 31:
598 dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
599 dsisr |= (inst << 8) & 0x04000; /* bit 17 */
600 dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */
601 break;
602 default:
603 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
604 break;
605 }
606
607 dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
608
609 return dsisr;
610} 655}
611 656
612ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) 657ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
613{ 658{
659#ifdef CONFIG_PPC_BOOK3S_64
660 /*
661 * Linux's fix_alignment() assumes that DAR is valid, so can we
662 */
663 return vcpu->arch.fault_dar;
664#else
614 ulong dar = 0; 665 ulong dar = 0;
615 ulong ra = get_ra(inst); 666 ulong ra = get_ra(inst);
616 ulong rb = get_rb(inst); 667 ulong rb = get_rb(inst);
@@ -635,4 +686,5 @@ ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
635 } 686 }
636 687
637 return dar; 688 return dar;
689#endif
638} 690}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 20d4ea8e656d..0d013fbc2e13 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/export.h> 20#include <linux/export.h>
21#include <asm/kvm_ppc.h>
21#include <asm/kvm_book3s.h> 22#include <asm/kvm_book3s.h>
22 23
23#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 24#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8227dba5af0f..aba05bbb3e74 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -879,24 +879,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
879 case KVM_REG_PPC_IAMR: 879 case KVM_REG_PPC_IAMR:
880 *val = get_reg_val(id, vcpu->arch.iamr); 880 *val = get_reg_val(id, vcpu->arch.iamr);
881 break; 881 break;
882 case KVM_REG_PPC_FSCR:
883 *val = get_reg_val(id, vcpu->arch.fscr);
884 break;
885 case KVM_REG_PPC_PSPB: 882 case KVM_REG_PPC_PSPB:
886 *val = get_reg_val(id, vcpu->arch.pspb); 883 *val = get_reg_val(id, vcpu->arch.pspb);
887 break; 884 break;
888 case KVM_REG_PPC_EBBHR:
889 *val = get_reg_val(id, vcpu->arch.ebbhr);
890 break;
891 case KVM_REG_PPC_EBBRR:
892 *val = get_reg_val(id, vcpu->arch.ebbrr);
893 break;
894 case KVM_REG_PPC_BESCR:
895 *val = get_reg_val(id, vcpu->arch.bescr);
896 break;
897 case KVM_REG_PPC_TAR:
898 *val = get_reg_val(id, vcpu->arch.tar);
899 break;
900 case KVM_REG_PPC_DPDES: 885 case KVM_REG_PPC_DPDES:
901 *val = get_reg_val(id, vcpu->arch.vcore->dpdes); 886 *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
902 break; 887 break;
@@ -1091,24 +1076,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1091 case KVM_REG_PPC_IAMR: 1076 case KVM_REG_PPC_IAMR:
1092 vcpu->arch.iamr = set_reg_val(id, *val); 1077 vcpu->arch.iamr = set_reg_val(id, *val);
1093 break; 1078 break;
1094 case KVM_REG_PPC_FSCR:
1095 vcpu->arch.fscr = set_reg_val(id, *val);
1096 break;
1097 case KVM_REG_PPC_PSPB: 1079 case KVM_REG_PPC_PSPB:
1098 vcpu->arch.pspb = set_reg_val(id, *val); 1080 vcpu->arch.pspb = set_reg_val(id, *val);
1099 break; 1081 break;
1100 case KVM_REG_PPC_EBBHR:
1101 vcpu->arch.ebbhr = set_reg_val(id, *val);
1102 break;
1103 case KVM_REG_PPC_EBBRR:
1104 vcpu->arch.ebbrr = set_reg_val(id, *val);
1105 break;
1106 case KVM_REG_PPC_BESCR:
1107 vcpu->arch.bescr = set_reg_val(id, *val);
1108 break;
1109 case KVM_REG_PPC_TAR:
1110 vcpu->arch.tar = set_reg_val(id, *val);
1111 break;
1112 case KVM_REG_PPC_DPDES: 1082 case KVM_REG_PPC_DPDES:
1113 vcpu->arch.vcore->dpdes = set_reg_val(id, *val); 1083 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
1114 break; 1084 break;
@@ -1280,6 +1250,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1280 goto free_vcpu; 1250 goto free_vcpu;
1281 1251
1282 vcpu->arch.shared = &vcpu->arch.shregs; 1252 vcpu->arch.shared = &vcpu->arch.shregs;
1253#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1254 /*
1255 * The shared struct is never shared on HV,
1256 * so we can always use host endianness
1257 */
1258#ifdef __BIG_ENDIAN__
1259 vcpu->arch.shared_big_endian = true;
1260#else
1261 vcpu->arch.shared_big_endian = false;
1262#endif
1263#endif
1283 vcpu->arch.mmcr[0] = MMCR0_FC; 1264 vcpu->arch.mmcr[0] = MMCR0_FC;
1284 vcpu->arch.ctrl = CTRL_RUNLATCH; 1265 vcpu->arch.ctrl = CTRL_RUNLATCH;
1285 /* default to host PVR, since we can't spoof it */ 1266 /* default to host PVR, since we can't spoof it */
@@ -1949,6 +1930,13 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1949 * support pte_enc here 1930 * support pte_enc here
1950 */ 1931 */
1951 (*sps)->enc[0].pte_enc = def->penc[linux_psize]; 1932 (*sps)->enc[0].pte_enc = def->penc[linux_psize];
1933 /*
1934 * Add 16MB MPSS support if host supports it
1935 */
1936 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
1937 (*sps)->enc[1].page_shift = 24;
1938 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
1939 }
1952 (*sps)++; 1940 (*sps)++;
1953} 1941}
1954 1942
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 8fcc36306a02..6e6224318c36 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -42,13 +42,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
42 42
43 /* 43 /*
44 * If there is only one vcore, and it's currently running, 44 * If there is only one vcore, and it's currently running,
45 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
45 * we can use tlbiel as long as we mark all other physical 46 * we can use tlbiel as long as we mark all other physical
46 * cores as potentially having stale TLB entries for this lpid. 47 * cores as potentially having stale TLB entries for this lpid.
47 * If we're not using MMU notifiers, we never take pages away 48 * If we're not using MMU notifiers, we never take pages away
48 * from the guest, so we can use tlbiel if requested. 49 * from the guest, so we can use tlbiel if requested.
49 * Otherwise, don't use tlbiel. 50 * Otherwise, don't use tlbiel.
50 */ 51 */
51 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore) 52 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
52 global = 0; 53 global = 0;
53 else if (kvm->arch.using_mmu_notifiers) 54 else if (kvm->arch.using_mmu_notifiers)
54 global = 1; 55 global = 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 07c8b5b0f9d2..974793435a2e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -86,6 +86,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
86 lbz r4, LPPACA_PMCINUSE(r3) 86 lbz r4, LPPACA_PMCINUSE(r3)
87 cmpwi r4, 0 87 cmpwi r4, 0
88 beq 23f /* skip if not */ 88 beq 23f /* skip if not */
89BEGIN_FTR_SECTION
90 ld r3, HSTATE_MMCR(r13)
91 andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
92 cmpwi r4, MMCR0_PMAO
93 beql kvmppc_fix_pmao
94END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
89 lwz r3, HSTATE_PMC(r13) 95 lwz r3, HSTATE_PMC(r13)
90 lwz r4, HSTATE_PMC + 4(r13) 96 lwz r4, HSTATE_PMC + 4(r13)
91 lwz r5, HSTATE_PMC + 8(r13) 97 lwz r5, HSTATE_PMC + 8(r13)
@@ -737,6 +743,12 @@ skip_tm:
737 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 743 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
738 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 744 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
739 isync 745 isync
746BEGIN_FTR_SECTION
747 ld r3, VCPU_MMCR(r4)
748 andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
749 cmpwi r5, MMCR0_PMAO
750 beql kvmppc_fix_pmao
751END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
740 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ 752 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
741 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ 753 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
742 lwz r6, VCPU_PMC + 8(r4) 754 lwz r6, VCPU_PMC + 8(r4)
@@ -1439,6 +1451,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
143925: 145125:
1440 /* Save PMU registers if requested */ 1452 /* Save PMU registers if requested */
1441 /* r8 and cr0.eq are live here */ 1453 /* r8 and cr0.eq are live here */
1454BEGIN_FTR_SECTION
1455 /*
1456 * POWER8 seems to have a hardware bug where setting
1457 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
1458 * when some counters are already negative doesn't seem
1459 * to cause a performance monitor alert (and hence interrupt).
1460 * The effect of this is that when saving the PMU state,
1461 * if there is no PMU alert pending when we read MMCR0
1462 * before freezing the counters, but one becomes pending
1463 * before we read the counters, we lose it.
1464 * To work around this, we need a way to freeze the counters
1465 * before reading MMCR0. Normally, freezing the counters
1466 * is done by writing MMCR0 (to set MMCR0[FC]) which
1467 * unavoidably writes MMCR0[PMA0] as well. On POWER8,
1468 * we can also freeze the counters using MMCR2, by writing
1469 * 1s to all the counter freeze condition bits (there are
1470 * 9 bits each for 6 counters).
1471 */
1472 li r3, -1 /* set all freeze bits */
1473 clrrdi r3, r3, 10
1474 mfspr r10, SPRN_MMCR2
1475 mtspr SPRN_MMCR2, r3
1476 isync
1477END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1442 li r3, 1 1478 li r3, 1
1443 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 1479 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
1444 mfspr r4, SPRN_MMCR0 /* save MMCR0 */ 1480 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
@@ -1462,6 +1498,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1462 std r4, VCPU_MMCR(r9) 1498 std r4, VCPU_MMCR(r9)
1463 std r5, VCPU_MMCR + 8(r9) 1499 std r5, VCPU_MMCR + 8(r9)
1464 std r6, VCPU_MMCR + 16(r9) 1500 std r6, VCPU_MMCR + 16(r9)
1501BEGIN_FTR_SECTION
1502 std r10, VCPU_MMCR + 24(r9)
1503END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1465 std r7, VCPU_SIAR(r9) 1504 std r7, VCPU_SIAR(r9)
1466 std r8, VCPU_SDAR(r9) 1505 std r8, VCPU_SDAR(r9)
1467 mfspr r3, SPRN_PMC1 1506 mfspr r3, SPRN_PMC1
@@ -1485,12 +1524,10 @@ BEGIN_FTR_SECTION
1485 stw r11, VCPU_PMC + 28(r9) 1524 stw r11, VCPU_PMC + 28(r9)
1486END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1525END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1487BEGIN_FTR_SECTION 1526BEGIN_FTR_SECTION
1488 mfspr r4, SPRN_MMCR2
1489 mfspr r5, SPRN_SIER 1527 mfspr r5, SPRN_SIER
1490 mfspr r6, SPRN_SPMC1 1528 mfspr r6, SPRN_SPMC1
1491 mfspr r7, SPRN_SPMC2 1529 mfspr r7, SPRN_SPMC2
1492 mfspr r8, SPRN_MMCRS 1530 mfspr r8, SPRN_MMCRS
1493 std r4, VCPU_MMCR + 24(r9)
1494 std r5, VCPU_SIER(r9) 1531 std r5, VCPU_SIER(r9)
1495 stw r6, VCPU_PMC + 24(r9) 1532 stw r6, VCPU_PMC + 24(r9)
1496 stw r7, VCPU_PMC + 28(r9) 1533 stw r7, VCPU_PMC + 28(r9)
@@ -2227,6 +2264,7 @@ machine_check_realmode:
2227 beq mc_cont 2264 beq mc_cont
2228 /* If not, deliver a machine check. SRR0/1 are already set */ 2265 /* If not, deliver a machine check. SRR0/1 are already set */
2229 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK 2266 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
2267 ld r11, VCPU_MSR(r9)
2230 bl kvmppc_msr_interrupt 2268 bl kvmppc_msr_interrupt
2231 b fast_interrupt_c_return 2269 b fast_interrupt_c_return
2232 2270
@@ -2431,3 +2469,21 @@ kvmppc_msr_interrupt:
2431 li r0, 1 2469 li r0, 1
24321: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG 24701: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
2433 blr 2471 blr
2472
2473/*
2474 * This works around a hardware bug on POWER8E processors, where
2475 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
2476 * performance monitor interrupt. Instead, when we need to have
2477 * an interrupt pending, we have to arrange for a counter to overflow.
2478 */
2479kvmppc_fix_pmao:
2480 li r3, 0
2481 mtspr SPRN_MMCR2, r3
2482 lis r3, (MMCR0_PMXE | MMCR0_FCECE)@h
2483 ori r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
2484 mtspr SPRN_MMCR0, r3
2485 lis r3, 0x7fff
2486 ori r3, r3, 0xffff
2487 mtspr SPRN_PMC6, r3
2488 isync
2489 blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 3533c999194a..e2c29e381dc7 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -104,8 +104,27 @@ kvm_start_lightweight:
104 stb r3, HSTATE_RESTORE_HID5(r13) 104 stb r3, HSTATE_RESTORE_HID5(r13)
105 105
106 /* Load up guest SPRG3 value, since it's user readable */ 106 /* Load up guest SPRG3 value, since it's user readable */
107 ld r3, VCPU_SHARED(r4) 107 lwz r3, VCPU_SHAREDBE(r4)
108 ld r3, VCPU_SHARED_SPRG3(r3) 108 cmpwi r3, 0
109 ld r5, VCPU_SHARED(r4)
110 beq sprg3_little_endian
111sprg3_big_endian:
112#ifdef __BIG_ENDIAN__
113 ld r3, VCPU_SHARED_SPRG3(r5)
114#else
115 addi r5, r5, VCPU_SHARED_SPRG3
116 ldbrx r3, 0, r5
117#endif
118 b after_sprg3_load
119sprg3_little_endian:
120#ifdef __LITTLE_ENDIAN__
121 ld r3, VCPU_SHARED_SPRG3(r5)
122#else
123 addi r5, r5, VCPU_SHARED_SPRG3
124 ldbrx r3, 0, r5
125#endif
126
127after_sprg3_load:
109 mtspr SPRN_SPRG3, r3 128 mtspr SPRN_SPRG3, r3
110#endif /* CONFIG_PPC_BOOK3S_64 */ 129#endif /* CONFIG_PPC_BOOK3S_64 */
111 130
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index c1abd95063f4..6c8011fd57e6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -165,16 +165,18 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
165 165
166static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) 166static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
167{ 167{
168 u64 dsisr; 168 u32 dsisr;
169 struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; 169 u64 msr = kvmppc_get_msr(vcpu);
170 170
171 shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0); 171 msr = kvmppc_set_field(msr, 33, 36, 0);
172 shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0); 172 msr = kvmppc_set_field(msr, 42, 47, 0);
173 shared->dar = eaddr; 173 kvmppc_set_msr(vcpu, msr);
174 kvmppc_set_dar(vcpu, eaddr);
174 /* Page Fault */ 175 /* Page Fault */
175 dsisr = kvmppc_set_field(0, 33, 33, 1); 176 dsisr = kvmppc_set_field(0, 33, 33, 1);
176 if (is_store) 177 if (is_store)
177 shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); 178 dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
179 kvmppc_set_dsisr(vcpu, dsisr);
178 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); 180 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
179} 181}
180 182
@@ -660,7 +662,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
660 if (!kvmppc_inst_is_paired_single(vcpu, inst)) 662 if (!kvmppc_inst_is_paired_single(vcpu, inst))
661 return EMULATE_FAIL; 663 return EMULATE_FAIL;
662 664
663 if (!(vcpu->arch.shared->msr & MSR_FP)) { 665 if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
664 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); 666 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
665 return EMULATE_AGAIN; 667 return EMULATE_AGAIN;
666 } 668 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 02f1defd8bb9..8eef1e519077 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -53,6 +53,7 @@
53 53
54static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 54static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
55 ulong msr); 55 ulong msr);
56static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
56 57
57/* Some compatibility defines */ 58/* Some compatibility defines */
58#ifdef CONFIG_PPC_BOOK3S_32 59#ifdef CONFIG_PPC_BOOK3S_32
@@ -89,6 +90,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
89#endif 90#endif
90 91
91 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 92 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
93 kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
92 vcpu->cpu = -1; 94 vcpu->cpu = -1;
93} 95}
94 96
@@ -115,6 +117,9 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
115 svcpu->ctr = vcpu->arch.ctr; 117 svcpu->ctr = vcpu->arch.ctr;
116 svcpu->lr = vcpu->arch.lr; 118 svcpu->lr = vcpu->arch.lr;
117 svcpu->pc = vcpu->arch.pc; 119 svcpu->pc = vcpu->arch.pc;
120#ifdef CONFIG_PPC_BOOK3S_64
121 svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
122#endif
118 svcpu->in_use = true; 123 svcpu->in_use = true;
119} 124}
120 125
@@ -158,6 +163,9 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
158 vcpu->arch.fault_dar = svcpu->fault_dar; 163 vcpu->arch.fault_dar = svcpu->fault_dar;
159 vcpu->arch.fault_dsisr = svcpu->fault_dsisr; 164 vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
160 vcpu->arch.last_inst = svcpu->last_inst; 165 vcpu->arch.last_inst = svcpu->last_inst;
166#ifdef CONFIG_PPC_BOOK3S_64
167 vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
168#endif
161 svcpu->in_use = false; 169 svcpu->in_use = false;
162 170
163out: 171out:
@@ -246,14 +254,15 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
246 254
247static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 255static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
248{ 256{
249 ulong smsr = vcpu->arch.shared->msr; 257 ulong guest_msr = kvmppc_get_msr(vcpu);
258 ulong smsr = guest_msr;
250 259
251 /* Guest MSR values */ 260 /* Guest MSR values */
252 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE; 261 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
253 /* Process MSR values */ 262 /* Process MSR values */
254 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 263 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
255 /* External providers the guest reserved */ 264 /* External providers the guest reserved */
256 smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); 265 smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
257 /* 64-bit Process MSR values */ 266 /* 64-bit Process MSR values */
258#ifdef CONFIG_PPC_BOOK3S_64 267#ifdef CONFIG_PPC_BOOK3S_64
259 smsr |= MSR_ISF | MSR_HV; 268 smsr |= MSR_ISF | MSR_HV;
@@ -263,14 +272,14 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
263 272
264static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) 273static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
265{ 274{
266 ulong old_msr = vcpu->arch.shared->msr; 275 ulong old_msr = kvmppc_get_msr(vcpu);
267 276
268#ifdef EXIT_DEBUG 277#ifdef EXIT_DEBUG
269 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 278 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
270#endif 279#endif
271 280
272 msr &= to_book3s(vcpu)->msr_mask; 281 msr &= to_book3s(vcpu)->msr_mask;
273 vcpu->arch.shared->msr = msr; 282 kvmppc_set_msr_fast(vcpu, msr);
274 kvmppc_recalc_shadow_msr(vcpu); 283 kvmppc_recalc_shadow_msr(vcpu);
275 284
276 if (msr & MSR_POW) { 285 if (msr & MSR_POW) {
@@ -281,11 +290,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
281 290
282 /* Unset POW bit after we woke up */ 291 /* Unset POW bit after we woke up */
283 msr &= ~MSR_POW; 292 msr &= ~MSR_POW;
284 vcpu->arch.shared->msr = msr; 293 kvmppc_set_msr_fast(vcpu, msr);
285 } 294 }
286 } 295 }
287 296
288 if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != 297 if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
289 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { 298 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
290 kvmppc_mmu_flush_segments(vcpu); 299 kvmppc_mmu_flush_segments(vcpu);
291 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 300 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
@@ -317,7 +326,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
317 } 326 }
318 327
319 /* Preload FPU if it's enabled */ 328 /* Preload FPU if it's enabled */
320 if (vcpu->arch.shared->msr & MSR_FP) 329 if (kvmppc_get_msr(vcpu) & MSR_FP)
321 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 330 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
322} 331}
323 332
@@ -427,8 +436,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
427 436
428 /* patch dcbz into reserved instruction, so we trap */ 437 /* patch dcbz into reserved instruction, so we trap */
429 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 438 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
430 if ((page[i] & 0xff0007ff) == INS_DCBZ) 439 if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
431 page[i] &= 0xfffffff7; 440 page[i] &= cpu_to_be32(0xfffffff7);
432 441
433 kunmap_atomic(page); 442 kunmap_atomic(page);
434 put_page(hpage); 443 put_page(hpage);
@@ -438,7 +447,7 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
438{ 447{
439 ulong mp_pa = vcpu->arch.magic_page_pa; 448 ulong mp_pa = vcpu->arch.magic_page_pa;
440 449
441 if (!(vcpu->arch.shared->msr & MSR_SF)) 450 if (!(kvmppc_get_msr(vcpu) & MSR_SF))
442 mp_pa = (uint32_t)mp_pa; 451 mp_pa = (uint32_t)mp_pa;
443 452
444 if (unlikely(mp_pa) && 453 if (unlikely(mp_pa) &&
@@ -459,8 +468,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
459 int page_found = 0; 468 int page_found = 0;
460 struct kvmppc_pte pte; 469 struct kvmppc_pte pte;
461 bool is_mmio = false; 470 bool is_mmio = false;
462 bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; 471 bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
463 bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; 472 bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
464 u64 vsid; 473 u64 vsid;
465 474
466 relocated = data ? dr : ir; 475 relocated = data ? dr : ir;
@@ -480,7 +489,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
480 pte.page_size = MMU_PAGE_64K; 489 pte.page_size = MMU_PAGE_64K;
481 } 490 }
482 491
483 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 492 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
484 case 0: 493 case 0:
485 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); 494 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
486 break; 495 break;
@@ -488,7 +497,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
488 case MSR_IR: 497 case MSR_IR:
489 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 498 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
490 499
491 if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) 500 if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
492 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); 501 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
493 else 502 else
494 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); 503 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -511,22 +520,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
511 520
512 if (page_found == -ENOENT) { 521 if (page_found == -ENOENT) {
513 /* Page not found in guest PTE entries */ 522 /* Page not found in guest PTE entries */
514 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 523 u64 ssrr1 = vcpu->arch.shadow_srr1;
515 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr; 524 u64 msr = kvmppc_get_msr(vcpu);
516 vcpu->arch.shared->msr |= 525 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
517 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 526 kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
527 kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
518 kvmppc_book3s_queue_irqprio(vcpu, vec); 528 kvmppc_book3s_queue_irqprio(vcpu, vec);
519 } else if (page_found == -EPERM) { 529 } else if (page_found == -EPERM) {
520 /* Storage protection */ 530 /* Storage protection */
521 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 531 u32 dsisr = vcpu->arch.fault_dsisr;
522 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 532 u64 ssrr1 = vcpu->arch.shadow_srr1;
523 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 533 u64 msr = kvmppc_get_msr(vcpu);
524 vcpu->arch.shared->msr |= 534 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
525 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 535 dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
536 kvmppc_set_dsisr(vcpu, dsisr);
537 kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
526 kvmppc_book3s_queue_irqprio(vcpu, vec); 538 kvmppc_book3s_queue_irqprio(vcpu, vec);
527 } else if (page_found == -EINVAL) { 539 } else if (page_found == -EINVAL) {
528 /* Page not found in guest SLB */ 540 /* Page not found in guest SLB */
529 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 541 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
530 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 542 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
531 } else if (!is_mmio && 543 } else if (!is_mmio &&
532 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 544 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -606,6 +618,25 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
606 kvmppc_recalc_shadow_msr(vcpu); 618 kvmppc_recalc_shadow_msr(vcpu);
607} 619}
608 620
621/* Give up facility (TAR / EBB / DSCR) */
622static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
623{
624#ifdef CONFIG_PPC_BOOK3S_64
625 if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
626 /* Facility not available to the guest, ignore giveup request*/
627 return;
628 }
629
630 switch (fac) {
631 case FSCR_TAR_LG:
632 vcpu->arch.tar = mfspr(SPRN_TAR);
633 mtspr(SPRN_TAR, current->thread.tar);
634 vcpu->arch.shadow_fscr &= ~FSCR_TAR;
635 break;
636 }
637#endif
638}
639
609static int kvmppc_read_inst(struct kvm_vcpu *vcpu) 640static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
610{ 641{
611 ulong srr0 = kvmppc_get_pc(vcpu); 642 ulong srr0 = kvmppc_get_pc(vcpu);
@@ -614,11 +645,12 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
614 645
615 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 646 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
616 if (ret == -ENOENT) { 647 if (ret == -ENOENT) {
617 ulong msr = vcpu->arch.shared->msr; 648 ulong msr = kvmppc_get_msr(vcpu);
618 649
619 msr = kvmppc_set_field(msr, 33, 33, 1); 650 msr = kvmppc_set_field(msr, 33, 33, 1);
620 msr = kvmppc_set_field(msr, 34, 36, 0); 651 msr = kvmppc_set_field(msr, 34, 36, 0);
621 vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); 652 msr = kvmppc_set_field(msr, 42, 47, 0);
653 kvmppc_set_msr_fast(vcpu, msr);
622 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 654 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
623 return EMULATE_AGAIN; 655 return EMULATE_AGAIN;
624 } 656 }
@@ -651,7 +683,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
651 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) 683 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
652 return RESUME_GUEST; 684 return RESUME_GUEST;
653 685
654 if (!(vcpu->arch.shared->msr & msr)) { 686 if (!(kvmppc_get_msr(vcpu) & msr)) {
655 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 687 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
656 return RESUME_GUEST; 688 return RESUME_GUEST;
657 } 689 }
@@ -683,16 +715,20 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
683#endif 715#endif
684 716
685 if (msr & MSR_FP) { 717 if (msr & MSR_FP) {
718 preempt_disable();
686 enable_kernel_fp(); 719 enable_kernel_fp();
687 load_fp_state(&vcpu->arch.fp); 720 load_fp_state(&vcpu->arch.fp);
688 t->fp_save_area = &vcpu->arch.fp; 721 t->fp_save_area = &vcpu->arch.fp;
722 preempt_enable();
689 } 723 }
690 724
691 if (msr & MSR_VEC) { 725 if (msr & MSR_VEC) {
692#ifdef CONFIG_ALTIVEC 726#ifdef CONFIG_ALTIVEC
727 preempt_disable();
693 enable_kernel_altivec(); 728 enable_kernel_altivec();
694 load_vr_state(&vcpu->arch.vr); 729 load_vr_state(&vcpu->arch.vr);
695 t->vr_save_area = &vcpu->arch.vr; 730 t->vr_save_area = &vcpu->arch.vr;
731 preempt_enable();
696#endif 732#endif
697 } 733 }
698 734
@@ -716,18 +752,90 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
716 return; 752 return;
717 753
718 if (lost_ext & MSR_FP) { 754 if (lost_ext & MSR_FP) {
755 preempt_disable();
719 enable_kernel_fp(); 756 enable_kernel_fp();
720 load_fp_state(&vcpu->arch.fp); 757 load_fp_state(&vcpu->arch.fp);
758 preempt_enable();
721 } 759 }
722#ifdef CONFIG_ALTIVEC 760#ifdef CONFIG_ALTIVEC
723 if (lost_ext & MSR_VEC) { 761 if (lost_ext & MSR_VEC) {
762 preempt_disable();
724 enable_kernel_altivec(); 763 enable_kernel_altivec();
725 load_vr_state(&vcpu->arch.vr); 764 load_vr_state(&vcpu->arch.vr);
765 preempt_enable();
726 } 766 }
727#endif 767#endif
728 current->thread.regs->msr |= lost_ext; 768 current->thread.regs->msr |= lost_ext;
729} 769}
730 770
771#ifdef CONFIG_PPC_BOOK3S_64
772
773static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
774{
775 /* Inject the Interrupt Cause field and trigger a guest interrupt */
776 vcpu->arch.fscr &= ~(0xffULL << 56);
777 vcpu->arch.fscr |= (fac << 56);
778 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
779}
780
781static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
782{
783 enum emulation_result er = EMULATE_FAIL;
784
785 if (!(kvmppc_get_msr(vcpu) & MSR_PR))
786 er = kvmppc_emulate_instruction(vcpu->run, vcpu);
787
788 if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
789 /* Couldn't emulate, trigger interrupt in guest */
790 kvmppc_trigger_fac_interrupt(vcpu, fac);
791 }
792}
793
794/* Enable facilities (TAR, EBB, DSCR) for the guest */
795static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
796{
797 bool guest_fac_enabled;
798 BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
799
800 /*
801 * Not every facility is enabled by FSCR bits, check whether the
802 * guest has this facility enabled at all.
803 */
804 switch (fac) {
805 case FSCR_TAR_LG:
806 case FSCR_EBB_LG:
807 guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
808 break;
809 case FSCR_TM_LG:
810 guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
811 break;
812 default:
813 guest_fac_enabled = false;
814 break;
815 }
816
817 if (!guest_fac_enabled) {
818 /* Facility not enabled by the guest */
819 kvmppc_trigger_fac_interrupt(vcpu, fac);
820 return RESUME_GUEST;
821 }
822
823 switch (fac) {
824 case FSCR_TAR_LG:
825 /* TAR switching isn't lazy in Linux yet */
826 current->thread.tar = mfspr(SPRN_TAR);
827 mtspr(SPRN_TAR, vcpu->arch.tar);
828 vcpu->arch.shadow_fscr |= FSCR_TAR;
829 break;
830 default:
831 kvmppc_emulate_fac(vcpu, fac);
832 break;
833 }
834
835 return RESUME_GUEST;
836}
837#endif
838
731int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 839int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
732 unsigned int exit_nr) 840 unsigned int exit_nr)
733{ 841{
@@ -784,7 +892,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
784 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 892 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
785 r = RESUME_GUEST; 893 r = RESUME_GUEST;
786 } else { 894 } else {
787 vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; 895 u64 msr = kvmppc_get_msr(vcpu);
896 msr |= shadow_srr1 & 0x58000000;
897 kvmppc_set_msr_fast(vcpu, msr);
788 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 898 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
789 r = RESUME_GUEST; 899 r = RESUME_GUEST;
790 } 900 }
@@ -824,8 +934,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
824 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 934 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
825 srcu_read_unlock(&vcpu->kvm->srcu, idx); 935 srcu_read_unlock(&vcpu->kvm->srcu, idx);
826 } else { 936 } else {
827 vcpu->arch.shared->dar = dar; 937 kvmppc_set_dar(vcpu, dar);
828 vcpu->arch.shared->dsisr = fault_dsisr; 938 kvmppc_set_dsisr(vcpu, fault_dsisr);
829 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 939 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
830 r = RESUME_GUEST; 940 r = RESUME_GUEST;
831 } 941 }
@@ -833,7 +943,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
833 } 943 }
834 case BOOK3S_INTERRUPT_DATA_SEGMENT: 944 case BOOK3S_INTERRUPT_DATA_SEGMENT:
835 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { 945 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
836 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 946 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
837 kvmppc_book3s_queue_irqprio(vcpu, 947 kvmppc_book3s_queue_irqprio(vcpu,
838 BOOK3S_INTERRUPT_DATA_SEGMENT); 948 BOOK3S_INTERRUPT_DATA_SEGMENT);
839 } 949 }
@@ -871,7 +981,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
871program_interrupt: 981program_interrupt:
872 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 982 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
873 983
874 if (vcpu->arch.shared->msr & MSR_PR) { 984 if (kvmppc_get_msr(vcpu) & MSR_PR) {
875#ifdef EXIT_DEBUG 985#ifdef EXIT_DEBUG
876 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 986 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
877#endif 987#endif
@@ -913,7 +1023,7 @@ program_interrupt:
913 case BOOK3S_INTERRUPT_SYSCALL: 1023 case BOOK3S_INTERRUPT_SYSCALL:
914 if (vcpu->arch.papr_enabled && 1024 if (vcpu->arch.papr_enabled &&
915 (kvmppc_get_last_sc(vcpu) == 0x44000022) && 1025 (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
916 !(vcpu->arch.shared->msr & MSR_PR)) { 1026 !(kvmppc_get_msr(vcpu) & MSR_PR)) {
917 /* SC 1 papr hypercalls */ 1027 /* SC 1 papr hypercalls */
918 ulong cmd = kvmppc_get_gpr(vcpu, 3); 1028 ulong cmd = kvmppc_get_gpr(vcpu, 3);
919 int i; 1029 int i;
@@ -945,7 +1055,7 @@ program_interrupt:
945 gprs[i] = kvmppc_get_gpr(vcpu, i); 1055 gprs[i] = kvmppc_get_gpr(vcpu, i);
946 vcpu->arch.osi_needed = 1; 1056 vcpu->arch.osi_needed = 1;
947 r = RESUME_HOST_NV; 1057 r = RESUME_HOST_NV;
948 } else if (!(vcpu->arch.shared->msr & MSR_PR) && 1058 } else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
949 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { 1059 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
950 /* KVM PV hypercalls */ 1060 /* KVM PV hypercalls */
951 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); 1061 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
@@ -986,14 +1096,26 @@ program_interrupt:
986 } 1096 }
987 case BOOK3S_INTERRUPT_ALIGNMENT: 1097 case BOOK3S_INTERRUPT_ALIGNMENT:
988 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { 1098 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
989 vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, 1099 u32 last_inst = kvmppc_get_last_inst(vcpu);
990 kvmppc_get_last_inst(vcpu)); 1100 u32 dsisr;
991 vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, 1101 u64 dar;
992 kvmppc_get_last_inst(vcpu)); 1102
1103 dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
1104 dar = kvmppc_alignment_dar(vcpu, last_inst);
1105
1106 kvmppc_set_dsisr(vcpu, dsisr);
1107 kvmppc_set_dar(vcpu, dar);
1108
993 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1109 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
994 } 1110 }
995 r = RESUME_GUEST; 1111 r = RESUME_GUEST;
996 break; 1112 break;
1113#ifdef CONFIG_PPC_BOOK3S_64
1114 case BOOK3S_INTERRUPT_FAC_UNAVAIL:
1115 kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
1116 r = RESUME_GUEST;
1117 break;
1118#endif
997 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1119 case BOOK3S_INTERRUPT_MACHINE_CHECK:
998 case BOOK3S_INTERRUPT_TRACE: 1120 case BOOK3S_INTERRUPT_TRACE:
999 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1121 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
@@ -1054,7 +1176,7 @@ static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
1054 } 1176 }
1055 } else { 1177 } else {
1056 for (i = 0; i < 16; i++) 1178 for (i = 0; i < 16; i++)
1057 sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; 1179 sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
1058 1180
1059 for (i = 0; i < 8; i++) { 1181 for (i = 0; i < 8; i++) {
1060 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; 1182 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
@@ -1110,6 +1232,15 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1110 case KVM_REG_PPC_HIOR: 1232 case KVM_REG_PPC_HIOR:
1111 *val = get_reg_val(id, to_book3s(vcpu)->hior); 1233 *val = get_reg_val(id, to_book3s(vcpu)->hior);
1112 break; 1234 break;
1235 case KVM_REG_PPC_LPCR:
1236 /*
1237 * We are only interested in the LPCR_ILE bit
1238 */
1239 if (vcpu->arch.intr_msr & MSR_LE)
1240 *val = get_reg_val(id, LPCR_ILE);
1241 else
1242 *val = get_reg_val(id, 0);
1243 break;
1113 default: 1244 default:
1114 r = -EINVAL; 1245 r = -EINVAL;
1115 break; 1246 break;
@@ -1118,6 +1249,14 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1118 return r; 1249 return r;
1119} 1250}
1120 1251
1252static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
1253{
1254 if (new_lpcr & LPCR_ILE)
1255 vcpu->arch.intr_msr |= MSR_LE;
1256 else
1257 vcpu->arch.intr_msr &= ~MSR_LE;
1258}
1259
1121static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1260static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1122 union kvmppc_one_reg *val) 1261 union kvmppc_one_reg *val)
1123{ 1262{
@@ -1128,6 +1267,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1128 to_book3s(vcpu)->hior = set_reg_val(id, *val); 1267 to_book3s(vcpu)->hior = set_reg_val(id, *val);
1129 to_book3s(vcpu)->hior_explicit = true; 1268 to_book3s(vcpu)->hior_explicit = true;
1130 break; 1269 break;
1270 case KVM_REG_PPC_LPCR:
1271 kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
1272 break;
1131 default: 1273 default:
1132 r = -EINVAL; 1274 r = -EINVAL;
1133 break; 1275 break;
@@ -1170,8 +1312,14 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
1170 goto uninit_vcpu; 1312 goto uninit_vcpu;
1171 /* the real shared page fills the last 4k of our page */ 1313 /* the real shared page fills the last 4k of our page */
1172 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1314 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1173
1174#ifdef CONFIG_PPC_BOOK3S_64 1315#ifdef CONFIG_PPC_BOOK3S_64
1316 /* Always start the shared struct in native endian mode */
1317#ifdef __BIG_ENDIAN__
1318 vcpu->arch.shared_big_endian = true;
1319#else
1320 vcpu->arch.shared_big_endian = false;
1321#endif
1322
1175 /* 1323 /*
1176 * Default to the same as the host if we're on sufficiently 1324 * Default to the same as the host if we're on sufficiently
1177 * recent machine that we have 1TB segments; 1325 * recent machine that we have 1TB segments;
@@ -1180,6 +1328,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
1180 vcpu->arch.pvr = 0x3C0301; 1328 vcpu->arch.pvr = 0x3C0301;
1181 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1329 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1182 vcpu->arch.pvr = mfspr(SPRN_PVR); 1330 vcpu->arch.pvr = mfspr(SPRN_PVR);
1331 vcpu->arch.intr_msr = MSR_SF;
1183#else 1332#else
1184 /* default to book3s_32 (750) */ 1333 /* default to book3s_32 (750) */
1185 vcpu->arch.pvr = 0x84202; 1334 vcpu->arch.pvr = 0x84202;
@@ -1187,7 +1336,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
1187 kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); 1336 kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
1188 vcpu->arch.slb_nr = 64; 1337 vcpu->arch.slb_nr = 64;
1189 1338
1190 vcpu->arch.shadow_msr = MSR_USER64; 1339 vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
1191 1340
1192 err = kvmppc_mmu_init(vcpu); 1341 err = kvmppc_mmu_init(vcpu);
1193 if (err < 0) 1342 if (err < 0)
@@ -1264,7 +1413,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1264#endif 1413#endif
1265 1414
1266 /* Preload FPU if it's enabled */ 1415 /* Preload FPU if it's enabled */
1267 if (vcpu->arch.shared->msr & MSR_FP) 1416 if (kvmppc_get_msr(vcpu) & MSR_FP)
1268 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1417 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1269 1418
1270 kvmppc_fix_ee_before_entry(); 1419 kvmppc_fix_ee_before_entry();
@@ -1277,6 +1426,9 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1277 /* Make sure we save the guest FPU/Altivec/VSX state */ 1426 /* Make sure we save the guest FPU/Altivec/VSX state */
1278 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 1427 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1279 1428
1429 /* Make sure we save the guest TAR/EBB/DSCR state */
1430 kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
1431
1280out: 1432out:
1281 vcpu->mode = OUTSIDE_GUEST_MODE; 1433 vcpu->mode = OUTSIDE_GUEST_MODE;
1282 return ret; 1434 return ret;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 5efa97b993d8..52a63bfe3f07 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -57,7 +57,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
57 for (i = 0; ; ++i) { 57 for (i = 0; ; ++i) {
58 if (i == 8) 58 if (i == 8)
59 goto done; 59 goto done;
60 if ((*hpte & HPTE_V_VALID) == 0) 60 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)
61 break; 61 break;
62 hpte += 2; 62 hpte += 2;
63 } 63 }
@@ -67,8 +67,8 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
67 goto done; 67 goto done;
68 } 68 }
69 69
70 hpte[0] = kvmppc_get_gpr(vcpu, 6); 70 hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
71 hpte[1] = kvmppc_get_gpr(vcpu, 7); 71 hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
72 pteg_addr += i * HPTE_SIZE; 72 pteg_addr += i * HPTE_SIZE;
73 copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); 73 copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
74 kvmppc_set_gpr(vcpu, 4, pte_index | i); 74 kvmppc_set_gpr(vcpu, 4, pte_index | i);
@@ -93,6 +93,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
93 pteg = get_pteg_addr(vcpu, pte_index); 93 pteg = get_pteg_addr(vcpu, pte_index);
94 mutex_lock(&vcpu->kvm->arch.hpt_mutex); 94 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
95 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 95 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
96 pte[0] = be64_to_cpu(pte[0]);
97 pte[1] = be64_to_cpu(pte[1]);
96 98
97 ret = H_NOT_FOUND; 99 ret = H_NOT_FOUND;
98 if ((pte[0] & HPTE_V_VALID) == 0 || 100 if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -169,6 +171,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
169 171
170 pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); 172 pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
171 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 173 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
174 pte[0] = be64_to_cpu(pte[0]);
175 pte[1] = be64_to_cpu(pte[1]);
172 176
173 /* tsl = AVPN */ 177 /* tsl = AVPN */
174 flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; 178 flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
@@ -207,6 +211,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
207 pteg = get_pteg_addr(vcpu, pte_index); 211 pteg = get_pteg_addr(vcpu, pte_index);
208 mutex_lock(&vcpu->kvm->arch.hpt_mutex); 212 mutex_lock(&vcpu->kvm->arch.hpt_mutex);
209 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 213 copy_from_user(pte, (void __user *)pteg, sizeof(pte));
214 pte[0] = be64_to_cpu(pte[0]);
215 pte[1] = be64_to_cpu(pte[1]);
210 216
211 ret = H_NOT_FOUND; 217 ret = H_NOT_FOUND;
212 if ((pte[0] & HPTE_V_VALID) == 0 || 218 if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -225,6 +231,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
225 231
226 rb = compute_tlbie_rb(v, r, pte_index); 232 rb = compute_tlbie_rb(v, r, pte_index);
227 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 233 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
234 pte[0] = cpu_to_be64(pte[0]);
235 pte[1] = cpu_to_be64(pte[1]);
228 copy_to_user((void __user *)pteg, pte, sizeof(pte)); 236 copy_to_user((void __user *)pteg, pte, sizeof(pte));
229 ret = H_SUCCESS; 237 ret = H_SUCCESS;
230 238
@@ -270,7 +278,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
270 case H_PUT_TCE: 278 case H_PUT_TCE:
271 return kvmppc_h_pr_put_tce(vcpu); 279 return kvmppc_h_pr_put_tce(vcpu);
272 case H_CEDE: 280 case H_CEDE:
273 vcpu->arch.shared->msr |= MSR_EE; 281 kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
274 kvm_vcpu_block(vcpu); 282 kvm_vcpu_block(vcpu);
275 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 283 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
276 vcpu->stat.halt_wakeup++; 284 vcpu->stat.halt_wakeup++;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 7a053157483b..edb14ba992b3 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -205,6 +205,32 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
205 return rc; 205 return rc;
206} 206}
207 207
208static void kvmppc_rtas_swap_endian_in(struct rtas_args *args)
209{
210#ifdef __LITTLE_ENDIAN__
211 int i;
212
213 args->token = be32_to_cpu(args->token);
214 args->nargs = be32_to_cpu(args->nargs);
215 args->nret = be32_to_cpu(args->nret);
216 for (i = 0; i < args->nargs; i++)
217 args->args[i] = be32_to_cpu(args->args[i]);
218#endif
219}
220
221static void kvmppc_rtas_swap_endian_out(struct rtas_args *args)
222{
223#ifdef __LITTLE_ENDIAN__
224 int i;
225
226 for (i = 0; i < args->nret; i++)
227 args->args[i] = cpu_to_be32(args->args[i]);
228 args->token = cpu_to_be32(args->token);
229 args->nargs = cpu_to_be32(args->nargs);
230 args->nret = cpu_to_be32(args->nret);
231#endif
232}
233
208int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) 234int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
209{ 235{
210 struct rtas_token_definition *d; 236 struct rtas_token_definition *d;
@@ -223,6 +249,8 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
223 if (rc) 249 if (rc)
224 goto fail; 250 goto fail;
225 251
252 kvmppc_rtas_swap_endian_in(&args);
253
226 /* 254 /*
227 * args->rets is a pointer into args->args. Now that we've 255 * args->rets is a pointer into args->args. Now that we've
228 * copied args we need to fix it up to point into our copy, 256 * copied args we need to fix it up to point into our copy,
@@ -247,6 +275,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
247 275
248 if (rc == 0) { 276 if (rc == 0) {
249 args.rets = orig_rets; 277 args.rets = orig_rets;
278 kvmppc_rtas_swap_endian_out(&args);
250 rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); 279 rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
251 if (rc) 280 if (rc)
252 goto fail; 281 goto fail;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1e0cc2adfd40..acee37cde840 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -90,6 +90,15 @@ kvmppc_handler_trampoline_enter:
90 LOAD_GUEST_SEGMENTS 90 LOAD_GUEST_SEGMENTS
91 91
92#ifdef CONFIG_PPC_BOOK3S_64 92#ifdef CONFIG_PPC_BOOK3S_64
93BEGIN_FTR_SECTION
94 /* Save host FSCR */
95 mfspr r8, SPRN_FSCR
96 std r8, HSTATE_HOST_FSCR(r13)
97 /* Set FSCR during guest execution */
98 ld r9, SVCPU_SHADOW_FSCR(r13)
99 mtspr SPRN_FSCR, r9
100END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
101
93 /* Some guests may need to have dcbz set to 32 byte length. 102 /* Some guests may need to have dcbz set to 32 byte length.
94 * 103 *
95 * Usually we ensure that by patching the guest's instructions 104 * Usually we ensure that by patching the guest's instructions
@@ -255,6 +264,10 @@ BEGIN_FTR_SECTION
255 cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST 264 cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
256 beq- ld_last_inst 265 beq- ld_last_inst
257END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 266END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
267BEGIN_FTR_SECTION
268 cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
269 beq- ld_last_inst
270END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
258#endif 271#endif
259 272
260 b no_ld_last_inst 273 b no_ld_last_inst
@@ -311,6 +324,18 @@ no_ld_last_inst:
311 324
312no_dcbz32_off: 325no_dcbz32_off:
313 326
327BEGIN_FTR_SECTION
328 /* Save guest FSCR on a FAC_UNAVAIL interrupt */
329 cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
330 bne+ no_fscr_save
331 mfspr r7, SPRN_FSCR
332 std r7, SVCPU_SHADOW_FSCR(r13)
333no_fscr_save:
334 /* Restore host FSCR */
335 ld r8, HSTATE_HOST_FSCR(r13)
336 mtspr SPRN_FSCR, r8
337END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
338
314#endif /* CONFIG_PPC_BOOK3S_64 */ 339#endif /* CONFIG_PPC_BOOK3S_64 */
315 340
316 /* 341 /*
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 89b7f821f6c4..002d51764143 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -19,6 +19,7 @@
19#include "booke.h" 19#include "booke.h"
20#include "e500.h" 20#include "e500.h"
21 21
22#define XOP_DCBTLS 166
22#define XOP_MSGSND 206 23#define XOP_MSGSND 206
23#define XOP_MSGCLR 238 24#define XOP_MSGCLR 238
24#define XOP_TLBIVAX 786 25#define XOP_TLBIVAX 786
@@ -103,6 +104,15 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
103 return emulated; 104 return emulated;
104} 105}
105 106
107static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
108{
109 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
110
111 /* Always fail to lock the cache */
112 vcpu_e500->l1csr0 |= L1CSR0_CUL;
113 return EMULATE_DONE;
114}
115
106int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, 116int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
107 unsigned int inst, int *advance) 117 unsigned int inst, int *advance)
108{ 118{
@@ -116,6 +126,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
116 case 31: 126 case 31:
117 switch (get_xop(inst)) { 127 switch (get_xop(inst)) {
118 128
129 case XOP_DCBTLS:
130 emulated = kvmppc_e500_emul_dcbtls(vcpu);
131 break;
132
119#ifdef CONFIG_KVM_E500MC 133#ifdef CONFIG_KVM_E500MC
120 case XOP_MSGSND: 134 case XOP_MSGSND:
121 emulated = kvmppc_e500_emul_msgsnd(vcpu, rb); 135 emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
@@ -222,6 +236,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
222 break; 236 break;
223 case SPRN_L1CSR1: 237 case SPRN_L1CSR1:
224 vcpu_e500->l1csr1 = spr_val; 238 vcpu_e500->l1csr1 = spr_val;
239 vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);
225 break; 240 break;
226 case SPRN_HID0: 241 case SPRN_HID0:
227 vcpu_e500->hid0 = spr_val; 242 vcpu_e500->hid0 = spr_val;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c2b887be2c29..da86d9ba3476 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -97,10 +97,10 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
97 97
98 switch (sprn) { 98 switch (sprn) {
99 case SPRN_SRR0: 99 case SPRN_SRR0:
100 vcpu->arch.shared->srr0 = spr_val; 100 kvmppc_set_srr0(vcpu, spr_val);
101 break; 101 break;
102 case SPRN_SRR1: 102 case SPRN_SRR1:
103 vcpu->arch.shared->srr1 = spr_val; 103 kvmppc_set_srr1(vcpu, spr_val);
104 break; 104 break;
105 105
106 /* XXX We need to context-switch the timebase for 106 /* XXX We need to context-switch the timebase for
@@ -114,16 +114,16 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
114 break; 114 break;
115 115
116 case SPRN_SPRG0: 116 case SPRN_SPRG0:
117 vcpu->arch.shared->sprg0 = spr_val; 117 kvmppc_set_sprg0(vcpu, spr_val);
118 break; 118 break;
119 case SPRN_SPRG1: 119 case SPRN_SPRG1:
120 vcpu->arch.shared->sprg1 = spr_val; 120 kvmppc_set_sprg1(vcpu, spr_val);
121 break; 121 break;
122 case SPRN_SPRG2: 122 case SPRN_SPRG2:
123 vcpu->arch.shared->sprg2 = spr_val; 123 kvmppc_set_sprg2(vcpu, spr_val);
124 break; 124 break;
125 case SPRN_SPRG3: 125 case SPRN_SPRG3:
126 vcpu->arch.shared->sprg3 = spr_val; 126 kvmppc_set_sprg3(vcpu, spr_val);
127 break; 127 break;
128 128
129 /* PIR can legally be written, but we ignore it */ 129 /* PIR can legally be written, but we ignore it */
@@ -150,10 +150,10 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
150 150
151 switch (sprn) { 151 switch (sprn) {
152 case SPRN_SRR0: 152 case SPRN_SRR0:
153 spr_val = vcpu->arch.shared->srr0; 153 spr_val = kvmppc_get_srr0(vcpu);
154 break; 154 break;
155 case SPRN_SRR1: 155 case SPRN_SRR1:
156 spr_val = vcpu->arch.shared->srr1; 156 spr_val = kvmppc_get_srr1(vcpu);
157 break; 157 break;
158 case SPRN_PVR: 158 case SPRN_PVR:
159 spr_val = vcpu->arch.pvr; 159 spr_val = vcpu->arch.pvr;
@@ -173,16 +173,16 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
173 break; 173 break;
174 174
175 case SPRN_SPRG0: 175 case SPRN_SPRG0:
176 spr_val = vcpu->arch.shared->sprg0; 176 spr_val = kvmppc_get_sprg0(vcpu);
177 break; 177 break;
178 case SPRN_SPRG1: 178 case SPRN_SPRG1:
179 spr_val = vcpu->arch.shared->sprg1; 179 spr_val = kvmppc_get_sprg1(vcpu);
180 break; 180 break;
181 case SPRN_SPRG2: 181 case SPRN_SPRG2:
182 spr_val = vcpu->arch.shared->sprg2; 182 spr_val = kvmppc_get_sprg2(vcpu);
183 break; 183 break;
184 case SPRN_SPRG3: 184 case SPRN_SPRG3:
185 spr_val = vcpu->arch.shared->sprg3; 185 spr_val = kvmppc_get_sprg3(vcpu);
186 break; 186 break;
187 /* Note: SPRG4-7 are user-readable, so we don't get 187 /* Note: SPRG4-7 are user-readable, so we don't get
188 * a trap. */ 188 * a trap. */
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index efbd9962a209..b68d0dc9479a 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -126,6 +126,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
126 u32 val, int idx); 126 u32 val, int idx);
127static int openpic_cpu_read_internal(void *opaque, gpa_t addr, 127static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
128 u32 *ptr, int idx); 128 u32 *ptr, int idx);
129static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
130 uint32_t val);
129 131
130enum irq_type { 132enum irq_type {
131 IRQ_TYPE_NORMAL = 0, 133 IRQ_TYPE_NORMAL = 0,
@@ -528,7 +530,6 @@ static void openpic_reset(struct openpic *opp)
528 /* Initialise IRQ sources */ 530 /* Initialise IRQ sources */
529 for (i = 0; i < opp->max_irq; i++) { 531 for (i = 0; i < opp->max_irq; i++) {
530 opp->src[i].ivpr = opp->ivpr_reset; 532 opp->src[i].ivpr = opp->ivpr_reset;
531 opp->src[i].idr = opp->idr_reset;
532 533
533 switch (opp->src[i].type) { 534 switch (opp->src[i].type) {
534 case IRQ_TYPE_NORMAL: 535 case IRQ_TYPE_NORMAL:
@@ -543,6 +544,8 @@ static void openpic_reset(struct openpic *opp)
543 case IRQ_TYPE_FSLSPECIAL: 544 case IRQ_TYPE_FSLSPECIAL:
544 break; 545 break;
545 } 546 }
547
548 write_IRQreg_idr(opp, i, opp->idr_reset);
546 } 549 }
547 /* Initialise IRQ destinations */ 550 /* Initialise IRQ destinations */
548 for (i = 0; i < MAX_CPU; i++) { 551 for (i = 0; i < MAX_CPU; i++) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3cf541a53e2a..bab20f410443 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -125,6 +125,27 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
125} 125}
126EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); 126EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
127 127
128#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
129static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
130{
131 struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
132 int i;
133
134 shared->sprg0 = swab64(shared->sprg0);
135 shared->sprg1 = swab64(shared->sprg1);
136 shared->sprg2 = swab64(shared->sprg2);
137 shared->sprg3 = swab64(shared->sprg3);
138 shared->srr0 = swab64(shared->srr0);
139 shared->srr1 = swab64(shared->srr1);
140 shared->dar = swab64(shared->dar);
141 shared->msr = swab64(shared->msr);
142 shared->dsisr = swab32(shared->dsisr);
143 shared->int_pending = swab32(shared->int_pending);
144 for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
145 shared->sr[i] = swab32(shared->sr[i]);
146}
147#endif
148
128int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 149int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
129{ 150{
130 int nr = kvmppc_get_gpr(vcpu, 11); 151 int nr = kvmppc_get_gpr(vcpu, 11);
@@ -135,7 +156,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
135 unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6); 156 unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
136 unsigned long r2 = 0; 157 unsigned long r2 = 0;
137 158
138 if (!(vcpu->arch.shared->msr & MSR_SF)) { 159 if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
139 /* 32 bit mode */ 160 /* 32 bit mode */
140 param1 &= 0xffffffff; 161 param1 &= 0xffffffff;
141 param2 &= 0xffffffff; 162 param2 &= 0xffffffff;
@@ -146,8 +167,28 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
146 switch (nr) { 167 switch (nr) {
147 case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE): 168 case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
148 { 169 {
149 vcpu->arch.magic_page_pa = param1; 170#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
150 vcpu->arch.magic_page_ea = param2; 171 /* Book3S can be little endian, find it out here */
172 int shared_big_endian = true;
173 if (vcpu->arch.intr_msr & MSR_LE)
174 shared_big_endian = false;
175 if (shared_big_endian != vcpu->arch.shared_big_endian)
176 kvmppc_swab_shared(vcpu);
177 vcpu->arch.shared_big_endian = shared_big_endian;
178#endif
179
180 if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
181 /*
182 * Older versions of the Linux magic page code had
183 * a bug where they would map their trampoline code
184 * NX. If that's the case, remove !PR NX capability.
185 */
186 vcpu->arch.disable_kernel_nx = true;
187 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
188 }
189
190 vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
191 vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
151 192
152 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; 193 r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
153 194
@@ -375,6 +416,7 @@ int kvm_dev_ioctl_check_extension(long ext)
375 case KVM_CAP_SPAPR_TCE: 416 case KVM_CAP_SPAPR_TCE:
376 case KVM_CAP_PPC_ALLOC_HTAB: 417 case KVM_CAP_PPC_ALLOC_HTAB:
377 case KVM_CAP_PPC_RTAS: 418 case KVM_CAP_PPC_RTAS:
419 case KVM_CAP_PPC_FIXUP_HCALL:
378#ifdef CONFIG_KVM_XICS 420#ifdef CONFIG_KVM_XICS
379 case KVM_CAP_IRQ_XICS: 421 case KVM_CAP_IRQ_XICS:
380#endif 422#endif
@@ -1015,10 +1057,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
1015 u32 inst_nop = 0x60000000; 1057 u32 inst_nop = 0x60000000;
1016#ifdef CONFIG_KVM_BOOKE_HV 1058#ifdef CONFIG_KVM_BOOKE_HV
1017 u32 inst_sc1 = 0x44000022; 1059 u32 inst_sc1 = 0x44000022;
1018 pvinfo->hcall[0] = inst_sc1; 1060 pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
1019 pvinfo->hcall[1] = inst_nop; 1061 pvinfo->hcall[1] = cpu_to_be32(inst_nop);
1020 pvinfo->hcall[2] = inst_nop; 1062 pvinfo->hcall[2] = cpu_to_be32(inst_nop);
1021 pvinfo->hcall[3] = inst_nop; 1063 pvinfo->hcall[3] = cpu_to_be32(inst_nop);
1022#else 1064#else
1023 u32 inst_lis = 0x3c000000; 1065 u32 inst_lis = 0x3c000000;
1024 u32 inst_ori = 0x60000000; 1066 u32 inst_ori = 0x60000000;
@@ -1034,10 +1076,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
1034 * sc 1076 * sc
1035 * nop 1077 * nop
1036 */ 1078 */
1037 pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask); 1079 pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
1038 pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); 1080 pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
1039 pvinfo->hcall[2] = inst_sc; 1081 pvinfo->hcall[2] = cpu_to_be32(inst_sc);
1040 pvinfo->hcall[3] = inst_nop; 1082 pvinfo->hcall[3] = cpu_to_be32(inst_nop);
1041#endif 1083#endif
1042 1084
1043 pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; 1085 pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 8b22e4748344..e1357cd8dc1f 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_exit,
255 __entry->exit_nr = exit_nr; 255 __entry->exit_nr = exit_nr;
256 __entry->pc = kvmppc_get_pc(vcpu); 256 __entry->pc = kvmppc_get_pc(vcpu);
257 __entry->dar = kvmppc_get_fault_dar(vcpu); 257 __entry->dar = kvmppc_get_fault_dar(vcpu);
258 __entry->msr = vcpu->arch.shared->msr; 258 __entry->msr = kvmppc_get_msr(vcpu);
259 __entry->srr1 = vcpu->arch.shadow_srr1; 259 __entry->srr1 = vcpu->arch.shadow_srr1;
260 __entry->last_inst = vcpu->arch.last_inst; 260 __entry->last_inst = vcpu->arch.last_inst;
261 ), 261 ),
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 9d1d33cd2be5..964a5f61488a 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -97,7 +97,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
97static void __slb_flush_and_rebolt(void) 97static void __slb_flush_and_rebolt(void)
98{ 98{
99 /* If you change this make sure you change SLB_NUM_BOLTED 99 /* If you change this make sure you change SLB_NUM_BOLTED
100 * appropriately too. */ 100 * and PR KVM appropriately too. */
101 unsigned long linear_llp, vmalloc_llp, lflags, vflags; 101 unsigned long linear_llp, vmalloc_llp, lflags, vflags;
102 unsigned long ksp_esid_data, ksp_vsid_data; 102 unsigned long ksp_esid_data, ksp_vsid_data;
103 103
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 4e63f1a13600..31ab9f346d7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
57void smp_ctl_set_bit(int cr, int bit); 57void smp_ctl_set_bit(int cr, int bit);
58void smp_ctl_clear_bit(int cr, int bit); 58void smp_ctl_clear_bit(int cr, int bit);
59 59
60union ctlreg0 {
61 unsigned long val;
62 struct {
63#ifdef CONFIG_64BIT
64 unsigned long : 32;
65#endif
66 unsigned long : 3;
67 unsigned long lap : 1; /* Low-address-protection control */
68 unsigned long : 4;
69 unsigned long edat : 1; /* Enhanced-DAT-enablement control */
70 unsigned long : 23;
71 };
72};
73
60#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
61# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) 75# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
62# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) 76# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 154b60089be9..4181d7baabba 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -32,16 +32,26 @@
32#define KVM_NR_IRQCHIPS 1 32#define KVM_NR_IRQCHIPS 1
33#define KVM_IRQCHIP_NUM_PINS 4096 33#define KVM_IRQCHIP_NUM_PINS 4096
34 34
35#define SIGP_CTRL_C 0x00800000
36
35struct sca_entry { 37struct sca_entry {
36 atomic_t scn; 38 atomic_t ctrl;
37 __u32 reserved; 39 __u32 reserved;
38 __u64 sda; 40 __u64 sda;
39 __u64 reserved2[2]; 41 __u64 reserved2[2];
40} __attribute__((packed)); 42} __attribute__((packed));
41 43
44union ipte_control {
45 unsigned long val;
46 struct {
47 unsigned long k : 1;
48 unsigned long kh : 31;
49 unsigned long kg : 32;
50 };
51};
42 52
43struct sca_block { 53struct sca_block {
44 __u64 ipte_control; 54 union ipte_control ipte_control;
45 __u64 reserved[5]; 55 __u64 reserved[5];
46 __u64 mcn; 56 __u64 mcn;
47 __u64 reserved2; 57 __u64 reserved2;
@@ -64,6 +74,7 @@ struct sca_block {
64#define CPUSTAT_ZARCH 0x00000800 74#define CPUSTAT_ZARCH 0x00000800
65#define CPUSTAT_MCDS 0x00000100 75#define CPUSTAT_MCDS 0x00000100
66#define CPUSTAT_SM 0x00000080 76#define CPUSTAT_SM 0x00000080
77#define CPUSTAT_IBS 0x00000040
67#define CPUSTAT_G 0x00000008 78#define CPUSTAT_G 0x00000008
68#define CPUSTAT_GED 0x00000004 79#define CPUSTAT_GED 0x00000004
69#define CPUSTAT_J 0x00000002 80#define CPUSTAT_J 0x00000002
@@ -71,7 +82,9 @@ struct sca_block {
71 82
72struct kvm_s390_sie_block { 83struct kvm_s390_sie_block {
73 atomic_t cpuflags; /* 0x0000 */ 84 atomic_t cpuflags; /* 0x0000 */
74 __u32 prefix; /* 0x0004 */ 85 __u32 : 1; /* 0x0004 */
86 __u32 prefix : 18;
87 __u32 : 13;
75 __u8 reserved08[4]; /* 0x0008 */ 88 __u8 reserved08[4]; /* 0x0008 */
76#define PROG_IN_SIE (1<<0) 89#define PROG_IN_SIE (1<<0)
77 __u32 prog0c; /* 0x000c */ 90 __u32 prog0c; /* 0x000c */
@@ -85,12 +98,27 @@ struct kvm_s390_sie_block {
85 __u8 reserved40[4]; /* 0x0040 */ 98 __u8 reserved40[4]; /* 0x0040 */
86#define LCTL_CR0 0x8000 99#define LCTL_CR0 0x8000
87#define LCTL_CR6 0x0200 100#define LCTL_CR6 0x0200
101#define LCTL_CR9 0x0040
102#define LCTL_CR10 0x0020
103#define LCTL_CR11 0x0010
88#define LCTL_CR14 0x0002 104#define LCTL_CR14 0x0002
89 __u16 lctl; /* 0x0044 */ 105 __u16 lctl; /* 0x0044 */
90 __s16 icpua; /* 0x0046 */ 106 __s16 icpua; /* 0x0046 */
91#define ICTL_LPSW 0x00400000 107#define ICTL_PINT 0x20000000
108#define ICTL_LPSW 0x00400000
109#define ICTL_STCTL 0x00040000
110#define ICTL_ISKE 0x00004000
111#define ICTL_SSKE 0x00002000
112#define ICTL_RRBE 0x00001000
113#define ICTL_TPROT 0x00000200
92 __u32 ictl; /* 0x0048 */ 114 __u32 ictl; /* 0x0048 */
93 __u32 eca; /* 0x004c */ 115 __u32 eca; /* 0x004c */
116#define ICPT_INST 0x04
117#define ICPT_PROGI 0x08
118#define ICPT_INSTPROGI 0x0C
119#define ICPT_OPEREXC 0x2C
120#define ICPT_PARTEXEC 0x38
121#define ICPT_IOINST 0x40
94 __u8 icptcode; /* 0x0050 */ 122 __u8 icptcode; /* 0x0050 */
95 __u8 reserved51; /* 0x0051 */ 123 __u8 reserved51; /* 0x0051 */
96 __u16 ihcpu; /* 0x0052 */ 124 __u16 ihcpu; /* 0x0052 */
@@ -109,9 +137,24 @@ struct kvm_s390_sie_block {
109 psw_t gpsw; /* 0x0090 */ 137 psw_t gpsw; /* 0x0090 */
110 __u64 gg14; /* 0x00a0 */ 138 __u64 gg14; /* 0x00a0 */
111 __u64 gg15; /* 0x00a8 */ 139 __u64 gg15; /* 0x00a8 */
112 __u8 reservedb0[30]; /* 0x00b0 */ 140 __u8 reservedb0[20]; /* 0x00b0 */
113 __u16 iprcc; /* 0x00ce */ 141 __u16 extcpuaddr; /* 0x00c4 */
114 __u8 reservedd0[48]; /* 0x00d0 */ 142 __u16 eic; /* 0x00c6 */
143 __u32 reservedc8; /* 0x00c8 */
144 __u16 pgmilc; /* 0x00cc */
145 __u16 iprcc; /* 0x00ce */
146 __u32 dxc; /* 0x00d0 */
147 __u16 mcn; /* 0x00d4 */
148 __u8 perc; /* 0x00d6 */
149 __u8 peratmid; /* 0x00d7 */
150 __u64 peraddr; /* 0x00d8 */
151 __u8 eai; /* 0x00e0 */
152 __u8 peraid; /* 0x00e1 */
153 __u8 oai; /* 0x00e2 */
154 __u8 armid; /* 0x00e3 */
155 __u8 reservede4[4]; /* 0x00e4 */
156 __u64 tecmc; /* 0x00e8 */
157 __u8 reservedf0[16]; /* 0x00f0 */
115 __u64 gcr[16]; /* 0x0100 */ 158 __u64 gcr[16]; /* 0x0100 */
116 __u64 gbea; /* 0x0180 */ 159 __u64 gbea; /* 0x0180 */
117 __u8 reserved188[24]; /* 0x0188 */ 160 __u8 reserved188[24]; /* 0x0188 */
@@ -146,6 +189,8 @@ struct kvm_vcpu_stat {
146 u32 exit_instruction; 189 u32 exit_instruction;
147 u32 instruction_lctl; 190 u32 instruction_lctl;
148 u32 instruction_lctlg; 191 u32 instruction_lctlg;
192 u32 instruction_stctl;
193 u32 instruction_stctg;
149 u32 exit_program_interruption; 194 u32 exit_program_interruption;
150 u32 exit_instr_and_program; 195 u32 exit_instr_and_program;
151 u32 deliver_external_call; 196 u32 deliver_external_call;
@@ -164,6 +209,7 @@ struct kvm_vcpu_stat {
164 u32 instruction_stpx; 209 u32 instruction_stpx;
165 u32 instruction_stap; 210 u32 instruction_stap;
166 u32 instruction_storage_key; 211 u32 instruction_storage_key;
212 u32 instruction_ipte_interlock;
167 u32 instruction_stsch; 213 u32 instruction_stsch;
168 u32 instruction_chsc; 214 u32 instruction_chsc;
169 u32 instruction_stsi; 215 u32 instruction_stsi;
@@ -183,13 +229,58 @@ struct kvm_vcpu_stat {
183 u32 diagnose_9c; 229 u32 diagnose_9c;
184}; 230};
185 231
186#define PGM_OPERATION 0x01 232#define PGM_OPERATION 0x01
187#define PGM_PRIVILEGED_OP 0x02 233#define PGM_PRIVILEGED_OP 0x02
188#define PGM_EXECUTE 0x03 234#define PGM_EXECUTE 0x03
189#define PGM_PROTECTION 0x04 235#define PGM_PROTECTION 0x04
190#define PGM_ADDRESSING 0x05 236#define PGM_ADDRESSING 0x05
191#define PGM_SPECIFICATION 0x06 237#define PGM_SPECIFICATION 0x06
192#define PGM_DATA 0x07 238#define PGM_DATA 0x07
239#define PGM_FIXED_POINT_OVERFLOW 0x08
240#define PGM_FIXED_POINT_DIVIDE 0x09
241#define PGM_DECIMAL_OVERFLOW 0x0a
242#define PGM_DECIMAL_DIVIDE 0x0b
243#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
244#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
245#define PGM_HFP_SIGNIFICANCE 0x0e
246#define PGM_HFP_DIVIDE 0x0f
247#define PGM_SEGMENT_TRANSLATION 0x10
248#define PGM_PAGE_TRANSLATION 0x11
249#define PGM_TRANSLATION_SPEC 0x12
250#define PGM_SPECIAL_OPERATION 0x13
251#define PGM_OPERAND 0x15
252#define PGM_TRACE_TABEL 0x16
253#define PGM_SPACE_SWITCH 0x1c
254#define PGM_HFP_SQUARE_ROOT 0x1d
255#define PGM_PC_TRANSLATION_SPEC 0x1f
256#define PGM_AFX_TRANSLATION 0x20
257#define PGM_ASX_TRANSLATION 0x21
258#define PGM_LX_TRANSLATION 0x22
259#define PGM_EX_TRANSLATION 0x23
260#define PGM_PRIMARY_AUTHORITY 0x24
261#define PGM_SECONDARY_AUTHORITY 0x25
262#define PGM_LFX_TRANSLATION 0x26
263#define PGM_LSX_TRANSLATION 0x27
264#define PGM_ALET_SPECIFICATION 0x28
265#define PGM_ALEN_TRANSLATION 0x29
266#define PGM_ALE_SEQUENCE 0x2a
267#define PGM_ASTE_VALIDITY 0x2b
268#define PGM_ASTE_SEQUENCE 0x2c
269#define PGM_EXTENDED_AUTHORITY 0x2d
270#define PGM_LSTE_SEQUENCE 0x2e
271#define PGM_ASTE_INSTANCE 0x2f
272#define PGM_STACK_FULL 0x30
273#define PGM_STACK_EMPTY 0x31
274#define PGM_STACK_SPECIFICATION 0x32
275#define PGM_STACK_TYPE 0x33
276#define PGM_STACK_OPERATION 0x34
277#define PGM_ASCE_TYPE 0x38
278#define PGM_REGION_FIRST_TRANS 0x39
279#define PGM_REGION_SECOND_TRANS 0x3a
280#define PGM_REGION_THIRD_TRANS 0x3b
281#define PGM_MONITOR 0x40
282#define PGM_PER 0x80
283#define PGM_CRYPTO_OPERATION 0x119
193 284
194struct kvm_s390_interrupt_info { 285struct kvm_s390_interrupt_info {
195 struct list_head list; 286 struct list_head list;
@@ -229,6 +320,45 @@ struct kvm_s390_float_interrupt {
229 unsigned int irq_count; 320 unsigned int irq_count;
230}; 321};
231 322
323struct kvm_hw_wp_info_arch {
324 unsigned long addr;
325 unsigned long phys_addr;
326 int len;
327 char *old_data;
328};
329
330struct kvm_hw_bp_info_arch {
331 unsigned long addr;
332 int len;
333};
334
335/*
336 * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
337 * Further KVM_GUESTDBG flags which an be used from userspace can be found in
338 * arch/s390/include/uapi/asm/kvm.h
339 */
340#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
341
342#define guestdbg_enabled(vcpu) \
343 (vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
344#define guestdbg_sstep_enabled(vcpu) \
345 (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
346#define guestdbg_hw_bp_enabled(vcpu) \
347 (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
348#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
349 (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
350
351struct kvm_guestdbg_info_arch {
352 unsigned long cr0;
353 unsigned long cr9;
354 unsigned long cr10;
355 unsigned long cr11;
356 struct kvm_hw_bp_info_arch *hw_bp_info;
357 struct kvm_hw_wp_info_arch *hw_wp_info;
358 int nr_hw_bp;
359 int nr_hw_wp;
360 unsigned long last_bp;
361};
232 362
233struct kvm_vcpu_arch { 363struct kvm_vcpu_arch {
234 struct kvm_s390_sie_block *sie_block; 364 struct kvm_s390_sie_block *sie_block;
@@ -238,11 +368,13 @@ struct kvm_vcpu_arch {
238 struct kvm_s390_local_interrupt local_int; 368 struct kvm_s390_local_interrupt local_int;
239 struct hrtimer ckc_timer; 369 struct hrtimer ckc_timer;
240 struct tasklet_struct tasklet; 370 struct tasklet_struct tasklet;
371 struct kvm_s390_pgm_info pgm;
241 union { 372 union {
242 struct cpuid cpu_id; 373 struct cpuid cpu_id;
243 u64 stidp_data; 374 u64 stidp_data;
244 }; 375 };
245 struct gmap *gmap; 376 struct gmap *gmap;
377 struct kvm_guestdbg_info_arch guestdbg;
246#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) 378#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
247 unsigned long pfault_token; 379 unsigned long pfault_token;
248 unsigned long pfault_select; 380 unsigned long pfault_select;
@@ -285,7 +417,10 @@ struct kvm_arch{
285 struct gmap *gmap; 417 struct gmap *gmap;
286 int css_support; 418 int css_support;
287 int use_irqchip; 419 int use_irqchip;
420 int use_cmma;
288 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; 421 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
422 wait_queue_head_t ipte_wq;
423 spinlock_t start_stop_lock;
289}; 424};
290 425
291#define KVM_HVA_ERR_BAD (-1UL) 426#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 2070cad80e9e..4349197ab9df 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -56,13 +56,14 @@ struct _lowcore {
56 __u16 pgm_code; /* 0x008e */ 56 __u16 pgm_code; /* 0x008e */
57 __u32 trans_exc_code; /* 0x0090 */ 57 __u32 trans_exc_code; /* 0x0090 */
58 __u16 mon_class_num; /* 0x0094 */ 58 __u16 mon_class_num; /* 0x0094 */
59 __u16 per_perc_atmid; /* 0x0096 */ 59 __u8 per_code; /* 0x0096 */
60 __u8 per_atmid; /* 0x0097 */
60 __u32 per_address; /* 0x0098 */ 61 __u32 per_address; /* 0x0098 */
61 __u32 monitor_code; /* 0x009c */ 62 __u32 monitor_code; /* 0x009c */
62 __u8 exc_access_id; /* 0x00a0 */ 63 __u8 exc_access_id; /* 0x00a0 */
63 __u8 per_access_id; /* 0x00a1 */ 64 __u8 per_access_id; /* 0x00a1 */
64 __u8 op_access_id; /* 0x00a2 */ 65 __u8 op_access_id; /* 0x00a2 */
65 __u8 ar_access_id; /* 0x00a3 */ 66 __u8 ar_mode_id; /* 0x00a3 */
66 __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */ 67 __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */
67 __u16 subchannel_id; /* 0x00b8 */ 68 __u16 subchannel_id; /* 0x00b8 */
68 __u16 subchannel_nr; /* 0x00ba */ 69 __u16 subchannel_nr; /* 0x00ba */
@@ -195,12 +196,13 @@ struct _lowcore {
195 __u16 pgm_code; /* 0x008e */ 196 __u16 pgm_code; /* 0x008e */
196 __u32 data_exc_code; /* 0x0090 */ 197 __u32 data_exc_code; /* 0x0090 */
197 __u16 mon_class_num; /* 0x0094 */ 198 __u16 mon_class_num; /* 0x0094 */
198 __u16 per_perc_atmid; /* 0x0096 */ 199 __u8 per_code; /* 0x0096 */
200 __u8 per_atmid; /* 0x0097 */
199 __u64 per_address; /* 0x0098 */ 201 __u64 per_address; /* 0x0098 */
200 __u8 exc_access_id; /* 0x00a0 */ 202 __u8 exc_access_id; /* 0x00a0 */
201 __u8 per_access_id; /* 0x00a1 */ 203 __u8 per_access_id; /* 0x00a1 */
202 __u8 op_access_id; /* 0x00a2 */ 204 __u8 op_access_id; /* 0x00a2 */
203 __u8 ar_access_id; /* 0x00a3 */ 205 __u8 ar_mode_id; /* 0x00a3 */
204 __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ 206 __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
205 __u64 trans_exc_code; /* 0x00a8 */ 207 __u64 trans_exc_code; /* 0x00a8 */
206 __u64 monitor_code; /* 0x00b0 */ 208 __u64 monitor_code; /* 0x00b0 */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f77695a82f64..a5e656260a70 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
16 unsigned long vdso_base; 16 unsigned long vdso_base;
17 /* The mmu context has extended page tables. */ 17 /* The mmu context has extended page tables. */
18 unsigned int has_pgste:1; 18 unsigned int has_pgste:1;
19 /* The mmu context uses storage keys. */
20 unsigned int use_skey:1;
19} mm_context_t; 21} mm_context_t;
20 22
21#define INIT_MM_CONTEXT(name) \ 23#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 056d7eff2a16..c28f32a45af5 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
23 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 23 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
24#endif 24#endif
25 mm->context.has_pgste = 0; 25 mm->context.has_pgste = 0;
26 mm->context.use_skey = 0;
26 mm->context.asce_limit = STACK_TOP_MAX; 27 mm->context.asce_limit = STACK_TOP_MAX;
27 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 28 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
28 return 0; 29 return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 884017cbfa9f..9e18a61d3df3 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
22void page_table_free(struct mm_struct *, unsigned long *); 22void page_table_free(struct mm_struct *, unsigned long *);
23void page_table_free_rcu(struct mmu_gather *, unsigned long *); 23void page_table_free_rcu(struct mmu_gather *, unsigned long *);
24 24
25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long); 25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
26 bool init_skey);
26int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 27int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
27 unsigned long key, bool nq); 28 unsigned long key, bool nq);
28 29
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 12f75313e086..fcba5e03839f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
309#define PGSTE_HC_BIT 0x00200000UL 309#define PGSTE_HC_BIT 0x00200000UL
310#define PGSTE_GR_BIT 0x00040000UL 310#define PGSTE_GR_BIT 0x00040000UL
311#define PGSTE_GC_BIT 0x00020000UL 311#define PGSTE_GC_BIT 0x00020000UL
312#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */ 312#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
313#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
313 314
314#else /* CONFIG_64BIT */ 315#else /* CONFIG_64BIT */
315 316
@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
391#define PGSTE_HC_BIT 0x0020000000000000UL 392#define PGSTE_HC_BIT 0x0020000000000000UL
392#define PGSTE_GR_BIT 0x0004000000000000UL 393#define PGSTE_GR_BIT 0x0004000000000000UL
393#define PGSTE_GC_BIT 0x0002000000000000UL 394#define PGSTE_GC_BIT 0x0002000000000000UL
394#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ 395#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
396#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
395 397
396#endif /* CONFIG_64BIT */ 398#endif /* CONFIG_64BIT */
397 399
@@ -466,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
466#endif 468#endif
467 return 0; 469 return 0;
468} 470}
471
472static inline int mm_use_skey(struct mm_struct *mm)
473{
474#ifdef CONFIG_PGSTE
475 if (mm->context.use_skey)
476 return 1;
477#endif
478 return 0;
479}
480
469/* 481/*
470 * pgd/pmd/pte query functions 482 * pgd/pmd/pte query functions
471 */ 483 */
@@ -699,26 +711,17 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
699#endif 711#endif
700} 712}
701 713
702static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) 714static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
715 struct mm_struct *mm)
703{ 716{
704#ifdef CONFIG_PGSTE 717#ifdef CONFIG_PGSTE
705 unsigned long address, bits, skey; 718 unsigned long address, bits, skey;
706 719
707 if (pte_val(*ptep) & _PAGE_INVALID) 720 if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
708 return pgste; 721 return pgste;
709 address = pte_val(*ptep) & PAGE_MASK; 722 address = pte_val(*ptep) & PAGE_MASK;
710 skey = (unsigned long) page_get_storage_key(address); 723 skey = (unsigned long) page_get_storage_key(address);
711 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 724 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
712 if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
713 /* Transfer dirty + referenced bit to host bits in pgste */
714 pgste_val(pgste) |= bits << 52;
715 page_set_storage_key(address, skey ^ bits, 0);
716 } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
717 (bits & _PAGE_REFERENCED)) {
718 /* Transfer referenced bit to host bit in pgste */
719 pgste_val(pgste) |= PGSTE_HR_BIT;
720 page_reset_referenced(address);
721 }
722 /* Transfer page changed & referenced bit to guest bits in pgste */ 725 /* Transfer page changed & referenced bit to guest bits in pgste */
723 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 726 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
724 /* Copy page access key and fetch protection bit to pgste */ 727 /* Copy page access key and fetch protection bit to pgste */
@@ -729,25 +732,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
729 732
730} 733}
731 734
732static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) 735static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
733{ 736 struct mm_struct *mm)
734#ifdef CONFIG_PGSTE
735 if (pte_val(*ptep) & _PAGE_INVALID)
736 return pgste;
737 /* Get referenced bit from storage key */
738 if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
739 pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
740#endif
741 return pgste;
742}
743
744static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
745{ 737{
746#ifdef CONFIG_PGSTE 738#ifdef CONFIG_PGSTE
747 unsigned long address; 739 unsigned long address;
748 unsigned long nkey; 740 unsigned long nkey;
749 741
750 if (pte_val(entry) & _PAGE_INVALID) 742 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
751 return; 743 return;
752 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 744 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
753 address = pte_val(entry) & PAGE_MASK; 745 address = pte_val(entry) & PAGE_MASK;
@@ -757,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
757 * key C/R to 0. 749 * key C/R to 0.
758 */ 750 */
759 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 751 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
752 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
760 page_set_storage_key(address, nkey, 0); 753 page_set_storage_key(address, nkey, 0);
761#endif 754#endif
762} 755}
763 756
764static inline void pgste_set_pte(pte_t *ptep, pte_t entry) 757static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
765{ 758{
766 if (!MACHINE_HAS_ESOP && 759 if ((pte_val(entry) & _PAGE_PRESENT) &&
767 (pte_val(entry) & _PAGE_PRESENT) && 760 (pte_val(entry) & _PAGE_WRITE) &&
768 (pte_val(entry) & _PAGE_WRITE)) { 761 !(pte_val(entry) & _PAGE_INVALID)) {
769 /* 762 if (!MACHINE_HAS_ESOP) {
770 * Without enhanced suppression-on-protection force 763 /*
771 * the dirty bit on for all writable ptes. 764 * Without enhanced suppression-on-protection force
772 */ 765 * the dirty bit on for all writable ptes.
773 pte_val(entry) |= _PAGE_DIRTY; 766 */
774 pte_val(entry) &= ~_PAGE_PROTECT; 767 pte_val(entry) |= _PAGE_DIRTY;
768 pte_val(entry) &= ~_PAGE_PROTECT;
769 }
770 if (!(pte_val(entry) & _PAGE_PROTECT))
771 /* This pte allows write access, set user-dirty */
772 pgste_val(pgste) |= PGSTE_UC_BIT;
775 } 773 }
776 *ptep = entry; 774 *ptep = entry;
775 return pgste;
777} 776}
778 777
779/** 778/**
@@ -839,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
839unsigned long gmap_fault(unsigned long address, struct gmap *); 838unsigned long gmap_fault(unsigned long address, struct gmap *);
840void gmap_discard(unsigned long from, unsigned long to, struct gmap *); 839void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
841void __gmap_zap(unsigned long address, struct gmap *); 840void __gmap_zap(unsigned long address, struct gmap *);
841bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
842
842 843
843void gmap_register_ipte_notifier(struct gmap_notifier *); 844void gmap_register_ipte_notifier(struct gmap_notifier *);
844void gmap_unregister_ipte_notifier(struct gmap_notifier *); 845void gmap_unregister_ipte_notifier(struct gmap_notifier *);
@@ -870,8 +871,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
870 if (mm_has_pgste(mm)) { 871 if (mm_has_pgste(mm)) {
871 pgste = pgste_get_lock(ptep); 872 pgste = pgste_get_lock(ptep);
872 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 873 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
873 pgste_set_key(ptep, pgste, entry); 874 pgste_set_key(ptep, pgste, entry, mm);
874 pgste_set_pte(ptep, entry); 875 pgste = pgste_set_pte(ptep, pgste, entry);
875 pgste_set_unlock(ptep, pgste); 876 pgste_set_unlock(ptep, pgste);
876 } else { 877 } else {
877 if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) 878 if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1017,45 +1018,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
1017} 1018}
1018#endif 1019#endif
1019 1020
1020/*
1021 * Get (and clear) the user dirty bit for a pte.
1022 */
1023static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1024 pte_t *ptep)
1025{
1026 pgste_t pgste;
1027 int dirty = 0;
1028
1029 if (mm_has_pgste(mm)) {
1030 pgste = pgste_get_lock(ptep);
1031 pgste = pgste_update_all(ptep, pgste);
1032 dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
1033 pgste_val(pgste) &= ~PGSTE_HC_BIT;
1034 pgste_set_unlock(ptep, pgste);
1035 return dirty;
1036 }
1037 return dirty;
1038}
1039
1040/*
1041 * Get (and clear) the user referenced bit for a pte.
1042 */
1043static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
1044 pte_t *ptep)
1045{
1046 pgste_t pgste;
1047 int young = 0;
1048
1049 if (mm_has_pgste(mm)) {
1050 pgste = pgste_get_lock(ptep);
1051 pgste = pgste_update_young(ptep, pgste);
1052 young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
1053 pgste_val(pgste) &= ~PGSTE_HR_BIT;
1054 pgste_set_unlock(ptep, pgste);
1055 }
1056 return young;
1057}
1058
1059static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 1021static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
1060{ 1022{
1061 unsigned long pto = (unsigned long) ptep; 1023 unsigned long pto = (unsigned long) ptep;
@@ -1118,6 +1080,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
1118 atomic_sub(0x10000, &mm->context.attach_count); 1080 atomic_sub(0x10000, &mm->context.attach_count);
1119} 1081}
1120 1082
1083/*
1084 * Get (and clear) the user dirty bit for a pte.
1085 */
1086static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1087 unsigned long addr,
1088 pte_t *ptep)
1089{
1090 pgste_t pgste;
1091 pte_t pte;
1092 int dirty;
1093
1094 if (!mm_has_pgste(mm))
1095 return 0;
1096 pgste = pgste_get_lock(ptep);
1097 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
1098 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1099 pte = *ptep;
1100 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1101 pgste = pgste_ipte_notify(mm, ptep, pgste);
1102 __ptep_ipte(addr, ptep);
1103 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1104 pte_val(pte) |= _PAGE_PROTECT;
1105 else
1106 pte_val(pte) |= _PAGE_INVALID;
1107 *ptep = pte;
1108 }
1109 pgste_set_unlock(ptep, pgste);
1110 return dirty;
1111}
1112
1121#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1113#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1122static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 1114static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1123 unsigned long addr, pte_t *ptep) 1115 unsigned long addr, pte_t *ptep)
@@ -1137,7 +1129,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1137 pte = pte_mkold(pte); 1129 pte = pte_mkold(pte);
1138 1130
1139 if (mm_has_pgste(vma->vm_mm)) { 1131 if (mm_has_pgste(vma->vm_mm)) {
1140 pgste_set_pte(ptep, pte); 1132 pgste = pgste_set_pte(ptep, pgste, pte);
1141 pgste_set_unlock(ptep, pgste); 1133 pgste_set_unlock(ptep, pgste);
1142 } else 1134 } else
1143 *ptep = pte; 1135 *ptep = pte;
@@ -1182,7 +1174,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1182 pte_val(*ptep) = _PAGE_INVALID; 1174 pte_val(*ptep) = _PAGE_INVALID;
1183 1175
1184 if (mm_has_pgste(mm)) { 1176 if (mm_has_pgste(mm)) {
1185 pgste = pgste_update_all(&pte, pgste); 1177 pgste = pgste_update_all(&pte, pgste, mm);
1186 pgste_set_unlock(ptep, pgste); 1178 pgste_set_unlock(ptep, pgste);
1187 } 1179 }
1188 return pte; 1180 return pte;
@@ -1205,7 +1197,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1205 ptep_flush_lazy(mm, address, ptep); 1197 ptep_flush_lazy(mm, address, ptep);
1206 1198
1207 if (mm_has_pgste(mm)) { 1199 if (mm_has_pgste(mm)) {
1208 pgste = pgste_update_all(&pte, pgste); 1200 pgste = pgste_update_all(&pte, pgste, mm);
1209 pgste_set(ptep, pgste); 1201 pgste_set(ptep, pgste);
1210 } 1202 }
1211 return pte; 1203 return pte;
@@ -1219,8 +1211,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
1219 1211
1220 if (mm_has_pgste(mm)) { 1212 if (mm_has_pgste(mm)) {
1221 pgste = pgste_get(ptep); 1213 pgste = pgste_get(ptep);
1222 pgste_set_key(ptep, pgste, pte); 1214 pgste_set_key(ptep, pgste, pte, mm);
1223 pgste_set_pte(ptep, pte); 1215 pgste = pgste_set_pte(ptep, pgste, pte);
1224 pgste_set_unlock(ptep, pgste); 1216 pgste_set_unlock(ptep, pgste);
1225 } else 1217 } else
1226 *ptep = pte; 1218 *ptep = pte;
@@ -1246,7 +1238,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1246 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 1238 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
1247 _PGSTE_GPS_USAGE_UNUSED) 1239 _PGSTE_GPS_USAGE_UNUSED)
1248 pte_val(pte) |= _PAGE_UNUSED; 1240 pte_val(pte) |= _PAGE_UNUSED;
1249 pgste = pgste_update_all(&pte, pgste); 1241 pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
1250 pgste_set_unlock(ptep, pgste); 1242 pgste_set_unlock(ptep, pgste);
1251 } 1243 }
1252 return pte; 1244 return pte;
@@ -1278,7 +1270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1278 pte_val(*ptep) = _PAGE_INVALID; 1270 pte_val(*ptep) = _PAGE_INVALID;
1279 1271
1280 if (!full && mm_has_pgste(mm)) { 1272 if (!full && mm_has_pgste(mm)) {
1281 pgste = pgste_update_all(&pte, pgste); 1273 pgste = pgste_update_all(&pte, pgste, mm);
1282 pgste_set_unlock(ptep, pgste); 1274 pgste_set_unlock(ptep, pgste);
1283 } 1275 }
1284 return pte; 1276 return pte;
@@ -1301,7 +1293,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1301 pte = pte_wrprotect(pte); 1293 pte = pte_wrprotect(pte);
1302 1294
1303 if (mm_has_pgste(mm)) { 1295 if (mm_has_pgste(mm)) {
1304 pgste_set_pte(ptep, pte); 1296 pgste = pgste_set_pte(ptep, pgste, pte);
1305 pgste_set_unlock(ptep, pgste); 1297 pgste_set_unlock(ptep, pgste);
1306 } else 1298 } else
1307 *ptep = pte; 1299 *ptep = pte;
@@ -1326,7 +1318,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1326 ptep_flush_direct(vma->vm_mm, address, ptep); 1318 ptep_flush_direct(vma->vm_mm, address, ptep);
1327 1319
1328 if (mm_has_pgste(vma->vm_mm)) { 1320 if (mm_has_pgste(vma->vm_mm)) {
1329 pgste_set_pte(ptep, entry); 1321 pgste = pgste_set_pte(ptep, pgste, entry);
1330 pgste_set_unlock(ptep, pgste); 1322 pgste_set_unlock(ptep, pgste);
1331 } else 1323 } else
1332 *ptep = entry; 1324 *ptep = entry;
@@ -1734,6 +1726,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1734extern int vmem_add_mapping(unsigned long start, unsigned long size); 1726extern int vmem_add_mapping(unsigned long start, unsigned long size);
1735extern int vmem_remove_mapping(unsigned long start, unsigned long size); 1727extern int vmem_remove_mapping(unsigned long start, unsigned long size);
1736extern int s390_enable_sie(void); 1728extern int s390_enable_sie(void);
1729extern void s390_enable_skey(void);
1737 1730
1738/* 1731/*
1739 * No page table caches to initialise 1732 * No page table caches to initialise
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 1b5300cd6d22..55d69dd7473c 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -22,6 +22,50 @@
22 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \ 22 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
23 PSW_MASK_PSTATE | PSW_ASC_PRIMARY) 23 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
24 24
25struct psw_bits {
26 unsigned long long : 1;
27 unsigned long long r : 1; /* PER-Mask */
28 unsigned long long : 3;
29 unsigned long long t : 1; /* DAT Mode */
30 unsigned long long i : 1; /* Input/Output Mask */
31 unsigned long long e : 1; /* External Mask */
32 unsigned long long key : 4; /* PSW Key */
33 unsigned long long : 1;
34 unsigned long long m : 1; /* Machine-Check Mask */
35 unsigned long long w : 1; /* Wait State */
36 unsigned long long p : 1; /* Problem State */
37 unsigned long long as : 2; /* Address Space Control */
38 unsigned long long cc : 2; /* Condition Code */
39 unsigned long long pm : 4; /* Program Mask */
40 unsigned long long ri : 1; /* Runtime Instrumentation */
41 unsigned long long : 6;
42 unsigned long long eaba : 2; /* Addressing Mode */
43#ifdef CONFIG_64BIT
44 unsigned long long : 31;
45 unsigned long long ia : 64;/* Instruction Address */
46#else
47 unsigned long long ia : 31;/* Instruction Address */
48#endif
49};
50
51enum {
52 PSW_AMODE_24BIT = 0,
53 PSW_AMODE_31BIT = 1,
54 PSW_AMODE_64BIT = 3
55};
56
57enum {
58 PSW_AS_PRIMARY = 0,
59 PSW_AS_ACCREG = 1,
60 PSW_AS_SECONDARY = 2,
61 PSW_AS_HOME = 3
62};
63
64#define psw_bits(__psw) (*({ \
65 typecheck(psw_t, __psw); \
66 &(*(struct psw_bits *)(&(__psw))); \
67}))
68
25/* 69/*
26 * The pt_regs struct defines the way the registers are stored on 70 * The pt_regs struct defines the way the registers are stored on
27 * the stack during a system call. 71 * the stack during a system call.
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2f5e9932b4de..1aba89b53cb9 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -28,7 +28,11 @@ struct sclp_ipl_info {
28 28
29struct sclp_cpu_entry { 29struct sclp_cpu_entry {
30 u8 address; 30 u8 address;
31 u8 reserved0[13]; 31 u8 reserved0[2];
32 u8 : 3;
33 u8 siif : 1;
34 u8 : 4;
35 u8 reserved2[10];
32 u8 type; 36 u8 type;
33 u8 reserved1; 37 u8 reserved1;
34} __attribute__((packed)); 38} __attribute__((packed));
@@ -61,5 +65,7 @@ int sclp_pci_deconfigure(u32 fid);
61int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); 65int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
62unsigned long sclp_get_hsa_size(void); 66unsigned long sclp_get_hsa_size(void);
63void sclp_early_detect(void); 67void sclp_early_detect(void);
68int sclp_has_siif(void);
69unsigned int sclp_get_ibc(void);
64 70
65#endif /* _ASM_S390_SCLP_H */ 71#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index c003c6a73b1e..0fc26430a1e5 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -15,6 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16 16
17#define __KVM_S390 17#define __KVM_S390
18#define __KVM_HAVE_GUEST_DEBUG
18 19
19/* Device control API: s390-specific devices */ 20/* Device control API: s390-specific devices */
20#define KVM_DEV_FLIC_GET_ALL_IRQS 1 21#define KVM_DEV_FLIC_GET_ALL_IRQS 1
@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
54 __u64 addr; 55 __u64 addr;
55}; 56};
56 57
58/* kvm attr_group on vm fd */
59#define KVM_S390_VM_MEM_CTRL 0
60
61/* kvm attributes for mem_ctrl */
62#define KVM_S390_VM_MEM_ENABLE_CMMA 0
63#define KVM_S390_VM_MEM_CLR_CMMA 1
64
57/* for KVM_GET_REGS and KVM_SET_REGS */ 65/* for KVM_GET_REGS and KVM_SET_REGS */
58struct kvm_regs { 66struct kvm_regs {
59 /* general purpose regs for s390 */ 67 /* general purpose regs for s390 */
@@ -72,11 +80,31 @@ struct kvm_fpu {
72 __u64 fprs[16]; 80 __u64 fprs[16];
73}; 81};
74 82
83#define KVM_GUESTDBG_USE_HW_BP 0x00010000
84
85#define KVM_HW_BP 1
86#define KVM_HW_WP_WRITE 2
87#define KVM_SINGLESTEP 4
88
75struct kvm_debug_exit_arch { 89struct kvm_debug_exit_arch {
90 __u64 addr;
91 __u8 type;
92 __u8 pad[7]; /* Should be set to 0 */
93};
94
95struct kvm_hw_breakpoint {
96 __u64 addr;
97 __u64 phys_addr;
98 __u64 len;
99 __u8 type;
100 __u8 pad[7]; /* Should be set to 0 */
76}; 101};
77 102
78/* for KVM_SET_GUEST_DEBUG */ 103/* for KVM_SET_GUEST_DEBUG */
79struct kvm_guest_debug_arch { 104struct kvm_guest_debug_arch {
105 __u32 nr_hw_bp;
106 __u32 pad; /* Should be set to 0 */
107 struct kvm_hw_breakpoint __user *hw_bp;
80}; 108};
81 109
82#define KVM_SYNC_PREFIX (1UL << 0) 110#define KVM_SYNC_PREFIX (1UL << 0)
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000000..3d97f610198d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,245 @@
1#ifndef _UAPI_ASM_S390_SIE_H
2#define _UAPI_ASM_S390_SIE_H
3
4#include <asm/sigp.h>
5
6#define diagnose_codes \
7 { 0x10, "DIAG (0x10) release pages" }, \
8 { 0x44, "DIAG (0x44) time slice end" }, \
9 { 0x9c, "DIAG (0x9c) time slice end directed" }, \
10 { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
11 { 0x258, "DIAG (0x258) page-reference services" }, \
12 { 0x308, "DIAG (0x308) ipl functions" }, \
13 { 0x500, "DIAG (0x500) KVM virtio functions" }, \
14 { 0x501, "DIAG (0x501) KVM breakpoint" }
15
16#define sigp_order_codes \
17 { SIGP_SENSE, "SIGP sense" }, \
18 { SIGP_EXTERNAL_CALL, "SIGP external call" }, \
19 { SIGP_EMERGENCY_SIGNAL, "SIGP emergency signal" }, \
20 { SIGP_STOP, "SIGP stop" }, \
21 { SIGP_STOP_AND_STORE_STATUS, "SIGP stop and store status" }, \
22 { SIGP_SET_ARCHITECTURE, "SIGP set architecture" }, \
23 { SIGP_SET_PREFIX, "SIGP set prefix" }, \
24 { SIGP_SENSE_RUNNING, "SIGP sense running" }, \
25 { SIGP_RESTART, "SIGP restart" }, \
26 { SIGP_INITIAL_CPU_RESET, "SIGP initial cpu reset" }, \
27 { SIGP_STORE_STATUS_AT_ADDRESS, "SIGP store status at address" }
28
29#define icpt_prog_codes \
30 { 0x0001, "Prog Operation" }, \
31 { 0x0002, "Prog Privileged Operation" }, \
32 { 0x0003, "Prog Execute" }, \
33 { 0x0004, "Prog Protection" }, \
34 { 0x0005, "Prog Addressing" }, \
35 { 0x0006, "Prog Specification" }, \
36 { 0x0007, "Prog Data" }, \
37 { 0x0008, "Prog Fixedpoint overflow" }, \
38 { 0x0009, "Prog Fixedpoint divide" }, \
39 { 0x000A, "Prog Decimal overflow" }, \
40 { 0x000B, "Prog Decimal divide" }, \
41 { 0x000C, "Prog HFP exponent overflow" }, \
42 { 0x000D, "Prog HFP exponent underflow" }, \
43 { 0x000E, "Prog HFP significance" }, \
44 { 0x000F, "Prog HFP divide" }, \
45 { 0x0010, "Prog Segment translation" }, \
46 { 0x0011, "Prog Page translation" }, \
47 { 0x0012, "Prog Translation specification" }, \
48 { 0x0013, "Prog Special operation" }, \
49 { 0x0015, "Prog Operand" }, \
50 { 0x0016, "Prog Trace table" }, \
51 { 0x0017, "Prog ASNtranslation specification" }, \
52 { 0x001C, "Prog Spaceswitch event" }, \
53 { 0x001D, "Prog HFP square root" }, \
54 { 0x001F, "Prog PCtranslation specification" }, \
55 { 0x0020, "Prog AFX translation" }, \
56 { 0x0021, "Prog ASX translation" }, \
57 { 0x0022, "Prog LX translation" }, \
58 { 0x0023, "Prog EX translation" }, \
59 { 0x0024, "Prog Primary authority" }, \
60 { 0x0025, "Prog Secondary authority" }, \
61 { 0x0026, "Prog LFXtranslation exception" }, \
62 { 0x0027, "Prog LSXtranslation exception" }, \
63 { 0x0028, "Prog ALET specification" }, \
64 { 0x0029, "Prog ALEN translation" }, \
65 { 0x002A, "Prog ALE sequence" }, \
66 { 0x002B, "Prog ASTE validity" }, \
67 { 0x002C, "Prog ASTE sequence" }, \
68 { 0x002D, "Prog Extended authority" }, \
69 { 0x002E, "Prog LSTE sequence" }, \
70 { 0x002F, "Prog ASTE instance" }, \
71 { 0x0030, "Prog Stack full" }, \
72 { 0x0031, "Prog Stack empty" }, \
73 { 0x0032, "Prog Stack specification" }, \
74 { 0x0033, "Prog Stack type" }, \
75 { 0x0034, "Prog Stack operation" }, \
76 { 0x0039, "Prog Region first translation" }, \
77 { 0x003A, "Prog Region second translation" }, \
78 { 0x003B, "Prog Region third translation" }, \
79 { 0x0040, "Prog Monitor event" }, \
80 { 0x0080, "Prog PER event" }, \
81 { 0x0119, "Prog Crypto operation" }
82
83#define exit_code_ipa0(ipa0, opcode, mnemonic) \
84 { (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
85#define exit_code(opcode, mnemonic) \
86 { opcode, mnemonic }
87
88#define icpt_insn_codes \
89 exit_code_ipa0(0x01, 0x01, "PR"), \
90 exit_code_ipa0(0x01, 0x04, "PTFF"), \
91 exit_code_ipa0(0x01, 0x07, "SCKPF"), \
92 exit_code_ipa0(0xAA, 0x00, "RINEXT"), \
93 exit_code_ipa0(0xAA, 0x01, "RION"), \
94 exit_code_ipa0(0xAA, 0x02, "TRIC"), \
95 exit_code_ipa0(0xAA, 0x03, "RIOFF"), \
96 exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \
97 exit_code_ipa0(0xB2, 0x02, "STIDP"), \
98 exit_code_ipa0(0xB2, 0x04, "SCK"), \
99 exit_code_ipa0(0xB2, 0x05, "STCK"), \
100 exit_code_ipa0(0xB2, 0x06, "SCKC"), \
101 exit_code_ipa0(0xB2, 0x07, "STCKC"), \
102 exit_code_ipa0(0xB2, 0x08, "SPT"), \
103 exit_code_ipa0(0xB2, 0x09, "STPT"), \
104 exit_code_ipa0(0xB2, 0x0d, "PTLB"), \
105 exit_code_ipa0(0xB2, 0x10, "SPX"), \
106 exit_code_ipa0(0xB2, 0x11, "STPX"), \
107 exit_code_ipa0(0xB2, 0x12, "STAP"), \
108 exit_code_ipa0(0xB2, 0x14, "SIE"), \
109 exit_code_ipa0(0xB2, 0x16, "SETR"), \
110 exit_code_ipa0(0xB2, 0x17, "STETR"), \
111 exit_code_ipa0(0xB2, 0x18, "PC"), \
112 exit_code_ipa0(0xB2, 0x20, "SERVC"), \
113 exit_code_ipa0(0xB2, 0x28, "PT"), \
114 exit_code_ipa0(0xB2, 0x29, "ISKE"), \
115 exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
116 exit_code_ipa0(0xB2, 0x2b, "SSKE"), \
117 exit_code_ipa0(0xB2, 0x2c, "TB"), \
118 exit_code_ipa0(0xB2, 0x2e, "PGIN"), \
119 exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \
120 exit_code_ipa0(0xB2, 0x30, "CSCH"), \
121 exit_code_ipa0(0xB2, 0x31, "HSCH"), \
122 exit_code_ipa0(0xB2, 0x32, "MSCH"), \
123 exit_code_ipa0(0xB2, 0x33, "SSCH"), \
124 exit_code_ipa0(0xB2, 0x34, "STSCH"), \
125 exit_code_ipa0(0xB2, 0x35, "TSCH"), \
126 exit_code_ipa0(0xB2, 0x36, "TPI"), \
127 exit_code_ipa0(0xB2, 0x37, "SAL"), \
128 exit_code_ipa0(0xB2, 0x38, "RSCH"), \
129 exit_code_ipa0(0xB2, 0x39, "STCRW"), \
130 exit_code_ipa0(0xB2, 0x3a, "STCPS"), \
131 exit_code_ipa0(0xB2, 0x3b, "RCHP"), \
132 exit_code_ipa0(0xB2, 0x3c, "SCHM"), \
133 exit_code_ipa0(0xB2, 0x40, "BAKR"), \
134 exit_code_ipa0(0xB2, 0x48, "PALB"), \
135 exit_code_ipa0(0xB2, 0x4c, "TAR"), \
136 exit_code_ipa0(0xB2, 0x50, "CSP"), \
137 exit_code_ipa0(0xB2, 0x54, "MVPG"), \
138 exit_code_ipa0(0xB2, 0x58, "BSG"), \
139 exit_code_ipa0(0xB2, 0x5a, "BSA"), \
140 exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
141 exit_code_ipa0(0xB2, 0x74, "SIGA"), \
142 exit_code_ipa0(0xB2, 0x76, "XSCH"), \
143 exit_code_ipa0(0xB2, 0x78, "STCKE"), \
144 exit_code_ipa0(0xB2, 0x7c, "STCKF"), \
145 exit_code_ipa0(0xB2, 0x7d, "STSI"), \
146 exit_code_ipa0(0xB2, 0xb0, "STFLE"), \
147 exit_code_ipa0(0xB2, 0xb1, "STFL"), \
148 exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \
149 exit_code_ipa0(0xB2, 0xf8, "TEND"), \
150 exit_code_ipa0(0xB2, 0xfc, "TABORT"), \
151 exit_code_ipa0(0xB9, 0x1e, "KMAC"), \
152 exit_code_ipa0(0xB9, 0x28, "PCKMO"), \
153 exit_code_ipa0(0xB9, 0x2a, "KMF"), \
154 exit_code_ipa0(0xB9, 0x2b, "KMO"), \
155 exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \
156 exit_code_ipa0(0xB9, 0x2e, "KM"), \
157 exit_code_ipa0(0xB9, 0x2f, "KMC"), \
158 exit_code_ipa0(0xB9, 0x3e, "KIMD"), \
159 exit_code_ipa0(0xB9, 0x3f, "KLMD"), \
160 exit_code_ipa0(0xB9, 0x8a, "CSPG"), \
161 exit_code_ipa0(0xB9, 0x8d, "EPSW"), \
162 exit_code_ipa0(0xB9, 0x8e, "IDTE"), \
163 exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \
164 exit_code_ipa0(0xB9, 0x9c, "EQBS"), \
165 exit_code_ipa0(0xB9, 0xa2, "PTF"), \
166 exit_code_ipa0(0xB9, 0xab, "ESSA"), \
167 exit_code_ipa0(0xB9, 0xae, "RRBM"), \
168 exit_code_ipa0(0xB9, 0xaf, "PFMF"), \
169 exit_code_ipa0(0xE3, 0x03, "LRAG"), \
170 exit_code_ipa0(0xE3, 0x13, "LRAY"), \
171 exit_code_ipa0(0xE3, 0x25, "NTSTG"), \
172 exit_code_ipa0(0xE5, 0x00, "LASP"), \
173 exit_code_ipa0(0xE5, 0x01, "TPROT"), \
174 exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \
175 exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \
176 exit_code_ipa0(0xEB, 0x25, "STCTG"), \
177 exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \
178 exit_code_ipa0(0xEB, 0x60, "LRIC"), \
179 exit_code_ipa0(0xEB, 0x61, "STRIC"), \
180 exit_code_ipa0(0xEB, 0x62, "MRIC"), \
181 exit_code_ipa0(0xEB, 0x8a, "SQBS"), \
182 exit_code_ipa0(0xC8, 0x01, "ECTG"), \
183 exit_code(0x0a, "SVC"), \
184 exit_code(0x80, "SSM"), \
185 exit_code(0x82, "LPSW"), \
186 exit_code(0x83, "DIAG"), \
187 exit_code(0xae, "SIGP"), \
188 exit_code(0xac, "STNSM"), \
189 exit_code(0xad, "STOSM"), \
190 exit_code(0xb1, "LRA"), \
191 exit_code(0xb6, "STCTL"), \
192 exit_code(0xb7, "LCTL"), \
193 exit_code(0xee, "PLO")
194
195#define sie_intercept_code \
196 { 0x00, "Host interruption" }, \
197 { 0x04, "Instruction" }, \
198 { 0x08, "Program interruption" }, \
199 { 0x0c, "Instruction and program interruption" }, \
200 { 0x10, "External request" }, \
201 { 0x14, "External interruption" }, \
202 { 0x18, "I/O request" }, \
203 { 0x1c, "Wait state" }, \
204 { 0x20, "Validity" }, \
205 { 0x28, "Stop request" }, \
206 { 0x2c, "Operation exception" }, \
207 { 0x38, "Partial-execution" }, \
208 { 0x3c, "I/O interruption" }, \
209 { 0x40, "I/O instruction" }, \
210 { 0x48, "Timing subset" }
211
212/*
213 * This is the simple interceptable instructions decoder.
214 *
215 * It will be used as userspace interface and it can be used in places
216 * that does not allow to use general decoder functions,
217 * such as trace events declarations.
218 *
219 * Some userspace tools may want to parse this code
220 * and would be confused by switch(), if() and other statements,
221 * but they can understand conditional operator.
222 */
223#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \
224 (insn >> 56) == (ipa0) ? \
225 ((ipa0 << 8) | ((insn >> rshift) & mask)) :
226
227#define INSN_DECODE(insn) (insn >> 56)
228
229/*
230 * The macro icpt_insn_decoder() takes an intercepted instruction
231 * and returns a key, which can be used to find a mnemonic name
232 * of the instruction in the icpt_insn_codes table.
233 */
234#define icpt_insn_decoder(insn) \
235 INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
236 INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
237 INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
238 INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \
239 INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \
240 INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
241 INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
242 INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
243 INSN_DECODE(insn)
244
245#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 0c070c44cde2..afe1715a4eb7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -90,16 +90,22 @@ int main(void)
90 DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); 90 DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
91 DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); 91 DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
92 DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); 92 DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
93 DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid)); 93 DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
94 DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
95 DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
94 DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); 96 DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
95 DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id)); 97 DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
96 DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id)); 98 DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
99 DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
100 DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
101 DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
97 DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); 102 DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
98 DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); 103 DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
99 DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); 104 DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
100 DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); 105 DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
101 DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); 106 DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
102 DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); 107 DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
108 DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
103 DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); 109 DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
104 DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); 110 DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
105 DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); 111 DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
@@ -157,6 +163,8 @@ int main(void)
157#ifdef CONFIG_32BIT 163#ifdef CONFIG_32BIT
158 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); 164 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
159#else /* CONFIG_32BIT */ 165#else /* CONFIG_32BIT */
166 DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
167 DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
160 DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); 168 DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
161 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); 169 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
162 DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); 170 DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 18e5af848f9a..70203265196f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -389,8 +389,8 @@ ENTRY(pgm_check_handler)
389 jz pgm_kprobe 389 jz pgm_kprobe
390 oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP 390 oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP
391 mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS 391 mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
392 mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE 392 mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE
393 mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID 393 mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
3940: REENABLE_IRQS 3940: REENABLE_IRQS
395 xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) 395 xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
396 l %r1,BASED(.Ljump_table) 396 l %r1,BASED(.Ljump_table)
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index c41f3f906720..f2e674c702e1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -420,8 +420,8 @@ ENTRY(pgm_check_handler)
420 jz pgm_kprobe 420 jz pgm_kprobe
421 oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP 421 oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
422 mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS 422 mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
423 mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE 423 mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
424 mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID 424 mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
4250: REENABLE_IRQS 4250: REENABLE_IRQS
426 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) 426 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
427 larl %r1,pgm_check_table 427 larl %r1,pgm_check_table
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d3adb37e93a4..b3b553469650 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
11 11
12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm 12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
13 13
14kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o 14kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
15kvm-objs += diag.o gaccess.o guestdbg.o
16
15obj-$(CONFIG_KVM) += kvm.o 17obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 08dfc839a6cf..0161675878a2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -23,7 +23,7 @@
23static int diag_release_pages(struct kvm_vcpu *vcpu) 23static int diag_release_pages(struct kvm_vcpu *vcpu)
24{ 24{
25 unsigned long start, end; 25 unsigned long start, end;
26 unsigned long prefix = vcpu->arch.sie_block->prefix; 26 unsigned long prefix = kvm_s390_get_prefix(vcpu);
27 27
28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 28 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 29 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
64 int rc; 64 int rc;
65 u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; 65 u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
66 u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); 66 u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
67 unsigned long hva_token = KVM_HVA_ERR_BAD;
68 67
69 if (vcpu->run->s.regs.gprs[rx] & 7) 68 if (vcpu->run->s.regs.gprs[rx] & 7)
70 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 69 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
71 if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm))) 70 rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
72 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 71 if (rc)
72 return kvm_s390_inject_prog_cond(vcpu, rc);
73 if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) 73 if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
74 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 74 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
75 75
@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
89 parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) 89 parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
90 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 90 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
91 91
92 hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr)); 92 if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
93 if (kvm_is_error_hva(hva_token))
94 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 93 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
95 94
96 vcpu->arch.pfault_token = parm.token_addr; 95 vcpu->arch.pfault_token = parm.token_addr;
@@ -167,23 +166,17 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
167 166
168 VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); 167 VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
169 switch (subcode) { 168 switch (subcode) {
170 case 0:
171 case 1:
172 page_table_reset_pgste(current->mm, 0, TASK_SIZE);
173 return -EOPNOTSUPP;
174 case 3: 169 case 3:
175 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; 170 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
176 page_table_reset_pgste(current->mm, 0, TASK_SIZE);
177 break; 171 break;
178 case 4: 172 case 4:
179 vcpu->run->s390_reset_flags = 0; 173 vcpu->run->s390_reset_flags = 0;
180 page_table_reset_pgste(current->mm, 0, TASK_SIZE);
181 break; 174 break;
182 default: 175 default:
183 return -EOPNOTSUPP; 176 return -EOPNOTSUPP;
184 } 177 }
185 178
186 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 179 kvm_s390_vcpu_stop(vcpu);
187 vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; 180 vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
188 vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; 181 vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
189 vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; 182 vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 000000000000..4653ac6e182b
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,726 @@
1/*
2 * guest access functions
3 *
4 * Copyright IBM Corp. 2014
5 *
6 */
7
8#include <linux/vmalloc.h>
9#include <linux/err.h>
10#include <asm/pgtable.h>
11#include "kvm-s390.h"
12#include "gaccess.h"
13
14union asce {
15 unsigned long val;
16 struct {
17 unsigned long origin : 52; /* Region- or Segment-Table Origin */
18 unsigned long : 2;
19 unsigned long g : 1; /* Subspace Group Control */
20 unsigned long p : 1; /* Private Space Control */
21 unsigned long s : 1; /* Storage-Alteration-Event Control */
22 unsigned long x : 1; /* Space-Switch-Event Control */
23 unsigned long r : 1; /* Real-Space Control */
24 unsigned long : 1;
25 unsigned long dt : 2; /* Designation-Type Control */
26 unsigned long tl : 2; /* Region- or Segment-Table Length */
27 };
28};
29
30enum {
31 ASCE_TYPE_SEGMENT = 0,
32 ASCE_TYPE_REGION3 = 1,
33 ASCE_TYPE_REGION2 = 2,
34 ASCE_TYPE_REGION1 = 3
35};
36
37union region1_table_entry {
38 unsigned long val;
39 struct {
40 unsigned long rto: 52;/* Region-Table Origin */
41 unsigned long : 2;
42 unsigned long p : 1; /* DAT-Protection Bit */
43 unsigned long : 1;
44 unsigned long tf : 2; /* Region-Second-Table Offset */
45 unsigned long i : 1; /* Region-Invalid Bit */
46 unsigned long : 1;
47 unsigned long tt : 2; /* Table-Type Bits */
48 unsigned long tl : 2; /* Region-Second-Table Length */
49 };
50};
51
52union region2_table_entry {
53 unsigned long val;
54 struct {
55 unsigned long rto: 52;/* Region-Table Origin */
56 unsigned long : 2;
57 unsigned long p : 1; /* DAT-Protection Bit */
58 unsigned long : 1;
59 unsigned long tf : 2; /* Region-Third-Table Offset */
60 unsigned long i : 1; /* Region-Invalid Bit */
61 unsigned long : 1;
62 unsigned long tt : 2; /* Table-Type Bits */
63 unsigned long tl : 2; /* Region-Third-Table Length */
64 };
65};
66
67struct region3_table_entry_fc0 {
68 unsigned long sto: 52;/* Segment-Table Origin */
69 unsigned long : 1;
70 unsigned long fc : 1; /* Format-Control */
71 unsigned long p : 1; /* DAT-Protection Bit */
72 unsigned long : 1;
73 unsigned long tf : 2; /* Segment-Table Offset */
74 unsigned long i : 1; /* Region-Invalid Bit */
75 unsigned long cr : 1; /* Common-Region Bit */
76 unsigned long tt : 2; /* Table-Type Bits */
77 unsigned long tl : 2; /* Segment-Table Length */
78};
79
80struct region3_table_entry_fc1 {
81 unsigned long rfaa : 33; /* Region-Frame Absolute Address */
82 unsigned long : 14;
83 unsigned long av : 1; /* ACCF-Validity Control */
84 unsigned long acc: 4; /* Access-Control Bits */
85 unsigned long f : 1; /* Fetch-Protection Bit */
86 unsigned long fc : 1; /* Format-Control */
87 unsigned long p : 1; /* DAT-Protection Bit */
88 unsigned long co : 1; /* Change-Recording Override */
89 unsigned long : 2;
90 unsigned long i : 1; /* Region-Invalid Bit */
91 unsigned long cr : 1; /* Common-Region Bit */
92 unsigned long tt : 2; /* Table-Type Bits */
93 unsigned long : 2;
94};
95
96union region3_table_entry {
97 unsigned long val;
98 struct region3_table_entry_fc0 fc0;
99 struct region3_table_entry_fc1 fc1;
100 struct {
101 unsigned long : 53;
102 unsigned long fc : 1; /* Format-Control */
103 unsigned long : 4;
104 unsigned long i : 1; /* Region-Invalid Bit */
105 unsigned long cr : 1; /* Common-Region Bit */
106 unsigned long tt : 2; /* Table-Type Bits */
107 unsigned long : 2;
108 };
109};
110
111struct segment_entry_fc0 {
112 unsigned long pto: 53;/* Page-Table Origin */
113 unsigned long fc : 1; /* Format-Control */
114 unsigned long p : 1; /* DAT-Protection Bit */
115 unsigned long : 3;
116 unsigned long i : 1; /* Segment-Invalid Bit */
117 unsigned long cs : 1; /* Common-Segment Bit */
118 unsigned long tt : 2; /* Table-Type Bits */
119 unsigned long : 2;
120};
121
122struct segment_entry_fc1 {
123 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
124 unsigned long : 3;
125 unsigned long av : 1; /* ACCF-Validity Control */
126 unsigned long acc: 4; /* Access-Control Bits */
127 unsigned long f : 1; /* Fetch-Protection Bit */
128 unsigned long fc : 1; /* Format-Control */
129 unsigned long p : 1; /* DAT-Protection Bit */
130 unsigned long co : 1; /* Change-Recording Override */
131 unsigned long : 2;
132 unsigned long i : 1; /* Segment-Invalid Bit */
133 unsigned long cs : 1; /* Common-Segment Bit */
134 unsigned long tt : 2; /* Table-Type Bits */
135 unsigned long : 2;
136};
137
138union segment_table_entry {
139 unsigned long val;
140 struct segment_entry_fc0 fc0;
141 struct segment_entry_fc1 fc1;
142 struct {
143 unsigned long : 53;
144 unsigned long fc : 1; /* Format-Control */
145 unsigned long : 4;
146 unsigned long i : 1; /* Segment-Invalid Bit */
147 unsigned long cs : 1; /* Common-Segment Bit */
148 unsigned long tt : 2; /* Table-Type Bits */
149 unsigned long : 2;
150 };
151};
152
153enum {
154 TABLE_TYPE_SEGMENT = 0,
155 TABLE_TYPE_REGION3 = 1,
156 TABLE_TYPE_REGION2 = 2,
157 TABLE_TYPE_REGION1 = 3
158};
159
160union page_table_entry {
161 unsigned long val;
162 struct {
163 unsigned long pfra : 52; /* Page-Frame Real Address */
164 unsigned long z : 1; /* Zero Bit */
165 unsigned long i : 1; /* Page-Invalid Bit */
166 unsigned long p : 1; /* DAT-Protection Bit */
167 unsigned long co : 1; /* Change-Recording Override */
168 unsigned long : 8;
169 };
170};
171
172/*
173 * vaddress union in order to easily decode a virtual address into its
174 * region first index, region second index etc. parts.
175 */
176union vaddress {
177 unsigned long addr;
178 struct {
179 unsigned long rfx : 11;
180 unsigned long rsx : 11;
181 unsigned long rtx : 11;
182 unsigned long sx : 11;
183 unsigned long px : 8;
184 unsigned long bx : 12;
185 };
186 struct {
187 unsigned long rfx01 : 2;
188 unsigned long : 9;
189 unsigned long rsx01 : 2;
190 unsigned long : 9;
191 unsigned long rtx01 : 2;
192 unsigned long : 9;
193 unsigned long sx01 : 2;
194 unsigned long : 29;
195 };
196};
197
198/*
199 * raddress union which will contain the result (real or absolute address)
200 * after a page table walk. The rfaa, sfaa and pfra members are used to
201 * simply assign them the value of a region, segment or page table entry.
202 */
203union raddress {
204 unsigned long addr;
205 unsigned long rfaa : 33; /* Region-Frame Absolute Address */
206 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
207 unsigned long pfra : 52; /* Page-Frame Real Address */
208};
209
210static int ipte_lock_count;
211static DEFINE_MUTEX(ipte_mutex);
212
213int ipte_lock_held(struct kvm_vcpu *vcpu)
214{
215 union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
216
217 if (vcpu->arch.sie_block->eca & 1)
218 return ic->kh != 0;
219 return ipte_lock_count != 0;
220}
221
222static void ipte_lock_simple(struct kvm_vcpu *vcpu)
223{
224 union ipte_control old, new, *ic;
225
226 mutex_lock(&ipte_mutex);
227 ipte_lock_count++;
228 if (ipte_lock_count > 1)
229 goto out;
230 ic = &vcpu->kvm->arch.sca->ipte_control;
231 do {
232 old = ACCESS_ONCE(*ic);
233 while (old.k) {
234 cond_resched();
235 old = ACCESS_ONCE(*ic);
236 }
237 new = old;
238 new.k = 1;
239 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
240out:
241 mutex_unlock(&ipte_mutex);
242}
243
244static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
245{
246 union ipte_control old, new, *ic;
247
248 mutex_lock(&ipte_mutex);
249 ipte_lock_count--;
250 if (ipte_lock_count)
251 goto out;
252 ic = &vcpu->kvm->arch.sca->ipte_control;
253 do {
254 new = old = ACCESS_ONCE(*ic);
255 new.k = 0;
256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
257 if (!ipte_lock_count)
258 wake_up(&vcpu->kvm->arch.ipte_wq);
259out:
260 mutex_unlock(&ipte_mutex);
261}
262
263static void ipte_lock_siif(struct kvm_vcpu *vcpu)
264{
265 union ipte_control old, new, *ic;
266
267 ic = &vcpu->kvm->arch.sca->ipte_control;
268 do {
269 old = ACCESS_ONCE(*ic);
270 while (old.kg) {
271 cond_resched();
272 old = ACCESS_ONCE(*ic);
273 }
274 new = old;
275 new.k = 1;
276 new.kh++;
277 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
278}
279
280static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
281{
282 union ipte_control old, new, *ic;
283
284 ic = &vcpu->kvm->arch.sca->ipte_control;
285 do {
286 new = old = ACCESS_ONCE(*ic);
287 new.kh--;
288 if (!new.kh)
289 new.k = 0;
290 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
291 if (!new.kh)
292 wake_up(&vcpu->kvm->arch.ipte_wq);
293}
294
295void ipte_lock(struct kvm_vcpu *vcpu)
296{
297 if (vcpu->arch.sie_block->eca & 1)
298 ipte_lock_siif(vcpu);
299 else
300 ipte_lock_simple(vcpu);
301}
302
303void ipte_unlock(struct kvm_vcpu *vcpu)
304{
305 if (vcpu->arch.sie_block->eca & 1)
306 ipte_unlock_siif(vcpu);
307 else
308 ipte_unlock_simple(vcpu);
309}
310
311static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
312{
313 switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
314 case PSW_AS_PRIMARY:
315 return vcpu->arch.sie_block->gcr[1];
316 case PSW_AS_SECONDARY:
317 return vcpu->arch.sie_block->gcr[7];
318 case PSW_AS_HOME:
319 return vcpu->arch.sie_block->gcr[13];
320 }
321 return 0;
322}
323
324static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
325{
326 return kvm_read_guest(kvm, gpa, val, sizeof(*val));
327}
328
329/**
330 * guest_translate - translate a guest virtual into a guest absolute address
331 * @vcpu: virtual cpu
332 * @gva: guest virtual address
333 * @gpa: points to where guest physical (absolute) address should be stored
334 * @write: indicates if access is a write access
335 *
336 * Translate a guest virtual address into a guest absolute address by means
337 * of dynamic address translation as specified by the architecuture.
338 * If the resulting absolute address is not available in the configuration
339 * an addressing exception is indicated and @gpa will not be changed.
340 *
341 * Returns: - zero on success; @gpa contains the resulting absolute address
342 * - a negative value if guest access failed due to e.g. broken
343 * guest mapping
344 * - a positve value if an access exception happened. In this case
345 * the returned value is the program interruption code as defined
346 * by the architecture
347 */
348static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
349 unsigned long *gpa, int write)
350{
351 union vaddress vaddr = {.addr = gva};
352 union raddress raddr = {.addr = gva};
353 union page_table_entry pte;
354 int dat_protection = 0;
355 union ctlreg0 ctlreg0;
356 unsigned long ptr;
357 int edat1, edat2;
358 union asce asce;
359
360 ctlreg0.val = vcpu->arch.sie_block->gcr[0];
361 edat1 = ctlreg0.edat && test_vfacility(8);
362 edat2 = edat1 && test_vfacility(78);
363 asce.val = get_vcpu_asce(vcpu);
364 if (asce.r)
365 goto real_address;
366 ptr = asce.origin * 4096;
367 switch (asce.dt) {
368 case ASCE_TYPE_REGION1:
369 if (vaddr.rfx01 > asce.tl)
370 return PGM_REGION_FIRST_TRANS;
371 ptr += vaddr.rfx * 8;
372 break;
373 case ASCE_TYPE_REGION2:
374 if (vaddr.rfx)
375 return PGM_ASCE_TYPE;
376 if (vaddr.rsx01 > asce.tl)
377 return PGM_REGION_SECOND_TRANS;
378 ptr += vaddr.rsx * 8;
379 break;
380 case ASCE_TYPE_REGION3:
381 if (vaddr.rfx || vaddr.rsx)
382 return PGM_ASCE_TYPE;
383 if (vaddr.rtx01 > asce.tl)
384 return PGM_REGION_THIRD_TRANS;
385 ptr += vaddr.rtx * 8;
386 break;
387 case ASCE_TYPE_SEGMENT:
388 if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
389 return PGM_ASCE_TYPE;
390 if (vaddr.sx01 > asce.tl)
391 return PGM_SEGMENT_TRANSLATION;
392 ptr += vaddr.sx * 8;
393 break;
394 }
395 switch (asce.dt) {
396 case ASCE_TYPE_REGION1: {
397 union region1_table_entry rfte;
398
399 if (kvm_is_error_gpa(vcpu->kvm, ptr))
400 return PGM_ADDRESSING;
401 if (deref_table(vcpu->kvm, ptr, &rfte.val))
402 return -EFAULT;
403 if (rfte.i)
404 return PGM_REGION_FIRST_TRANS;
405 if (rfte.tt != TABLE_TYPE_REGION1)
406 return PGM_TRANSLATION_SPEC;
407 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
408 return PGM_REGION_SECOND_TRANS;
409 if (edat1)
410 dat_protection |= rfte.p;
411 ptr = rfte.rto * 4096 + vaddr.rsx * 8;
412 }
413 /* fallthrough */
414 case ASCE_TYPE_REGION2: {
415 union region2_table_entry rste;
416
417 if (kvm_is_error_gpa(vcpu->kvm, ptr))
418 return PGM_ADDRESSING;
419 if (deref_table(vcpu->kvm, ptr, &rste.val))
420 return -EFAULT;
421 if (rste.i)
422 return PGM_REGION_SECOND_TRANS;
423 if (rste.tt != TABLE_TYPE_REGION2)
424 return PGM_TRANSLATION_SPEC;
425 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
426 return PGM_REGION_THIRD_TRANS;
427 if (edat1)
428 dat_protection |= rste.p;
429 ptr = rste.rto * 4096 + vaddr.rtx * 8;
430 }
431 /* fallthrough */
432 case ASCE_TYPE_REGION3: {
433 union region3_table_entry rtte;
434
435 if (kvm_is_error_gpa(vcpu->kvm, ptr))
436 return PGM_ADDRESSING;
437 if (deref_table(vcpu->kvm, ptr, &rtte.val))
438 return -EFAULT;
439 if (rtte.i)
440 return PGM_REGION_THIRD_TRANS;
441 if (rtte.tt != TABLE_TYPE_REGION3)
442 return PGM_TRANSLATION_SPEC;
443 if (rtte.cr && asce.p && edat2)
444 return PGM_TRANSLATION_SPEC;
445 if (rtte.fc && edat2) {
446 dat_protection |= rtte.fc1.p;
447 raddr.rfaa = rtte.fc1.rfaa;
448 goto absolute_address;
449 }
450 if (vaddr.sx01 < rtte.fc0.tf)
451 return PGM_SEGMENT_TRANSLATION;
452 if (vaddr.sx01 > rtte.fc0.tl)
453 return PGM_SEGMENT_TRANSLATION;
454 if (edat1)
455 dat_protection |= rtte.fc0.p;
456 ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
457 }
458 /* fallthrough */
459 case ASCE_TYPE_SEGMENT: {
460 union segment_table_entry ste;
461
462 if (kvm_is_error_gpa(vcpu->kvm, ptr))
463 return PGM_ADDRESSING;
464 if (deref_table(vcpu->kvm, ptr, &ste.val))
465 return -EFAULT;
466 if (ste.i)
467 return PGM_SEGMENT_TRANSLATION;
468 if (ste.tt != TABLE_TYPE_SEGMENT)
469 return PGM_TRANSLATION_SPEC;
470 if (ste.cs && asce.p)
471 return PGM_TRANSLATION_SPEC;
472 if (ste.fc && edat1) {
473 dat_protection |= ste.fc1.p;
474 raddr.sfaa = ste.fc1.sfaa;
475 goto absolute_address;
476 }
477 dat_protection |= ste.fc0.p;
478 ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
479 }
480 }
481 if (kvm_is_error_gpa(vcpu->kvm, ptr))
482 return PGM_ADDRESSING;
483 if (deref_table(vcpu->kvm, ptr, &pte.val))
484 return -EFAULT;
485 if (pte.i)
486 return PGM_PAGE_TRANSLATION;
487 if (pte.z)
488 return PGM_TRANSLATION_SPEC;
489 if (pte.co && !edat1)
490 return PGM_TRANSLATION_SPEC;
491 dat_protection |= pte.p;
492 raddr.pfra = pte.pfra;
493real_address:
494 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
495absolute_address:
496 if (write && dat_protection)
497 return PGM_PROTECTION;
498 if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
499 return PGM_ADDRESSING;
500 *gpa = raddr.addr;
501 return 0;
502}
503
504static inline int is_low_address(unsigned long ga)
505{
506 /* Check for address ranges 0..511 and 4096..4607 */
507 return (ga & ~0x11fful) == 0;
508}
509
510static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
511{
512 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
513 psw_t *psw = &vcpu->arch.sie_block->gpsw;
514 union asce asce;
515
516 if (!ctlreg0.lap)
517 return 0;
518 asce.val = get_vcpu_asce(vcpu);
519 if (psw_bits(*psw).t && asce.p)
520 return 0;
521 return 1;
522}
523
524struct trans_exc_code_bits {
525 unsigned long addr : 52; /* Translation-exception Address */
526 unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
527 unsigned long : 7;
528 unsigned long b61 : 1;
529 unsigned long as : 2; /* ASCE Identifier */
530};
531
532enum {
533 FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
534 FSI_STORE = 1, /* Exception was due to store operation */
535 FSI_FETCH = 2 /* Exception was due to fetch operation */
536};
537
538static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
539 unsigned long *pages, unsigned long nr_pages,
540 int write)
541{
542 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
543 psw_t *psw = &vcpu->arch.sie_block->gpsw;
544 struct trans_exc_code_bits *tec_bits;
545 int lap_enabled, rc;
546
547 memset(pgm, 0, sizeof(*pgm));
548 tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
549 tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
550 tec_bits->as = psw_bits(*psw).as;
551 lap_enabled = low_address_protection_enabled(vcpu);
552 while (nr_pages) {
553 ga = kvm_s390_logical_to_effective(vcpu, ga);
554 tec_bits->addr = ga >> PAGE_SHIFT;
555 if (write && lap_enabled && is_low_address(ga)) {
556 pgm->code = PGM_PROTECTION;
557 return pgm->code;
558 }
559 ga &= PAGE_MASK;
560 if (psw_bits(*psw).t) {
561 rc = guest_translate(vcpu, ga, pages, write);
562 if (rc < 0)
563 return rc;
564 if (rc == PGM_PROTECTION)
565 tec_bits->b61 = 1;
566 if (rc)
567 pgm->code = rc;
568 } else {
569 *pages = kvm_s390_real_to_abs(vcpu, ga);
570 if (kvm_is_error_gpa(vcpu->kvm, *pages))
571 pgm->code = PGM_ADDRESSING;
572 }
573 if (pgm->code)
574 return pgm->code;
575 ga += PAGE_SIZE;
576 pages++;
577 nr_pages--;
578 }
579 return 0;
580}
581
582int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
583 unsigned long len, int write)
584{
585 psw_t *psw = &vcpu->arch.sie_block->gpsw;
586 unsigned long _len, nr_pages, gpa, idx;
587 unsigned long pages_array[2];
588 unsigned long *pages;
589 int need_ipte_lock;
590 union asce asce;
591 int rc;
592
593 if (!len)
594 return 0;
595 /* Access register mode is not supported yet. */
596 if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
597 return -EOPNOTSUPP;
598 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
599 pages = pages_array;
600 if (nr_pages > ARRAY_SIZE(pages_array))
601 pages = vmalloc(nr_pages * sizeof(unsigned long));
602 if (!pages)
603 return -ENOMEM;
604 asce.val = get_vcpu_asce(vcpu);
605 need_ipte_lock = psw_bits(*psw).t && !asce.r;
606 if (need_ipte_lock)
607 ipte_lock(vcpu);
608 rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
609 for (idx = 0; idx < nr_pages && !rc; idx++) {
610 gpa = *(pages + idx) + (ga & ~PAGE_MASK);
611 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
612 if (write)
613 rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
614 else
615 rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
616 len -= _len;
617 ga += _len;
618 data += _len;
619 }
620 if (need_ipte_lock)
621 ipte_unlock(vcpu);
622 if (nr_pages > ARRAY_SIZE(pages_array))
623 vfree(pages);
624 return rc;
625}
626
627int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
628 void *data, unsigned long len, int write)
629{
630 unsigned long _len, gpa;
631 int rc = 0;
632
633 while (len && !rc) {
634 gpa = kvm_s390_real_to_abs(vcpu, gra);
635 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
636 if (write)
637 rc = write_guest_abs(vcpu, gpa, data, _len);
638 else
639 rc = read_guest_abs(vcpu, gpa, data, _len);
640 len -= _len;
641 gra += _len;
642 data += _len;
643 }
644 return rc;
645}
646
647/**
648 * guest_translate_address - translate guest logical into guest absolute address
649 *
650 * Parameter semantics are the same as the ones from guest_translate.
651 * The memory contents at the guest address are not changed.
652 *
653 * Note: The IPTE lock is not taken during this function, so the caller
654 * has to take care of this.
655 */
656int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
657 unsigned long *gpa, int write)
658{
659 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
660 psw_t *psw = &vcpu->arch.sie_block->gpsw;
661 struct trans_exc_code_bits *tec;
662 union asce asce;
663 int rc;
664
665 /* Access register mode is not supported yet. */
666 if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
667 return -EOPNOTSUPP;
668
669 gva = kvm_s390_logical_to_effective(vcpu, gva);
670 memset(pgm, 0, sizeof(*pgm));
671 tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
672 tec->as = psw_bits(*psw).as;
673 tec->fsi = write ? FSI_STORE : FSI_FETCH;
674 tec->addr = gva >> PAGE_SHIFT;
675 if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
676 if (write) {
677 rc = pgm->code = PGM_PROTECTION;
678 return rc;
679 }
680 }
681
682 asce.val = get_vcpu_asce(vcpu);
683 if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
684 rc = guest_translate(vcpu, gva, gpa, write);
685 if (rc > 0) {
686 if (rc == PGM_PROTECTION)
687 tec->b61 = 1;
688 pgm->code = rc;
689 }
690 } else {
691 rc = 0;
692 *gpa = kvm_s390_real_to_abs(vcpu, gva);
693 if (kvm_is_error_gpa(vcpu->kvm, *gpa))
694 rc = pgm->code = PGM_ADDRESSING;
695 }
696
697 return rc;
698}
699
700/**
701 * kvm_s390_check_low_addr_protection - check for low-address protection
702 * @ga: Guest address
703 *
704 * Checks whether an address is subject to low-address protection and set
705 * up vcpu->arch.pgm accordingly if necessary.
706 *
707 * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
708 */
709int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
710{
711 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
712 psw_t *psw = &vcpu->arch.sie_block->gpsw;
713 struct trans_exc_code_bits *tec_bits;
714
715 if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
716 return 0;
717
718 memset(pgm, 0, sizeof(*pgm));
719 tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
720 tec_bits->fsi = FSI_STORE;
721 tec_bits->as = psw_bits(*psw).as;
722 tec_bits->addr = ga >> PAGE_SHIFT;
723 pgm->code = PGM_PROTECTION;
724
725 return pgm->code;
726}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 374a439ccc60..0149cf15058a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * access guest memory 2 * access guest memory
3 * 3 *
4 * Copyright IBM Corp. 2008, 2009 4 * Copyright IBM Corp. 2008, 2014
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
@@ -15,100 +15,321 @@
15 15
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include <asm/uaccess.h> 18#include <linux/uaccess.h>
19#include <linux/ptrace.h>
19#include "kvm-s390.h" 20#include "kvm-s390.h"
20 21
21/* Convert real to absolute address by applying the prefix of the CPU */ 22/**
23 * kvm_s390_real_to_abs - convert guest real address to guest absolute address
24 * @vcpu - guest virtual cpu
25 * @gra - guest real address
26 *
27 * Returns the guest absolute address that corresponds to the passed guest real
28 * address @gra of a virtual guest cpu by applying its prefix.
29 */
22static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, 30static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
23 unsigned long gaddr) 31 unsigned long gra)
24{ 32{
25 unsigned long prefix = vcpu->arch.sie_block->prefix; 33 unsigned long prefix = kvm_s390_get_prefix(vcpu);
26 if (gaddr < 2 * PAGE_SIZE) 34
27 gaddr += prefix; 35 if (gra < 2 * PAGE_SIZE)
28 else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE) 36 gra += prefix;
29 gaddr -= prefix; 37 else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
30 return gaddr; 38 gra -= prefix;
39 return gra;
31} 40}
32 41
33static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, 42/**
34 void __user *gptr, 43 * kvm_s390_logical_to_effective - convert guest logical to effective address
35 int prefixing) 44 * @vcpu: guest virtual cpu
45 * @ga: guest logical address
46 *
47 * Convert a guest vcpu logical address to a guest vcpu effective address by
48 * applying the rules of the vcpu's addressing mode defined by PSW bits 31
49 * and 32 (extendended/basic addressing mode).
50 *
51 * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
52 * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
53 * of @ga will be zeroed and the remaining bits will be returned.
54 */
55static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
56 unsigned long ga)
36{ 57{
37 unsigned long gaddr = (unsigned long) gptr; 58 psw_t *psw = &vcpu->arch.sie_block->gpsw;
38 unsigned long uaddr; 59
39 60 if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
40 if (prefixing) 61 return ga;
41 gaddr = kvm_s390_real_to_abs(vcpu, gaddr); 62 if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
42 uaddr = gmap_fault(gaddr, vcpu->arch.gmap); 63 return ga & ((1UL << 31) - 1);
43 if (IS_ERR_VALUE(uaddr)) 64 return ga & ((1UL << 24) - 1);
44 uaddr = -EFAULT;
45 return (void __user *)uaddr;
46} 65}
47 66
48#define get_guest(vcpu, x, gptr) \ 67/*
49({ \ 68 * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
50 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ 69 * which shall only be used to access the lowcore of a vcpu.
51 int __mask = sizeof(__typeof__(*(gptr))) - 1; \ 70 * These functions should be used for e.g. interrupt handlers where no
52 int __ret; \ 71 * guest memory access protection facilities, like key or low address
53 \ 72 * protection, are applicable.
54 if (IS_ERR((void __force *)__uptr)) { \ 73 * At a later point guest vcpu lowcore access should happen via pinned
55 __ret = PTR_ERR((void __force *)__uptr); \ 74 * prefix pages, so that these pages can be accessed directly via the
56 } else { \ 75 * kernel mapping. All of these *_lc functions can be removed then.
57 BUG_ON((unsigned long)__uptr & __mask); \ 76 */
58 __ret = get_user(x, __uptr); \
59 } \
60 __ret; \
61})
62 77
63#define put_guest(vcpu, x, gptr) \ 78/**
79 * put_guest_lc - write a simple variable to a guest vcpu's lowcore
80 * @vcpu: virtual cpu
81 * @x: value to copy to guest
82 * @gra: vcpu's destination guest real address
83 *
84 * Copies a simple value from kernel space to a guest vcpu's lowcore.
85 * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
86 * must be located in the vcpu's lowcore. Otherwise the result is undefined.
87 *
88 * Returns zero on success or -EFAULT on error.
89 *
90 * Note: an error indicates that either the kernel is out of memory or
91 * the guest memory mapping is broken. In any case the best solution
92 * would be to terminate the guest.
93 * It is wrong to inject a guest exception.
94 */
95#define put_guest_lc(vcpu, x, gra) \
64({ \ 96({ \
65 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ 97 struct kvm_vcpu *__vcpu = (vcpu); \
66 int __mask = sizeof(__typeof__(*(gptr))) - 1; \ 98 __typeof__(*(gra)) __x = (x); \
67 int __ret; \ 99 unsigned long __gpa; \
68 \ 100 \
69 if (IS_ERR((void __force *)__uptr)) { \ 101 __gpa = (unsigned long)(gra); \
70 __ret = PTR_ERR((void __force *)__uptr); \ 102 __gpa += kvm_s390_get_prefix(__vcpu); \
71 } else { \ 103 kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
72 BUG_ON((unsigned long)__uptr & __mask); \
73 __ret = put_user(x, __uptr); \
74 } \
75 __ret; \
76}) 104})
77 105
78static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, 106/**
79 unsigned long from, unsigned long len, 107 * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
80 int to_guest, int prefixing) 108 * @vcpu: virtual cpu
109 * @gra: vcpu's source guest real address
110 * @data: source address in kernel space
111 * @len: number of bytes to copy
112 *
113 * Copy data from kernel space to guest vcpu's lowcore. The entire range must
114 * be located within the vcpu's lowcore, otherwise the result is undefined.
115 *
116 * Returns zero on success or -EFAULT on error.
117 *
118 * Note: an error indicates that either the kernel is out of memory or
119 * the guest memory mapping is broken. In any case the best solution
120 * would be to terminate the guest.
121 * It is wrong to inject a guest exception.
122 */
123static inline __must_check
124int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
125 unsigned long len)
126{
127 unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
128
129 return kvm_write_guest(vcpu->kvm, gpa, data, len);
130}
131
132/**
133 * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
134 * @vcpu: virtual cpu
135 * @gra: vcpu's source guest real address
136 * @data: destination address in kernel space
137 * @len: number of bytes to copy
138 *
139 * Copy data from guest vcpu's lowcore to kernel space. The entire range must
140 * be located within the vcpu's lowcore, otherwise the result is undefined.
141 *
142 * Returns zero on success or -EFAULT on error.
143 *
144 * Note: an error indicates that either the kernel is out of memory or
145 * the guest memory mapping is broken. In any case the best solution
146 * would be to terminate the guest.
147 * It is wrong to inject a guest exception.
148 */
149static inline __must_check
150int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
151 unsigned long len)
152{
153 unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
154
155 return kvm_read_guest(vcpu->kvm, gpa, data, len);
156}
157
158int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
159 unsigned long *gpa, int write);
160
161int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
162 unsigned long len, int write);
163
164int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
165 void *data, unsigned long len, int write);
166
167/**
168 * write_guest - copy data from kernel space to guest space
169 * @vcpu: virtual cpu
170 * @ga: guest address
171 * @data: source address in kernel space
172 * @len: number of bytes to copy
173 *
174 * Copy @len bytes from @data (kernel space) to @ga (guest address).
175 * In order to copy data to guest space the PSW of the vcpu is inspected:
176 * If DAT is off data will be copied to guest real or absolute memory.
177 * If DAT is on data will be copied to the address space as specified by
178 * the address space bits of the PSW:
179 * Primary, secondory or home space (access register mode is currently not
180 * implemented).
181 * The addressing mode of the PSW is also inspected, so that address wrap
182 * around is taken into account for 24-, 31- and 64-bit addressing mode,
183 * if the to be copied data crosses page boundaries in guest address space.
184 * In addition also low address and DAT protection are inspected before
185 * copying any data (key protection is currently not implemented).
186 *
187 * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
188 * In case of an access exception (e.g. protection exception) pgm will contain
189 * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
190 * will inject a correct exception into the guest.
191 * If no access exception happened, the contents of pgm are undefined when
192 * this function returns.
193 *
194 * Returns: - zero on success
195 * - a negative value if e.g. the guest mapping is broken or in
196 * case of out-of-memory. In this case the contents of pgm are
197 * undefined. Also parts of @data may have been copied to guest
198 * space.
199 * - a positive value if an access exception happened. In this case
200 * the returned value is the program interruption code and the
201 * contents of pgm may be used to inject an exception into the
202 * guest. No data has been copied to guest space.
203 *
204 * Note: in case an access exception is recognized no data has been copied to
205 * guest space (this is also true, if the to be copied data would cross
206 * one or more page boundaries in guest space).
207 * Therefore this function may be used for nullifying and suppressing
208 * instruction emulation.
209 * It may also be used for terminating instructions, if it is undefined
210 * if data has been changed in guest space in case of an exception.
211 */
212static inline __must_check
213int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
214 unsigned long len)
215{
216 return access_guest(vcpu, ga, data, len, 1);
217}
218
219/**
220 * read_guest - copy data from guest space to kernel space
221 * @vcpu: virtual cpu
222 * @ga: guest address
223 * @data: destination address in kernel space
224 * @len: number of bytes to copy
225 *
226 * Copy @len bytes from @ga (guest address) to @data (kernel space).
227 *
228 * The behaviour of read_guest is identical to write_guest, except that
229 * data will be copied from guest space to kernel space.
230 */
231static inline __must_check
232int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
233 unsigned long len)
234{
235 return access_guest(vcpu, ga, data, len, 0);
236}
237
238/**
239 * write_guest_abs - copy data from kernel space to guest space absolute
240 * @vcpu: virtual cpu
241 * @gpa: guest physical (absolute) address
242 * @data: source address in kernel space
243 * @len: number of bytes to copy
244 *
245 * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
246 * It is up to the caller to ensure that the entire guest memory range is
247 * valid memory before calling this function.
248 * Guest low address and key protection are not checked.
249 *
250 * Returns zero on success or -EFAULT on error.
251 *
252 * If an error occurs data may have been copied partially to guest memory.
253 */
254static inline __must_check
255int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
256 unsigned long len)
257{
258 return kvm_write_guest(vcpu->kvm, gpa, data, len);
259}
260
261/**
262 * read_guest_abs - copy data from guest space absolute to kernel space
263 * @vcpu: virtual cpu
264 * @gpa: guest physical (absolute) address
265 * @data: destination address in kernel space
266 * @len: number of bytes to copy
267 *
268 * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
269 * It is up to the caller to ensure that the entire guest memory range is
270 * valid memory before calling this function.
271 * Guest key protection is not checked.
272 *
273 * Returns zero on success or -EFAULT on error.
274 *
275 * If an error occurs data may have been copied partially to kernel space.
276 */
277static inline __must_check
278int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
279 unsigned long len)
280{
281 return kvm_read_guest(vcpu->kvm, gpa, data, len);
282}
283
284/**
285 * write_guest_real - copy data from kernel space to guest space real
286 * @vcpu: virtual cpu
287 * @gra: guest real address
288 * @data: source address in kernel space
289 * @len: number of bytes to copy
290 *
291 * Copy @len bytes from @data (kernel space) to @gra (guest real address).
292 * It is up to the caller to ensure that the entire guest memory range is
293 * valid memory before calling this function.
294 * Guest low address and key protection are not checked.
295 *
296 * Returns zero on success or -EFAULT on error.
297 *
298 * If an error occurs data may have been copied partially to guest memory.
299 */
300static inline __must_check
301int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
302 unsigned long len)
303{
304 return access_guest_real(vcpu, gra, data, len, 1);
305}
306
307/**
308 * read_guest_real - copy data from guest space real to kernel space
309 * @vcpu: virtual cpu
310 * @gra: guest real address
311 * @data: destination address in kernel space
312 * @len: number of bytes to copy
313 *
314 * Copy @len bytes from @gra (guest real address) to @data (kernel space).
315 * It is up to the caller to ensure that the entire guest memory range is
316 * valid memory before calling this function.
317 * Guest key protection is not checked.
318 *
319 * Returns zero on success or -EFAULT on error.
320 *
321 * If an error occurs data may have been copied partially to kernel space.
322 */
323static inline __must_check
324int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
325 unsigned long len)
81{ 326{
82 unsigned long _len, rc; 327 return access_guest_real(vcpu, gra, data, len, 0);
83 void __user *uptr;
84
85 while (len) {
86 uptr = to_guest ? (void __user *)to : (void __user *)from;
87 uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
88 if (IS_ERR((void __force *)uptr))
89 return -EFAULT;
90 _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
91 _len = min(_len, len);
92 if (to_guest)
93 rc = copy_to_user((void __user *) uptr, (void *)from, _len);
94 else
95 rc = copy_from_user((void *)to, (void __user *)uptr, _len);
96 if (rc)
97 return -EFAULT;
98 len -= _len;
99 from += _len;
100 to += _len;
101 }
102 return 0;
103} 328}
104 329
105#define copy_to_guest(vcpu, to, from, size) \ 330void ipte_lock(struct kvm_vcpu *vcpu);
106 __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) 331void ipte_unlock(struct kvm_vcpu *vcpu);
107#define copy_from_guest(vcpu, to, from, size) \ 332int ipte_lock_held(struct kvm_vcpu *vcpu);
108 __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) 333int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
109#define copy_to_guest_absolute(vcpu, to, from, size) \
110 __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
111#define copy_from_guest_absolute(vcpu, to, from, size) \
112 __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
113 334
114#endif /* __KVM_S390_GACCESS_H */ 335#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 000000000000..3e8d4092ce30
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,482 @@
1/*
2 * kvm guest debug support
3 *
4 * Copyright IBM Corp. 2014
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */
12#include <linux/kvm_host.h>
13#include <linux/errno.h>
14#include "kvm-s390.h"
15#include "gaccess.h"
16
17/*
18 * Extends the address range given by *start and *stop to include the address
19 * range starting with estart and the length len. Takes care of overflowing
20 * intervals and tries to minimize the overall intervall size.
21 */
22static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
23{
24 u64 estop;
25
26 if (len > 0)
27 len--;
28 else
29 len = 0;
30
31 estop = estart + len;
32
33 /* 0-0 range represents "not set" */
34 if ((*start == 0) && (*stop == 0)) {
35 *start = estart;
36 *stop = estop;
37 } else if (*start <= *stop) {
38 /* increase the existing range */
39 if (estart < *start)
40 *start = estart;
41 if (estop > *stop)
42 *stop = estop;
43 } else {
44 /* "overflowing" interval, whereby *stop > *start */
45 if (estart <= *stop) {
46 if (estop > *stop)
47 *stop = estop;
48 } else if (estop > *start) {
49 if (estart < *start)
50 *start = estart;
51 }
52 /* minimize the range */
53 else if ((estop - *stop) < (*start - estart))
54 *stop = estop;
55 else
56 *start = estart;
57 }
58}
59
60#define MAX_INST_SIZE 6
61
62static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
63{
64 unsigned long start, len;
65 u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
66 u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
67 u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
68 int i;
69
70 if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
71 vcpu->arch.guestdbg.hw_bp_info == NULL)
72 return;
73
74 /*
75 * If the guest is not interrested in branching events, we can savely
76 * limit them to the PER address range.
77 */
78 if (!(*cr9 & PER_EVENT_BRANCH))
79 *cr9 |= PER_CONTROL_BRANCH_ADDRESS;
80 *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
81
82 for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
83 start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
84 len = vcpu->arch.guestdbg.hw_bp_info[i].len;
85
86 /*
87 * The instruction in front of the desired bp has to
88 * report instruction-fetching events
89 */
90 if (start < MAX_INST_SIZE) {
91 len += start;
92 start = 0;
93 } else {
94 start -= MAX_INST_SIZE;
95 len += MAX_INST_SIZE;
96 }
97
98 extend_address_range(cr10, cr11, start, len);
99 }
100}
101
102static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
103{
104 unsigned long start, len;
105 u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
106 u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
107 u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
108 int i;
109
110 if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
111 vcpu->arch.guestdbg.hw_wp_info == NULL)
112 return;
113
114 /* if host uses storage alternation for special address
115 * spaces, enable all events and give all to the guest */
116 if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
117 *cr9 &= ~PER_CONTROL_ALTERATION;
118 *cr10 = 0;
119 *cr11 = PSW_ADDR_INSN;
120 } else {
121 *cr9 &= ~PER_CONTROL_ALTERATION;
122 *cr9 |= PER_EVENT_STORE;
123
124 for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
125 start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
126 len = vcpu->arch.guestdbg.hw_wp_info[i].len;
127
128 extend_address_range(cr10, cr11, start, len);
129 }
130 }
131}
132
133void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
134{
135 vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
136 vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
137 vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
138 vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
139}
140
141void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
142{
143 vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
144 vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
145 vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
146 vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
147}
148
149void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
150{
151 /*
152 * TODO: if guest psw has per enabled, otherwise 0s!
153 * This reduces the amount of reported events.
154 * Need to intercept all psw changes!
155 */
156
157 if (guestdbg_sstep_enabled(vcpu)) {
158 /* disable timer (clock-comparator) interrupts */
159 vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
160 vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
161 vcpu->arch.sie_block->gcr[10] = 0;
162 vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
163 }
164
165 if (guestdbg_hw_bp_enabled(vcpu)) {
166 enable_all_hw_bp(vcpu);
167 enable_all_hw_wp(vcpu);
168 }
169
170 /* TODO: Instruction-fetching-nullification not allowed for now */
171 if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
172 vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
173}
174
175#define MAX_WP_SIZE 100
176
177static int __import_wp_info(struct kvm_vcpu *vcpu,
178 struct kvm_hw_breakpoint *bp_data,
179 struct kvm_hw_wp_info_arch *wp_info)
180{
181 int ret = 0;
182 wp_info->len = bp_data->len;
183 wp_info->addr = bp_data->addr;
184 wp_info->phys_addr = bp_data->phys_addr;
185 wp_info->old_data = NULL;
186
187 if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
188 return -EINVAL;
189
190 wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
191 if (!wp_info->old_data)
192 return -ENOMEM;
193 /* try to backup the original value */
194 ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
195 wp_info->len);
196 if (ret) {
197 kfree(wp_info->old_data);
198 wp_info->old_data = NULL;
199 }
200
201 return ret;
202}
203
204#define MAX_BP_COUNT 50
205
206int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
207 struct kvm_guest_debug *dbg)
208{
209 int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
210 struct kvm_hw_breakpoint *bp_data = NULL;
211 struct kvm_hw_wp_info_arch *wp_info = NULL;
212 struct kvm_hw_bp_info_arch *bp_info = NULL;
213
214 if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
215 return 0;
216 else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
217 return -EINVAL;
218
219 size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
220 bp_data = kmalloc(size, GFP_KERNEL);
221 if (!bp_data) {
222 ret = -ENOMEM;
223 goto error;
224 }
225
226 if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
227 ret = -EFAULT;
228 goto error;
229 }
230
231 for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
232 switch (bp_data[i].type) {
233 case KVM_HW_WP_WRITE:
234 nr_wp++;
235 break;
236 case KVM_HW_BP:
237 nr_bp++;
238 break;
239 default:
240 break;
241 }
242 }
243
244 size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
245 if (size > 0) {
246 wp_info = kmalloc(size, GFP_KERNEL);
247 if (!wp_info) {
248 ret = -ENOMEM;
249 goto error;
250 }
251 }
252 size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
253 if (size > 0) {
254 bp_info = kmalloc(size, GFP_KERNEL);
255 if (!bp_info) {
256 ret = -ENOMEM;
257 goto error;
258 }
259 }
260
261 for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
262 switch (bp_data[i].type) {
263 case KVM_HW_WP_WRITE:
264 ret = __import_wp_info(vcpu, &bp_data[i],
265 &wp_info[nr_wp]);
266 if (ret)
267 goto error;
268 nr_wp++;
269 break;
270 case KVM_HW_BP:
271 bp_info[nr_bp].len = bp_data[i].len;
272 bp_info[nr_bp].addr = bp_data[i].addr;
273 nr_bp++;
274 break;
275 }
276 }
277
278 vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
279 vcpu->arch.guestdbg.hw_bp_info = bp_info;
280 vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
281 vcpu->arch.guestdbg.hw_wp_info = wp_info;
282 return 0;
283error:
284 kfree(bp_data);
285 kfree(wp_info);
286 kfree(bp_info);
287 return ret;
288}
289
290void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
291{
292 int i;
293 struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
294
295 for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
296 hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
297 kfree(hw_wp_info->old_data);
298 hw_wp_info->old_data = NULL;
299 }
300 kfree(vcpu->arch.guestdbg.hw_wp_info);
301 vcpu->arch.guestdbg.hw_wp_info = NULL;
302
303 kfree(vcpu->arch.guestdbg.hw_bp_info);
304 vcpu->arch.guestdbg.hw_bp_info = NULL;
305
306 vcpu->arch.guestdbg.nr_hw_wp = 0;
307 vcpu->arch.guestdbg.nr_hw_bp = 0;
308}
309
310static inline int in_addr_range(u64 addr, u64 a, u64 b)
311{
312 if (a <= b)
313 return (addr >= a) && (addr <= b);
314 else
315 /* "overflowing" interval */
316 return (addr <= a) && (addr >= b);
317}
318
319#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
320
321static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
322 unsigned long addr)
323{
324 struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
325 int i;
326
327 if (vcpu->arch.guestdbg.nr_hw_bp == 0)
328 return NULL;
329
330 for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
331 /* addr is directly the start or in the range of a bp */
332 if (addr == bp_info->addr)
333 goto found;
334 if (bp_info->len > 0 &&
335 in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
336 goto found;
337
338 bp_info++;
339 }
340
341 return NULL;
342found:
343 return bp_info;
344}
345
346static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
347{
348 int i;
349 struct kvm_hw_wp_info_arch *wp_info = NULL;
350 void *temp = NULL;
351
352 if (vcpu->arch.guestdbg.nr_hw_wp == 0)
353 return NULL;
354
355 for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
356 wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
357 if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
358 continue;
359
360 temp = kmalloc(wp_info->len, GFP_KERNEL);
361 if (!temp)
362 continue;
363
364 /* refetch the wp data and compare it to the old value */
365 if (!read_guest(vcpu, wp_info->phys_addr, temp,
366 wp_info->len)) {
367 if (memcmp(temp, wp_info->old_data, wp_info->len)) {
368 kfree(temp);
369 return wp_info;
370 }
371 }
372 kfree(temp);
373 temp = NULL;
374 }
375
376 return NULL;
377}
378
379void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
380{
381 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
382 vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
383}
384
385#define per_bp_event(code) \
386 (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
387#define per_write_wp_event(code) \
388 (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
389
390static int debug_exit_required(struct kvm_vcpu *vcpu)
391{
392 u32 perc = (vcpu->arch.sie_block->perc << 24);
393 struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
394 struct kvm_hw_wp_info_arch *wp_info = NULL;
395 struct kvm_hw_bp_info_arch *bp_info = NULL;
396 unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
397 unsigned long peraddr = vcpu->arch.sie_block->peraddr;
398
399 if (guestdbg_hw_bp_enabled(vcpu)) {
400 if (per_write_wp_event(perc) &&
401 vcpu->arch.guestdbg.nr_hw_wp > 0) {
402 wp_info = any_wp_changed(vcpu);
403 if (wp_info) {
404 debug_exit->addr = wp_info->addr;
405 debug_exit->type = KVM_HW_WP_WRITE;
406 goto exit_required;
407 }
408 }
409 if (per_bp_event(perc) &&
410 vcpu->arch.guestdbg.nr_hw_bp > 0) {
411 bp_info = find_hw_bp(vcpu, addr);
412 /* remove duplicate events if PC==PER address */
413 if (bp_info && (addr != peraddr)) {
414 debug_exit->addr = addr;
415 debug_exit->type = KVM_HW_BP;
416 vcpu->arch.guestdbg.last_bp = addr;
417 goto exit_required;
418 }
419 /* breakpoint missed */
420 bp_info = find_hw_bp(vcpu, peraddr);
421 if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
422 debug_exit->addr = peraddr;
423 debug_exit->type = KVM_HW_BP;
424 goto exit_required;
425 }
426 }
427 }
428 if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
429 debug_exit->addr = addr;
430 debug_exit->type = KVM_SINGLESTEP;
431 goto exit_required;
432 }
433
434 return 0;
435exit_required:
436 return 1;
437}
438
439#define guest_per_enabled(vcpu) \
440 (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
441
442static void filter_guest_per_event(struct kvm_vcpu *vcpu)
443{
444 u32 perc = vcpu->arch.sie_block->perc << 24;
445 u64 peraddr = vcpu->arch.sie_block->peraddr;
446 u64 addr = vcpu->arch.sie_block->gpsw.addr;
447 u64 cr9 = vcpu->arch.sie_block->gcr[9];
448 u64 cr10 = vcpu->arch.sie_block->gcr[10];
449 u64 cr11 = vcpu->arch.sie_block->gcr[11];
450 /* filter all events, demanded by the guest */
451 u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
452
453 if (!guest_per_enabled(vcpu))
454 guest_perc = 0;
455
456 /* filter "successful-branching" events */
457 if (guest_perc & PER_EVENT_BRANCH &&
458 cr9 & PER_CONTROL_BRANCH_ADDRESS &&
459 !in_addr_range(addr, cr10, cr11))
460 guest_perc &= ~PER_EVENT_BRANCH;
461
462 /* filter "instruction-fetching" events */
463 if (guest_perc & PER_EVENT_IFETCH &&
464 !in_addr_range(peraddr, cr10, cr11))
465 guest_perc &= ~PER_EVENT_IFETCH;
466
467 /* All other PER events will be given to the guest */
468 /* TODO: Check alterated address/address space */
469
470 vcpu->arch.sie_block->perc = guest_perc >> 24;
471
472 if (!guest_perc)
473 vcpu->arch.sie_block->iprcc &= ~PGM_PER;
474}
475
476void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
477{
478 if (debug_exit_required(vcpu))
479 vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
480
481 filter_guest_per_event(vcpu);
482}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eeb1ac7d8fa4..a0b586c1913c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * in-kernel handling for sie intercepts 2 * in-kernel handling for sie intercepts
3 * 3 *
4 * Copyright IBM Corp. 2008, 2009 4 * Copyright IBM Corp. 2008, 2014
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
@@ -16,6 +16,8 @@
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17 17
18#include <asm/kvm_host.h> 18#include <asm/kvm_host.h>
19#include <asm/asm-offsets.h>
20#include <asm/irq.h>
19 21
20#include "kvm-s390.h" 22#include "kvm-s390.h"
21#include "gaccess.h" 23#include "gaccess.h"
@@ -29,6 +31,7 @@ static const intercept_handler_t instruction_handlers[256] = {
29 [0x83] = kvm_s390_handle_diag, 31 [0x83] = kvm_s390_handle_diag,
30 [0xae] = kvm_s390_handle_sigp, 32 [0xae] = kvm_s390_handle_sigp,
31 [0xb2] = kvm_s390_handle_b2, 33 [0xb2] = kvm_s390_handle_b2,
34 [0xb6] = kvm_s390_handle_stctl,
32 [0xb7] = kvm_s390_handle_lctl, 35 [0xb7] = kvm_s390_handle_lctl,
33 [0xb9] = kvm_s390_handle_b9, 36 [0xb9] = kvm_s390_handle_b9,
34 [0xe5] = kvm_s390_handle_e5, 37 [0xe5] = kvm_s390_handle_e5,
@@ -44,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu)
44 case 0x10: 47 case 0x10:
45 vcpu->stat.exit_external_request++; 48 vcpu->stat.exit_external_request++;
46 break; 49 break;
47 case 0x14:
48 vcpu->stat.exit_external_interrupt++;
49 break;
50 default: 50 default:
51 break; /* nothing */ 51 break; /* nothing */
52 } 52 }
@@ -63,8 +63,7 @@ static int handle_stop(struct kvm_vcpu *vcpu)
63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); 63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
64 64
65 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { 65 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
66 atomic_set_mask(CPUSTAT_STOPPED, 66 kvm_s390_vcpu_stop(vcpu);
67 &vcpu->arch.sie_block->cpuflags);
68 vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; 67 vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
69 VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); 68 VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
70 rc = -EOPNOTSUPP; 69 rc = -EOPNOTSUPP;
@@ -109,22 +108,120 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
109 return -EOPNOTSUPP; 108 return -EOPNOTSUPP;
110} 109}
111 110
111static void __extract_prog_irq(struct kvm_vcpu *vcpu,
112 struct kvm_s390_pgm_info *pgm_info)
113{
114 memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
115 pgm_info->code = vcpu->arch.sie_block->iprcc;
116
117 switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
118 case PGM_AFX_TRANSLATION:
119 case PGM_ASX_TRANSLATION:
120 case PGM_EX_TRANSLATION:
121 case PGM_LFX_TRANSLATION:
122 case PGM_LSTE_SEQUENCE:
123 case PGM_LSX_TRANSLATION:
124 case PGM_LX_TRANSLATION:
125 case PGM_PRIMARY_AUTHORITY:
126 case PGM_SECONDARY_AUTHORITY:
127 case PGM_SPACE_SWITCH:
128 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
129 break;
130 case PGM_ALEN_TRANSLATION:
131 case PGM_ALE_SEQUENCE:
132 case PGM_ASTE_INSTANCE:
133 case PGM_ASTE_SEQUENCE:
134 case PGM_ASTE_VALIDITY:
135 case PGM_EXTENDED_AUTHORITY:
136 pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
137 break;
138 case PGM_ASCE_TYPE:
139 case PGM_PAGE_TRANSLATION:
140 case PGM_REGION_FIRST_TRANS:
141 case PGM_REGION_SECOND_TRANS:
142 case PGM_REGION_THIRD_TRANS:
143 case PGM_SEGMENT_TRANSLATION:
144 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
145 pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
146 pgm_info->op_access_id = vcpu->arch.sie_block->oai;
147 break;
148 case PGM_MONITOR:
149 pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
150 pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
151 break;
152 case PGM_DATA:
153 pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
154 break;
155 case PGM_PROTECTION:
156 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
157 pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
158 break;
159 default:
160 break;
161 }
162
163 if (vcpu->arch.sie_block->iprcc & PGM_PER) {
164 pgm_info->per_code = vcpu->arch.sie_block->perc;
165 pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
166 pgm_info->per_address = vcpu->arch.sie_block->peraddr;
167 pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
168 }
169}
170
171/*
172 * restore ITDB to program-interruption TDB in guest lowcore
173 * and set TX abort indication if required
174*/
175static int handle_itdb(struct kvm_vcpu *vcpu)
176{
177 struct kvm_s390_itdb *itdb;
178 int rc;
179
180 if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
181 return 0;
182 if (current->thread.per_flags & PER_FLAG_NO_TE)
183 return 0;
184 itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
185 rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
186 if (rc)
187 return rc;
188 memset(itdb, 0, sizeof(*itdb));
189
190 return 0;
191}
192
193#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
194
112static int handle_prog(struct kvm_vcpu *vcpu) 195static int handle_prog(struct kvm_vcpu *vcpu)
113{ 196{
197 struct kvm_s390_pgm_info pgm_info;
198 psw_t psw;
199 int rc;
200
114 vcpu->stat.exit_program_interruption++; 201 vcpu->stat.exit_program_interruption++;
115 202
116 /* Restore ITDB to Program-Interruption TDB in guest memory */ 203 if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
117 if (IS_TE_ENABLED(vcpu) && 204 kvm_s390_handle_per_event(vcpu);
118 !(current->thread.per_flags & PER_FLAG_NO_TE) && 205 /* the interrupt might have been filtered out completely */
119 IS_ITDB_VALID(vcpu)) { 206 if (vcpu->arch.sie_block->iprcc == 0)
120 copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba, 207 return 0;
121 sizeof(struct kvm_s390_itdb));
122 memset((void *) vcpu->arch.sie_block->itdba, 0,
123 sizeof(struct kvm_s390_itdb));
124 } 208 }
125 209
126 trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc); 210 trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
127 return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); 211 if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
212 rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
213 if (rc)
214 return rc;
215 /* Avoid endless loops of specification exceptions */
216 if (!is_valid_psw(&psw))
217 return -EOPNOTSUPP;
218 }
219 rc = handle_itdb(vcpu);
220 if (rc)
221 return rc;
222
223 __extract_prog_irq(vcpu, &pgm_info);
224 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
128} 225}
129 226
130static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) 227static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
@@ -142,17 +239,110 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
142 return rc2; 239 return rc2;
143} 240}
144 241
242/**
243 * handle_external_interrupt - used for external interruption interceptions
244 *
245 * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
246 * the new PSW does not have external interrupts disabled. In the first case,
247 * we've got to deliver the interrupt manually, and in the second case, we
248 * drop to userspace to handle the situation there.
249 */
250static int handle_external_interrupt(struct kvm_vcpu *vcpu)
251{
252 u16 eic = vcpu->arch.sie_block->eic;
253 struct kvm_s390_interrupt irq;
254 psw_t newpsw;
255 int rc;
256
257 vcpu->stat.exit_external_interrupt++;
258
259 rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
260 if (rc)
261 return rc;
262 /* We can not handle clock comparator or timer interrupt with bad PSW */
263 if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
264 (newpsw.mask & PSW_MASK_EXT))
265 return -EOPNOTSUPP;
266
267 switch (eic) {
268 case EXT_IRQ_CLK_COMP:
269 irq.type = KVM_S390_INT_CLOCK_COMP;
270 break;
271 case EXT_IRQ_CPU_TIMER:
272 irq.type = KVM_S390_INT_CPU_TIMER;
273 break;
274 case EXT_IRQ_EXTERNAL_CALL:
275 if (kvm_s390_si_ext_call_pending(vcpu))
276 return 0;
277 irq.type = KVM_S390_INT_EXTERNAL_CALL;
278 irq.parm = vcpu->arch.sie_block->extcpuaddr;
279 break;
280 default:
281 return -EOPNOTSUPP;
282 }
283
284 return kvm_s390_inject_vcpu(vcpu, &irq);
285}
286
287/**
288 * Handle MOVE PAGE partial execution interception.
289 *
290 * This interception can only happen for guests with DAT disabled and
291 * addresses that are currently not mapped in the host. Thus we try to
292 * set up the mappings for the corresponding user pages here (or throw
293 * addressing exceptions in case of illegal guest addresses).
294 */
295static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
296{
297 psw_t *psw = &vcpu->arch.sie_block->gpsw;
298 unsigned long srcaddr, dstaddr;
299 int reg1, reg2, rc;
300
301 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
302
303 /* Make sure that the source is paged-in */
304 srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
305 if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
306 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
307 rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
308 if (rc != 0)
309 return rc;
310
311 /* Make sure that the destination is paged-in */
312 dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
313 if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
314 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
315 rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
316 if (rc != 0)
317 return rc;
318
319 psw->addr = __rewind_psw(*psw, 4);
320
321 return 0;
322}
323
324static int handle_partial_execution(struct kvm_vcpu *vcpu)
325{
326 if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */
327 return handle_mvpg_pei(vcpu);
328 if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */
329 return kvm_s390_handle_sigp_pei(vcpu);
330
331 return -EOPNOTSUPP;
332}
333
145static const intercept_handler_t intercept_funcs[] = { 334static const intercept_handler_t intercept_funcs[] = {
146 [0x00 >> 2] = handle_noop, 335 [0x00 >> 2] = handle_noop,
147 [0x04 >> 2] = handle_instruction, 336 [0x04 >> 2] = handle_instruction,
148 [0x08 >> 2] = handle_prog, 337 [0x08 >> 2] = handle_prog,
149 [0x0C >> 2] = handle_instruction_and_prog, 338 [0x0C >> 2] = handle_instruction_and_prog,
150 [0x10 >> 2] = handle_noop, 339 [0x10 >> 2] = handle_noop,
151 [0x14 >> 2] = handle_noop, 340 [0x14 >> 2] = handle_external_interrupt,
152 [0x18 >> 2] = handle_noop, 341 [0x18 >> 2] = handle_noop,
153 [0x1C >> 2] = kvm_s390_handle_wait, 342 [0x1C >> 2] = kvm_s390_handle_wait,
154 [0x20 >> 2] = handle_validity, 343 [0x20 >> 2] = handle_validity,
155 [0x28 >> 2] = handle_stop, 344 [0x28 >> 2] = handle_stop,
345 [0x38 >> 2] = handle_partial_execution,
156}; 346};
157 347
158int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) 348int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 200a8f9390b6..90c8de22a2a0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -27,6 +27,8 @@
27#define IOINT_CSSID_MASK 0x03fc0000 27#define IOINT_CSSID_MASK 0x03fc0000
28#define IOINT_AI_MASK 0x04000000 28#define IOINT_AI_MASK 0x04000000
29 29
30static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
31
30static int is_ioint(u64 type) 32static int is_ioint(u64 type)
31{ 33{
32 return ((type & 0xfffe0000u) != 0xfffe0000u); 34 return ((type & 0xfffe0000u) != 0xfffe0000u);
@@ -56,6 +58,17 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
56 return 1; 58 return 1;
57} 59}
58 60
61static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
62{
63 if (psw_extint_disabled(vcpu) ||
64 !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
65 return 0;
66 if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
67 /* No timer interrupts when single stepping */
68 return 0;
69 return 1;
70}
71
59static u64 int_word_to_isc_bits(u32 int_word) 72static u64 int_word_to_isc_bits(u32 int_word)
60{ 73{
61 u8 isc = (int_word & 0x38000000) >> 27; 74 u8 isc = (int_word & 0x38000000) >> 27;
@@ -78,6 +91,14 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
78 if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) 91 if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
79 return 1; 92 return 1;
80 return 0; 93 return 0;
94 case KVM_S390_INT_CLOCK_COMP:
95 return ckc_interrupts_enabled(vcpu);
96 case KVM_S390_INT_CPU_TIMER:
97 if (psw_extint_disabled(vcpu))
98 return 0;
99 if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
100 return 1;
101 return 0;
81 case KVM_S390_INT_SERVICE: 102 case KVM_S390_INT_SERVICE:
82 case KVM_S390_INT_PFAULT_INIT: 103 case KVM_S390_INT_PFAULT_INIT:
83 case KVM_S390_INT_PFAULT_DONE: 104 case KVM_S390_INT_PFAULT_DONE:
@@ -127,11 +148,16 @@ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
127 148
128static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) 149static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
129{ 150{
130 atomic_clear_mask(CPUSTAT_ECALL_PEND | 151 atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
131 CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, 152 &vcpu->arch.sie_block->cpuflags);
132 &vcpu->arch.sie_block->cpuflags);
133 vcpu->arch.sie_block->lctl = 0x0000; 153 vcpu->arch.sie_block->lctl = 0x0000;
134 vcpu->arch.sie_block->ictl &= ~ICTL_LPSW; 154 vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
155
156 if (guestdbg_enabled(vcpu)) {
157 vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
158 LCTL_CR10 | LCTL_CR11);
159 vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
160 }
135} 161}
136 162
137static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) 163static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -149,6 +175,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
149 case KVM_S390_INT_PFAULT_INIT: 175 case KVM_S390_INT_PFAULT_INIT:
150 case KVM_S390_INT_PFAULT_DONE: 176 case KVM_S390_INT_PFAULT_DONE:
151 case KVM_S390_INT_VIRTIO: 177 case KVM_S390_INT_VIRTIO:
178 case KVM_S390_INT_CLOCK_COMP:
179 case KVM_S390_INT_CPU_TIMER:
152 if (psw_extint_disabled(vcpu)) 180 if (psw_extint_disabled(vcpu))
153 __set_cpuflag(vcpu, CPUSTAT_EXT_INT); 181 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
154 else 182 else
@@ -174,6 +202,106 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
174 } 202 }
175} 203}
176 204
205static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
206 struct kvm_s390_pgm_info *pgm_info)
207{
208 const unsigned short table[] = { 2, 4, 4, 6 };
209 int rc = 0;
210
211 switch (pgm_info->code & ~PGM_PER) {
212 case PGM_AFX_TRANSLATION:
213 case PGM_ASX_TRANSLATION:
214 case PGM_EX_TRANSLATION:
215 case PGM_LFX_TRANSLATION:
216 case PGM_LSTE_SEQUENCE:
217 case PGM_LSX_TRANSLATION:
218 case PGM_LX_TRANSLATION:
219 case PGM_PRIMARY_AUTHORITY:
220 case PGM_SECONDARY_AUTHORITY:
221 case PGM_SPACE_SWITCH:
222 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
223 (u64 *)__LC_TRANS_EXC_CODE);
224 break;
225 case PGM_ALEN_TRANSLATION:
226 case PGM_ALE_SEQUENCE:
227 case PGM_ASTE_INSTANCE:
228 case PGM_ASTE_SEQUENCE:
229 case PGM_ASTE_VALIDITY:
230 case PGM_EXTENDED_AUTHORITY:
231 rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
232 (u8 *)__LC_EXC_ACCESS_ID);
233 break;
234 case PGM_ASCE_TYPE:
235 case PGM_PAGE_TRANSLATION:
236 case PGM_REGION_FIRST_TRANS:
237 case PGM_REGION_SECOND_TRANS:
238 case PGM_REGION_THIRD_TRANS:
239 case PGM_SEGMENT_TRANSLATION:
240 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
241 (u64 *)__LC_TRANS_EXC_CODE);
242 rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
243 (u8 *)__LC_EXC_ACCESS_ID);
244 rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
245 (u8 *)__LC_OP_ACCESS_ID);
246 break;
247 case PGM_MONITOR:
248 rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
249 (u64 *)__LC_MON_CLASS_NR);
250 rc |= put_guest_lc(vcpu, pgm_info->mon_code,
251 (u64 *)__LC_MON_CODE);
252 break;
253 case PGM_DATA:
254 rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
255 (u32 *)__LC_DATA_EXC_CODE);
256 break;
257 case PGM_PROTECTION:
258 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
259 (u64 *)__LC_TRANS_EXC_CODE);
260 rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
261 (u8 *)__LC_EXC_ACCESS_ID);
262 break;
263 }
264
265 if (pgm_info->code & PGM_PER) {
266 rc |= put_guest_lc(vcpu, pgm_info->per_code,
267 (u8 *) __LC_PER_CODE);
268 rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
269 (u8 *)__LC_PER_ATMID);
270 rc |= put_guest_lc(vcpu, pgm_info->per_address,
271 (u64 *) __LC_PER_ADDRESS);
272 rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
273 (u8 *) __LC_PER_ACCESS_ID);
274 }
275
276 switch (vcpu->arch.sie_block->icptcode) {
277 case ICPT_INST:
278 case ICPT_INSTPROGI:
279 case ICPT_OPEREXC:
280 case ICPT_PARTEXEC:
281 case ICPT_IOINST:
282 /* last instruction only stored for these icptcodes */
283 rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
284 (u16 *) __LC_PGM_ILC);
285 break;
286 case ICPT_PROGI:
287 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
288 (u16 *) __LC_PGM_ILC);
289 break;
290 default:
291 rc |= put_guest_lc(vcpu, 0,
292 (u16 *) __LC_PGM_ILC);
293 }
294
295 rc |= put_guest_lc(vcpu, pgm_info->code,
296 (u16 *)__LC_PGM_INT_CODE);
297 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
298 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
299 rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
300 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
301
302 return rc;
303}
304
177static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, 305static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
178 struct kvm_s390_interrupt_info *inti) 306 struct kvm_s390_interrupt_info *inti)
179{ 307{
@@ -186,26 +314,46 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
186 vcpu->stat.deliver_emergency_signal++; 314 vcpu->stat.deliver_emergency_signal++;
187 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 315 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
188 inti->emerg.code, 0); 316 inti->emerg.code, 0);
189 rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); 317 rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
190 rc |= put_guest(vcpu, inti->emerg.code, 318 rc |= put_guest_lc(vcpu, inti->emerg.code,
191 (u16 __user *)__LC_EXT_CPU_ADDR); 319 (u16 *)__LC_EXT_CPU_ADDR);
192 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 320 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
321 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
322 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
193 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 323 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
194 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
195 __LC_EXT_NEW_PSW, sizeof(psw_t));
196 break; 324 break;
197 case KVM_S390_INT_EXTERNAL_CALL: 325 case KVM_S390_INT_EXTERNAL_CALL:
198 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); 326 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
199 vcpu->stat.deliver_external_call++; 327 vcpu->stat.deliver_external_call++;
200 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 328 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
201 inti->extcall.code, 0); 329 inti->extcall.code, 0);
202 rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); 330 rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
203 rc |= put_guest(vcpu, inti->extcall.code, 331 rc |= put_guest_lc(vcpu, inti->extcall.code,
204 (u16 __user *)__LC_EXT_CPU_ADDR); 332 (u16 *)__LC_EXT_CPU_ADDR);
205 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 333 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
334 &vcpu->arch.sie_block->gpsw,
335 sizeof(psw_t));
336 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
337 &vcpu->arch.sie_block->gpsw,
338 sizeof(psw_t));
339 break;
340 case KVM_S390_INT_CLOCK_COMP:
341 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
342 inti->ext.ext_params, 0);
343 deliver_ckc_interrupt(vcpu);
344 break;
345 case KVM_S390_INT_CPU_TIMER:
346 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
347 inti->ext.ext_params, 0);
348 rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
349 (u16 *)__LC_EXT_INT_CODE);
350 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
351 &vcpu->arch.sie_block->gpsw,
352 sizeof(psw_t));
353 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
206 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 354 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
207 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 355 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
208 __LC_EXT_NEW_PSW, sizeof(psw_t)); 356 (u32 *)__LC_EXT_PARAMS);
209 break; 357 break;
210 case KVM_S390_INT_SERVICE: 358 case KVM_S390_INT_SERVICE:
211 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", 359 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
@@ -213,37 +361,39 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
213 vcpu->stat.deliver_service_signal++; 361 vcpu->stat.deliver_service_signal++;
214 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 362 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
215 inti->ext.ext_params, 0); 363 inti->ext.ext_params, 0);
216 rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); 364 rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
217 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 365 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
366 &vcpu->arch.sie_block->gpsw,
367 sizeof(psw_t));
368 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
218 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 369 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
219 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 370 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
220 __LC_EXT_NEW_PSW, sizeof(psw_t)); 371 (u32 *)__LC_EXT_PARAMS);
221 rc |= put_guest(vcpu, inti->ext.ext_params,
222 (u32 __user *)__LC_EXT_PARAMS);
223 break; 372 break;
224 case KVM_S390_INT_PFAULT_INIT: 373 case KVM_S390_INT_PFAULT_INIT:
225 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 374 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
226 inti->ext.ext_params2); 375 inti->ext.ext_params2);
227 rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); 376 rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
228 rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR); 377 rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
229 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 378 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
379 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
380 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
230 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 381 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
231 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 382 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
232 __LC_EXT_NEW_PSW, sizeof(psw_t)); 383 (u64 *) __LC_EXT_PARAMS2);
233 rc |= put_guest(vcpu, inti->ext.ext_params2,
234 (u64 __user *) __LC_EXT_PARAMS2);
235 break; 384 break;
236 case KVM_S390_INT_PFAULT_DONE: 385 case KVM_S390_INT_PFAULT_DONE:
237 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 386 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
238 inti->ext.ext_params2); 387 inti->ext.ext_params2);
239 rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); 388 rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
240 rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR); 389 rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
241 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 390 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
391 &vcpu->arch.sie_block->gpsw,
392 sizeof(psw_t));
393 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
242 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 394 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
243 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 395 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
244 __LC_EXT_NEW_PSW, sizeof(psw_t)); 396 (u64 *)__LC_EXT_PARAMS2);
245 rc |= put_guest(vcpu, inti->ext.ext_params2,
246 (u64 __user *) __LC_EXT_PARAMS2);
247 break; 397 break;
248 case KVM_S390_INT_VIRTIO: 398 case KVM_S390_INT_VIRTIO:
249 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", 399 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
@@ -252,16 +402,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
252 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 402 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
253 inti->ext.ext_params, 403 inti->ext.ext_params,
254 inti->ext.ext_params2); 404 inti->ext.ext_params2);
255 rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); 405 rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
256 rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); 406 rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
257 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 407 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
408 &vcpu->arch.sie_block->gpsw,
409 sizeof(psw_t));
410 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
258 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 411 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
259 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 412 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
260 __LC_EXT_NEW_PSW, sizeof(psw_t)); 413 (u32 *)__LC_EXT_PARAMS);
261 rc |= put_guest(vcpu, inti->ext.ext_params, 414 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
262 (u32 __user *)__LC_EXT_PARAMS); 415 (u64 *)__LC_EXT_PARAMS2);
263 rc |= put_guest(vcpu, inti->ext.ext_params2,
264 (u64 __user *)__LC_EXT_PARAMS2);
265 break; 416 break;
266 case KVM_S390_SIGP_STOP: 417 case KVM_S390_SIGP_STOP:
267 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); 418 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
@@ -285,13 +436,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
285 vcpu->stat.deliver_restart_signal++; 436 vcpu->stat.deliver_restart_signal++;
286 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 437 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
287 0, 0); 438 0, 0);
288 rc = copy_to_guest(vcpu, 439 rc = write_guest_lc(vcpu,
289 offsetof(struct _lowcore, restart_old_psw), 440 offsetof(struct _lowcore, restart_old_psw),
290 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 441 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
291 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 442 rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
292 offsetof(struct _lowcore, restart_psw), 443 &vcpu->arch.sie_block->gpsw,
293 sizeof(psw_t)); 444 sizeof(psw_t));
294 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
295 break; 445 break;
296 case KVM_S390_PROGRAM_INT: 446 case KVM_S390_PROGRAM_INT:
297 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", 447 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
@@ -300,13 +450,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
300 vcpu->stat.deliver_program_int++; 450 vcpu->stat.deliver_program_int++;
301 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 451 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
302 inti->pgm.code, 0); 452 inti->pgm.code, 0);
303 rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); 453 rc = __deliver_prog_irq(vcpu, &inti->pgm);
304 rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
305 (u16 __user *)__LC_PGM_ILC);
306 rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
307 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
308 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
309 __LC_PGM_NEW_PSW, sizeof(psw_t));
310 break; 454 break;
311 455
312 case KVM_S390_MCHK: 456 case KVM_S390_MCHK:
@@ -317,11 +461,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
317 inti->mchk.mcic); 461 inti->mchk.mcic);
318 rc = kvm_s390_vcpu_store_status(vcpu, 462 rc = kvm_s390_vcpu_store_status(vcpu,
319 KVM_S390_STORE_STATUS_PREFIXED); 463 KVM_S390_STORE_STATUS_PREFIXED);
320 rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); 464 rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
321 rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, 465 rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
466 &vcpu->arch.sie_block->gpsw,
467 sizeof(psw_t));
468 rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
322 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 469 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
323 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
324 __LC_MCK_NEW_PSW, sizeof(psw_t));
325 break; 470 break;
326 471
327 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 472 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -334,18 +479,20 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
334 vcpu->stat.deliver_io_int++; 479 vcpu->stat.deliver_io_int++;
335 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 480 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
336 param0, param1); 481 param0, param1);
337 rc = put_guest(vcpu, inti->io.subchannel_id, 482 rc = put_guest_lc(vcpu, inti->io.subchannel_id,
338 (u16 __user *) __LC_SUBCHANNEL_ID); 483 (u16 *)__LC_SUBCHANNEL_ID);
339 rc |= put_guest(vcpu, inti->io.subchannel_nr, 484 rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
340 (u16 __user *) __LC_SUBCHANNEL_NR); 485 (u16 *)__LC_SUBCHANNEL_NR);
341 rc |= put_guest(vcpu, inti->io.io_int_parm, 486 rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
342 (u32 __user *) __LC_IO_INT_PARM); 487 (u32 *)__LC_IO_INT_PARM);
343 rc |= put_guest(vcpu, inti->io.io_int_word, 488 rc |= put_guest_lc(vcpu, inti->io.io_int_word,
344 (u32 __user *) __LC_IO_INT_WORD); 489 (u32 *)__LC_IO_INT_WORD);
345 rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, 490 rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
346 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 491 &vcpu->arch.sie_block->gpsw,
347 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 492 sizeof(psw_t));
348 __LC_IO_NEW_PSW, sizeof(psw_t)); 493 rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
494 &vcpu->arch.sie_block->gpsw,
495 sizeof(psw_t));
349 break; 496 break;
350 } 497 }
351 default: 498 default:
@@ -358,25 +505,35 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
358 } 505 }
359} 506}
360 507
361static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) 508static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
362{ 509{
363 int rc; 510 int rc;
364 511
365 if (psw_extint_disabled(vcpu)) 512 rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
366 return 0; 513 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
367 if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) 514 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
368 return 0; 515 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
369 rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); 516 &vcpu->arch.sie_block->gpsw,
370 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 517 sizeof(psw_t));
371 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
372 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
373 __LC_EXT_NEW_PSW, sizeof(psw_t));
374 if (rc) { 518 if (rc) {
375 printk("kvm: The guest lowcore is not mapped during interrupt " 519 printk("kvm: The guest lowcore is not mapped during interrupt "
376 "delivery, killing userspace\n"); 520 "delivery, killing userspace\n");
377 do_exit(SIGKILL); 521 do_exit(SIGKILL);
378 } 522 }
379 return 1; 523}
524
525/* Check whether SIGP interpretation facility has an external call pending */
526int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
527{
528 atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
529
530 if (!psw_extint_disabled(vcpu) &&
531 (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
532 (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
533 (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
534 return 1;
535
536 return 0;
380} 537}
381 538
382int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) 539int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
@@ -406,19 +563,23 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
406 spin_unlock(&fi->lock); 563 spin_unlock(&fi->lock);
407 } 564 }
408 565
409 if ((!rc) && (vcpu->arch.sie_block->ckc < 566 if (!rc && kvm_cpu_has_pending_timer(vcpu))
410 get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { 567 rc = 1;
411 if ((!psw_extint_disabled(vcpu)) && 568
412 (vcpu->arch.sie_block->gcr[0] & 0x800ul)) 569 if (!rc && kvm_s390_si_ext_call_pending(vcpu))
413 rc = 1; 570 rc = 1;
414 }
415 571
416 return rc; 572 return rc;
417} 573}
418 574
419int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 575int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
420{ 576{
421 return 0; 577 if (!(vcpu->arch.sie_block->ckc <
578 get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
579 return 0;
580 if (!ckc_interrupts_enabled(vcpu))
581 return 0;
582 return 1;
422} 583}
423 584
424int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) 585int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -441,8 +602,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
441 return -EOPNOTSUPP; /* disabled wait */ 602 return -EOPNOTSUPP; /* disabled wait */
442 } 603 }
443 604
444 if (psw_extint_disabled(vcpu) || 605 if (!ckc_interrupts_enabled(vcpu)) {
445 (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
446 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); 606 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
447 goto no_timer; 607 goto no_timer;
448 } 608 }
@@ -465,7 +625,8 @@ no_timer:
465 while (list_empty(&vcpu->arch.local_int.list) && 625 while (list_empty(&vcpu->arch.local_int.list) &&
466 list_empty(&vcpu->arch.local_int.float_int->list) && 626 list_empty(&vcpu->arch.local_int.float_int->list) &&
467 (!vcpu->arch.local_int.timer_due) && 627 (!vcpu->arch.local_int.timer_due) &&
468 !signal_pending(current)) { 628 !signal_pending(current) &&
629 !kvm_s390_si_ext_call_pending(vcpu)) {
469 set_current_state(TASK_INTERRUPTIBLE); 630 set_current_state(TASK_INTERRUPTIBLE);
470 spin_unlock_bh(&vcpu->arch.local_int.lock); 631 spin_unlock_bh(&vcpu->arch.local_int.lock);
471 spin_unlock(&vcpu->arch.local_int.float_int->lock); 632 spin_unlock(&vcpu->arch.local_int.float_int->lock);
@@ -522,6 +683,11 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
522 } 683 }
523 atomic_set(&li->active, 0); 684 atomic_set(&li->active, 0);
524 spin_unlock_bh(&li->lock); 685 spin_unlock_bh(&li->lock);
686
687 /* clear pending external calls set by sigp interpretation facility */
688 atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
689 atomic_clear_mask(SIGP_CTRL_C,
690 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
525} 691}
526 692
527void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 693void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -554,9 +720,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
554 } while (deliver); 720 } while (deliver);
555 } 721 }
556 722
557 if ((vcpu->arch.sie_block->ckc < 723 if (kvm_cpu_has_pending_timer(vcpu))
558 get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) 724 deliver_ckc_interrupt(vcpu);
559 __try_deliver_ckc_interrupt(vcpu);
560 725
561 if (atomic_read(&fi->active)) { 726 if (atomic_read(&fi->active)) {
562 do { 727 do {
@@ -660,6 +825,31 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
660 return 0; 825 return 0;
661} 826}
662 827
828int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
829 struct kvm_s390_pgm_info *pgm_info)
830{
831 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
832 struct kvm_s390_interrupt_info *inti;
833
834 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
835 if (!inti)
836 return -ENOMEM;
837
838 VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
839 pgm_info->code);
840 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
841 pgm_info->code, 0, 1);
842
843 inti->type = KVM_S390_PROGRAM_INT;
844 memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
845 spin_lock_bh(&li->lock);
846 list_add(&inti->list, &li->list);
847 atomic_set(&li->active, 1);
848 BUG_ON(waitqueue_active(li->wq));
849 spin_unlock_bh(&li->lock);
850 return 0;
851}
852
663struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 853struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
664 u64 cr6, u64 schid) 854 u64 cr6, u64 schid)
665{ 855{
@@ -810,6 +1000,12 @@ int kvm_s390_inject_vm(struct kvm *kvm,
810 return __inject_vm(kvm, inti); 1000 return __inject_vm(kvm, inti);
811} 1001}
812 1002
1003void kvm_s390_reinject_io_int(struct kvm *kvm,
1004 struct kvm_s390_interrupt_info *inti)
1005{
1006 __inject_vm(kvm, inti);
1007}
1008
813int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 1009int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
814 struct kvm_s390_interrupt *s390int) 1010 struct kvm_s390_interrupt *s390int)
815{ 1011{
@@ -839,6 +1035,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
839 break; 1035 break;
840 case KVM_S390_SIGP_STOP: 1036 case KVM_S390_SIGP_STOP:
841 case KVM_S390_RESTART: 1037 case KVM_S390_RESTART:
1038 case KVM_S390_INT_CLOCK_COMP:
1039 case KVM_S390_INT_CPU_TIMER:
842 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); 1040 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
843 inti->type = s390int->type; 1041 inti->type = s390int->type;
844 break; 1042 break;
@@ -900,7 +1098,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
900 return 0; 1098 return 0;
901} 1099}
902 1100
903static void clear_floating_interrupts(struct kvm *kvm) 1101void kvm_s390_clear_float_irqs(struct kvm *kvm)
904{ 1102{
905 struct kvm_s390_float_interrupt *fi; 1103 struct kvm_s390_float_interrupt *fi;
906 struct kvm_s390_interrupt_info *n, *inti = NULL; 1104 struct kvm_s390_interrupt_info *n, *inti = NULL;
@@ -1246,7 +1444,7 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1246 break; 1444 break;
1247 case KVM_DEV_FLIC_CLEAR_IRQS: 1445 case KVM_DEV_FLIC_CLEAR_IRQS:
1248 r = 0; 1446 r = 0;
1249 clear_floating_interrupts(dev->kvm); 1447 kvm_s390_clear_float_irqs(dev->kvm);
1250 break; 1448 break;
1251 case KVM_DEV_FLIC_APF_ENABLE: 1449 case KVM_DEV_FLIC_APF_ENABLE:
1252 dev->kvm->arch.gmap->pfault_enabled = 1; 1450 dev->kvm->arch.gmap->pfault_enabled = 1;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 825fe7bf95a6..2f3e14fe91a4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -11,6 +11,7 @@
11 * Christian Borntraeger <borntraeger@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
14 */ 15 */
15 16
16#include <linux/compiler.h> 17#include <linux/compiler.h>
@@ -51,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 53 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 54 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
55 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
56 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 57 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 58 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 59 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
@@ -66,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 69 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) }, 70 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 71 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
72 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 73 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 74 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_essa", VCPU_STAT(instruction_essa) }, 75 { "instruction_essa", VCPU_STAT(instruction_essa) },
@@ -90,7 +94,7 @@ unsigned long *vfacilities;
90static struct gmap_notifier gmap_notifier; 94static struct gmap_notifier gmap_notifier;
91 95
92/* test availability of vfacility */ 96/* test availability of vfacility */
93static inline int test_vfacility(unsigned long nr) 97int test_vfacility(unsigned long nr)
94{ 98{
95 return __test_facility(nr, (void *) vfacilities); 99 return __test_facility(nr, (void *) vfacilities);
96} 100}
@@ -162,6 +166,7 @@ int kvm_dev_ioctl_check_extension(long ext)
162 case KVM_CAP_IOEVENTFD: 166 case KVM_CAP_IOEVENTFD:
163 case KVM_CAP_DEVICE_CTRL: 167 case KVM_CAP_DEVICE_CTRL:
164 case KVM_CAP_ENABLE_CAP_VM: 168 case KVM_CAP_ENABLE_CAP_VM:
169 case KVM_CAP_VM_ATTRIBUTES:
165 r = 1; 170 r = 1;
166 break; 171 break;
167 case KVM_CAP_NR_VCPUS: 172 case KVM_CAP_NR_VCPUS:
@@ -180,6 +185,25 @@ int kvm_dev_ioctl_check_extension(long ext)
180 return r; 185 return r;
181} 186}
182 187
188static void kvm_s390_sync_dirty_log(struct kvm *kvm,
189 struct kvm_memory_slot *memslot)
190{
191 gfn_t cur_gfn, last_gfn;
192 unsigned long address;
193 struct gmap *gmap = kvm->arch.gmap;
194
195 down_read(&gmap->mm->mmap_sem);
196 /* Loop over all guest pages */
197 last_gfn = memslot->base_gfn + memslot->npages;
198 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
199 address = gfn_to_hva_memslot(memslot, cur_gfn);
200
201 if (gmap_test_and_clear_dirty(address, gmap))
202 mark_page_dirty(kvm, cur_gfn);
203 }
204 up_read(&gmap->mm->mmap_sem);
205}
206
183/* Section: vm related */ 207/* Section: vm related */
184/* 208/*
185 * Get (and clear) the dirty memory log for a memory slot. 209 * Get (and clear) the dirty memory log for a memory slot.
@@ -187,7 +211,36 @@ int kvm_dev_ioctl_check_extension(long ext)
187int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 211int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
188 struct kvm_dirty_log *log) 212 struct kvm_dirty_log *log)
189{ 213{
190 return 0; 214 int r;
215 unsigned long n;
216 struct kvm_memory_slot *memslot;
217 int is_dirty = 0;
218
219 mutex_lock(&kvm->slots_lock);
220
221 r = -EINVAL;
222 if (log->slot >= KVM_USER_MEM_SLOTS)
223 goto out;
224
225 memslot = id_to_memslot(kvm->memslots, log->slot);
226 r = -ENOENT;
227 if (!memslot->dirty_bitmap)
228 goto out;
229
230 kvm_s390_sync_dirty_log(kvm, memslot);
231 r = kvm_get_dirty_log(kvm, log, &is_dirty);
232 if (r)
233 goto out;
234
235 /* Clear the dirty log */
236 if (is_dirty) {
237 n = kvm_dirty_bitmap_bytes(memslot);
238 memset(memslot->dirty_bitmap, 0, n);
239 }
240 r = 0;
241out:
242 mutex_unlock(&kvm->slots_lock);
243 return r;
191} 244}
192 245
193static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 246static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
@@ -209,11 +262,86 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
209 return r; 262 return r;
210} 263}
211 264
265static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
266{
267 int ret;
268 unsigned int idx;
269 switch (attr->attr) {
270 case KVM_S390_VM_MEM_ENABLE_CMMA:
271 ret = -EBUSY;
272 mutex_lock(&kvm->lock);
273 if (atomic_read(&kvm->online_vcpus) == 0) {
274 kvm->arch.use_cmma = 1;
275 ret = 0;
276 }
277 mutex_unlock(&kvm->lock);
278 break;
279 case KVM_S390_VM_MEM_CLR_CMMA:
280 mutex_lock(&kvm->lock);
281 idx = srcu_read_lock(&kvm->srcu);
282 page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
283 srcu_read_unlock(&kvm->srcu, idx);
284 mutex_unlock(&kvm->lock);
285 ret = 0;
286 break;
287 default:
288 ret = -ENXIO;
289 break;
290 }
291 return ret;
292}
293
294static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
295{
296 int ret;
297
298 switch (attr->group) {
299 case KVM_S390_VM_MEM_CTRL:
300 ret = kvm_s390_mem_control(kvm, attr);
301 break;
302 default:
303 ret = -ENXIO;
304 break;
305 }
306
307 return ret;
308}
309
310static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
311{
312 return -ENXIO;
313}
314
315static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
316{
317 int ret;
318
319 switch (attr->group) {
320 case KVM_S390_VM_MEM_CTRL:
321 switch (attr->attr) {
322 case KVM_S390_VM_MEM_ENABLE_CMMA:
323 case KVM_S390_VM_MEM_CLR_CMMA:
324 ret = 0;
325 break;
326 default:
327 ret = -ENXIO;
328 break;
329 }
330 break;
331 default:
332 ret = -ENXIO;
333 break;
334 }
335
336 return ret;
337}
338
212long kvm_arch_vm_ioctl(struct file *filp, 339long kvm_arch_vm_ioctl(struct file *filp,
213 unsigned int ioctl, unsigned long arg) 340 unsigned int ioctl, unsigned long arg)
214{ 341{
215 struct kvm *kvm = filp->private_data; 342 struct kvm *kvm = filp->private_data;
216 void __user *argp = (void __user *)arg; 343 void __user *argp = (void __user *)arg;
344 struct kvm_device_attr attr;
217 int r; 345 int r;
218 346
219 switch (ioctl) { 347 switch (ioctl) {
@@ -246,6 +374,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
246 } 374 }
247 break; 375 break;
248 } 376 }
377 case KVM_SET_DEVICE_ATTR: {
378 r = -EFAULT;
379 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
380 break;
381 r = kvm_s390_vm_set_attr(kvm, &attr);
382 break;
383 }
384 case KVM_GET_DEVICE_ATTR: {
385 r = -EFAULT;
386 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
387 break;
388 r = kvm_s390_vm_get_attr(kvm, &attr);
389 break;
390 }
391 case KVM_HAS_DEVICE_ATTR: {
392 r = -EFAULT;
393 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
394 break;
395 r = kvm_s390_vm_has_attr(kvm, &attr);
396 break;
397 }
249 default: 398 default:
250 r = -ENOTTY; 399 r = -ENOTTY;
251 } 400 }
@@ -292,6 +441,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
292 441
293 spin_lock_init(&kvm->arch.float_int.lock); 442 spin_lock_init(&kvm->arch.float_int.lock);
294 INIT_LIST_HEAD(&kvm->arch.float_int.list); 443 INIT_LIST_HEAD(&kvm->arch.float_int.list);
444 init_waitqueue_head(&kvm->arch.ipte_wq);
295 445
296 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 446 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
297 VM_EVENT(kvm, 3, "%s", "vm created"); 447 VM_EVENT(kvm, 3, "%s", "vm created");
@@ -309,6 +459,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
309 kvm->arch.css_support = 0; 459 kvm->arch.css_support = 0;
310 kvm->arch.use_irqchip = 0; 460 kvm->arch.use_irqchip = 0;
311 461
462 spin_lock_init(&kvm->arch.start_stop_lock);
463
312 return 0; 464 return 0;
313out_nogmap: 465out_nogmap:
314 debug_unregister(kvm->arch.dbf); 466 debug_unregister(kvm->arch.dbf);
@@ -322,6 +474,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
322{ 474{
323 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 475 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
324 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 476 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
477 kvm_s390_clear_local_irqs(vcpu);
325 kvm_clear_async_pf_completion_queue(vcpu); 478 kvm_clear_async_pf_completion_queue(vcpu);
326 if (!kvm_is_ucontrol(vcpu->kvm)) { 479 if (!kvm_is_ucontrol(vcpu->kvm)) {
327 clear_bit(63 - vcpu->vcpu_id, 480 clear_bit(63 - vcpu->vcpu_id,
@@ -335,9 +488,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
335 if (kvm_is_ucontrol(vcpu->kvm)) 488 if (kvm_is_ucontrol(vcpu->kvm))
336 gmap_free(vcpu->arch.gmap); 489 gmap_free(vcpu->arch.gmap);
337 490
338 if (vcpu->arch.sie_block->cbrlo) 491 if (kvm_s390_cmma_enabled(vcpu->kvm))
339 __free_page(__pfn_to_page( 492 kvm_s390_vcpu_unsetup_cmma(vcpu);
340 vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
341 free_page((unsigned long)(vcpu->arch.sie_block)); 493 free_page((unsigned long)(vcpu->arch.sie_block));
342 494
343 kvm_vcpu_uninit(vcpu); 495 kvm_vcpu_uninit(vcpu);
@@ -372,6 +524,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
372 if (!kvm_is_ucontrol(kvm)) 524 if (!kvm_is_ucontrol(kvm))
373 gmap_free(kvm->arch.gmap); 525 gmap_free(kvm->arch.gmap);
374 kvm_s390_destroy_adapters(kvm); 526 kvm_s390_destroy_adapters(kvm);
527 kvm_s390_clear_float_irqs(kvm);
375} 528}
376 529
377/* Section: vcpu related */ 530/* Section: vcpu related */
@@ -442,7 +595,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
442 vcpu->arch.sie_block->pp = 0; 595 vcpu->arch.sie_block->pp = 0;
443 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 596 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
444 kvm_clear_async_pf_completion_queue(vcpu); 597 kvm_clear_async_pf_completion_queue(vcpu);
445 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 598 kvm_s390_vcpu_stop(vcpu);
446 kvm_s390_clear_local_irqs(vcpu); 599 kvm_s390_clear_local_irqs(vcpu);
447} 600}
448 601
@@ -451,9 +604,26 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
451 return 0; 604 return 0;
452} 605}
453 606
607void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
608{
609 free_page(vcpu->arch.sie_block->cbrlo);
610 vcpu->arch.sie_block->cbrlo = 0;
611}
612
613int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
614{
615 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
616 if (!vcpu->arch.sie_block->cbrlo)
617 return -ENOMEM;
618
619 vcpu->arch.sie_block->ecb2 |= 0x80;
620 vcpu->arch.sie_block->ecb2 &= ~0x08;
621 return 0;
622}
623
454int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 624int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
455{ 625{
456 struct page *cbrl; 626 int rc = 0;
457 627
458 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 628 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
459 CPUSTAT_SM | 629 CPUSTAT_SM |
@@ -464,15 +634,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
464 vcpu->arch.sie_block->ecb |= 0x10; 634 vcpu->arch.sie_block->ecb |= 0x10;
465 635
466 vcpu->arch.sie_block->ecb2 = 8; 636 vcpu->arch.sie_block->ecb2 = 8;
467 vcpu->arch.sie_block->eca = 0xC1002001U; 637 vcpu->arch.sie_block->eca = 0xD1002000U;
638 if (sclp_has_siif())
639 vcpu->arch.sie_block->eca |= 1;
468 vcpu->arch.sie_block->fac = (int) (long) vfacilities; 640 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
469 if (kvm_enabled_cmma()) { 641 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
470 cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO); 642 ICTL_TPROT;
471 if (cbrl) { 643
472 vcpu->arch.sie_block->ecb2 |= 0x80; 644 if (kvm_s390_cmma_enabled(vcpu->kvm)) {
473 vcpu->arch.sie_block->ecb2 &= ~0x08; 645 rc = kvm_s390_vcpu_setup_cmma(vcpu);
474 vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl); 646 if (rc)
475 } 647 return rc;
476 } 648 }
477 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 649 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
478 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 650 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
@@ -480,7 +652,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
480 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 652 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
481 get_cpu_id(&vcpu->arch.cpu_id); 653 get_cpu_id(&vcpu->arch.cpu_id);
482 vcpu->arch.cpu_id.version = 0xff; 654 vcpu->arch.cpu_id.version = 0xff;
483 return 0; 655 return rc;
484} 656}
485 657
486struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 658struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -584,7 +756,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
584 756
585 kvm_for_each_vcpu(i, vcpu, kvm) { 757 kvm_for_each_vcpu(i, vcpu, kvm) {
586 /* match against both prefix pages */ 758 /* match against both prefix pages */
587 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { 759 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
588 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); 760 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
589 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 761 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
590 exit_sie_sync(vcpu); 762 exit_sie_sync(vcpu);
@@ -769,10 +941,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
769 return -EINVAL; /* not implemented yet */ 941 return -EINVAL; /* not implemented yet */
770} 942}
771 943
944#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
945 KVM_GUESTDBG_USE_HW_BP | \
946 KVM_GUESTDBG_ENABLE)
947
772int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 948int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
773 struct kvm_guest_debug *dbg) 949 struct kvm_guest_debug *dbg)
774{ 950{
775 return -EINVAL; /* not implemented yet */ 951 int rc = 0;
952
953 vcpu->guest_debug = 0;
954 kvm_s390_clear_bp_data(vcpu);
955
956 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
957 return -EINVAL;
958
959 if (dbg->control & KVM_GUESTDBG_ENABLE) {
960 vcpu->guest_debug = dbg->control;
961 /* enforce guest PER */
962 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
963
964 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
965 rc = kvm_s390_import_bp_data(vcpu, dbg);
966 } else {
967 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
968 vcpu->arch.guestdbg.last_bp = 0;
969 }
970
971 if (rc) {
972 vcpu->guest_debug = 0;
973 kvm_s390_clear_bp_data(vcpu);
974 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
975 }
976
977 return rc;
776} 978}
777 979
778int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 980int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -787,8 +989,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
787 return -EINVAL; /* not implemented yet */ 989 return -EINVAL; /* not implemented yet */
788} 990}
789 991
992bool kvm_s390_cmma_enabled(struct kvm *kvm)
993{
994 if (!MACHINE_IS_LPAR)
995 return false;
996 /* only enable for z10 and later */
997 if (!MACHINE_HAS_EDAT1)
998 return false;
999 if (!kvm->arch.use_cmma)
1000 return false;
1001 return true;
1002}
1003
1004static bool ibs_enabled(struct kvm_vcpu *vcpu)
1005{
1006 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1007}
1008
790static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 1009static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
791{ 1010{
1011retry:
1012 s390_vcpu_unblock(vcpu);
792 /* 1013 /*
793 * We use MMU_RELOAD just to re-arm the ipte notifier for the 1014 * We use MMU_RELOAD just to re-arm the ipte notifier for the
794 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 1015 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -796,27 +1017,61 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
796 * already finished. We might race against a second unmapper that 1017 * already finished. We might race against a second unmapper that
797 * wants to set the blocking bit. Lets just retry the request loop. 1018 * wants to set the blocking bit. Lets just retry the request loop.
798 */ 1019 */
799 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 1020 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
800 int rc; 1021 int rc;
801 rc = gmap_ipte_notify(vcpu->arch.gmap, 1022 rc = gmap_ipte_notify(vcpu->arch.gmap,
802 vcpu->arch.sie_block->prefix, 1023 kvm_s390_get_prefix(vcpu),
803 PAGE_SIZE * 2); 1024 PAGE_SIZE * 2);
804 if (rc) 1025 if (rc)
805 return rc; 1026 return rc;
806 s390_vcpu_unblock(vcpu); 1027 goto retry;
1028 }
1029
1030 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1031 if (!ibs_enabled(vcpu)) {
1032 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1033 atomic_set_mask(CPUSTAT_IBS,
1034 &vcpu->arch.sie_block->cpuflags);
1035 }
1036 goto retry;
807 } 1037 }
1038
1039 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1040 if (ibs_enabled(vcpu)) {
1041 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1042 atomic_clear_mask(CPUSTAT_IBS,
1043 &vcpu->arch.sie_block->cpuflags);
1044 }
1045 goto retry;
1046 }
1047
808 return 0; 1048 return 0;
809} 1049}
810 1050
811static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu) 1051/**
1052 * kvm_arch_fault_in_page - fault-in guest page if necessary
1053 * @vcpu: The corresponding virtual cpu
1054 * @gpa: Guest physical address
1055 * @writable: Whether the page should be writable or not
1056 *
1057 * Make sure that a guest page has been faulted-in on the host.
1058 *
1059 * Return: Zero on success, negative error code otherwise.
1060 */
1061long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
812{ 1062{
813 long rc;
814 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
815 struct mm_struct *mm = current->mm; 1063 struct mm_struct *mm = current->mm;
1064 hva_t hva;
1065 long rc;
1066
1067 hva = gmap_fault(gpa, vcpu->arch.gmap);
1068 if (IS_ERR_VALUE(hva))
1069 return (long)hva;
816 down_read(&mm->mmap_sem); 1070 down_read(&mm->mmap_sem);
817 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL); 1071 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
818 up_read(&mm->mmap_sem); 1072 up_read(&mm->mmap_sem);
819 return rc; 1073
1074 return rc < 0 ? rc : 0;
820} 1075}
821 1076
822static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1077static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -883,8 +1138,9 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
883 if (!vcpu->arch.gmap->pfault_enabled) 1138 if (!vcpu->arch.gmap->pfault_enabled)
884 return 0; 1139 return 0;
885 1140
886 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); 1141 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
887 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8)) 1142 hva += current->thread.gmap_addr & ~PAGE_MASK;
1143 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
888 return 0; 1144 return 0;
889 1145
890 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 1146 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
@@ -917,6 +1173,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
917 if (rc) 1173 if (rc)
918 return rc; 1174 return rc;
919 1175
1176 if (guestdbg_enabled(vcpu)) {
1177 kvm_s390_backup_guest_per_regs(vcpu);
1178 kvm_s390_patch_guest_per_regs(vcpu);
1179 }
1180
920 vcpu->arch.sie_block->icptcode = 0; 1181 vcpu->arch.sie_block->icptcode = 0;
921 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 1182 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
922 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 1183 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
@@ -933,6 +1194,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
933 vcpu->arch.sie_block->icptcode); 1194 vcpu->arch.sie_block->icptcode);
934 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 1195 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
935 1196
1197 if (guestdbg_enabled(vcpu))
1198 kvm_s390_restore_guest_per_regs(vcpu);
1199
936 if (exit_reason >= 0) { 1200 if (exit_reason >= 0) {
937 rc = 0; 1201 rc = 0;
938 } else if (kvm_is_ucontrol(vcpu->kvm)) { 1202 } else if (kvm_is_ucontrol(vcpu->kvm)) {
@@ -945,9 +1209,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
945 } else if (current->thread.gmap_pfault) { 1209 } else if (current->thread.gmap_pfault) {
946 trace_kvm_s390_major_guest_pfault(vcpu); 1210 trace_kvm_s390_major_guest_pfault(vcpu);
947 current->thread.gmap_pfault = 0; 1211 current->thread.gmap_pfault = 0;
948 if (kvm_arch_setup_async_pf(vcpu) || 1212 if (kvm_arch_setup_async_pf(vcpu)) {
949 (kvm_arch_fault_in_sync(vcpu) >= 0))
950 rc = 0; 1213 rc = 0;
1214 } else {
1215 gpa_t gpa = current->thread.gmap_addr;
1216 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1217 }
951 } 1218 }
952 1219
953 if (rc == -1) { 1220 if (rc == -1) {
@@ -969,16 +1236,6 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
969 return rc; 1236 return rc;
970} 1237}
971 1238
972bool kvm_enabled_cmma(void)
973{
974 if (!MACHINE_IS_LPAR)
975 return false;
976 /* only enable for z10 and later */
977 if (!MACHINE_HAS_EDAT1)
978 return false;
979 return true;
980}
981
982static int __vcpu_run(struct kvm_vcpu *vcpu) 1239static int __vcpu_run(struct kvm_vcpu *vcpu)
983{ 1240{
984 int rc, exit_reason; 1241 int rc, exit_reason;
@@ -1008,7 +1265,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
1008 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1265 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1009 1266
1010 rc = vcpu_post_run(vcpu, exit_reason); 1267 rc = vcpu_post_run(vcpu, exit_reason);
1011 } while (!signal_pending(current) && !rc); 1268 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
1012 1269
1013 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1270 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1014 return rc; 1271 return rc;
@@ -1019,10 +1276,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1019 int rc; 1276 int rc;
1020 sigset_t sigsaved; 1277 sigset_t sigsaved;
1021 1278
1279 if (guestdbg_exit_pending(vcpu)) {
1280 kvm_s390_prepare_debug_exit(vcpu);
1281 return 0;
1282 }
1283
1022 if (vcpu->sigset_active) 1284 if (vcpu->sigset_active)
1023 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1285 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1024 1286
1025 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1287 kvm_s390_vcpu_start(vcpu);
1026 1288
1027 switch (kvm_run->exit_reason) { 1289 switch (kvm_run->exit_reason) {
1028 case KVM_EXIT_S390_SIEIC: 1290 case KVM_EXIT_S390_SIEIC:
@@ -1031,6 +1293,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1031 case KVM_EXIT_S390_RESET: 1293 case KVM_EXIT_S390_RESET:
1032 case KVM_EXIT_S390_UCONTROL: 1294 case KVM_EXIT_S390_UCONTROL:
1033 case KVM_EXIT_S390_TSCH: 1295 case KVM_EXIT_S390_TSCH:
1296 case KVM_EXIT_DEBUG:
1034 break; 1297 break;
1035 default: 1298 default:
1036 BUG(); 1299 BUG();
@@ -1056,6 +1319,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1056 rc = -EINTR; 1319 rc = -EINTR;
1057 } 1320 }
1058 1321
1322 if (guestdbg_exit_pending(vcpu) && !rc) {
1323 kvm_s390_prepare_debug_exit(vcpu);
1324 rc = 0;
1325 }
1326
1059 if (rc == -EOPNOTSUPP) { 1327 if (rc == -EOPNOTSUPP) {
1060 /* intercept cannot be handled in-kernel, prepare kvm-run */ 1328 /* intercept cannot be handled in-kernel, prepare kvm-run */
1061 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 1329 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
@@ -1073,7 +1341,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1073 1341
1074 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1342 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1075 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 1343 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1076 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; 1344 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1077 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 1345 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1078 1346
1079 if (vcpu->sigset_active) 1347 if (vcpu->sigset_active)
@@ -1083,83 +1351,52 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1083 return rc; 1351 return rc;
1084} 1352}
1085 1353
1086static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1087 unsigned long n, int prefix)
1088{
1089 if (prefix)
1090 return copy_to_guest(vcpu, guestdest, from, n);
1091 else
1092 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1093}
1094
1095/* 1354/*
1096 * store status at address 1355 * store status at address
1097 * we use have two special cases: 1356 * we use have two special cases:
1098 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 1357 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1099 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 1358 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1100 */ 1359 */
1101int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) 1360int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
1102{ 1361{
1103 unsigned char archmode = 1; 1362 unsigned char archmode = 1;
1104 int prefix; 1363 unsigned int px;
1105 u64 clkcomp; 1364 u64 clkcomp;
1365 int rc;
1106 1366
1107 if (addr == KVM_S390_STORE_STATUS_NOADDR) { 1367 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
1108 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) 1368 if (write_guest_abs(vcpu, 163, &archmode, 1))
1109 return -EFAULT; 1369 return -EFAULT;
1110 addr = SAVE_AREA_BASE; 1370 gpa = SAVE_AREA_BASE;
1111 prefix = 0; 1371 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
1112 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { 1372 if (write_guest_real(vcpu, 163, &archmode, 1))
1113 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1114 return -EFAULT; 1373 return -EFAULT;
1115 addr = SAVE_AREA_BASE; 1374 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
1116 prefix = 1; 1375 }
1117 } else 1376 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
1118 prefix = 0; 1377 vcpu->arch.guest_fpregs.fprs, 128);
1119 1378 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
1120 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), 1379 vcpu->run->s.regs.gprs, 128);
1121 vcpu->arch.guest_fpregs.fprs, 128, prefix)) 1380 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
1122 return -EFAULT; 1381 &vcpu->arch.sie_block->gpsw, 16);
1123 1382 px = kvm_s390_get_prefix(vcpu);
1124 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), 1383 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
1125 vcpu->run->s.regs.gprs, 128, prefix)) 1384 &px, 4);
1126 return -EFAULT; 1385 rc |= write_guest_abs(vcpu,
1127 1386 gpa + offsetof(struct save_area, fp_ctrl_reg),
1128 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), 1387 &vcpu->arch.guest_fpregs.fpc, 4);
1129 &vcpu->arch.sie_block->gpsw, 16, prefix)) 1388 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
1130 return -EFAULT; 1389 &vcpu->arch.sie_block->todpr, 4);
1131 1390 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
1132 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg), 1391 &vcpu->arch.sie_block->cputm, 8);
1133 &vcpu->arch.sie_block->prefix, 4, prefix))
1134 return -EFAULT;
1135
1136 if (__guestcopy(vcpu,
1137 addr + offsetof(struct save_area, fp_ctrl_reg),
1138 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1139 return -EFAULT;
1140
1141 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1142 &vcpu->arch.sie_block->todpr, 4, prefix))
1143 return -EFAULT;
1144
1145 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1146 &vcpu->arch.sie_block->cputm, 8, prefix))
1147 return -EFAULT;
1148
1149 clkcomp = vcpu->arch.sie_block->ckc >> 8; 1392 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1150 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), 1393 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
1151 &clkcomp, 8, prefix)) 1394 &clkcomp, 8);
1152 return -EFAULT; 1395 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
1153 1396 &vcpu->run->s.regs.acrs, 64);
1154 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), 1397 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
1155 &vcpu->run->s.regs.acrs, 64, prefix)) 1398 &vcpu->arch.sie_block->gcr, 128);
1156 return -EFAULT; 1399 return rc ? -EFAULT : 0;
1157
1158 if (__guestcopy(vcpu,
1159 addr + offsetof(struct save_area, ctrl_regs),
1160 &vcpu->arch.sie_block->gcr, 128, prefix))
1161 return -EFAULT;
1162 return 0;
1163} 1400}
1164 1401
1165int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 1402int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
@@ -1176,6 +1413,109 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1176 return kvm_s390_store_status_unloaded(vcpu, addr); 1413 return kvm_s390_store_status_unloaded(vcpu, addr);
1177} 1414}
1178 1415
1416static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
1417{
1418 return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
1419}
1420
1421static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1422{
1423 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
1424 kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
1425 exit_sie_sync(vcpu);
1426}
1427
1428static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
1429{
1430 unsigned int i;
1431 struct kvm_vcpu *vcpu;
1432
1433 kvm_for_each_vcpu(i, vcpu, kvm) {
1434 __disable_ibs_on_vcpu(vcpu);
1435 }
1436}
1437
1438static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1439{
1440 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
1441 kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
1442 exit_sie_sync(vcpu);
1443}
1444
1445void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
1446{
1447 int i, online_vcpus, started_vcpus = 0;
1448
1449 if (!is_vcpu_stopped(vcpu))
1450 return;
1451
1452 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
1453 /* Only one cpu at a time may enter/leave the STOPPED state. */
1454 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
1455 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
1456
1457 for (i = 0; i < online_vcpus; i++) {
1458 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
1459 started_vcpus++;
1460 }
1461
1462 if (started_vcpus == 0) {
1463 /* we're the only active VCPU -> speed it up */
1464 __enable_ibs_on_vcpu(vcpu);
1465 } else if (started_vcpus == 1) {
1466 /*
1467 * As we are starting a second VCPU, we have to disable
1468 * the IBS facility on all VCPUs to remove potentially
1469 * oustanding ENABLE requests.
1470 */
1471 __disable_ibs_on_all_vcpus(vcpu->kvm);
1472 }
1473
1474 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1475 /*
1476 * Another VCPU might have used IBS while we were offline.
1477 * Let's play safe and flush the VCPU at startup.
1478 */
1479 vcpu->arch.sie_block->ihcpu = 0xffff;
1480 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
1481 return;
1482}
1483
1484void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
1485{
1486 int i, online_vcpus, started_vcpus = 0;
1487 struct kvm_vcpu *started_vcpu = NULL;
1488
1489 if (is_vcpu_stopped(vcpu))
1490 return;
1491
1492 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
1493 /* Only one cpu at a time may enter/leave the STOPPED state. */
1494 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
1495 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
1496
1497 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1498 __disable_ibs_on_vcpu(vcpu);
1499
1500 for (i = 0; i < online_vcpus; i++) {
1501 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
1502 started_vcpus++;
1503 started_vcpu = vcpu->kvm->vcpus[i];
1504 }
1505 }
1506
1507 if (started_vcpus == 1) {
1508 /*
1509 * As we only have one VCPU left, we want to enable the
1510 * IBS facility for that VCPU to speed it up.
1511 */
1512 __enable_ibs_on_vcpu(started_vcpu);
1513 }
1514
1515 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
1516 return;
1517}
1518
1179static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 1519static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1180 struct kvm_enable_cap *cap) 1520 struct kvm_enable_cap *cap)
1181{ 1521{
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3c1e2274d9ea..a8655ed31616 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -28,7 +28,6 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
28 28
29/* Transactional Memory Execution related macros */ 29/* Transactional Memory Execution related macros */
30#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) 30#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
31#define TDB_ADDR 0x1800UL
32#define TDB_FORMAT1 1 31#define TDB_FORMAT1 1
33#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) 32#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
34 33
@@ -62,9 +61,15 @@ static inline int kvm_is_ucontrol(struct kvm *kvm)
62#endif 61#endif
63} 62}
64 63
64#define GUEST_PREFIX_SHIFT 13
65static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
66{
67 return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
68}
69
65static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) 70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
66{ 71{
67 vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; 72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
68 vcpu->arch.sie_block->ihcpu = 0xffff; 73 vcpu->arch.sie_block->ihcpu = 0xffff;
69 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
70} 75}
@@ -130,6 +135,7 @@ void kvm_s390_tasklet(unsigned long parm);
130void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 135void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
131void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); 136void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
132void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); 137void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
138void kvm_s390_clear_float_irqs(struct kvm *kvm);
133int __must_check kvm_s390_inject_vm(struct kvm *kvm, 139int __must_check kvm_s390_inject_vm(struct kvm *kvm,
134 struct kvm_s390_interrupt *s390int); 140 struct kvm_s390_interrupt *s390int);
135int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 141int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -137,35 +143,94 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
137int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 143int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
138struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 144struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
139 u64 cr6, u64 schid); 145 u64 cr6, u64 schid);
146void kvm_s390_reinject_io_int(struct kvm *kvm,
147 struct kvm_s390_interrupt_info *inti);
140int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); 148int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
141 149
142/* implemented in priv.c */ 150/* implemented in priv.c */
151int is_valid_psw(psw_t *psw);
143int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); 152int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
144int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); 153int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
145int kvm_s390_handle_01(struct kvm_vcpu *vcpu); 154int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
146int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); 155int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
147int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); 156int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
157int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
148int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); 158int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
149int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); 159int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
150 160
151/* implemented in sigp.c */ 161/* implemented in sigp.c */
152int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 162int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
163int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
153 164
154/* implemented in kvm-s390.c */ 165/* implemented in kvm-s390.c */
166long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
155int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); 167int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
156int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); 168int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
169void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
170void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
157void s390_vcpu_block(struct kvm_vcpu *vcpu); 171void s390_vcpu_block(struct kvm_vcpu *vcpu);
158void s390_vcpu_unblock(struct kvm_vcpu *vcpu); 172void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
159void exit_sie(struct kvm_vcpu *vcpu); 173void exit_sie(struct kvm_vcpu *vcpu);
160void exit_sie_sync(struct kvm_vcpu *vcpu); 174void exit_sie_sync(struct kvm_vcpu *vcpu);
161/* are we going to support cmma? */ 175int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
162bool kvm_enabled_cmma(void); 176void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
177/* is cmma enabled */
178bool kvm_s390_cmma_enabled(struct kvm *kvm);
179int test_vfacility(unsigned long nr);
180
163/* implemented in diag.c */ 181/* implemented in diag.c */
164int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); 182int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
183/* implemented in interrupt.c */
184int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
185 struct kvm_s390_pgm_info *pgm_info);
186
187/**
188 * kvm_s390_inject_prog_cond - conditionally inject a program check
189 * @vcpu: virtual cpu
190 * @rc: original return/error code
191 *
192 * This function is supposed to be used after regular guest access functions
193 * failed, to conditionally inject a program check to a vcpu. The typical
194 * pattern would look like
195 *
196 * rc = write_guest(vcpu, addr, data, len);
197 * if (rc)
198 * return kvm_s390_inject_prog_cond(vcpu, rc);
199 *
200 * A negative return code from guest access functions implies an internal error
201 * like e.g. out of memory. In these cases no program check should be injected
202 * to the guest.
203 * A positive value implies that an exception happened while accessing a guest's
204 * memory. In this case all data belonging to the corresponding program check
205 * has been stored in vcpu->arch.pgm and can be injected with
206 * kvm_s390_inject_prog_irq().
207 *
208 * Returns: - the original @rc value if @rc was negative (internal error)
209 * - zero if @rc was already zero
210 * - zero or error code from injecting if @rc was positive
211 * (program check injected to @vcpu)
212 */
213static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
214{
215 if (rc <= 0)
216 return rc;
217 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
218}
165 219
166/* implemented in interrupt.c */ 220/* implemented in interrupt.c */
167int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 221int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
168int psw_extint_disabled(struct kvm_vcpu *vcpu); 222int psw_extint_disabled(struct kvm_vcpu *vcpu);
169void kvm_s390_destroy_adapters(struct kvm *kvm); 223void kvm_s390_destroy_adapters(struct kvm *kvm);
224int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
225
226/* implemented in guestdbg.c */
227void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
228void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
229void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
230int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
231 struct kvm_guest_debug *dbg);
232void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
233void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
234void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
170 235
171#endif 236#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 476e9e218f43..f89c1cd67751 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -35,8 +35,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
35{ 35{
36 struct kvm_vcpu *cpup; 36 struct kvm_vcpu *cpup;
37 s64 hostclk, val; 37 s64 hostclk, val;
38 int i, rc;
38 u64 op2; 39 u64 op2;
39 int i;
40 40
41 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 41 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
42 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 42 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -44,8 +44,9 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
44 op2 = kvm_s390_get_base_disp_s(vcpu); 44 op2 = kvm_s390_get_base_disp_s(vcpu);
45 if (op2 & 7) /* Operand must be on a doubleword boundary */ 45 if (op2 & 7) /* Operand must be on a doubleword boundary */
46 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 46 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
47 if (get_guest(vcpu, val, (u64 __user *) op2)) 47 rc = read_guest(vcpu, op2, &val, sizeof(val));
48 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 48 if (rc)
49 return kvm_s390_inject_prog_cond(vcpu, rc);
49 50
50 if (store_tod_clock(&hostclk)) { 51 if (store_tod_clock(&hostclk)) {
51 kvm_s390_set_psw_cc(vcpu, 3); 52 kvm_s390_set_psw_cc(vcpu, 3);
@@ -65,8 +66,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
65static int handle_set_prefix(struct kvm_vcpu *vcpu) 66static int handle_set_prefix(struct kvm_vcpu *vcpu)
66{ 67{
67 u64 operand2; 68 u64 operand2;
68 u32 address = 0; 69 u32 address;
69 u8 tmp; 70 int rc;
70 71
71 vcpu->stat.instruction_spx++; 72 vcpu->stat.instruction_spx++;
72 73
@@ -80,14 +81,18 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
80 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 81 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
81 82
82 /* get the value */ 83 /* get the value */
83 if (get_guest(vcpu, address, (u32 __user *) operand2)) 84 rc = read_guest(vcpu, operand2, &address, sizeof(address));
84 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 85 if (rc)
86 return kvm_s390_inject_prog_cond(vcpu, rc);
85 87
86 address = address & 0x7fffe000u; 88 address &= 0x7fffe000u;
87 89
88 /* make sure that the new value is valid memory */ 90 /*
89 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || 91 * Make sure the new value is valid memory. We only need to check the
90 (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) 92 * first page, since address is 8k aligned and memory pieces are always
93 * at least 1MB aligned and have at least a size of 1MB.
94 */
95 if (kvm_is_error_gpa(vcpu->kvm, address))
91 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 96 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
92 97
93 kvm_s390_set_prefix(vcpu, address); 98 kvm_s390_set_prefix(vcpu, address);
@@ -101,6 +106,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
101{ 106{
102 u64 operand2; 107 u64 operand2;
103 u32 address; 108 u32 address;
109 int rc;
104 110
105 vcpu->stat.instruction_stpx++; 111 vcpu->stat.instruction_stpx++;
106 112
@@ -113,12 +119,12 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
113 if (operand2 & 3) 119 if (operand2 & 3)
114 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 120 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
115 121
116 address = vcpu->arch.sie_block->prefix; 122 address = kvm_s390_get_prefix(vcpu);
117 address = address & 0x7fffe000u;
118 123
119 /* get the value */ 124 /* get the value */
120 if (put_guest(vcpu, address, (u32 __user *)operand2)) 125 rc = write_guest(vcpu, operand2, &address, sizeof(address));
121 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 126 if (rc)
127 return kvm_s390_inject_prog_cond(vcpu, rc);
122 128
123 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); 129 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
124 trace_kvm_s390_handle_prefix(vcpu, 0, address); 130 trace_kvm_s390_handle_prefix(vcpu, 0, address);
@@ -127,28 +133,44 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
127 133
128static int handle_store_cpu_address(struct kvm_vcpu *vcpu) 134static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
129{ 135{
130 u64 useraddr; 136 u16 vcpu_id = vcpu->vcpu_id;
137 u64 ga;
138 int rc;
131 139
132 vcpu->stat.instruction_stap++; 140 vcpu->stat.instruction_stap++;
133 141
134 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 142 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
135 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 143 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
136 144
137 useraddr = kvm_s390_get_base_disp_s(vcpu); 145 ga = kvm_s390_get_base_disp_s(vcpu);
138 146
139 if (useraddr & 1) 147 if (ga & 1)
140 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 148 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
141 149
142 if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) 150 rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
143 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 151 if (rc)
152 return kvm_s390_inject_prog_cond(vcpu, rc);
144 153
145 VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); 154 VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
146 trace_kvm_s390_handle_stap(vcpu, useraddr); 155 trace_kvm_s390_handle_stap(vcpu, ga);
147 return 0; 156 return 0;
148} 157}
149 158
159static void __skey_check_enable(struct kvm_vcpu *vcpu)
160{
161 if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
162 return;
163
164 s390_enable_skey();
165 trace_kvm_s390_skey_related_inst(vcpu);
166 vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
167}
168
169
150static int handle_skey(struct kvm_vcpu *vcpu) 170static int handle_skey(struct kvm_vcpu *vcpu)
151{ 171{
172 __skey_check_enable(vcpu);
173
152 vcpu->stat.instruction_storage_key++; 174 vcpu->stat.instruction_storage_key++;
153 175
154 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 176 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -160,9 +182,21 @@ static int handle_skey(struct kvm_vcpu *vcpu)
160 return 0; 182 return 0;
161} 183}
162 184
185static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
186{
187 psw_t *psw = &vcpu->arch.sie_block->gpsw;
188
189 vcpu->stat.instruction_ipte_interlock++;
190 if (psw_bits(*psw).p)
191 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
192 wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
193 psw->addr = __rewind_psw(*psw, 4);
194 VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
195 return 0;
196}
197
163static int handle_test_block(struct kvm_vcpu *vcpu) 198static int handle_test_block(struct kvm_vcpu *vcpu)
164{ 199{
165 unsigned long hva;
166 gpa_t addr; 200 gpa_t addr;
167 int reg2; 201 int reg2;
168 202
@@ -171,16 +205,18 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
171 205
172 kvm_s390_get_regs_rre(vcpu, NULL, &reg2); 206 kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
173 addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; 207 addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
208 addr = kvm_s390_logical_to_effective(vcpu, addr);
209 if (kvm_s390_check_low_addr_protection(vcpu, addr))
210 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
174 addr = kvm_s390_real_to_abs(vcpu, addr); 211 addr = kvm_s390_real_to_abs(vcpu, addr);
175 212
176 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); 213 if (kvm_is_error_gpa(vcpu->kvm, addr))
177 if (kvm_is_error_hva(hva))
178 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 214 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
179 /* 215 /*
180 * We don't expect errors on modern systems, and do not care 216 * We don't expect errors on modern systems, and do not care
181 * about storage keys (yet), so let's just clear the page. 217 * about storage keys (yet), so let's just clear the page.
182 */ 218 */
183 if (clear_user((void __user *)hva, PAGE_SIZE) != 0) 219 if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
184 return -EFAULT; 220 return -EFAULT;
185 kvm_s390_set_psw_cc(vcpu, 0); 221 kvm_s390_set_psw_cc(vcpu, 0);
186 vcpu->run->s.regs.gprs[0] = 0; 222 vcpu->run->s.regs.gprs[0] = 0;
@@ -190,9 +226,12 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
190static int handle_tpi(struct kvm_vcpu *vcpu) 226static int handle_tpi(struct kvm_vcpu *vcpu)
191{ 227{
192 struct kvm_s390_interrupt_info *inti; 228 struct kvm_s390_interrupt_info *inti;
229 unsigned long len;
230 u32 tpi_data[3];
231 int cc, rc;
193 u64 addr; 232 u64 addr;
194 int cc;
195 233
234 rc = 0;
196 addr = kvm_s390_get_base_disp_s(vcpu); 235 addr = kvm_s390_get_base_disp_s(vcpu);
197 if (addr & 3) 236 if (addr & 3)
198 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 237 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -201,30 +240,41 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
201 if (!inti) 240 if (!inti)
202 goto no_interrupt; 241 goto no_interrupt;
203 cc = 1; 242 cc = 1;
243 tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
244 tpi_data[1] = inti->io.io_int_parm;
245 tpi_data[2] = inti->io.io_int_word;
204 if (addr) { 246 if (addr) {
205 /* 247 /*
206 * Store the two-word I/O interruption code into the 248 * Store the two-word I/O interruption code into the
207 * provided area. 249 * provided area.
208 */ 250 */
209 if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) 251 len = sizeof(tpi_data) - 4;
210 || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) 252 rc = write_guest(vcpu, addr, &tpi_data, len);
211 || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) 253 if (rc)
212 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 254 return kvm_s390_inject_prog_cond(vcpu, rc);
213 } else { 255 } else {
214 /* 256 /*
215 * Store the three-word I/O interruption code into 257 * Store the three-word I/O interruption code into
216 * the appropriate lowcore area. 258 * the appropriate lowcore area.
217 */ 259 */
218 put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); 260 len = sizeof(tpi_data);
219 put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); 261 if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
220 put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); 262 rc = -EFAULT;
221 put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
222 } 263 }
223 kfree(inti); 264 /*
265 * If we encounter a problem storing the interruption code, the
266 * instruction is suppressed from the guest's view: reinject the
267 * interrupt.
268 */
269 if (!rc)
270 kfree(inti);
271 else
272 kvm_s390_reinject_io_int(vcpu->kvm, inti);
224no_interrupt: 273no_interrupt:
225 /* Set condition code and we're done. */ 274 /* Set condition code and we're done. */
226 kvm_s390_set_psw_cc(vcpu, cc); 275 if (!rc)
227 return 0; 276 kvm_s390_set_psw_cc(vcpu, cc);
277 return rc ? -EFAULT : 0;
228} 278}
229 279
230static int handle_tsch(struct kvm_vcpu *vcpu) 280static int handle_tsch(struct kvm_vcpu *vcpu)
@@ -292,10 +342,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
292 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 342 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
293 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 343 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
294 344
295 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), 345 rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
296 vfacilities, 4); 346 vfacilities, 4);
297 if (rc) 347 if (rc)
298 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 348 return rc;
299 VCPU_EVENT(vcpu, 5, "store facility list value %x", 349 VCPU_EVENT(vcpu, 5, "store facility list value %x",
300 *(unsigned int *) vfacilities); 350 *(unsigned int *) vfacilities);
301 trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); 351 trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
@@ -314,7 +364,8 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
314#define PSW_ADDR_24 0x0000000000ffffffUL 364#define PSW_ADDR_24 0x0000000000ffffffUL
315#define PSW_ADDR_31 0x000000007fffffffUL 365#define PSW_ADDR_31 0x000000007fffffffUL
316 366
317static int is_valid_psw(psw_t *psw) { 367int is_valid_psw(psw_t *psw)
368{
318 if (psw->mask & PSW_MASK_UNASSIGNED) 369 if (psw->mask & PSW_MASK_UNASSIGNED)
319 return 0; 370 return 0;
320 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { 371 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
@@ -325,6 +376,8 @@ static int is_valid_psw(psw_t *psw) {
325 return 0; 376 return 0;
326 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) 377 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
327 return 0; 378 return 0;
379 if (psw->addr & 1)
380 return 0;
328 return 1; 381 return 1;
329} 382}
330 383
@@ -333,6 +386,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
333 psw_t *gpsw = &vcpu->arch.sie_block->gpsw; 386 psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
334 psw_compat_t new_psw; 387 psw_compat_t new_psw;
335 u64 addr; 388 u64 addr;
389 int rc;
336 390
337 if (gpsw->mask & PSW_MASK_PSTATE) 391 if (gpsw->mask & PSW_MASK_PSTATE)
338 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 392 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -340,8 +394,10 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
340 addr = kvm_s390_get_base_disp_s(vcpu); 394 addr = kvm_s390_get_base_disp_s(vcpu);
341 if (addr & 7) 395 if (addr & 7)
342 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 396 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
343 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) 397
344 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 398 rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
399 if (rc)
400 return kvm_s390_inject_prog_cond(vcpu, rc);
345 if (!(new_psw.mask & PSW32_MASK_BASE)) 401 if (!(new_psw.mask & PSW32_MASK_BASE))
346 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 402 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
347 gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; 403 gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
@@ -357,6 +413,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
357{ 413{
358 psw_t new_psw; 414 psw_t new_psw;
359 u64 addr; 415 u64 addr;
416 int rc;
360 417
361 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 418 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
362 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 419 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -364,8 +421,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
364 addr = kvm_s390_get_base_disp_s(vcpu); 421 addr = kvm_s390_get_base_disp_s(vcpu);
365 if (addr & 7) 422 if (addr & 7)
366 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 423 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
367 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) 424 rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
368 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 425 if (rc)
426 return kvm_s390_inject_prog_cond(vcpu, rc);
369 vcpu->arch.sie_block->gpsw = new_psw; 427 vcpu->arch.sie_block->gpsw = new_psw;
370 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) 428 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
371 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -375,7 +433,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
375 433
376static int handle_stidp(struct kvm_vcpu *vcpu) 434static int handle_stidp(struct kvm_vcpu *vcpu)
377{ 435{
436 u64 stidp_data = vcpu->arch.stidp_data;
378 u64 operand2; 437 u64 operand2;
438 int rc;
379 439
380 vcpu->stat.instruction_stidp++; 440 vcpu->stat.instruction_stidp++;
381 441
@@ -387,8 +447,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
387 if (operand2 & 7) 447 if (operand2 & 7)
388 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 448 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
389 449
390 if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) 450 rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
391 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 451 if (rc)
452 return kvm_s390_inject_prog_cond(vcpu, rc);
392 453
393 VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); 454 VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
394 return 0; 455 return 0;
@@ -474,9 +535,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
474 break; 535 break;
475 } 536 }
476 537
477 if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { 538 rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
478 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 539 if (rc) {
479 goto out_exception; 540 rc = kvm_s390_inject_prog_cond(vcpu, rc);
541 goto out;
480 } 542 }
481 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); 543 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
482 free_page(mem); 544 free_page(mem);
@@ -485,7 +547,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
485 return 0; 547 return 0;
486out_no_data: 548out_no_data:
487 kvm_s390_set_psw_cc(vcpu, 3); 549 kvm_s390_set_psw_cc(vcpu, 3);
488out_exception: 550out:
489 free_page(mem); 551 free_page(mem);
490 return rc; 552 return rc;
491} 553}
@@ -496,6 +558,7 @@ static const intercept_handler_t b2_handlers[256] = {
496 [0x10] = handle_set_prefix, 558 [0x10] = handle_set_prefix,
497 [0x11] = handle_store_prefix, 559 [0x11] = handle_store_prefix,
498 [0x12] = handle_store_cpu_address, 560 [0x12] = handle_store_cpu_address,
561 [0x21] = handle_ipte_interlock,
499 [0x29] = handle_skey, 562 [0x29] = handle_skey,
500 [0x2a] = handle_skey, 563 [0x2a] = handle_skey,
501 [0x2b] = handle_skey, 564 [0x2b] = handle_skey,
@@ -513,6 +576,7 @@ static const intercept_handler_t b2_handlers[256] = {
513 [0x3a] = handle_io_inst, 576 [0x3a] = handle_io_inst,
514 [0x3b] = handle_io_inst, 577 [0x3b] = handle_io_inst,
515 [0x3c] = handle_io_inst, 578 [0x3c] = handle_io_inst,
579 [0x50] = handle_ipte_interlock,
516 [0x5f] = handle_io_inst, 580 [0x5f] = handle_io_inst,
517 [0x74] = handle_io_inst, 581 [0x74] = handle_io_inst,
518 [0x76] = handle_io_inst, 582 [0x76] = handle_io_inst,
@@ -591,6 +655,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
591 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 655 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
592 656
593 start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; 657 start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
658 if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
659 if (kvm_s390_check_low_addr_protection(vcpu, start))
660 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
661 }
662
594 switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { 663 switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
595 case 0x00000000: 664 case 0x00000000:
596 end = (start + (1UL << 12)) & ~((1UL << 12) - 1); 665 end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
@@ -606,10 +675,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
606 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 675 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
607 } 676 }
608 while (start < end) { 677 while (start < end) {
609 unsigned long useraddr; 678 unsigned long useraddr, abs_addr;
610 679
611 useraddr = gmap_translate(start, vcpu->arch.gmap); 680 /* Translate guest address to host address */
612 if (IS_ERR((void *)useraddr)) 681 if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
682 abs_addr = kvm_s390_real_to_abs(vcpu, start);
683 else
684 abs_addr = start;
685 useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
686 if (kvm_is_error_hva(useraddr))
613 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 687 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
614 688
615 if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { 689 if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
@@ -618,6 +692,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
618 } 692 }
619 693
620 if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { 694 if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
695 __skey_check_enable(vcpu);
621 if (set_guest_storage_key(current->mm, useraddr, 696 if (set_guest_storage_key(current->mm, useraddr,
622 vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, 697 vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
623 vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) 698 vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
@@ -642,7 +717,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
642 VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); 717 VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
643 gmap = vcpu->arch.gmap; 718 gmap = vcpu->arch.gmap;
644 vcpu->stat.instruction_essa++; 719 vcpu->stat.instruction_essa++;
645 if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo) 720 if (!kvm_s390_cmma_enabled(vcpu->kvm))
646 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); 721 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
647 722
648 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 723 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -672,7 +747,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
672} 747}
673 748
674static const intercept_handler_t b9_handlers[256] = { 749static const intercept_handler_t b9_handlers[256] = {
750 [0x8a] = handle_ipte_interlock,
675 [0x8d] = handle_epsw, 751 [0x8d] = handle_epsw,
752 [0x8e] = handle_ipte_interlock,
753 [0x8f] = handle_ipte_interlock,
676 [0xab] = handle_essa, 754 [0xab] = handle_essa,
677 [0xaf] = handle_pfmf, 755 [0xaf] = handle_pfmf,
678}; 756};
@@ -693,32 +771,67 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
693{ 771{
694 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 772 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
695 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 773 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
696 u64 useraddr;
697 u32 val = 0; 774 u32 val = 0;
698 int reg, rc; 775 int reg, rc;
776 u64 ga;
699 777
700 vcpu->stat.instruction_lctl++; 778 vcpu->stat.instruction_lctl++;
701 779
702 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 780 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
703 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 781 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
704 782
705 useraddr = kvm_s390_get_base_disp_rs(vcpu); 783 ga = kvm_s390_get_base_disp_rs(vcpu);
706 784
707 if (useraddr & 3) 785 if (ga & 3)
708 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 786 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
709 787
710 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, 788 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
711 useraddr); 789 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
712 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
713 790
714 reg = reg1; 791 reg = reg1;
715 do { 792 do {
716 rc = get_guest(vcpu, val, (u32 __user *) useraddr); 793 rc = read_guest(vcpu, ga, &val, sizeof(val));
717 if (rc) 794 if (rc)
718 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 795 return kvm_s390_inject_prog_cond(vcpu, rc);
719 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; 796 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
720 vcpu->arch.sie_block->gcr[reg] |= val; 797 vcpu->arch.sie_block->gcr[reg] |= val;
721 useraddr += 4; 798 ga += 4;
799 if (reg == reg3)
800 break;
801 reg = (reg + 1) % 16;
802 } while (1);
803
804 return 0;
805}
806
807int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
808{
809 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
810 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
811 u64 ga;
812 u32 val;
813 int reg, rc;
814
815 vcpu->stat.instruction_stctl++;
816
817 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
818 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
819
820 ga = kvm_s390_get_base_disp_rs(vcpu);
821
822 if (ga & 3)
823 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
824
825 VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
826 trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
827
828 reg = reg1;
829 do {
830 val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful;
831 rc = write_guest(vcpu, ga, &val, sizeof(val));
832 if (rc)
833 return kvm_s390_inject_prog_cond(vcpu, rc);
834 ga += 4;
722 if (reg == reg3) 835 if (reg == reg3)
723 break; 836 break;
724 reg = (reg + 1) % 16; 837 reg = (reg + 1) % 16;
@@ -731,7 +844,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
731{ 844{
732 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 845 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
733 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 846 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
734 u64 useraddr; 847 u64 ga, val;
735 int reg, rc; 848 int reg, rc;
736 849
737 vcpu->stat.instruction_lctlg++; 850 vcpu->stat.instruction_lctlg++;
@@ -739,23 +852,58 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
739 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 852 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
740 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 853 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
741 854
742 useraddr = kvm_s390_get_base_disp_rsy(vcpu); 855 ga = kvm_s390_get_base_disp_rsy(vcpu);
743 856
744 if (useraddr & 7) 857 if (ga & 7)
745 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 858 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
746 859
747 reg = reg1; 860 reg = reg1;
748 861
749 VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, 862 VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
750 useraddr); 863 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
751 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
752 864
753 do { 865 do {
754 rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], 866 rc = read_guest(vcpu, ga, &val, sizeof(val));
755 (u64 __user *) useraddr);
756 if (rc) 867 if (rc)
757 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 868 return kvm_s390_inject_prog_cond(vcpu, rc);
758 useraddr += 8; 869 vcpu->arch.sie_block->gcr[reg] = val;
870 ga += 8;
871 if (reg == reg3)
872 break;
873 reg = (reg + 1) % 16;
874 } while (1);
875
876 return 0;
877}
878
879static int handle_stctg(struct kvm_vcpu *vcpu)
880{
881 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
882 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
883 u64 ga, val;
884 int reg, rc;
885
886 vcpu->stat.instruction_stctg++;
887
888 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
889 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
890
891 ga = kvm_s390_get_base_disp_rsy(vcpu);
892
893 if (ga & 7)
894 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
895
896 reg = reg1;
897
898 VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
899 trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
900
901 do {
902 val = vcpu->arch.sie_block->gcr[reg];
903 rc = write_guest(vcpu, ga, &val, sizeof(val));
904 if (rc)
905 return kvm_s390_inject_prog_cond(vcpu, rc);
906 ga += 8;
759 if (reg == reg3) 907 if (reg == reg3)
760 break; 908 break;
761 reg = (reg + 1) % 16; 909 reg = (reg + 1) % 16;
@@ -766,6 +914,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
766 914
767static const intercept_handler_t eb_handlers[256] = { 915static const intercept_handler_t eb_handlers[256] = {
768 [0x2f] = handle_lctlg, 916 [0x2f] = handle_lctlg,
917 [0x25] = handle_stctg,
769}; 918};
770 919
771int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) 920int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
@@ -781,8 +930,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
781static int handle_tprot(struct kvm_vcpu *vcpu) 930static int handle_tprot(struct kvm_vcpu *vcpu)
782{ 931{
783 u64 address1, address2; 932 u64 address1, address2;
784 struct vm_area_struct *vma; 933 unsigned long hva, gpa;
785 unsigned long user_address; 934 int ret = 0, cc = 0;
935 bool writable;
786 936
787 vcpu->stat.instruction_tprot++; 937 vcpu->stat.instruction_tprot++;
788 938
@@ -793,32 +943,41 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
793 943
794 /* we only handle the Linux memory detection case: 944 /* we only handle the Linux memory detection case:
795 * access key == 0 945 * access key == 0
796 * guest DAT == off
797 * everything else goes to userspace. */ 946 * everything else goes to userspace. */
798 if (address2 & 0xf0) 947 if (address2 & 0xf0)
799 return -EOPNOTSUPP; 948 return -EOPNOTSUPP;
800 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) 949 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
801 return -EOPNOTSUPP; 950 ipte_lock(vcpu);
802 951 ret = guest_translate_address(vcpu, address1, &gpa, 1);
803 down_read(&current->mm->mmap_sem); 952 if (ret == PGM_PROTECTION) {
804 user_address = __gmap_translate(address1, vcpu->arch.gmap); 953 /* Write protected? Try again with read-only... */
805 if (IS_ERR_VALUE(user_address)) 954 cc = 1;
806 goto out_inject; 955 ret = guest_translate_address(vcpu, address1, &gpa, 0);
807 vma = find_vma(current->mm, user_address); 956 }
808 if (!vma) 957 if (ret) {
809 goto out_inject; 958 if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
810 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 959 ret = kvm_s390_inject_program_int(vcpu, ret);
811 if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) 960 } else if (ret > 0) {
812 vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); 961 /* Translation not available */
813 if (!(vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_READ)) 962 kvm_s390_set_psw_cc(vcpu, 3);
814 vcpu->arch.sie_block->gpsw.mask |= (2ul << 44); 963 ret = 0;
815 964 }
816 up_read(&current->mm->mmap_sem); 965 goto out_unlock;
817 return 0; 966 }
818 967
819out_inject: 968 hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
820 up_read(&current->mm->mmap_sem); 969 if (kvm_is_error_hva(hva)) {
821 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 970 ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
971 } else {
972 if (!writable)
973 cc = 1; /* Write not permitted ==> read-only */
974 kvm_s390_set_psw_cc(vcpu, cc);
975 /* Note: CC2 only occurs for storage keys (not supported yet) */
976 }
977out_unlock:
978 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
979 ipte_unlock(vcpu);
980 return ret;
822} 981}
823 982
824int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) 983int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 26caeb530a78..43079a48cc98 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -54,33 +54,23 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
54 54
55static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) 55static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
56{ 56{
57 struct kvm_s390_local_interrupt *li; 57 struct kvm_s390_interrupt s390int = {
58 struct kvm_s390_interrupt_info *inti; 58 .type = KVM_S390_INT_EMERGENCY,
59 .parm = vcpu->vcpu_id,
60 };
59 struct kvm_vcpu *dst_vcpu = NULL; 61 struct kvm_vcpu *dst_vcpu = NULL;
62 int rc = 0;
60 63
61 if (cpu_addr < KVM_MAX_VCPUS) 64 if (cpu_addr < KVM_MAX_VCPUS)
62 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); 65 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
63 if (!dst_vcpu) 66 if (!dst_vcpu)
64 return SIGP_CC_NOT_OPERATIONAL; 67 return SIGP_CC_NOT_OPERATIONAL;
65 68
66 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 69 rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
67 if (!inti) 70 if (!rc)
68 return -ENOMEM; 71 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
69
70 inti->type = KVM_S390_INT_EMERGENCY;
71 inti->emerg.code = vcpu->vcpu_id;
72
73 li = &dst_vcpu->arch.local_int;
74 spin_lock_bh(&li->lock);
75 list_add_tail(&inti->list, &li->list);
76 atomic_set(&li->active, 1);
77 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
78 if (waitqueue_active(li->wq))
79 wake_up_interruptible(li->wq);
80 spin_unlock_bh(&li->lock);
81 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
82 72
83 return SIGP_CC_ORDER_CODE_ACCEPTED; 73 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
84} 74}
85 75
86static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, 76static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
@@ -116,33 +106,23 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
116 106
117static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) 107static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
118{ 108{
119 struct kvm_s390_local_interrupt *li; 109 struct kvm_s390_interrupt s390int = {
120 struct kvm_s390_interrupt_info *inti; 110 .type = KVM_S390_INT_EXTERNAL_CALL,
111 .parm = vcpu->vcpu_id,
112 };
121 struct kvm_vcpu *dst_vcpu = NULL; 113 struct kvm_vcpu *dst_vcpu = NULL;
114 int rc;
122 115
123 if (cpu_addr < KVM_MAX_VCPUS) 116 if (cpu_addr < KVM_MAX_VCPUS)
124 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); 117 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
125 if (!dst_vcpu) 118 if (!dst_vcpu)
126 return SIGP_CC_NOT_OPERATIONAL; 119 return SIGP_CC_NOT_OPERATIONAL;
127 120
128 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 121 rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
129 if (!inti) 122 if (!rc)
130 return -ENOMEM; 123 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
131 124
132 inti->type = KVM_S390_INT_EXTERNAL_CALL; 125 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
133 inti->extcall.code = vcpu->vcpu_id;
134
135 li = &dst_vcpu->arch.local_int;
136 spin_lock_bh(&li->lock);
137 list_add_tail(&inti->list, &li->list);
138 atomic_set(&li->active, 1);
139 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
140 if (waitqueue_active(li->wq))
141 wake_up_interruptible(li->wq);
142 spin_unlock_bh(&li->lock);
143 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
144
145 return SIGP_CC_ORDER_CODE_ACCEPTED;
146} 126}
147 127
148static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) 128static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
@@ -235,7 +215,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
235 struct kvm_vcpu *dst_vcpu = NULL; 215 struct kvm_vcpu *dst_vcpu = NULL;
236 struct kvm_s390_interrupt_info *inti; 216 struct kvm_s390_interrupt_info *inti;
237 int rc; 217 int rc;
238 u8 tmp;
239 218
240 if (cpu_addr < KVM_MAX_VCPUS) 219 if (cpu_addr < KVM_MAX_VCPUS)
241 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); 220 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
@@ -243,10 +222,13 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
243 return SIGP_CC_NOT_OPERATIONAL; 222 return SIGP_CC_NOT_OPERATIONAL;
244 li = &dst_vcpu->arch.local_int; 223 li = &dst_vcpu->arch.local_int;
245 224
246 /* make sure that the new value is valid memory */ 225 /*
247 address = address & 0x7fffe000u; 226 * Make sure the new value is valid memory. We only need to check the
248 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || 227 * first page, since address is 8k aligned and memory pieces are always
249 copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) { 228 * at least 1MB aligned and have at least a size of 1MB.
229 */
230 address &= 0x7fffe000u;
231 if (kvm_is_error_gpa(vcpu->kvm, address)) {
250 *reg &= 0xffffffff00000000UL; 232 *reg &= 0xffffffff00000000UL;
251 *reg |= SIGP_STATUS_INVALID_PARAMETER; 233 *reg |= SIGP_STATUS_INVALID_PARAMETER;
252 return SIGP_CC_STATUS_STORED; 234 return SIGP_CC_STATUS_STORED;
@@ -456,3 +438,38 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
456 kvm_s390_set_psw_cc(vcpu, rc); 438 kvm_s390_set_psw_cc(vcpu, rc);
457 return 0; 439 return 0;
458} 440}
441
442/*
443 * Handle SIGP partial execution interception.
444 *
445 * This interception will occur at the source cpu when a source cpu sends an
446 * external call to a target cpu and the target cpu has the WAIT bit set in
447 * its cpuflags. Interception will occurr after the interrupt indicator bits at
448 * the target cpu have been set. All error cases will lead to instruction
449 * interception, therefore nothing is to be checked or prepared.
450 */
451int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
452{
453 int r3 = vcpu->arch.sie_block->ipa & 0x000f;
454 u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
455 struct kvm_vcpu *dest_vcpu;
456 u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
457
458 trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
459
460 if (order_code == SIGP_EXTERNAL_CALL) {
461 dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
462 BUG_ON(dest_vcpu == NULL);
463
464 spin_lock_bh(&dest_vcpu->arch.local_int.lock);
465 if (waitqueue_active(&dest_vcpu->wq))
466 wake_up_interruptible(&dest_vcpu->wq);
467 dest_vcpu->preempted = true;
468 spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
469
470 kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
471 return 0;
472 }
473
474 return -EOPNOTSUPP;
475}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 13f30f58a2df..647e9d6a4818 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -68,6 +68,27 @@ TRACE_EVENT(kvm_s390_destroy_vcpu,
68 ); 68 );
69 69
70/* 70/*
71 * Trace point for start and stop of vpcus.
72 */
73TRACE_EVENT(kvm_s390_vcpu_start_stop,
74 TP_PROTO(unsigned int id, int state),
75 TP_ARGS(id, state),
76
77 TP_STRUCT__entry(
78 __field(unsigned int, id)
79 __field(int, state)
80 ),
81
82 TP_fast_assign(
83 __entry->id = id;
84 __entry->state = state;
85 ),
86
87 TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
88 __entry->id)
89 );
90
91/*
71 * Trace points for injection of interrupts, either per machine or 92 * Trace points for injection of interrupts, either per machine or
72 * per vcpu. 93 * per vcpu.
73 */ 94 */
@@ -223,6 +244,28 @@ TRACE_EVENT(kvm_s390_enable_css,
223 __entry->kvm) 244 __entry->kvm)
224 ); 245 );
225 246
247/*
248 * Trace point for enabling and disabling interlocking-and-broadcasting
249 * suppression.
250 */
251TRACE_EVENT(kvm_s390_enable_disable_ibs,
252 TP_PROTO(unsigned int id, int state),
253 TP_ARGS(id, state),
254
255 TP_STRUCT__entry(
256 __field(unsigned int, id)
257 __field(int, state)
258 ),
259
260 TP_fast_assign(
261 __entry->id = id;
262 __entry->state = state;
263 ),
264
265 TP_printk("%s ibs on cpu %d",
266 __entry->state ? "enabling" : "disabling", __entry->id)
267 );
268
226 269
227#endif /* _TRACE_KVMS390_H */ 270#endif /* _TRACE_KVMS390_H */
228 271
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index e8e7213d4cc5..916834d7a73a 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -2,7 +2,7 @@
2#define _TRACE_KVM_H 2#define _TRACE_KVM_H
3 3
4#include <linux/tracepoint.h> 4#include <linux/tracepoint.h>
5#include <asm/sigp.h> 5#include <asm/sie.h>
6#include <asm/debug.h> 6#include <asm/debug.h>
7#include <asm/dis.h> 7#include <asm/dis.h>
8 8
@@ -30,6 +30,20 @@
30 TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ 30 TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
31 __entry->pswmask, __entry->pswaddr, p_args) 31 __entry->pswmask, __entry->pswaddr, p_args)
32 32
33TRACE_EVENT(kvm_s390_skey_related_inst,
34 TP_PROTO(VCPU_PROTO_COMMON),
35 TP_ARGS(VCPU_ARGS_COMMON),
36
37 TP_STRUCT__entry(
38 VCPU_FIELD_COMMON
39 ),
40
41 TP_fast_assign(
42 VCPU_ASSIGN_COMMON
43 ),
44 VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
45 );
46
33TRACE_EVENT(kvm_s390_major_guest_pfault, 47TRACE_EVENT(kvm_s390_major_guest_pfault,
34 TP_PROTO(VCPU_PROTO_COMMON), 48 TP_PROTO(VCPU_PROTO_COMMON),
35 TP_ARGS(VCPU_ARGS_COMMON), 49 TP_ARGS(VCPU_ARGS_COMMON),
@@ -111,17 +125,6 @@ TRACE_EVENT(kvm_s390_sie_fault,
111 VCPU_TP_PRINTK("%s", "fault in sie instruction") 125 VCPU_TP_PRINTK("%s", "fault in sie instruction")
112 ); 126 );
113 127
114#define sie_intercept_code \
115 {0x04, "Instruction"}, \
116 {0x08, "Program interruption"}, \
117 {0x0C, "Instruction and program interruption"}, \
118 {0x10, "External request"}, \
119 {0x14, "External interruption"}, \
120 {0x18, "I/O request"}, \
121 {0x1C, "Wait state"}, \
122 {0x20, "Validity"}, \
123 {0x28, "Stop request"}
124
125TRACE_EVENT(kvm_s390_sie_exit, 128TRACE_EVENT(kvm_s390_sie_exit,
126 TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode), 129 TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
127 TP_ARGS(VCPU_ARGS_COMMON, icptcode), 130 TP_ARGS(VCPU_ARGS_COMMON, icptcode),
@@ -151,7 +154,6 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
151 TP_STRUCT__entry( 154 TP_STRUCT__entry(
152 VCPU_FIELD_COMMON 155 VCPU_FIELD_COMMON
153 __field(__u64, instruction) 156 __field(__u64, instruction)
154 __field(char, insn[8])
155 ), 157 ),
156 158
157 TP_fast_assign( 159 TP_fast_assign(
@@ -162,10 +164,8 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
162 164
163 VCPU_TP_PRINTK("intercepted instruction %016llx (%s)", 165 VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
164 __entry->instruction, 166 __entry->instruction,
165 insn_to_mnemonic((unsigned char *) 167 __print_symbolic(icpt_insn_decoder(__entry->instruction),
166 &__entry->instruction, 168 icpt_insn_codes))
167 __entry->insn, sizeof(__entry->insn)) ?
168 "unknown" : __entry->insn)
169 ); 169 );
170 170
171/* 171/*
@@ -213,18 +213,6 @@ TRACE_EVENT(kvm_s390_intercept_validity,
213 * Trace points for instructions that are of special interest. 213 * Trace points for instructions that are of special interest.
214 */ 214 */
215 215
216#define sigp_order_codes \
217 {SIGP_SENSE, "sense"}, \
218 {SIGP_EXTERNAL_CALL, "external call"}, \
219 {SIGP_EMERGENCY_SIGNAL, "emergency signal"}, \
220 {SIGP_STOP, "stop"}, \
221 {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
222 {SIGP_SET_ARCHITECTURE, "set architecture"}, \
223 {SIGP_SET_PREFIX, "set prefix"}, \
224 {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \
225 {SIGP_SENSE_RUNNING, "sense running"}, \
226 {SIGP_RESTART, "restart"}
227
228TRACE_EVENT(kvm_s390_handle_sigp, 216TRACE_EVENT(kvm_s390_handle_sigp,
229 TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \ 217 TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
230 __u32 parameter), 218 __u32 parameter),
@@ -251,12 +239,28 @@ TRACE_EVENT(kvm_s390_handle_sigp,
251 __entry->cpu_addr, __entry->parameter) 239 __entry->cpu_addr, __entry->parameter)
252 ); 240 );
253 241
254#define diagnose_codes \ 242TRACE_EVENT(kvm_s390_handle_sigp_pei,
255 {0x10, "release pages"}, \ 243 TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
256 {0x44, "time slice end"}, \ 244 TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
257 {0x308, "ipl functions"}, \ 245
258 {0x500, "kvm hypercall"}, \ 246 TP_STRUCT__entry(
259 {0x501, "kvm breakpoint"} 247 VCPU_FIELD_COMMON
248 __field(__u8, order_code)
249 __field(__u16, cpu_addr)
250 ),
251
252 TP_fast_assign(
253 VCPU_ASSIGN_COMMON
254 __entry->order_code = order_code;
255 __entry->cpu_addr = cpu_addr;
256 ),
257
258 VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
259 __entry->order_code,
260 __print_symbolic(__entry->order_code,
261 sigp_order_codes),
262 __entry->cpu_addr)
263 );
260 264
261TRACE_EVENT(kvm_s390_handle_diag, 265TRACE_EVENT(kvm_s390_handle_diag,
262 TP_PROTO(VCPU_PROTO_COMMON, __u16 code), 266 TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
@@ -301,6 +305,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
301 __entry->reg1, __entry->reg3, __entry->addr) 305 __entry->reg1, __entry->reg3, __entry->addr)
302 ); 306 );
303 307
308TRACE_EVENT(kvm_s390_handle_stctl,
309 TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
310 TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
311
312 TP_STRUCT__entry(
313 VCPU_FIELD_COMMON
314 __field(int, g)
315 __field(int, reg1)
316 __field(int, reg3)
317 __field(u64, addr)
318 ),
319
320 TP_fast_assign(
321 VCPU_ASSIGN_COMMON
322 __entry->g = g;
323 __entry->reg1 = reg1;
324 __entry->reg3 = reg3;
325 __entry->addr = addr;
326 ),
327
328 VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
329 __entry->g ? "stctg" : "stctl",
330 __entry->reg1, __entry->reg3, __entry->addr)
331 );
332
304TRACE_EVENT(kvm_s390_handle_prefix, 333TRACE_EVENT(kvm_s390_handle_prefix,
305 TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address), 334 TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
306 TP_ARGS(VCPU_ARGS_COMMON, set, address), 335 TP_ARGS(VCPU_ARGS_COMMON, set, address),
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 7881d4eb8b6b..37b8241ec784 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -834,6 +834,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
834 } 834 }
835 spin_unlock(&gmap_notifier_lock); 835 spin_unlock(&gmap_notifier_lock);
836} 836}
837EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
837 838
838static inline int page_table_with_pgste(struct page *page) 839static inline int page_table_with_pgste(struct page *page)
839{ 840{
@@ -866,8 +867,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
866 atomic_set(&page->_mapcount, 0); 867 atomic_set(&page->_mapcount, 0);
867 table = (unsigned long *) page_to_phys(page); 868 table = (unsigned long *) page_to_phys(page);
868 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 869 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
869 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 870 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
870 PAGE_SIZE/2);
871 return table; 871 return table;
872} 872}
873 873
@@ -885,8 +885,8 @@ static inline void page_table_free_pgste(unsigned long *table)
885 __free_page(page); 885 __free_page(page);
886} 886}
887 887
888static inline unsigned long page_table_reset_pte(struct mm_struct *mm, 888static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
889 pmd_t *pmd, unsigned long addr, unsigned long end) 889 unsigned long addr, unsigned long end, bool init_skey)
890{ 890{
891 pte_t *start_pte, *pte; 891 pte_t *start_pte, *pte;
892 spinlock_t *ptl; 892 spinlock_t *ptl;
@@ -897,6 +897,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
897 do { 897 do {
898 pgste = pgste_get_lock(pte); 898 pgste = pgste_get_lock(pte);
899 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 899 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
900 if (init_skey) {
901 unsigned long address;
902
903 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
904 PGSTE_GR_BIT | PGSTE_GC_BIT);
905
906 /* skip invalid and not writable pages */
907 if (pte_val(*pte) & _PAGE_INVALID ||
908 !(pte_val(*pte) & _PAGE_WRITE)) {
909 pgste_set_unlock(pte, pgste);
910 continue;
911 }
912
913 address = pte_val(*pte) & PAGE_MASK;
914 page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
915 }
900 pgste_set_unlock(pte, pgste); 916 pgste_set_unlock(pte, pgste);
901 } while (pte++, addr += PAGE_SIZE, addr != end); 917 } while (pte++, addr += PAGE_SIZE, addr != end);
902 pte_unmap_unlock(start_pte, ptl); 918 pte_unmap_unlock(start_pte, ptl);
@@ -904,8 +920,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
904 return addr; 920 return addr;
905} 921}
906 922
907static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, 923static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
908 pud_t *pud, unsigned long addr, unsigned long end) 924 unsigned long addr, unsigned long end, bool init_skey)
909{ 925{
910 unsigned long next; 926 unsigned long next;
911 pmd_t *pmd; 927 pmd_t *pmd;
@@ -915,14 +931,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
915 next = pmd_addr_end(addr, end); 931 next = pmd_addr_end(addr, end);
916 if (pmd_none_or_clear_bad(pmd)) 932 if (pmd_none_or_clear_bad(pmd))
917 continue; 933 continue;
918 next = page_table_reset_pte(mm, pmd, addr, next); 934 next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
919 } while (pmd++, addr = next, addr != end); 935 } while (pmd++, addr = next, addr != end);
920 936
921 return addr; 937 return addr;
922} 938}
923 939
924static inline unsigned long page_table_reset_pud(struct mm_struct *mm, 940static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
925 pgd_t *pgd, unsigned long addr, unsigned long end) 941 unsigned long addr, unsigned long end, bool init_skey)
926{ 942{
927 unsigned long next; 943 unsigned long next;
928 pud_t *pud; 944 pud_t *pud;
@@ -932,28 +948,33 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
932 next = pud_addr_end(addr, end); 948 next = pud_addr_end(addr, end);
933 if (pud_none_or_clear_bad(pud)) 949 if (pud_none_or_clear_bad(pud))
934 continue; 950 continue;
935 next = page_table_reset_pmd(mm, pud, addr, next); 951 next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
936 } while (pud++, addr = next, addr != end); 952 } while (pud++, addr = next, addr != end);
937 953
938 return addr; 954 return addr;
939} 955}
940 956
941void page_table_reset_pgste(struct mm_struct *mm, 957void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
942 unsigned long start, unsigned long end) 958 unsigned long end, bool init_skey)
943{ 959{
944 unsigned long addr, next; 960 unsigned long addr, next;
945 pgd_t *pgd; 961 pgd_t *pgd;
946 962
963 down_write(&mm->mmap_sem);
964 if (init_skey && mm_use_skey(mm))
965 goto out_up;
947 addr = start; 966 addr = start;
948 down_read(&mm->mmap_sem);
949 pgd = pgd_offset(mm, addr); 967 pgd = pgd_offset(mm, addr);
950 do { 968 do {
951 next = pgd_addr_end(addr, end); 969 next = pgd_addr_end(addr, end);
952 if (pgd_none_or_clear_bad(pgd)) 970 if (pgd_none_or_clear_bad(pgd))
953 continue; 971 continue;
954 next = page_table_reset_pud(mm, pgd, addr, next); 972 next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
955 } while (pgd++, addr = next, addr != end); 973 } while (pgd++, addr = next, addr != end);
956 up_read(&mm->mmap_sem); 974 if (init_skey)
975 current->mm->context.use_skey = 1;
976out_up:
977 up_write(&mm->mmap_sem);
957} 978}
958EXPORT_SYMBOL(page_table_reset_pgste); 979EXPORT_SYMBOL(page_table_reset_pgste);
959 980
@@ -991,7 +1012,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
991 /* changing the guest storage key is considered a change of the page */ 1012 /* changing the guest storage key is considered a change of the page */
992 if ((pgste_val(new) ^ pgste_val(old)) & 1013 if ((pgste_val(new) ^ pgste_val(old)) &
993 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 1014 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
994 pgste_val(new) |= PGSTE_HC_BIT; 1015 pgste_val(new) |= PGSTE_UC_BIT;
995 1016
996 pgste_set_unlock(ptep, new); 1017 pgste_set_unlock(ptep, new);
997 pte_unmap_unlock(*ptep, ptl); 1018 pte_unmap_unlock(*ptep, ptl);
@@ -1013,6 +1034,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
1013 return NULL; 1034 return NULL;
1014} 1035}
1015 1036
1037void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
1038 unsigned long end, bool init_skey)
1039{
1040}
1041
1016static inline void page_table_free_pgste(unsigned long *table) 1042static inline void page_table_free_pgste(unsigned long *table)
1017{ 1043{
1018} 1044}
@@ -1359,6 +1385,37 @@ int s390_enable_sie(void)
1359} 1385}
1360EXPORT_SYMBOL_GPL(s390_enable_sie); 1386EXPORT_SYMBOL_GPL(s390_enable_sie);
1361 1387
1388/*
1389 * Enable storage key handling from now on and initialize the storage
1390 * keys with the default key.
1391 */
1392void s390_enable_skey(void)
1393{
1394 page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
1395}
1396EXPORT_SYMBOL_GPL(s390_enable_skey);
1397
1398/*
1399 * Test and reset if a guest page is dirty
1400 */
1401bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
1402{
1403 pte_t *pte;
1404 spinlock_t *ptl;
1405 bool dirty = false;
1406
1407 pte = get_locked_pte(gmap->mm, address, &ptl);
1408 if (unlikely(!pte))
1409 return false;
1410
1411 if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
1412 dirty = true;
1413
1414 spin_unlock(ptl);
1415 return dirty;
1416}
1417EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
1418
1362#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1419#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1363int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 1420int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
1364 pmd_t *pmdp) 1421 pmd_t *pmdp)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 24ec1216596e..a04fe4eb237d 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -189,7 +189,6 @@ struct x86_emulate_ops {
189 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 189 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
190 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); 190 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
191 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); 191 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
192 void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
193 int (*cpl)(struct x86_emulate_ctxt *ctxt); 192 int (*cpl)(struct x86_emulate_ctxt *ctxt);
194 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); 193 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
195 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); 194 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7de069afb382..49314155b66c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -50,11 +50,7 @@
50 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ 50 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
51 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 51 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
52 52
53#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) 53#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
54#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
55#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
56#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
57 0xFFFFFF0000000000ULL)
58#define CR4_RESERVED_BITS \ 54#define CR4_RESERVED_BITS \
59 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 55 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
60 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 56 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -134,7 +130,6 @@ enum kvm_reg_ex {
134 VCPU_EXREG_PDPTR = NR_VCPU_REGS, 130 VCPU_EXREG_PDPTR = NR_VCPU_REGS,
135 VCPU_EXREG_CR3, 131 VCPU_EXREG_CR3,
136 VCPU_EXREG_RFLAGS, 132 VCPU_EXREG_RFLAGS,
137 VCPU_EXREG_CPL,
138 VCPU_EXREG_SEGMENTS, 133 VCPU_EXREG_SEGMENTS,
139}; 134};
140 135
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 58d66fe06b61..8ba18842c48e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,6 +74,11 @@ dotraplinkage void do_general_protection(struct pt_regs *, long);
74dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 74dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
75#ifdef CONFIG_TRACING 75#ifdef CONFIG_TRACING
76dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long); 76dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
77#else
78static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
79{
80 do_page_fault(regs, error);
81}
77#endif 82#endif
78dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); 83dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
79dotraplinkage void do_coprocessor_error(struct pt_regs *, long); 84dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 0331cb389d68..7e97371387fd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -259,7 +259,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
259 259
260 switch (kvm_read_and_reset_pf_reason()) { 260 switch (kvm_read_and_reset_pf_reason()) {
261 default: 261 default:
262 do_page_fault(regs, error_code); 262 trace_do_page_fault(regs, error_code);
263 break; 263 break;
264 case KVM_PV_REASON_PAGE_NOT_PRESENT: 264 case KVM_PV_REASON_PAGE_NOT_PRESENT:
265 /* page is swapped out by the host. */ 265 /* page is swapped out by the host. */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f47a104a749c..38a0afe83c6b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -283,6 +283,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
283 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 283 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
284 /* cpuid 1.ecx */ 284 /* cpuid 1.ecx */
285 const u32 kvm_supported_word4_x86_features = 285 const u32 kvm_supported_word4_x86_features =
286 /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
287 * but *not* advertised to guests via CPUID ! */
286 F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 288 F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
287 0 /* DS-CPL, VMX, SMX, EST */ | 289 0 /* DS-CPL, VMX, SMX, EST */ |
288 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 290 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
@@ -495,6 +497,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
495 entry->ecx &= kvm_supported_word6_x86_features; 497 entry->ecx &= kvm_supported_word6_x86_features;
496 cpuid_mask(&entry->ecx, 6); 498 cpuid_mask(&entry->ecx, 6);
497 break; 499 break;
500 case 0x80000007: /* Advanced power management */
501 /* invariant TSC is CPUID.80000007H:EDX[8] */
502 entry->edx &= (1 << 8);
503 /* mask against host */
504 entry->edx &= boot_cpu_data.x86_power;
505 entry->eax = entry->ebx = entry->ecx = 0;
506 break;
498 case 0x80000008: { 507 case 0x80000008: {
499 unsigned g_phys_as = (entry->eax >> 16) & 0xff; 508 unsigned g_phys_as = (entry->eax >> 16) & 0xff;
500 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); 509 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
@@ -525,7 +534,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
525 case 3: /* Processor serial number */ 534 case 3: /* Processor serial number */
526 case 5: /* MONITOR/MWAIT */ 535 case 5: /* MONITOR/MWAIT */
527 case 6: /* Thermal management */ 536 case 6: /* Thermal management */
528 case 0x80000007: /* Advanced power management */
529 case 0xC0000002: 537 case 0xC0000002:
530 case 0xC0000003: 538 case 0xC0000003:
531 case 0xC0000004: 539 case 0xC0000004:
@@ -726,6 +734,7 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
726not_found: 734not_found:
727 return 36; 735 return 36;
728} 736}
737EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
729 738
730/* 739/*
731 * If no match is found, check whether we exceed the vCPU's limit 740 * If no match is found, check whether we exceed the vCPU's limit
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index eeecbed26ac7..f9087315e0cd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -88,4 +88,11 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
88 return best && (best->ecx & bit(X86_FEATURE_X2APIC)); 88 return best && (best->ecx & bit(X86_FEATURE_X2APIC));
89} 89}
90 90
91static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
92{
93 struct kvm_cpuid_entry2 *best;
94
95 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
96 return best && (best->edx & bit(X86_FEATURE_GBPAGES));
97}
91#endif 98#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 205b17eed93c..e4e833d3d7d7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,6 +161,7 @@
161#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ 161#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
162#define NoWrite ((u64)1 << 45) /* No writeback */ 162#define NoWrite ((u64)1 << 45) /* No writeback */
163#define SrcWrite ((u64)1 << 46) /* Write back src operand */ 163#define SrcWrite ((u64)1 << 46) /* Write back src operand */
164#define NoMod ((u64)1 << 47) /* Mod field is ignored */
164 165
165#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) 166#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
166 167
@@ -1077,7 +1078,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1077 ctxt->modrm_rm |= (ctxt->modrm & 0x07); 1078 ctxt->modrm_rm |= (ctxt->modrm & 0x07);
1078 ctxt->modrm_seg = VCPU_SREG_DS; 1079 ctxt->modrm_seg = VCPU_SREG_DS;
1079 1080
1080 if (ctxt->modrm_mod == 3) { 1081 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1081 op->type = OP_REG; 1082 op->type = OP_REG;
1082 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1083 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1083 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1084 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
@@ -1324,7 +1325,8 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1324 rc->end = n * size; 1325 rc->end = n * size;
1325 } 1326 }
1326 1327
1327 if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) { 1328 if (ctxt->rep_prefix && (ctxt->d & String) &&
1329 !(ctxt->eflags & EFLG_DF)) {
1328 ctxt->dst.data = rc->data + rc->pos; 1330 ctxt->dst.data = rc->data + rc->pos;
1329 ctxt->dst.type = OP_MEM_STR; 1331 ctxt->dst.type = OP_MEM_STR;
1330 ctxt->dst.count = (rc->end - rc->pos) / size; 1332 ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1409,11 +1411,11 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1409} 1411}
1410 1412
1411/* Does not support long mode */ 1413/* Does not support long mode */
1412static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1414static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1413 u16 selector, int seg) 1415 u16 selector, int seg, u8 cpl, bool in_task_switch)
1414{ 1416{
1415 struct desc_struct seg_desc, old_desc; 1417 struct desc_struct seg_desc, old_desc;
1416 u8 dpl, rpl, cpl; 1418 u8 dpl, rpl;
1417 unsigned err_vec = GP_VECTOR; 1419 unsigned err_vec = GP_VECTOR;
1418 u32 err_code = 0; 1420 u32 err_code = 0;
1419 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ 1421 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
@@ -1441,7 +1443,6 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1441 } 1443 }
1442 1444
1443 rpl = selector & 3; 1445 rpl = selector & 3;
1444 cpl = ctxt->ops->cpl(ctxt);
1445 1446
1446 /* NULL selector is not valid for TR, CS and SS (except for long mode) */ 1447 /* NULL selector is not valid for TR, CS and SS (except for long mode) */
1447 if ((seg == VCPU_SREG_CS 1448 if ((seg == VCPU_SREG_CS
@@ -1486,6 +1487,9 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1486 goto exception; 1487 goto exception;
1487 break; 1488 break;
1488 case VCPU_SREG_CS: 1489 case VCPU_SREG_CS:
1490 if (in_task_switch && rpl != dpl)
1491 goto exception;
1492
1489 if (!(seg_desc.type & 8)) 1493 if (!(seg_desc.type & 8))
1490 goto exception; 1494 goto exception;
1491 1495
@@ -1543,6 +1547,13 @@ exception:
1543 return X86EMUL_PROPAGATE_FAULT; 1547 return X86EMUL_PROPAGATE_FAULT;
1544} 1548}
1545 1549
1550static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1551 u16 selector, int seg)
1552{
1553 u8 cpl = ctxt->ops->cpl(ctxt);
1554 return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
1555}
1556
1546static void write_register_operand(struct operand *op) 1557static void write_register_operand(struct operand *op)
1547{ 1558{
1548 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ 1559 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
@@ -2404,6 +2415,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2404 struct tss_segment_16 *tss) 2415 struct tss_segment_16 *tss)
2405{ 2416{
2406 int ret; 2417 int ret;
2418 u8 cpl;
2407 2419
2408 ctxt->_eip = tss->ip; 2420 ctxt->_eip = tss->ip;
2409 ctxt->eflags = tss->flag | 2; 2421 ctxt->eflags = tss->flag | 2;
@@ -2426,23 +2438,25 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2426 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); 2438 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2427 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); 2439 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2428 2440
2441 cpl = tss->cs & 3;
2442
2429 /* 2443 /*
2430 * Now load segment descriptors. If fault happens at this stage 2444 * Now load segment descriptors. If fault happens at this stage
2431 * it is handled in a context of new task 2445 * it is handled in a context of new task
2432 */ 2446 */
2433 ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); 2447 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
2434 if (ret != X86EMUL_CONTINUE) 2448 if (ret != X86EMUL_CONTINUE)
2435 return ret; 2449 return ret;
2436 ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); 2450 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
2437 if (ret != X86EMUL_CONTINUE) 2451 if (ret != X86EMUL_CONTINUE)
2438 return ret; 2452 return ret;
2439 ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); 2453 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
2440 if (ret != X86EMUL_CONTINUE) 2454 if (ret != X86EMUL_CONTINUE)
2441 return ret; 2455 return ret;
2442 ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); 2456 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
2443 if (ret != X86EMUL_CONTINUE) 2457 if (ret != X86EMUL_CONTINUE)
2444 return ret; 2458 return ret;
2445 ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); 2459 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
2446 if (ret != X86EMUL_CONTINUE) 2460 if (ret != X86EMUL_CONTINUE)
2447 return ret; 2461 return ret;
2448 2462
@@ -2496,7 +2510,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2496static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, 2510static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2497 struct tss_segment_32 *tss) 2511 struct tss_segment_32 *tss)
2498{ 2512{
2499 tss->cr3 = ctxt->ops->get_cr(ctxt, 3); 2513 /* CR3 and ldt selector are not saved intentionally */
2500 tss->eip = ctxt->_eip; 2514 tss->eip = ctxt->_eip;
2501 tss->eflags = ctxt->eflags; 2515 tss->eflags = ctxt->eflags;
2502 tss->eax = reg_read(ctxt, VCPU_REGS_RAX); 2516 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
@@ -2514,13 +2528,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2514 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); 2528 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2515 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); 2529 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2516 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); 2530 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2517 tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2518} 2531}
2519 2532
2520static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, 2533static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2521 struct tss_segment_32 *tss) 2534 struct tss_segment_32 *tss)
2522{ 2535{
2523 int ret; 2536 int ret;
2537 u8 cpl;
2524 2538
2525 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3)) 2539 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2526 return emulate_gp(ctxt, 0); 2540 return emulate_gp(ctxt, 0);
@@ -2539,7 +2553,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2539 2553
2540 /* 2554 /*
2541 * SDM says that segment selectors are loaded before segment 2555 * SDM says that segment selectors are loaded before segment
2542 * descriptors 2556 * descriptors. This is important because CPL checks will
2557 * use CS.RPL.
2543 */ 2558 */
2544 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); 2559 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2545 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); 2560 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
@@ -2553,43 +2568,38 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2553 * If we're switching between Protected Mode and VM86, we need to make 2568 * If we're switching between Protected Mode and VM86, we need to make
2554 * sure to update the mode before loading the segment descriptors so 2569 * sure to update the mode before loading the segment descriptors so
2555 * that the selectors are interpreted correctly. 2570 * that the selectors are interpreted correctly.
2556 *
2557 * Need to get rflags to the vcpu struct immediately because it
2558 * influences the CPL which is checked at least when loading the segment
2559 * descriptors and when pushing an error code to the new kernel stack.
2560 *
2561 * TODO Introduce a separate ctxt->ops->set_cpl callback
2562 */ 2571 */
2563 if (ctxt->eflags & X86_EFLAGS_VM) 2572 if (ctxt->eflags & X86_EFLAGS_VM) {
2564 ctxt->mode = X86EMUL_MODE_VM86; 2573 ctxt->mode = X86EMUL_MODE_VM86;
2565 else 2574 cpl = 3;
2575 } else {
2566 ctxt->mode = X86EMUL_MODE_PROT32; 2576 ctxt->mode = X86EMUL_MODE_PROT32;
2567 2577 cpl = tss->cs & 3;
2568 ctxt->ops->set_rflags(ctxt, ctxt->eflags); 2578 }
2569 2579
2570 /* 2580 /*
2571 * Now load segment descriptors. If fault happenes at this stage 2581 * Now load segment descriptors. If fault happenes at this stage
2572 * it is handled in a context of new task 2582 * it is handled in a context of new task
2573 */ 2583 */
2574 ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); 2584 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
2575 if (ret != X86EMUL_CONTINUE) 2585 if (ret != X86EMUL_CONTINUE)
2576 return ret; 2586 return ret;
2577 ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); 2587 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
2578 if (ret != X86EMUL_CONTINUE) 2588 if (ret != X86EMUL_CONTINUE)
2579 return ret; 2589 return ret;
2580 ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); 2590 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
2581 if (ret != X86EMUL_CONTINUE) 2591 if (ret != X86EMUL_CONTINUE)
2582 return ret; 2592 return ret;
2583 ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); 2593 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
2584 if (ret != X86EMUL_CONTINUE) 2594 if (ret != X86EMUL_CONTINUE)
2585 return ret; 2595 return ret;
2586 ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); 2596 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
2587 if (ret != X86EMUL_CONTINUE) 2597 if (ret != X86EMUL_CONTINUE)
2588 return ret; 2598 return ret;
2589 ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS); 2599 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
2590 if (ret != X86EMUL_CONTINUE) 2600 if (ret != X86EMUL_CONTINUE)
2591 return ret; 2601 return ret;
2592 ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS); 2602 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
2593 if (ret != X86EMUL_CONTINUE) 2603 if (ret != X86EMUL_CONTINUE)
2594 return ret; 2604 return ret;
2595 2605
@@ -2604,6 +2614,8 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2604 struct tss_segment_32 tss_seg; 2614 struct tss_segment_32 tss_seg;
2605 int ret; 2615 int ret;
2606 u32 new_tss_base = get_desc_base(new_desc); 2616 u32 new_tss_base = get_desc_base(new_desc);
2617 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2618 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2607 2619
2608 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2620 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2609 &ctxt->exception); 2621 &ctxt->exception);
@@ -2613,8 +2625,9 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2613 2625
2614 save_state_to_tss32(ctxt, &tss_seg); 2626 save_state_to_tss32(ctxt, &tss_seg);
2615 2627
2616 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2628 /* Only GP registers and segment selectors are saved */
2617 &ctxt->exception); 2629 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2630 ldt_sel_offset - eip_offset, &ctxt->exception);
2618 if (ret != X86EMUL_CONTINUE) 2631 if (ret != X86EMUL_CONTINUE)
2619 /* FIXME: need to provide precise fault address */ 2632 /* FIXME: need to provide precise fault address */
2620 return ret; 2633 return ret;
@@ -3386,10 +3399,6 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3386 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 3399 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3387 if (efer & EFER_LMA) 3400 if (efer & EFER_LMA)
3388 rsvd = CR3_L_MODE_RESERVED_BITS; 3401 rsvd = CR3_L_MODE_RESERVED_BITS;
3389 else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
3390 rsvd = CR3_PAE_RESERVED_BITS;
3391 else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
3392 rsvd = CR3_NONPAE_RESERVED_BITS;
3393 3402
3394 if (new_val & rsvd) 3403 if (new_val & rsvd)
3395 return emulate_gp(ctxt, 0); 3404 return emulate_gp(ctxt, 0);
@@ -3869,10 +3878,12 @@ static const struct opcode twobyte_table[256] = {
3869 N, N, N, N, N, N, N, N, 3878 N, N, N, N, N, N, N, N,
3870 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 3879 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
3871 /* 0x20 - 0x2F */ 3880 /* 0x20 - 0x2F */
3872 DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), 3881 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3873 DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), 3882 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
3874 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), 3883 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
3875 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), 3884 check_cr_write),
3885 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
3886 check_dr_write),
3876 N, N, N, N, 3887 N, N, N, N,
3877 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), 3888 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
3878 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), 3889 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 484bc874688b..bd0da433e6d7 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -113,6 +113,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
113 113
114 return kvm_get_apic_interrupt(v); /* APIC */ 114 return kvm_get_apic_interrupt(v); /* APIC */
115} 115}
116EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
116 117
117void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 118void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
118{ 119{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9736529ade08..006911858174 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
360 360
361static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 361static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
362{ 362{
363 /* Note that we never get here with APIC virtualization enabled. */
364
363 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 365 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
364 ++apic->isr_count; 366 ++apic->isr_count;
365 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 367 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
371 apic->highest_isr_cache = vec; 373 apic->highest_isr_cache = vec;
372} 374}
373 375
376static inline int apic_find_highest_isr(struct kvm_lapic *apic)
377{
378 int result;
379
380 /*
381 * Note that isr_count is always 1, and highest_isr_cache
382 * is always -1, with APIC virtualization enabled.
383 */
384 if (!apic->isr_count)
385 return -1;
386 if (likely(apic->highest_isr_cache != -1))
387 return apic->highest_isr_cache;
388
389 result = find_highest_vector(apic->regs + APIC_ISR);
390 ASSERT(result == -1 || result >= 16);
391
392 return result;
393}
394
374static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 395static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
375{ 396{
376 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 397 struct kvm_vcpu *vcpu;
398 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
399 return;
400
401 vcpu = apic->vcpu;
402
403 /*
404 * We do get here for APIC virtualization enabled if the guest
405 * uses the Hyper-V APIC enlightenment. In this case we may need
406 * to trigger a new interrupt delivery by writing the SVI field;
407 * on the other hand isr_count and highest_isr_cache are unused
408 * and must be left alone.
409 */
410 if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
411 kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
412 apic_find_highest_isr(apic));
413 else {
377 --apic->isr_count; 414 --apic->isr_count;
378 BUG_ON(apic->isr_count < 0); 415 BUG_ON(apic->isr_count < 0);
379 apic->highest_isr_cache = -1; 416 apic->highest_isr_cache = -1;
417 }
380} 418}
381 419
382int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 420int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
@@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 494 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
457} 495}
458 496
459static inline int apic_find_highest_isr(struct kvm_lapic *apic)
460{
461 int result;
462
463 /* Note that isr_count is always 1 with vid enabled */
464 if (!apic->isr_count)
465 return -1;
466 if (likely(apic->highest_isr_cache != -1))
467 return apic->highest_isr_cache;
468
469 result = find_highest_vector(apic->regs + APIC_ISR);
470 ASSERT(result == -1 || result >= 16);
471
472 return result;
473}
474
475void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) 497void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
476{ 498{
477 struct kvm_lapic *apic = vcpu->arch.apic; 499 struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1605 int vector = kvm_apic_has_interrupt(vcpu); 1627 int vector = kvm_apic_has_interrupt(vcpu);
1606 struct kvm_lapic *apic = vcpu->arch.apic; 1628 struct kvm_lapic *apic = vcpu->arch.apic;
1607 1629
1630 /* Note that we never get here with APIC virtualization enabled. */
1631
1608 if (vector == -1) 1632 if (vector == -1)
1609 return -1; 1633 return -1;
1610 1634
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 813d31038b93..931467881da7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -22,6 +22,7 @@
22#include "mmu.h" 22#include "mmu.h"
23#include "x86.h" 23#include "x86.h"
24#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include "cpuid.h"
25 26
26#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
27#include <linux/types.h> 28#include <linux/types.h>
@@ -595,7 +596,8 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
595 * we always atomicly update it, see the comments in 596 * we always atomicly update it, see the comments in
596 * spte_has_volatile_bits(). 597 * spte_has_volatile_bits().
597 */ 598 */
598 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) 599 if (spte_is_locklessly_modifiable(old_spte) &&
600 !is_writable_pte(new_spte))
599 ret = true; 601 ret = true;
600 602
601 if (!shadow_accessed_mask) 603 if (!shadow_accessed_mask)
@@ -1176,8 +1178,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1176 1178
1177/* 1179/*
1178 * Write-protect on the specified @sptep, @pt_protect indicates whether 1180 * Write-protect on the specified @sptep, @pt_protect indicates whether
1179 * spte writ-protection is caused by protecting shadow page table. 1181 * spte write-protection is caused by protecting shadow page table.
1180 * @flush indicates whether tlb need be flushed.
1181 * 1182 *
1182 * Note: write protection is difference between drity logging and spte 1183 * Note: write protection is difference between drity logging and spte
1183 * protection: 1184 * protection:
@@ -1186,10 +1187,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1186 * - for spte protection, the spte can be writable only after unsync-ing 1187 * - for spte protection, the spte can be writable only after unsync-ing
1187 * shadow page. 1188 * shadow page.
1188 * 1189 *
1189 * Return true if the spte is dropped. 1190 * Return true if tlb need be flushed.
1190 */ 1191 */
1191static bool 1192static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
1192spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1193{ 1193{
1194 u64 spte = *sptep; 1194 u64 spte = *sptep;
1195 1195
@@ -1199,17 +1199,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1199 1199
1200 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); 1200 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
1201 1201
1202 if (__drop_large_spte(kvm, sptep)) {
1203 *flush |= true;
1204 return true;
1205 }
1206
1207 if (pt_protect) 1202 if (pt_protect)
1208 spte &= ~SPTE_MMU_WRITEABLE; 1203 spte &= ~SPTE_MMU_WRITEABLE;
1209 spte = spte & ~PT_WRITABLE_MASK; 1204 spte = spte & ~PT_WRITABLE_MASK;
1210 1205
1211 *flush |= mmu_spte_update(sptep, spte); 1206 return mmu_spte_update(sptep, spte);
1212 return false;
1213} 1207}
1214 1208
1215static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1209static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
@@ -1221,11 +1215,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1221 1215
1222 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { 1216 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1223 BUG_ON(!(*sptep & PT_PRESENT_MASK)); 1217 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1224 if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
1225 sptep = rmap_get_first(*rmapp, &iter);
1226 continue;
1227 }
1228 1218
1219 flush |= spte_write_protect(kvm, sptep, pt_protect);
1229 sptep = rmap_get_next(&iter); 1220 sptep = rmap_get_next(&iter);
1230 } 1221 }
1231 1222
@@ -2802,9 +2793,9 @@ static bool page_fault_can_be_fast(u32 error_code)
2802} 2793}
2803 2794
2804static bool 2795static bool
2805fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte) 2796fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
2797 u64 *sptep, u64 spte)
2806{ 2798{
2807 struct kvm_mmu_page *sp = page_header(__pa(sptep));
2808 gfn_t gfn; 2799 gfn_t gfn;
2809 2800
2810 WARN_ON(!sp->role.direct); 2801 WARN_ON(!sp->role.direct);
@@ -2830,6 +2821,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2830 u32 error_code) 2821 u32 error_code)
2831{ 2822{
2832 struct kvm_shadow_walk_iterator iterator; 2823 struct kvm_shadow_walk_iterator iterator;
2824 struct kvm_mmu_page *sp;
2833 bool ret = false; 2825 bool ret = false;
2834 u64 spte = 0ull; 2826 u64 spte = 0ull;
2835 2827
@@ -2853,7 +2845,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2853 goto exit; 2845 goto exit;
2854 } 2846 }
2855 2847
2856 if (!is_last_spte(spte, level)) 2848 sp = page_header(__pa(iterator.sptep));
2849 if (!is_last_spte(spte, sp->role.level))
2857 goto exit; 2850 goto exit;
2858 2851
2859 /* 2852 /*
@@ -2875,11 +2868,24 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2875 goto exit; 2868 goto exit;
2876 2869
2877 /* 2870 /*
2871 * Do not fix write-permission on the large spte since we only dirty
2872 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
2873 * that means other pages are missed if its slot is dirty-logged.
2874 *
2875 * Instead, we let the slow page fault path create a normal spte to
2876 * fix the access.
2877 *
2878 * See the comments in kvm_arch_commit_memory_region().
2879 */
2880 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
2881 goto exit;
2882
2883 /*
2878 * Currently, fast page fault only works for direct mapping since 2884 * Currently, fast page fault only works for direct mapping since
2879 * the gfn is not stable for indirect shadow page. 2885 * the gfn is not stable for indirect shadow page.
2880 * See Documentation/virtual/kvm/locking.txt to get more detail. 2886 * See Documentation/virtual/kvm/locking.txt to get more detail.
2881 */ 2887 */
2882 ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte); 2888 ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
2883exit: 2889exit:
2884 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, 2890 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
2885 spte, ret); 2891 spte, ret);
@@ -3511,11 +3517,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3511{ 3517{
3512 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3518 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3513 u64 exb_bit_rsvd = 0; 3519 u64 exb_bit_rsvd = 0;
3520 u64 gbpages_bit_rsvd = 0;
3514 3521
3515 context->bad_mt_xwr = 0; 3522 context->bad_mt_xwr = 0;
3516 3523
3517 if (!context->nx) 3524 if (!context->nx)
3518 exb_bit_rsvd = rsvd_bits(63, 63); 3525 exb_bit_rsvd = rsvd_bits(63, 63);
3526 if (!guest_cpuid_has_gbpages(vcpu))
3527 gbpages_bit_rsvd = rsvd_bits(7, 7);
3519 switch (context->root_level) { 3528 switch (context->root_level) {
3520 case PT32_ROOT_LEVEL: 3529 case PT32_ROOT_LEVEL:
3521 /* no rsvd bits for 2 level 4K page table entries */ 3530 /* no rsvd bits for 2 level 4K page table entries */
@@ -3538,7 +3547,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3538 case PT32E_ROOT_LEVEL: 3547 case PT32E_ROOT_LEVEL:
3539 context->rsvd_bits_mask[0][2] = 3548 context->rsvd_bits_mask[0][2] =
3540 rsvd_bits(maxphyaddr, 63) | 3549 rsvd_bits(maxphyaddr, 63) |
3541 rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ 3550 rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */
3542 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3551 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3543 rsvd_bits(maxphyaddr, 62); /* PDE */ 3552 rsvd_bits(maxphyaddr, 62); /* PDE */
3544 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3553 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -3550,16 +3559,16 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3550 break; 3559 break;
3551 case PT64_ROOT_LEVEL: 3560 case PT64_ROOT_LEVEL:
3552 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 3561 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
3553 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); 3562 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
3554 context->rsvd_bits_mask[0][2] = exb_bit_rsvd | 3563 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
3555 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); 3564 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
3556 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3565 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3557 rsvd_bits(maxphyaddr, 51); 3566 rsvd_bits(maxphyaddr, 51);
3558 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3567 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
3559 rsvd_bits(maxphyaddr, 51); 3568 rsvd_bits(maxphyaddr, 51);
3560 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; 3569 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
3561 context->rsvd_bits_mask[1][2] = exb_bit_rsvd | 3570 context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
3562 rsvd_bits(maxphyaddr, 51) | 3571 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
3563 rsvd_bits(13, 29); 3572 rsvd_bits(13, 29);
3564 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 3573 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
3565 rsvd_bits(maxphyaddr, 51) | 3574 rsvd_bits(maxphyaddr, 51) |
@@ -4304,15 +4313,32 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4304 if (*rmapp) 4313 if (*rmapp)
4305 __rmap_write_protect(kvm, rmapp, false); 4314 __rmap_write_protect(kvm, rmapp, false);
4306 4315
4307 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 4316 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
4308 kvm_flush_remote_tlbs(kvm);
4309 cond_resched_lock(&kvm->mmu_lock); 4317 cond_resched_lock(&kvm->mmu_lock);
4310 }
4311 } 4318 }
4312 } 4319 }
4313 4320
4314 kvm_flush_remote_tlbs(kvm);
4315 spin_unlock(&kvm->mmu_lock); 4321 spin_unlock(&kvm->mmu_lock);
4322
4323 /*
4324 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
4325 * which do tlb flush out of mmu-lock should be serialized by
4326 * kvm->slots_lock otherwise tlb flush would be missed.
4327 */
4328 lockdep_assert_held(&kvm->slots_lock);
4329
4330 /*
4331 * We can flush all the TLBs out of the mmu lock without TLB
4332 * corruption since we just change the spte from writable to
4333 * readonly so that we only need to care the case of changing
4334 * spte from present to present (changing the spte from present
4335 * to nonpresent will flush all the TLBs immediately), in other
4336 * words, the only case we care is mmu_spte_update() where we
4337 * haved checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
4338 * instead of PT_WRITABLE_MASK, that means it does not depend
4339 * on PT_WRITABLE_MASK anymore.
4340 */
4341 kvm_flush_remote_tlbs(kvm);
4316} 4342}
4317 4343
4318#define BATCH_ZAP_PAGES 10 4344#define BATCH_ZAP_PAGES 10
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 3842e70bdb7c..b982112d2ca5 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -104,6 +104,39 @@ static inline int is_present_gpte(unsigned long pte)
104 return pte & PT_PRESENT_MASK; 104 return pte & PT_PRESENT_MASK;
105} 105}
106 106
107/*
108 * Currently, we have two sorts of write-protection, a) the first one
109 * write-protects guest page to sync the guest modification, b) another one is
110 * used to sync dirty bitmap when we do KVM_GET_DIRTY_LOG. The differences
111 * between these two sorts are:
112 * 1) the first case clears SPTE_MMU_WRITEABLE bit.
113 * 2) the first case requires flushing tlb immediately avoiding corrupting
114 * shadow page table between all vcpus so it should be in the protection of
115 * mmu-lock. And the another case does not need to flush tlb until returning
116 * the dirty bitmap to userspace since it only write-protects the page
117 * logged in the bitmap, that means the page in the dirty bitmap is not
118 * missed, so it can flush tlb out of mmu-lock.
119 *
120 * So, there is the problem: the first case can meet the corrupted tlb caused
121 * by another case which write-protects pages but without flush tlb
122 * immediately. In order to making the first case be aware this problem we let
123 * it flush tlb if we try to write-protect a spte whose SPTE_MMU_WRITEABLE bit
124 * is set, it works since another case never touches SPTE_MMU_WRITEABLE bit.
125 *
126 * Anyway, whenever a spte is updated (only permission and status bits are
127 * changed) we need to check whether the spte with SPTE_MMU_WRITEABLE becomes
128 * readonly, if that happens, we need to flush tlb. Fortunately,
129 * mmu_spte_update() has already handled it perfectly.
130 *
131 * The rules to use SPTE_MMU_WRITEABLE and PT_WRITABLE_MASK:
132 * - if we want to see if it has writable tlb entry or if the spte can be
133 * writable on the mmu mapping, check SPTE_MMU_WRITEABLE, this is the most
134 * case, otherwise
135 * - if we fix page fault on the spte or do write-protection by dirty logging,
136 * check PT_WRITABLE_MASK.
137 *
138 * TODO: introduce APIs to split these two cases.
139 */
107static inline int is_writable_pte(unsigned long pte) 140static inline int is_writable_pte(unsigned long pte)
108{ 141{
109 return pte & PT_WRITABLE_MASK; 142 return pte & PT_WRITABLE_MASK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 123efd3ec29f..410776528265 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,8 +913,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't 913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
914 * used by guest then tlbs are not flushed, so guest is allowed to access the 914 * used by guest then tlbs are not flushed, so guest is allowed to access the
915 * freed pages. 915 * freed pages.
916 * We set tlbs_dirty to let the notifier know this change and delay the flush 916 * And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
917 * until such a case actually happens.
918 */ 917 */
919static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 918static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
920{ 919{
@@ -943,7 +942,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
943 return -EINVAL; 942 return -EINVAL;
944 943
945 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { 944 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
946 vcpu->kvm->tlbs_dirty = true; 945 vcpu->kvm->tlbs_dirty++;
947 continue; 946 continue;
948 } 947 }
949 948
@@ -958,7 +957,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
958 957
959 if (gfn != sp->gfns[i]) { 958 if (gfn != sp->gfns[i]) {
960 drop_spte(vcpu->kvm, &sp->spt[i]); 959 drop_spte(vcpu->kvm, &sp->spt[i]);
961 vcpu->kvm->tlbs_dirty = true; 960 vcpu->kvm->tlbs_dirty++;
962 continue; 961 continue;
963 } 962 }
964 963
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 5c4f63151b4d..cbecaa90399c 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -108,7 +108,10 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
108{ 108{
109 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 109 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
110 struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; 110 struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
111 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); 111 if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
112 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
113 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
114 }
112} 115}
113 116
114static void kvm_perf_overflow_intr(struct perf_event *perf_event, 117static void kvm_perf_overflow_intr(struct perf_event *perf_event,
@@ -117,7 +120,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
117 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 120 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
118 struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; 121 struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
119 if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { 122 if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
120 kvm_perf_overflow(perf_event, data, regs); 123 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
121 kvm_make_request(KVM_REQ_PMU, pmc->vcpu); 124 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
122 /* 125 /*
123 * Inject PMI. If vcpu was in a guest mode during NMI PMI 126 * Inject PMI. If vcpu was in a guest mode during NMI PMI
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7f4f9c2badae..ec8366c5cfea 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1338,21 +1338,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1338 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1338 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1339} 1339}
1340 1340
1341static void svm_update_cpl(struct kvm_vcpu *vcpu)
1342{
1343 struct vcpu_svm *svm = to_svm(vcpu);
1344 int cpl;
1345
1346 if (!is_protmode(vcpu))
1347 cpl = 0;
1348 else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1349 cpl = 3;
1350 else
1351 cpl = svm->vmcb->save.cs.selector & 0x3;
1352
1353 svm->vmcb->save.cpl = cpl;
1354}
1355
1356static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1341static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1357{ 1342{
1358 return to_svm(vcpu)->vmcb->save.rflags; 1343 return to_svm(vcpu)->vmcb->save.rflags;
@@ -1360,11 +1345,12 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1360 1345
1361static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1346static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1362{ 1347{
1363 unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags; 1348 /*
1364 1349 * Any change of EFLAGS.VM is accompained by a reload of SS
1350 * (caused by either a task switch or an inter-privilege IRET),
1351 * so we do not need to update the CPL here.
1352 */
1365 to_svm(vcpu)->vmcb->save.rflags = rflags; 1353 to_svm(vcpu)->vmcb->save.rflags = rflags;
1366 if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1367 svm_update_cpl(vcpu);
1368} 1354}
1369 1355
1370static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1356static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1631,8 +1617,15 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
1631 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; 1617 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1632 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1618 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1633 } 1619 }
1634 if (seg == VCPU_SREG_CS) 1620
1635 svm_update_cpl(vcpu); 1621 /*
1622 * This is always accurate, except if SYSRET returned to a segment
1623 * with SS.DPL != 3. Intel does not have this quirk, and always
1624 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
1625 * would entail passing the CPL to userspace and back.
1626 */
1627 if (seg == VCPU_SREG_SS)
1628 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1636 1629
1637 mark_dirty(svm->vmcb, VMCB_SEG); 1630 mark_dirty(svm->vmcb, VMCB_SEG);
1638} 1631}
@@ -2770,12 +2763,6 @@ static int xsetbv_interception(struct vcpu_svm *svm)
2770 return 1; 2763 return 1;
2771} 2764}
2772 2765
2773static int invalid_op_interception(struct vcpu_svm *svm)
2774{
2775 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2776 return 1;
2777}
2778
2779static int task_switch_interception(struct vcpu_svm *svm) 2766static int task_switch_interception(struct vcpu_svm *svm)
2780{ 2767{
2781 u16 tss_selector; 2768 u16 tss_selector;
@@ -3287,6 +3274,24 @@ static int pause_interception(struct vcpu_svm *svm)
3287 return 1; 3274 return 1;
3288} 3275}
3289 3276
3277static int nop_interception(struct vcpu_svm *svm)
3278{
3279 skip_emulated_instruction(&(svm->vcpu));
3280 return 1;
3281}
3282
3283static int monitor_interception(struct vcpu_svm *svm)
3284{
3285 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
3286 return nop_interception(svm);
3287}
3288
3289static int mwait_interception(struct vcpu_svm *svm)
3290{
3291 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
3292 return nop_interception(svm);
3293}
3294
3290static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { 3295static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
3291 [SVM_EXIT_READ_CR0] = cr_interception, 3296 [SVM_EXIT_READ_CR0] = cr_interception,
3292 [SVM_EXIT_READ_CR3] = cr_interception, 3297 [SVM_EXIT_READ_CR3] = cr_interception,
@@ -3344,8 +3349,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
3344 [SVM_EXIT_CLGI] = clgi_interception, 3349 [SVM_EXIT_CLGI] = clgi_interception,
3345 [SVM_EXIT_SKINIT] = skinit_interception, 3350 [SVM_EXIT_SKINIT] = skinit_interception,
3346 [SVM_EXIT_WBINVD] = emulate_on_interception, 3351 [SVM_EXIT_WBINVD] = emulate_on_interception,
3347 [SVM_EXIT_MONITOR] = invalid_op_interception, 3352 [SVM_EXIT_MONITOR] = monitor_interception,
3348 [SVM_EXIT_MWAIT] = invalid_op_interception, 3353 [SVM_EXIT_MWAIT] = mwait_interception,
3349 [SVM_EXIT_XSETBV] = xsetbv_interception, 3354 [SVM_EXIT_XSETBV] = xsetbv_interception,
3350 [SVM_EXIT_NPF] = pf_interception, 3355 [SVM_EXIT_NPF] = pf_interception,
3351}; 3356};
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 545245d7cc63..33574c95220d 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -91,16 +91,21 @@ TRACE_EVENT(kvm_hv_hypercall,
91/* 91/*
92 * Tracepoint for PIO. 92 * Tracepoint for PIO.
93 */ 93 */
94
95#define KVM_PIO_IN 0
96#define KVM_PIO_OUT 1
97
94TRACE_EVENT(kvm_pio, 98TRACE_EVENT(kvm_pio,
95 TP_PROTO(unsigned int rw, unsigned int port, unsigned int size, 99 TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
96 unsigned int count), 100 unsigned int count, void *data),
97 TP_ARGS(rw, port, size, count), 101 TP_ARGS(rw, port, size, count, data),
98 102
99 TP_STRUCT__entry( 103 TP_STRUCT__entry(
100 __field( unsigned int, rw ) 104 __field( unsigned int, rw )
101 __field( unsigned int, port ) 105 __field( unsigned int, port )
102 __field( unsigned int, size ) 106 __field( unsigned int, size )
103 __field( unsigned int, count ) 107 __field( unsigned int, count )
108 __field( unsigned int, val )
104 ), 109 ),
105 110
106 TP_fast_assign( 111 TP_fast_assign(
@@ -108,11 +113,18 @@ TRACE_EVENT(kvm_pio,
108 __entry->port = port; 113 __entry->port = port;
109 __entry->size = size; 114 __entry->size = size;
110 __entry->count = count; 115 __entry->count = count;
116 if (size == 1)
117 __entry->val = *(unsigned char *)data;
118 else if (size == 2)
119 __entry->val = *(unsigned short *)data;
120 else
121 __entry->val = *(unsigned int *)data;
111 ), 122 ),
112 123
113 TP_printk("pio_%s at 0x%x size %d count %d", 124 TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s",
114 __entry->rw ? "write" : "read", 125 __entry->rw ? "write" : "read",
115 __entry->port, __entry->size, __entry->count) 126 __entry->port, __entry->size, __entry->count, __entry->val,
127 __entry->count > 1 ? "(...)" : "")
116); 128);
117 129
118/* 130/*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 138ceffc6377..801332edefc3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -354,6 +354,7 @@ struct vmcs02_list {
354struct nested_vmx { 354struct nested_vmx {
355 /* Has the level1 guest done vmxon? */ 355 /* Has the level1 guest done vmxon? */
356 bool vmxon; 356 bool vmxon;
357 gpa_t vmxon_ptr;
357 358
358 /* The guest-physical address of the current VMCS L1 keeps for L2 */ 359 /* The guest-physical address of the current VMCS L1 keeps for L2 */
359 gpa_t current_vmptr; 360 gpa_t current_vmptr;
@@ -413,7 +414,6 @@ struct vcpu_vmx {
413 struct kvm_vcpu vcpu; 414 struct kvm_vcpu vcpu;
414 unsigned long host_rsp; 415 unsigned long host_rsp;
415 u8 fail; 416 u8 fail;
416 u8 cpl;
417 bool nmi_known_unmasked; 417 bool nmi_known_unmasked;
418 u32 exit_intr_info; 418 u32 exit_intr_info;
419 u32 idt_vectoring_info; 419 u32 idt_vectoring_info;
@@ -2283,7 +2283,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2283 rdmsr(MSR_IA32_VMX_EXIT_CTLS, 2283 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
2284 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); 2284 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
2285 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 2285 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
2286 /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ 2286
2287 nested_vmx_exit_ctls_high &= 2287 nested_vmx_exit_ctls_high &=
2288#ifdef CONFIG_X86_64 2288#ifdef CONFIG_X86_64
2289 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2289 VM_EXIT_HOST_ADDR_SPACE_SIZE |
@@ -2291,7 +2291,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2291 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2291 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
2292 nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2292 nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2293 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | 2293 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
2294 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2294 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
2295
2295 if (vmx_mpx_supported()) 2296 if (vmx_mpx_supported())
2296 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 2297 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2297 2298
@@ -2353,12 +2354,11 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2353 VMX_EPT_INVEPT_BIT; 2354 VMX_EPT_INVEPT_BIT;
2354 nested_vmx_ept_caps &= vmx_capability.ept; 2355 nested_vmx_ept_caps &= vmx_capability.ept;
2355 /* 2356 /*
2356 * Since invept is completely emulated we support both global 2357 * For nested guests, we don't do anything specific
2357 * and context invalidation independent of what host cpu 2358 * for single context invalidation. Hence, only advertise
2358 * supports 2359 * support for global context invalidation.
2359 */ 2360 */
2360 nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | 2361 nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
2361 VMX_EPT_EXTENT_CONTEXT_BIT;
2362 } else 2362 } else
2363 nested_vmx_ept_caps = 0; 2363 nested_vmx_ept_caps = 0;
2364 2364
@@ -3186,10 +3186,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
3186 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 3186 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3187 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 3187 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3188 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); 3188 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3189
3190 /* CPL is always 0 when CPU enters protected mode */
3191 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3192 vmx->cpl = 0;
3193} 3189}
3194 3190
3195static void fix_rmode_seg(int seg, struct kvm_segment *save) 3191static void fix_rmode_seg(int seg, struct kvm_segment *save)
@@ -3591,22 +3587,14 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3591{ 3587{
3592 struct vcpu_vmx *vmx = to_vmx(vcpu); 3588 struct vcpu_vmx *vmx = to_vmx(vcpu);
3593 3589
3594 if (!is_protmode(vcpu)) 3590 if (unlikely(vmx->rmode.vm86_active))
3595 return 0; 3591 return 0;
3596 3592 else {
3597 if (!is_long_mode(vcpu) 3593 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3598 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ 3594 return AR_DPL(ar);
3599 return 3;
3600
3601 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
3602 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3603 vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
3604 } 3595 }
3605
3606 return vmx->cpl;
3607} 3596}
3608 3597
3609
3610static u32 vmx_segment_access_rights(struct kvm_segment *var) 3598static u32 vmx_segment_access_rights(struct kvm_segment *var)
3611{ 3599{
3612 u32 ar; 3600 u32 ar;
@@ -3634,8 +3622,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3634 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3622 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3635 3623
3636 vmx_segment_cache_clear(vmx); 3624 vmx_segment_cache_clear(vmx);
3637 if (seg == VCPU_SREG_CS)
3638 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3639 3625
3640 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { 3626 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3641 vmx->rmode.segs[seg] = *var; 3627 vmx->rmode.segs[seg] = *var;
@@ -4564,6 +4550,16 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
4564 PIN_BASED_EXT_INTR_MASK; 4550 PIN_BASED_EXT_INTR_MASK;
4565} 4551}
4566 4552
4553/*
4554 * In nested virtualization, check if L1 has set
4555 * VM_EXIT_ACK_INTR_ON_EXIT
4556 */
4557static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
4558{
4559 return get_vmcs12(vcpu)->vm_exit_controls &
4560 VM_EXIT_ACK_INTR_ON_EXIT;
4561}
4562
4567static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) 4563static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
4568{ 4564{
4569 return get_vmcs12(vcpu)->pin_based_vm_exec_control & 4565 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
@@ -4878,6 +4874,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4878 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4874 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4879 vcpu->arch.dr6 &= ~15; 4875 vcpu->arch.dr6 &= ~15;
4880 vcpu->arch.dr6 |= dr6; 4876 vcpu->arch.dr6 |= dr6;
4877 if (!(dr6 & ~DR6_RESERVED)) /* icebp */
4878 skip_emulated_instruction(vcpu);
4879
4881 kvm_queue_exception(vcpu, DB_VECTOR); 4880 kvm_queue_exception(vcpu, DB_VECTOR);
4882 return 1; 4881 return 1;
4883 } 4882 }
@@ -5166,7 +5165,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5166 return 1; 5165 return 1;
5167 kvm_register_write(vcpu, reg, val); 5166 kvm_register_write(vcpu, reg, val);
5168 } else 5167 } else
5169 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) 5168 if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg)))
5170 return 1; 5169 return 1;
5171 5170
5172 skip_emulated_instruction(vcpu); 5171 skip_emulated_instruction(vcpu);
@@ -5439,7 +5438,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
5439 } 5438 }
5440 5439
5441 /* clear all local breakpoint enable flags */ 5440 /* clear all local breakpoint enable flags */
5442 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); 5441 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
5443 5442
5444 /* 5443 /*
5445 * TODO: What about debug traps on tss switch? 5444 * TODO: What about debug traps on tss switch?
@@ -5565,6 +5564,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5565 gpa_t gpa; 5564 gpa_t gpa;
5566 5565
5567 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 5566 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5567 if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5568 skip_emulated_instruction(vcpu);
5569 return 1;
5570 }
5568 5571
5569 ret = handle_mmio_page_fault_common(vcpu, gpa, true); 5572 ret = handle_mmio_page_fault_common(vcpu, gpa, true);
5570 if (likely(ret == RET_MMIO_PF_EMULATE)) 5573 if (likely(ret == RET_MMIO_PF_EMULATE))
@@ -5669,12 +5672,24 @@ static int handle_pause(struct kvm_vcpu *vcpu)
5669 return 1; 5672 return 1;
5670} 5673}
5671 5674
5672static int handle_invalid_op(struct kvm_vcpu *vcpu) 5675static int handle_nop(struct kvm_vcpu *vcpu)
5673{ 5676{
5674 kvm_queue_exception(vcpu, UD_VECTOR); 5677 skip_emulated_instruction(vcpu);
5675 return 1; 5678 return 1;
5676} 5679}
5677 5680
5681static int handle_mwait(struct kvm_vcpu *vcpu)
5682{
5683 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
5684 return handle_nop(vcpu);
5685}
5686
5687static int handle_monitor(struct kvm_vcpu *vcpu)
5688{
5689 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
5690 return handle_nop(vcpu);
5691}
5692
5678/* 5693/*
5679 * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. 5694 * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
5680 * We could reuse a single VMCS for all the L2 guests, but we also want the 5695 * We could reuse a single VMCS for all the L2 guests, but we also want the
@@ -5812,6 +5827,154 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
5812} 5827}
5813 5828
5814/* 5829/*
5830 * Decode the memory-address operand of a vmx instruction, as recorded on an
5831 * exit caused by such an instruction (run by a guest hypervisor).
5832 * On success, returns 0. When the operand is invalid, returns 1 and throws
5833 * #UD or #GP.
5834 */
5835static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
5836 unsigned long exit_qualification,
5837 u32 vmx_instruction_info, gva_t *ret)
5838{
5839 /*
5840 * According to Vol. 3B, "Information for VM Exits Due to Instruction
5841 * Execution", on an exit, vmx_instruction_info holds most of the
5842 * addressing components of the operand. Only the displacement part
5843 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
5844 * For how an actual address is calculated from all these components,
5845 * refer to Vol. 1, "Operand Addressing".
5846 */
5847 int scaling = vmx_instruction_info & 3;
5848 int addr_size = (vmx_instruction_info >> 7) & 7;
5849 bool is_reg = vmx_instruction_info & (1u << 10);
5850 int seg_reg = (vmx_instruction_info >> 15) & 7;
5851 int index_reg = (vmx_instruction_info >> 18) & 0xf;
5852 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
5853 int base_reg = (vmx_instruction_info >> 23) & 0xf;
5854 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
5855
5856 if (is_reg) {
5857 kvm_queue_exception(vcpu, UD_VECTOR);
5858 return 1;
5859 }
5860
5861 /* Addr = segment_base + offset */
5862 /* offset = base + [index * scale] + displacement */
5863 *ret = vmx_get_segment_base(vcpu, seg_reg);
5864 if (base_is_valid)
5865 *ret += kvm_register_read(vcpu, base_reg);
5866 if (index_is_valid)
5867 *ret += kvm_register_read(vcpu, index_reg)<<scaling;
5868 *ret += exit_qualification; /* holds the displacement */
5869
5870 if (addr_size == 1) /* 32 bit */
5871 *ret &= 0xffffffff;
5872
5873 /*
5874 * TODO: throw #GP (and return 1) in various cases that the VM*
5875 * instructions require it - e.g., offset beyond segment limit,
5876 * unusable or unreadable/unwritable segment, non-canonical 64-bit
5877 * address, and so on. Currently these are not checked.
5878 */
5879 return 0;
5880}
5881
5882/*
5883 * This function performs the various checks including
5884 * - if it's 4KB aligned
5885 * - No bits beyond the physical address width are set
5886 * - Returns 0 on success or else 1
5887 * (Intel SDM Section 30.3)
5888 */
5889static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5890 gpa_t *vmpointer)
5891{
5892 gva_t gva;
5893 gpa_t vmptr;
5894 struct x86_exception e;
5895 struct page *page;
5896 struct vcpu_vmx *vmx = to_vmx(vcpu);
5897 int maxphyaddr = cpuid_maxphyaddr(vcpu);
5898
5899 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
5900 vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
5901 return 1;
5902
5903 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
5904 sizeof(vmptr), &e)) {
5905 kvm_inject_page_fault(vcpu, &e);
5906 return 1;
5907 }
5908
5909 switch (exit_reason) {
5910 case EXIT_REASON_VMON:
5911 /*
5912 * SDM 3: 24.11.5
5913 * The first 4 bytes of VMXON region contain the supported
5914 * VMCS revision identifier
5915 *
5916 * Note - IA32_VMX_BASIC[48] will never be 1
5917 * for the nested case;
5918 * which replaces physical address width with 32
5919 *
5920 */
5921 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
5922 nested_vmx_failInvalid(vcpu);
5923 skip_emulated_instruction(vcpu);
5924 return 1;
5925 }
5926
5927 page = nested_get_page(vcpu, vmptr);
5928 if (page == NULL ||
5929 *(u32 *)kmap(page) != VMCS12_REVISION) {
5930 nested_vmx_failInvalid(vcpu);
5931 kunmap(page);
5932 skip_emulated_instruction(vcpu);
5933 return 1;
5934 }
5935 kunmap(page);
5936 vmx->nested.vmxon_ptr = vmptr;
5937 break;
5938 case EXIT_REASON_VMCLEAR:
5939 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
5940 nested_vmx_failValid(vcpu,
5941 VMXERR_VMCLEAR_INVALID_ADDRESS);
5942 skip_emulated_instruction(vcpu);
5943 return 1;
5944 }
5945
5946 if (vmptr == vmx->nested.vmxon_ptr) {
5947 nested_vmx_failValid(vcpu,
5948 VMXERR_VMCLEAR_VMXON_POINTER);
5949 skip_emulated_instruction(vcpu);
5950 return 1;
5951 }
5952 break;
5953 case EXIT_REASON_VMPTRLD:
5954 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) {
5955 nested_vmx_failValid(vcpu,
5956 VMXERR_VMPTRLD_INVALID_ADDRESS);
5957 skip_emulated_instruction(vcpu);
5958 return 1;
5959 }
5960
5961 if (vmptr == vmx->nested.vmxon_ptr) {
5962 nested_vmx_failValid(vcpu,
5963 VMXERR_VMCLEAR_VMXON_POINTER);
5964 skip_emulated_instruction(vcpu);
5965 return 1;
5966 }
5967 break;
5968 default:
5969 return 1; /* shouldn't happen */
5970 }
5971
5972 if (vmpointer)
5973 *vmpointer = vmptr;
5974 return 0;
5975}
5976
5977/*
5815 * Emulate the VMXON instruction. 5978 * Emulate the VMXON instruction.
5816 * Currently, we just remember that VMX is active, and do not save or even 5979 * Currently, we just remember that VMX is active, and do not save or even
5817 * inspect the argument to VMXON (the so-called "VMXON pointer") because we 5980 * inspect the argument to VMXON (the so-called "VMXON pointer") because we
@@ -5849,6 +6012,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
5849 kvm_inject_gp(vcpu, 0); 6012 kvm_inject_gp(vcpu, 0);
5850 return 1; 6013 return 1;
5851 } 6014 }
6015
6016 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
6017 return 1;
6018
5852 if (vmx->nested.vmxon) { 6019 if (vmx->nested.vmxon) {
5853 nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); 6020 nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
5854 skip_emulated_instruction(vcpu); 6021 skip_emulated_instruction(vcpu);
@@ -5971,87 +6138,19 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
5971 return 1; 6138 return 1;
5972} 6139}
5973 6140
5974/*
5975 * Decode the memory-address operand of a vmx instruction, as recorded on an
5976 * exit caused by such an instruction (run by a guest hypervisor).
5977 * On success, returns 0. When the operand is invalid, returns 1 and throws
5978 * #UD or #GP.
5979 */
5980static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
5981 unsigned long exit_qualification,
5982 u32 vmx_instruction_info, gva_t *ret)
5983{
5984 /*
5985 * According to Vol. 3B, "Information for VM Exits Due to Instruction
5986 * Execution", on an exit, vmx_instruction_info holds most of the
5987 * addressing components of the operand. Only the displacement part
5988 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
5989 * For how an actual address is calculated from all these components,
5990 * refer to Vol. 1, "Operand Addressing".
5991 */
5992 int scaling = vmx_instruction_info & 3;
5993 int addr_size = (vmx_instruction_info >> 7) & 7;
5994 bool is_reg = vmx_instruction_info & (1u << 10);
5995 int seg_reg = (vmx_instruction_info >> 15) & 7;
5996 int index_reg = (vmx_instruction_info >> 18) & 0xf;
5997 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
5998 int base_reg = (vmx_instruction_info >> 23) & 0xf;
5999 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
6000
6001 if (is_reg) {
6002 kvm_queue_exception(vcpu, UD_VECTOR);
6003 return 1;
6004 }
6005
6006 /* Addr = segment_base + offset */
6007 /* offset = base + [index * scale] + displacement */
6008 *ret = vmx_get_segment_base(vcpu, seg_reg);
6009 if (base_is_valid)
6010 *ret += kvm_register_read(vcpu, base_reg);
6011 if (index_is_valid)
6012 *ret += kvm_register_read(vcpu, index_reg)<<scaling;
6013 *ret += exit_qualification; /* holds the displacement */
6014
6015 if (addr_size == 1) /* 32 bit */
6016 *ret &= 0xffffffff;
6017
6018 /*
6019 * TODO: throw #GP (and return 1) in various cases that the VM*
6020 * instructions require it - e.g., offset beyond segment limit,
6021 * unusable or unreadable/unwritable segment, non-canonical 64-bit
6022 * address, and so on. Currently these are not checked.
6023 */
6024 return 0;
6025}
6026
6027/* Emulate the VMCLEAR instruction */ 6141/* Emulate the VMCLEAR instruction */
6028static int handle_vmclear(struct kvm_vcpu *vcpu) 6142static int handle_vmclear(struct kvm_vcpu *vcpu)
6029{ 6143{
6030 struct vcpu_vmx *vmx = to_vmx(vcpu); 6144 struct vcpu_vmx *vmx = to_vmx(vcpu);
6031 gva_t gva;
6032 gpa_t vmptr; 6145 gpa_t vmptr;
6033 struct vmcs12 *vmcs12; 6146 struct vmcs12 *vmcs12;
6034 struct page *page; 6147 struct page *page;
6035 struct x86_exception e;
6036 6148
6037 if (!nested_vmx_check_permission(vcpu)) 6149 if (!nested_vmx_check_permission(vcpu))
6038 return 1; 6150 return 1;
6039 6151
6040 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), 6152 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
6041 vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
6042 return 1;
6043
6044 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
6045 sizeof(vmptr), &e)) {
6046 kvm_inject_page_fault(vcpu, &e);
6047 return 1;
6048 }
6049
6050 if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
6051 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
6052 skip_emulated_instruction(vcpu);
6053 return 1; 6153 return 1;
6054 }
6055 6154
6056 if (vmptr == vmx->nested.current_vmptr) { 6155 if (vmptr == vmx->nested.current_vmptr) {
6057 nested_release_vmcs12(vmx); 6156 nested_release_vmcs12(vmx);
@@ -6372,29 +6471,14 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
6372static int handle_vmptrld(struct kvm_vcpu *vcpu) 6471static int handle_vmptrld(struct kvm_vcpu *vcpu)
6373{ 6472{
6374 struct vcpu_vmx *vmx = to_vmx(vcpu); 6473 struct vcpu_vmx *vmx = to_vmx(vcpu);
6375 gva_t gva;
6376 gpa_t vmptr; 6474 gpa_t vmptr;
6377 struct x86_exception e;
6378 u32 exec_control; 6475 u32 exec_control;
6379 6476
6380 if (!nested_vmx_check_permission(vcpu)) 6477 if (!nested_vmx_check_permission(vcpu))
6381 return 1; 6478 return 1;
6382 6479
6383 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), 6480 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
6384 vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
6385 return 1;
6386
6387 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
6388 sizeof(vmptr), &e)) {
6389 kvm_inject_page_fault(vcpu, &e);
6390 return 1;
6391 }
6392
6393 if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
6394 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
6395 skip_emulated_instruction(vcpu);
6396 return 1; 6481 return 1;
6397 }
6398 6482
6399 if (vmx->nested.current_vmptr != vmptr) { 6483 if (vmx->nested.current_vmptr != vmptr) {
6400 struct vmcs12 *new_vmcs12; 6484 struct vmcs12 *new_vmcs12;
@@ -6471,7 +6555,6 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6471 struct { 6555 struct {
6472 u64 eptp, gpa; 6556 u64 eptp, gpa;
6473 } operand; 6557 } operand;
6474 u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
6475 6558
6476 if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || 6559 if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
6477 !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { 6560 !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
@@ -6511,16 +6594,13 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6511 } 6594 }
6512 6595
6513 switch (type) { 6596 switch (type) {
6514 case VMX_EPT_EXTENT_CONTEXT:
6515 if ((operand.eptp & eptp_mask) !=
6516 (nested_ept_get_cr3(vcpu) & eptp_mask))
6517 break;
6518 case VMX_EPT_EXTENT_GLOBAL: 6597 case VMX_EPT_EXTENT_GLOBAL:
6519 kvm_mmu_sync_roots(vcpu); 6598 kvm_mmu_sync_roots(vcpu);
6520 kvm_mmu_flush_tlb(vcpu); 6599 kvm_mmu_flush_tlb(vcpu);
6521 nested_vmx_succeed(vcpu); 6600 nested_vmx_succeed(vcpu);
6522 break; 6601 break;
6523 default: 6602 default:
6603 /* Trap single context invalidation invept calls */
6524 BUG_ON(1); 6604 BUG_ON(1);
6525 break; 6605 break;
6526 } 6606 }
@@ -6571,8 +6651,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6571 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 6651 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
6572 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 6652 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
6573 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 6653 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
6574 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, 6654 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6575 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, 6655 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6576 [EXIT_REASON_INVEPT] = handle_invept, 6656 [EXIT_REASON_INVEPT] = handle_invept,
6577}; 6657};
6578 6658
@@ -7413,7 +7493,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7413 7493
7414 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 7494 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
7415 | (1 << VCPU_EXREG_RFLAGS) 7495 | (1 << VCPU_EXREG_RFLAGS)
7416 | (1 << VCPU_EXREG_CPL)
7417 | (1 << VCPU_EXREG_PDPTR) 7496 | (1 << VCPU_EXREG_PDPTR)
7418 | (1 << VCPU_EXREG_SEGMENTS) 7497 | (1 << VCPU_EXREG_SEGMENTS)
7419 | (1 << VCPU_EXREG_CR3)); 7498 | (1 << VCPU_EXREG_CR3));
@@ -8601,6 +8680,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8601 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, 8680 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8602 exit_qualification); 8681 exit_qualification);
8603 8682
8683 if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
8684 && nested_exit_intr_ack_set(vcpu)) {
8685 int irq = kvm_cpu_get_interrupt(vcpu);
8686 WARN_ON(irq < 0);
8687 vmcs12->vm_exit_intr_info = irq |
8688 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
8689 }
8690
8604 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, 8691 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8605 vmcs12->exit_qualification, 8692 vmcs12->exit_qualification,
8606 vmcs12->idt_vectoring_info_field, 8693 vmcs12->idt_vectoring_info_field,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20316c67b824..f32a02578c0d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,25 +704,11 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
704 } 704 }
705 705
706 if (is_long_mode(vcpu)) { 706 if (is_long_mode(vcpu)) {
707 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) { 707 if (cr3 & CR3_L_MODE_RESERVED_BITS)
708 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) 708 return 1;
709 return 1; 709 } else if (is_pae(vcpu) && is_paging(vcpu) &&
710 } else 710 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
711 if (cr3 & CR3_L_MODE_RESERVED_BITS) 711 return 1;
712 return 1;
713 } else {
714 if (is_pae(vcpu)) {
715 if (cr3 & CR3_PAE_RESERVED_BITS)
716 return 1;
717 if (is_paging(vcpu) &&
718 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
719 return 1;
720 }
721 /*
722 * We don't check reserved bits in nonpae mode, because
723 * this isn't enforced, and VMware depends on this.
724 */
725 }
726 712
727 vcpu->arch.cr3 = cr3; 713 vcpu->arch.cr3 = cr3;
728 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 714 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
@@ -1935,6 +1921,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1935 1921
1936 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1922 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1937 vcpu->arch.hv_vapic = data; 1923 vcpu->arch.hv_vapic = data;
1924 if (kvm_lapic_enable_pv_eoi(vcpu, 0))
1925 return 1;
1938 break; 1926 break;
1939 } 1927 }
1940 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; 1928 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
@@ -1945,6 +1933,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1945 return 1; 1933 return 1;
1946 vcpu->arch.hv_vapic = data; 1934 vcpu->arch.hv_vapic = data;
1947 mark_page_dirty(vcpu->kvm, gfn); 1935 mark_page_dirty(vcpu->kvm, gfn);
1936 if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
1937 return 1;
1948 break; 1938 break;
1949 } 1939 }
1950 case HV_X64_MSR_EOI: 1940 case HV_X64_MSR_EOI:
@@ -2647,6 +2637,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2647 case KVM_CAP_IRQ_INJECT_STATUS: 2637 case KVM_CAP_IRQ_INJECT_STATUS:
2648 case KVM_CAP_IRQFD: 2638 case KVM_CAP_IRQFD:
2649 case KVM_CAP_IOEVENTFD: 2639 case KVM_CAP_IOEVENTFD:
2640 case KVM_CAP_IOEVENTFD_NO_LENGTH:
2650 case KVM_CAP_PIT2: 2641 case KVM_CAP_PIT2:
2651 case KVM_CAP_PIT_STATE2: 2642 case KVM_CAP_PIT_STATE2:
2652 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 2643 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
@@ -3649,11 +3640,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3649 offset = i * BITS_PER_LONG; 3640 offset = i * BITS_PER_LONG;
3650 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); 3641 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3651 } 3642 }
3652 if (is_dirty)
3653 kvm_flush_remote_tlbs(kvm);
3654 3643
3655 spin_unlock(&kvm->mmu_lock); 3644 spin_unlock(&kvm->mmu_lock);
3656 3645
3646 /* See the comments in kvm_mmu_slot_remove_write_access(). */
3647 lockdep_assert_held(&kvm->slots_lock);
3648
3649 /*
3650 * All the TLBs can be flushed out of mmu lock, see the comments in
3651 * kvm_mmu_slot_remove_write_access().
3652 */
3653 if (is_dirty)
3654 kvm_flush_remote_tlbs(kvm);
3655
3657 r = -EFAULT; 3656 r = -EFAULT;
3658 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 3657 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3659 goto out; 3658 goto out;
@@ -4489,8 +4488,6 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4489 unsigned short port, void *val, 4488 unsigned short port, void *val,
4490 unsigned int count, bool in) 4489 unsigned int count, bool in)
4491{ 4490{
4492 trace_kvm_pio(!in, port, size, count);
4493
4494 vcpu->arch.pio.port = port; 4491 vcpu->arch.pio.port = port;
4495 vcpu->arch.pio.in = in; 4492 vcpu->arch.pio.in = in;
4496 vcpu->arch.pio.count = count; 4493 vcpu->arch.pio.count = count;
@@ -4525,6 +4522,7 @@ static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4525 if (ret) { 4522 if (ret) {
4526data_avail: 4523data_avail:
4527 memcpy(val, vcpu->arch.pio_data, size * count); 4524 memcpy(val, vcpu->arch.pio_data, size * count);
4525 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
4528 vcpu->arch.pio.count = 0; 4526 vcpu->arch.pio.count = 0;
4529 return 1; 4527 return 1;
4530 } 4528 }
@@ -4539,6 +4537,7 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4539 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4537 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4540 4538
4541 memcpy(vcpu->arch.pio_data, val, size * count); 4539 memcpy(vcpu->arch.pio_data, val, size * count);
4540 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
4542 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); 4541 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4543} 4542}
4544 4543
@@ -4650,11 +4649,6 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4650 return res; 4649 return res;
4651} 4650}
4652 4651
4653static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4654{
4655 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4656}
4657
4658static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) 4652static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4659{ 4653{
4660 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); 4654 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
@@ -4839,7 +4833,6 @@ static const struct x86_emulate_ops emulate_ops = {
4839 .set_idt = emulator_set_idt, 4833 .set_idt = emulator_set_idt,
4840 .get_cr = emulator_get_cr, 4834 .get_cr = emulator_get_cr,
4841 .set_cr = emulator_set_cr, 4835 .set_cr = emulator_set_cr,
4842 .set_rflags = emulator_set_rflags,
4843 .cpl = emulator_get_cpl, 4836 .cpl = emulator_get_cpl,
4844 .get_dr = emulator_get_dr, 4837 .get_dr = emulator_get_dr,
4845 .set_dr = emulator_set_dr, 4838 .set_dr = emulator_set_dr,
@@ -4905,7 +4898,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4905 ctxt->eip = kvm_rip_read(vcpu); 4898 ctxt->eip = kvm_rip_read(vcpu);
4906 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 4899 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4907 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : 4900 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4908 cs_l ? X86EMUL_MODE_PROT64 : 4901 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
4909 cs_db ? X86EMUL_MODE_PROT32 : 4902 cs_db ? X86EMUL_MODE_PROT32 :
4910 X86EMUL_MODE_PROT16; 4903 X86EMUL_MODE_PROT16;
4911 ctxt->guest_mode = is_guest_mode(vcpu); 4904 ctxt->guest_mode = is_guest_mode(vcpu);
@@ -7333,8 +7326,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
7333 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 7326 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7334 /* 7327 /*
7335 * Write protect all pages for dirty logging. 7328 * Write protect all pages for dirty logging.
7336 * Existing largepage mappings are destroyed here and new ones will 7329 *
7337 * not be created until the end of the logging. 7330 * All the sptes including the large sptes which point to this
7331 * slot are set to readonly. We can not create any new large
7332 * spte on this slot until the end of the logging.
7333 *
7334 * See the comments in fast_page_fault().
7338 */ 7335 */
7339 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) 7336 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7340 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 7337 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 14196ea0fdf3..1918d9dff45d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -22,11 +22,14 @@ struct read_info_sccb {
22 u8 rnsize; /* 10 */ 22 u8 rnsize; /* 10 */
23 u8 _reserved0[16 - 11]; /* 11-15 */ 23 u8 _reserved0[16 - 11]; /* 11-15 */
24 u16 ncpurl; /* 16-17 */ 24 u16 ncpurl; /* 16-17 */
25 u8 _reserved7[24 - 18]; /* 18-23 */ 25 u16 cpuoff; /* 18-19 */
26 u8 _reserved7[24 - 20]; /* 20-23 */
26 u8 loadparm[8]; /* 24-31 */ 27 u8 loadparm[8]; /* 24-31 */
27 u8 _reserved1[48 - 32]; /* 32-47 */ 28 u8 _reserved1[48 - 32]; /* 32-47 */
28 u64 facilities; /* 48-55 */ 29 u64 facilities; /* 48-55 */
29 u8 _reserved2[84 - 56]; /* 56-83 */ 30 u8 _reserved2a[76 - 56]; /* 56-75 */
31 u32 ibc; /* 76-79 */
32 u8 _reserved2b[84 - 80]; /* 80-83 */
30 u8 fac84; /* 84 */ 33 u8 fac84; /* 84 */
31 u8 fac85; /* 85 */ 34 u8 fac85; /* 85 */
32 u8 _reserved3[91 - 86]; /* 86-90 */ 35 u8 _reserved3[91 - 86]; /* 86-90 */
@@ -45,6 +48,8 @@ static unsigned int sclp_con_has_linemode __initdata;
45static unsigned long sclp_hsa_size; 48static unsigned long sclp_hsa_size;
46static unsigned int sclp_max_cpu; 49static unsigned int sclp_max_cpu;
47static struct sclp_ipl_info sclp_ipl_info; 50static struct sclp_ipl_info sclp_ipl_info;
51static unsigned char sclp_siif;
52static u32 sclp_ibc;
48 53
49u64 sclp_facilities; 54u64 sclp_facilities;
50u8 sclp_fac84; 55u8 sclp_fac84;
@@ -96,6 +101,9 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
96 101
97static void __init sclp_facilities_detect(struct read_info_sccb *sccb) 102static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
98{ 103{
104 struct sclp_cpu_entry *cpue;
105 u16 boot_cpu_address, cpu;
106
99 if (sclp_read_info_early(sccb)) 107 if (sclp_read_info_early(sccb))
100 return; 108 return;
101 109
@@ -106,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
106 sclp_rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 114 sclp_rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
107 sclp_rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; 115 sclp_rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
108 sclp_rzm <<= 20; 116 sclp_rzm <<= 20;
117 sclp_ibc = sccb->ibc;
109 118
110 if (!sccb->hcpua) { 119 if (!sccb->hcpua) {
111 if (MACHINE_IS_VM) 120 if (MACHINE_IS_VM)
@@ -116,6 +125,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
116 sclp_max_cpu = sccb->hcpua + 1; 125 sclp_max_cpu = sccb->hcpua + 1;
117 } 126 }
118 127
128 boot_cpu_address = stap();
129 cpue = (void *)sccb + sccb->cpuoff;
130 for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
131 if (boot_cpu_address != cpue->address)
132 continue;
133 sclp_siif = cpue->siif;
134 break;
135 }
136
119 /* Save IPL information */ 137 /* Save IPL information */
120 sclp_ipl_info.is_valid = 1; 138 sclp_ipl_info.is_valid = 1;
121 if (sccb->flags & 0x2) 139 if (sccb->flags & 0x2)
@@ -148,6 +166,18 @@ unsigned int sclp_get_max_cpu(void)
148 return sclp_max_cpu; 166 return sclp_max_cpu;
149} 167}
150 168
169int sclp_has_siif(void)
170{
171 return sclp_siif;
172}
173EXPORT_SYMBOL(sclp_has_siif);
174
175unsigned int sclp_get_ibc(void)
176{
177 return sclp_ibc;
178}
179EXPORT_SYMBOL(sclp_get_ibc);
180
151/* 181/*
152 * This function will be called after sclp_facilities_detect(), which gets 182 * This function will be called after sclp_facilities_detect(), which gets
153 * called from early.c code. The sclp_facilities_detect() function retrieves 183 * called from early.c code. The sclp_facilities_detect() function retrieves
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..970c68197c69 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,8 @@ static inline bool is_error_page(struct page *page)
134#define KVM_REQ_EPR_EXIT 20 134#define KVM_REQ_EPR_EXIT 20
135#define KVM_REQ_SCAN_IOAPIC 21 135#define KVM_REQ_SCAN_IOAPIC 21
136#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 136#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
137#define KVM_REQ_ENABLE_IBS 23
138#define KVM_REQ_DISABLE_IBS 24
137 139
138#define KVM_USERSPACE_IRQ_SOURCE_ID 0 140#define KVM_USERSPACE_IRQ_SOURCE_ID 0
139#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 141#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -163,6 +165,7 @@ enum kvm_bus {
163 KVM_MMIO_BUS, 165 KVM_MMIO_BUS,
164 KVM_PIO_BUS, 166 KVM_PIO_BUS,
165 KVM_VIRTIO_CCW_NOTIFY_BUS, 167 KVM_VIRTIO_CCW_NOTIFY_BUS,
168 KVM_FAST_MMIO_BUS,
166 KVM_NR_BUSES 169 KVM_NR_BUSES
167}; 170};
168 171
@@ -367,6 +370,7 @@ struct kvm {
367 struct mm_struct *mm; /* userspace tied to this vm */ 370 struct mm_struct *mm; /* userspace tied to this vm */
368 struct kvm_memslots *memslots; 371 struct kvm_memslots *memslots;
369 struct srcu_struct srcu; 372 struct srcu_struct srcu;
373 struct srcu_struct irq_srcu;
370#ifdef CONFIG_KVM_APIC_ARCHITECTURE 374#ifdef CONFIG_KVM_APIC_ARCHITECTURE
371 u32 bsp_vcpu_id; 375 u32 bsp_vcpu_id;
372#endif 376#endif
@@ -410,9 +414,7 @@ struct kvm {
410 unsigned long mmu_notifier_seq; 414 unsigned long mmu_notifier_seq;
411 long mmu_notifier_count; 415 long mmu_notifier_count;
412#endif 416#endif
413 /* Protected by mmu_lock */ 417 long tlbs_dirty;
414 bool tlbs_dirty;
415
416 struct list_head devices; 418 struct list_head devices;
417}; 419};
418 420
@@ -879,6 +881,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
879 return (hpa_t)pfn << PAGE_SHIFT; 881 return (hpa_t)pfn << PAGE_SHIFT;
880} 882}
881 883
884static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
885{
886 unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
887
888 return kvm_is_error_hva(hva);
889}
890
882static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) 891static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
883{ 892{
884 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); 893 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6929571b79b0..24e9033f8b3f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -317,6 +317,7 @@ header-y += ppp-ioctl.h
317header-y += ppp_defs.h 317header-y += ppp_defs.h
318header-y += pps.h 318header-y += pps.h
319header-y += prctl.h 319header-y += prctl.h
320header-y += psci.h
320header-y += ptp_clock.h 321header-y += ptp_clock.h
321header-y += ptrace.h 322header-y += ptrace.h
322header-y += qnx4_fs.h 323header-y += qnx4_fs.h
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a8f4ee5d2e82..e11d8f170a62 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
171#define KVM_EXIT_WATCHDOG 21 171#define KVM_EXIT_WATCHDOG 21
172#define KVM_EXIT_S390_TSCH 22 172#define KVM_EXIT_S390_TSCH 22
173#define KVM_EXIT_EPR 23 173#define KVM_EXIT_EPR 23
174#define KVM_EXIT_SYSTEM_EVENT 24
174 175
175/* For KVM_EXIT_INTERNAL_ERROR */ 176/* For KVM_EXIT_INTERNAL_ERROR */
176/* Emulate instruction failed. */ 177/* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
301 struct { 302 struct {
302 __u32 epr; 303 __u32 epr;
303 } epr; 304 } epr;
305 /* KVM_EXIT_SYSTEM_EVENT */
306 struct {
307#define KVM_SYSTEM_EVENT_SHUTDOWN 1
308#define KVM_SYSTEM_EVENT_RESET 2
309 __u32 type;
310 __u64 flags;
311 } system_event;
304 /* Fix the size of the union. */ 312 /* Fix the size of the union. */
305 char padding[256]; 313 char padding[256];
306 }; 314 };
@@ -416,6 +424,8 @@ struct kvm_s390_psw {
416#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u 424#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
417#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u 425#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
418#define KVM_S390_MCHK 0xfffe1000u 426#define KVM_S390_MCHK 0xfffe1000u
427#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
428#define KVM_S390_INT_CPU_TIMER 0xffff1005u
419#define KVM_S390_INT_VIRTIO 0xffff2603u 429#define KVM_S390_INT_VIRTIO 0xffff2603u
420#define KVM_S390_INT_SERVICE 0xffff2401u 430#define KVM_S390_INT_SERVICE 0xffff2401u
421#define KVM_S390_INT_EMERGENCY 0xffff1201u 431#define KVM_S390_INT_EMERGENCY 0xffff1201u
@@ -515,6 +525,7 @@ enum {
515 kvm_ioeventfd_flag_nr_pio, 525 kvm_ioeventfd_flag_nr_pio,
516 kvm_ioeventfd_flag_nr_deassign, 526 kvm_ioeventfd_flag_nr_deassign,
517 kvm_ioeventfd_flag_nr_virtio_ccw_notify, 527 kvm_ioeventfd_flag_nr_virtio_ccw_notify,
528 kvm_ioeventfd_flag_nr_fast_mmio,
518 kvm_ioeventfd_flag_nr_max, 529 kvm_ioeventfd_flag_nr_max,
519}; 530};
520 531
@@ -529,7 +540,7 @@ enum {
529struct kvm_ioeventfd { 540struct kvm_ioeventfd {
530 __u64 datamatch; 541 __u64 datamatch;
531 __u64 addr; /* legal pio/mmio address */ 542 __u64 addr; /* legal pio/mmio address */
532 __u32 len; /* 1, 2, 4, or 8 bytes */ 543 __u32 len; /* 1, 2, 4, or 8 bytes; or 0 to ignore length */
533 __s32 fd; 544 __s32 fd;
534 __u32 flags; 545 __u32 flags;
535 __u8 pad[36]; 546 __u8 pad[36];
@@ -743,6 +754,10 @@ struct kvm_ppc_smmu_info {
743#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 754#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
744#define KVM_CAP_ENABLE_CAP_VM 98 755#define KVM_CAP_ENABLE_CAP_VM 98
745#define KVM_CAP_S390_IRQCHIP 99 756#define KVM_CAP_S390_IRQCHIP 99
757#define KVM_CAP_IOEVENTFD_NO_LENGTH 100
758#define KVM_CAP_VM_ATTRIBUTES 101
759#define KVM_CAP_ARM_PSCI_0_2 102
760#define KVM_CAP_PPC_FIXUP_HCALL 103
746 761
747#ifdef KVM_CAP_IRQ_ROUTING 762#ifdef KVM_CAP_IRQ_ROUTING
748 763
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
1/*
2 * ARM Power State and Coordination Interface (PSCI) header
3 *
4 * This header holds common PSCI defines and macros shared
5 * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
6 *
7 * Copyright (C) 2014 Linaro Ltd.
8 * Author: Anup Patel <anup.patel@linaro.org>
9 */
10
11#ifndef _UAPI_LINUX_PSCI_H
12#define _UAPI_LINUX_PSCI_H
13
14/*
15 * PSCI v0.1 interface
16 *
17 * The PSCI v0.1 function numbers are implementation defined.
18 *
19 * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
20 * INVALID_PARAMS, and DENIED defined below are applicable
21 * to PSCI v0.1.
22 */
23
24/* PSCI v0.2 interface */
25#define PSCI_0_2_FN_BASE 0x84000000
26#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n))
27#define PSCI_0_2_64BIT 0x40000000
28#define PSCI_0_2_FN64_BASE \
29 (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
30#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n))
31
32#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0)
33#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1)
34#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2)
35#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3)
36#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4)
37#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5)
38#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6)
39#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7)
40#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8)
41#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9)
42
43#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1)
44#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3)
45#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4)
46#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5)
47#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
48
49/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
50#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
51#define PSCI_0_2_POWER_STATE_ID_SHIFT 0
52#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16
53#define PSCI_0_2_POWER_STATE_TYPE_MASK \
54 (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
55#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24
56#define PSCI_0_2_POWER_STATE_AFFL_MASK \
57 (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
58
59/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
60#define PSCI_0_2_AFFINITY_LEVEL_ON 0
61#define PSCI_0_2_AFFINITY_LEVEL_OFF 1
62#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2
63
64/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
65#define PSCI_0_2_TOS_UP_MIGRATE 0
66#define PSCI_0_2_TOS_UP_NO_MIGRATE 1
67#define PSCI_0_2_TOS_MP 2
68
69/* PSCI version decoding (independent of PSCI version) */
70#define PSCI_VERSION_MAJOR_SHIFT 16
71#define PSCI_VERSION_MINOR_MASK \
72 ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
73#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK
74#define PSCI_VERSION_MAJOR(ver) \
75 (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
76#define PSCI_VERSION_MINOR(ver) \
77 ((ver) & PSCI_VERSION_MINOR_MASK)
78
79/* PSCI return values (inclusive of all PSCI versions) */
80#define PSCI_RET_SUCCESS 0
81#define PSCI_RET_NOT_SUPPORTED -1
82#define PSCI_RET_INVALID_PARAMS -2
83#define PSCI_RET_DENIED -3
84#define PSCI_RET_ALREADY_ON -4
85#define PSCI_RET_ON_PENDING -5
86#define PSCI_RET_INTERNAL_FAILURE -6
87#define PSCI_RET_NOT_PRESENT -7
88#define PSCI_RET_DISABLED -8
89
90#endif /* _UAPI_LINUX_PSCI_H */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 06e6401d6ef4..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work)
80 80
81 might_sleep(); 81 might_sleep();
82 82
83 use_mm(mm);
84 down_read(&mm->mmap_sem); 83 down_read(&mm->mmap_sem);
85 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 84 get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
86 up_read(&mm->mmap_sem); 85 up_read(&mm->mmap_sem);
87 kvm_async_page_present_sync(vcpu, apf); 86 kvm_async_page_present_sync(vcpu, apf);
88 unuse_mm(mm);
89 87
90 spin_lock(&vcpu->async_pf.lock); 88 spin_lock(&vcpu->async_pf.lock);
91 list_add_tail(&apf->link, &vcpu->async_pf.done); 89 list_add_tail(&apf->link, &vcpu->async_pf.done);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 29c2a04e036e..20c3af7692c5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
31#include <linux/list.h> 31#include <linux/list.h>
32#include <linux/eventfd.h> 32#include <linux/eventfd.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/srcu.h>
34#include <linux/slab.h> 35#include <linux/slab.h>
35 36
36#include "iodev.h" 37#include "iodev.h"
@@ -118,19 +119,22 @@ static void
118irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 119irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
119{ 120{
120 struct _irqfd_resampler *resampler; 121 struct _irqfd_resampler *resampler;
122 struct kvm *kvm;
121 struct _irqfd *irqfd; 123 struct _irqfd *irqfd;
124 int idx;
122 125
123 resampler = container_of(kian, struct _irqfd_resampler, notifier); 126 resampler = container_of(kian, struct _irqfd_resampler, notifier);
127 kvm = resampler->kvm;
124 128
125 kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 129 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
126 resampler->notifier.gsi, 0, false); 130 resampler->notifier.gsi, 0, false);
127 131
128 rcu_read_lock(); 132 idx = srcu_read_lock(&kvm->irq_srcu);
129 133
130 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 134 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
131 eventfd_signal(irqfd->resamplefd, 1); 135 eventfd_signal(irqfd->resamplefd, 1);
132 136
133 rcu_read_unlock(); 137 srcu_read_unlock(&kvm->irq_srcu, idx);
134} 138}
135 139
136static void 140static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
142 mutex_lock(&kvm->irqfds.resampler_lock); 146 mutex_lock(&kvm->irqfds.resampler_lock);
143 147
144 list_del_rcu(&irqfd->resampler_link); 148 list_del_rcu(&irqfd->resampler_link);
145 synchronize_rcu(); 149 synchronize_srcu(&kvm->irq_srcu);
146 150
147 if (list_empty(&resampler->list)) { 151 if (list_empty(&resampler->list)) {
148 list_del(&resampler->link); 152 list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
221 unsigned long flags = (unsigned long)key; 225 unsigned long flags = (unsigned long)key;
222 struct kvm_kernel_irq_routing_entry *irq; 226 struct kvm_kernel_irq_routing_entry *irq;
223 struct kvm *kvm = irqfd->kvm; 227 struct kvm *kvm = irqfd->kvm;
228 int idx;
224 229
225 if (flags & POLLIN) { 230 if (flags & POLLIN) {
226 rcu_read_lock(); 231 idx = srcu_read_lock(&kvm->irq_srcu);
227 irq = rcu_dereference(irqfd->irq_entry); 232 irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
228 /* An event has been signaled, inject an interrupt */ 233 /* An event has been signaled, inject an interrupt */
229 if (irq) 234 if (irq)
230 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 235 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
231 false); 236 false);
232 else 237 else
233 schedule_work(&irqfd->inject); 238 schedule_work(&irqfd->inject);
234 rcu_read_unlock(); 239 srcu_read_unlock(&kvm->irq_srcu, idx);
235 } 240 }
236 241
237 if (flags & POLLHUP) { 242 if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
363 } 368 }
364 369
365 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 370 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
366 synchronize_rcu(); 371 synchronize_srcu(&kvm->irq_srcu);
367 372
368 mutex_unlock(&kvm->irqfds.resampler_lock); 373 mutex_unlock(&kvm->irqfds.resampler_lock);
369 } 374 }
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
465 * another thread calls kvm_irq_routing_update before 470 * another thread calls kvm_irq_routing_update before
466 * we flush workqueue below (we synchronize with 471 * we flush workqueue below (we synchronize with
467 * kvm_irq_routing_update using irqfds.lock). 472 * kvm_irq_routing_update using irqfds.lock).
468 * It is paired with synchronize_rcu done by caller 473 * It is paired with synchronize_srcu done by caller
469 * of that function. 474 * of that function.
470 */ 475 */
471 rcu_assign_pointer(irqfd->irq_entry, NULL); 476 rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
524 529
525/* 530/*
526 * Change irq_routing and irqfd. 531 * Change irq_routing and irqfd.
527 * Caller must invoke synchronize_rcu afterwards. 532 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
528 */ 533 */
529void kvm_irq_routing_update(struct kvm *kvm, 534void kvm_irq_routing_update(struct kvm *kvm,
530 struct kvm_irq_routing_table *irq_rt) 535 struct kvm_irq_routing_table *irq_rt)
@@ -600,7 +605,15 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
600{ 605{
601 u64 _val; 606 u64 _val;
602 607
603 if (!(addr == p->addr && len == p->length)) 608 if (addr != p->addr)
609 /* address must be precise for a hit */
610 return false;
611
612 if (!p->length)
613 /* length = 0 means only look at the address, so always a hit */
614 return true;
615
616 if (len != p->length)
604 /* address-range must be precise for a hit */ 617 /* address-range must be precise for a hit */
605 return false; 618 return false;
606 619
@@ -671,9 +684,11 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
671 684
672 list_for_each_entry(_p, &kvm->ioeventfds, list) 685 list_for_each_entry(_p, &kvm->ioeventfds, list)
673 if (_p->bus_idx == p->bus_idx && 686 if (_p->bus_idx == p->bus_idx &&
674 _p->addr == p->addr && _p->length == p->length && 687 _p->addr == p->addr &&
675 (_p->wildcard || p->wildcard || 688 (!_p->length || !p->length ||
676 _p->datamatch == p->datamatch)) 689 (_p->length == p->length &&
690 (_p->wildcard || p->wildcard ||
691 _p->datamatch == p->datamatch))))
677 return true; 692 return true;
678 693
679 return false; 694 return false;
@@ -697,8 +712,9 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
697 int ret; 712 int ret;
698 713
699 bus_idx = ioeventfd_bus_from_flags(args->flags); 714 bus_idx = ioeventfd_bus_from_flags(args->flags);
700 /* must be natural-word sized */ 715 /* must be natural-word sized, or 0 to ignore length */
701 switch (args->len) { 716 switch (args->len) {
717 case 0:
702 case 1: 718 case 1:
703 case 2: 719 case 2:
704 case 4: 720 case 4:
@@ -716,6 +732,12 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
716 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 732 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
717 return -EINVAL; 733 return -EINVAL;
718 734
735 /* ioeventfd with no length can't be combined with DATAMATCH */
736 if (!args->len &&
737 args->flags & (KVM_IOEVENTFD_FLAG_PIO |
738 KVM_IOEVENTFD_FLAG_DATAMATCH))
739 return -EINVAL;
740
719 eventfd = eventfd_ctx_fdget(args->fd); 741 eventfd = eventfd_ctx_fdget(args->fd);
720 if (IS_ERR(eventfd)) 742 if (IS_ERR(eventfd))
721 return PTR_ERR(eventfd); 743 return PTR_ERR(eventfd);
@@ -753,6 +775,16 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
753 if (ret < 0) 775 if (ret < 0)
754 goto unlock_fail; 776 goto unlock_fail;
755 777
778 /* When length is ignored, MMIO is also put on a separate bus, for
779 * faster lookups.
780 */
781 if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
782 ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
783 p->addr, 0, &p->dev);
784 if (ret < 0)
785 goto register_fail;
786 }
787
756 kvm->buses[bus_idx]->ioeventfd_count++; 788 kvm->buses[bus_idx]->ioeventfd_count++;
757 list_add_tail(&p->list, &kvm->ioeventfds); 789 list_add_tail(&p->list, &kvm->ioeventfds);
758 790
@@ -760,6 +792,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
760 792
761 return 0; 793 return 0;
762 794
795register_fail:
796 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
763unlock_fail: 797unlock_fail:
764 mutex_unlock(&kvm->slots_lock); 798 mutex_unlock(&kvm->slots_lock);
765 799
@@ -799,6 +833,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
799 continue; 833 continue;
800 834
801 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 835 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
836 if (!p->length) {
837 kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
838 &p->dev);
839 }
802 kvm->buses[bus_idx]->ioeventfd_count--; 840 kvm->buses[bus_idx]->ioeventfd_count--;
803 ioeventfd_release(p); 841 ioeventfd_release(p);
804 ret = 0; 842 ret = 0;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
163 struct kvm_kernel_irq_routing_entry *e; 163 struct kvm_kernel_irq_routing_entry *e;
164 int ret = -EINVAL; 164 int ret = -EINVAL;
165 struct kvm_irq_routing_table *irq_rt; 165 struct kvm_irq_routing_table *irq_rt;
166 int idx;
166 167
167 trace_kvm_set_irq(irq, level, irq_source_id); 168 trace_kvm_set_irq(irq, level, irq_source_id);
168 169
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
174 * Since there's no easy way to do this, we only support injecting MSI 175 * Since there's no easy way to do this, we only support injecting MSI
175 * which is limited to 1:1 GSI mapping. 176 * which is limited to 1:1 GSI mapping.
176 */ 177 */
177 rcu_read_lock(); 178 idx = srcu_read_lock(&kvm->irq_srcu);
178 irq_rt = rcu_dereference(kvm->irq_routing); 179 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
179 if (irq < irq_rt->nr_rt_entries) 180 if (irq < irq_rt->nr_rt_entries)
180 hlist_for_each_entry(e, &irq_rt->map[irq], link) { 181 hlist_for_each_entry(e, &irq_rt->map[irq], link) {
181 if (likely(e->type == KVM_IRQ_ROUTING_MSI)) 182 if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
184 ret = -EWOULDBLOCK; 185 ret = -EWOULDBLOCK;
185 break; 186 break;
186 } 187 }
187 rcu_read_unlock(); 188 srcu_read_unlock(&kvm->irq_srcu, idx);
188 return ret; 189 return ret;
189} 190}
190 191
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
253 mutex_lock(&kvm->irq_lock); 254 mutex_lock(&kvm->irq_lock);
254 hlist_del_rcu(&kimn->link); 255 hlist_del_rcu(&kimn->link);
255 mutex_unlock(&kvm->irq_lock); 256 mutex_unlock(&kvm->irq_lock);
256 synchronize_rcu(); 257 synchronize_srcu(&kvm->irq_srcu);
257} 258}
258 259
259void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, 260void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
260 bool mask) 261 bool mask)
261{ 262{
262 struct kvm_irq_mask_notifier *kimn; 263 struct kvm_irq_mask_notifier *kimn;
263 int gsi; 264 int idx, gsi;
264 265
265 rcu_read_lock(); 266 idx = srcu_read_lock(&kvm->irq_srcu);
266 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 267 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
267 if (gsi != -1) 268 if (gsi != -1)
268 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) 269 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
269 if (kimn->irq == gsi) 270 if (kimn->irq == gsi)
270 kimn->func(kimn, mask); 271 kimn->func(kimn, mask);
271 rcu_read_unlock(); 272 srcu_read_unlock(&kvm->irq_srcu, idx);
272} 273}
273 274
274int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, 275int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/srcu.h>
29#include <linux/export.h> 30#include <linux/export.h>
30#include <trace/events/kvm.h> 31#include <trace/events/kvm.h>
31#include "irq.h" 32#include "irq.h"
@@ -33,19 +34,19 @@
33bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 34bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
34{ 35{
35 struct kvm_irq_ack_notifier *kian; 36 struct kvm_irq_ack_notifier *kian;
36 int gsi; 37 int gsi, idx;
37 38
38 rcu_read_lock(); 39 idx = srcu_read_lock(&kvm->irq_srcu);
39 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 40 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
40 if (gsi != -1) 41 if (gsi != -1)
41 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 42 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
42 link) 43 link)
43 if (kian->gsi == gsi) { 44 if (kian->gsi == gsi) {
44 rcu_read_unlock(); 45 srcu_read_unlock(&kvm->irq_srcu, idx);
45 return true; 46 return true;
46 } 47 }
47 48
48 rcu_read_unlock(); 49 srcu_read_unlock(&kvm->irq_srcu, idx);
49 50
50 return false; 51 return false;
51} 52}
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
54void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 55void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
55{ 56{
56 struct kvm_irq_ack_notifier *kian; 57 struct kvm_irq_ack_notifier *kian;
57 int gsi; 58 int gsi, idx;
58 59
59 trace_kvm_ack_irq(irqchip, pin); 60 trace_kvm_ack_irq(irqchip, pin);
60 61
61 rcu_read_lock(); 62 idx = srcu_read_lock(&kvm->irq_srcu);
62 gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 63 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
63 if (gsi != -1) 64 if (gsi != -1)
64 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 65 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
65 link) 66 link)
66 if (kian->gsi == gsi) 67 if (kian->gsi == gsi)
67 kian->irq_acked(kian); 68 kian->irq_acked(kian);
68 rcu_read_unlock(); 69 srcu_read_unlock(&kvm->irq_srcu, idx);
69} 70}
70 71
71void kvm_register_irq_ack_notifier(struct kvm *kvm, 72void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
85 mutex_lock(&kvm->irq_lock); 86 mutex_lock(&kvm->irq_lock);
86 hlist_del_init_rcu(&kian->link); 87 hlist_del_init_rcu(&kian->link);
87 mutex_unlock(&kvm->irq_lock); 88 mutex_unlock(&kvm->irq_lock);
88 synchronize_rcu(); 89 synchronize_srcu(&kvm->irq_srcu);
89#ifdef __KVM_HAVE_IOAPIC 90#ifdef __KVM_HAVE_IOAPIC
90 kvm_vcpu_request_scan_ioapic(kvm); 91 kvm_vcpu_request_scan_ioapic(kvm);
91#endif 92#endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
115 bool line_status) 116 bool line_status)
116{ 117{
117 struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; 118 struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
118 int ret = -1, i = 0; 119 int ret = -1, i = 0, idx;
119 struct kvm_irq_routing_table *irq_rt; 120 struct kvm_irq_routing_table *irq_rt;
120 121
121 trace_kvm_set_irq(irq, level, irq_source_id); 122 trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
124 * IOAPIC. So set the bit in both. The guest will ignore 125 * IOAPIC. So set the bit in both. The guest will ignore
125 * writes to the unused one. 126 * writes to the unused one.
126 */ 127 */
127 rcu_read_lock(); 128 idx = srcu_read_lock(&kvm->irq_srcu);
128 irq_rt = rcu_dereference(kvm->irq_routing); 129 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
129 if (irq < irq_rt->nr_rt_entries) 130 if (irq < irq_rt->nr_rt_entries)
130 hlist_for_each_entry(e, &irq_rt->map[irq], link) 131 hlist_for_each_entry(e, &irq_rt->map[irq], link)
131 irq_set[i++] = *e; 132 irq_set[i++] = *e;
132 rcu_read_unlock(); 133 srcu_read_unlock(&kvm->irq_srcu, idx);
133 134
134 while(i--) { 135 while(i--) {
135 int r; 136 int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
226 kvm_irq_routing_update(kvm, new); 227 kvm_irq_routing_update(kvm, new);
227 mutex_unlock(&kvm->irq_lock); 228 mutex_unlock(&kvm->irq_lock);
228 229
229 synchronize_rcu(); 230 synchronize_srcu_expedited(&kvm->irq_srcu);
230 231
231 new = old; 232 new = old;
232 r = 0; 233 r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 56baae8c2f56..c86be0f983db 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,9 +186,12 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
186 186
187void kvm_flush_remote_tlbs(struct kvm *kvm) 187void kvm_flush_remote_tlbs(struct kvm *kvm)
188{ 188{
189 long dirty_count = kvm->tlbs_dirty;
190
191 smp_mb();
189 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
190 ++kvm->stat.remote_tlb_flush; 193 ++kvm->stat.remote_tlb_flush;
191 kvm->tlbs_dirty = false; 194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
192} 195}
193EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 196EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
194 197
@@ -454,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
454 457
455 r = kvm_arch_init_vm(kvm, type); 458 r = kvm_arch_init_vm(kvm, type);
456 if (r) 459 if (r)
457 goto out_err_nodisable; 460 goto out_err_no_disable;
458 461
459 r = hardware_enable_all(); 462 r = hardware_enable_all();
460 if (r) 463 if (r)
461 goto out_err_nodisable; 464 goto out_err_no_disable;
462 465
463#ifdef CONFIG_HAVE_KVM_IRQCHIP 466#ifdef CONFIG_HAVE_KVM_IRQCHIP
464 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 467 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -470,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
470 r = -ENOMEM; 473 r = -ENOMEM;
471 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 474 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
472 if (!kvm->memslots) 475 if (!kvm->memslots)
473 goto out_err_nosrcu; 476 goto out_err_no_srcu;
474 kvm_init_memslots_id(kvm); 477 kvm_init_memslots_id(kvm);
475 if (init_srcu_struct(&kvm->srcu)) 478 if (init_srcu_struct(&kvm->srcu))
476 goto out_err_nosrcu; 479 goto out_err_no_srcu;
480 if (init_srcu_struct(&kvm->irq_srcu))
481 goto out_err_no_irq_srcu;
477 for (i = 0; i < KVM_NR_BUSES; i++) { 482 for (i = 0; i < KVM_NR_BUSES; i++) {
478 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 483 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
479 GFP_KERNEL); 484 GFP_KERNEL);
@@ -502,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
502 return kvm; 507 return kvm;
503 508
504out_err: 509out_err:
510 cleanup_srcu_struct(&kvm->irq_srcu);
511out_err_no_irq_srcu:
505 cleanup_srcu_struct(&kvm->srcu); 512 cleanup_srcu_struct(&kvm->srcu);
506out_err_nosrcu: 513out_err_no_srcu:
507 hardware_disable_all(); 514 hardware_disable_all();
508out_err_nodisable: 515out_err_no_disable:
509 for (i = 0; i < KVM_NR_BUSES; i++) 516 for (i = 0; i < KVM_NR_BUSES; i++)
510 kfree(kvm->buses[i]); 517 kfree(kvm->buses[i]);
511 kfree(kvm->memslots); 518 kfree(kvm->memslots);
@@ -601,6 +608,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
601 kvm_arch_destroy_vm(kvm); 608 kvm_arch_destroy_vm(kvm);
602 kvm_destroy_devices(kvm); 609 kvm_destroy_devices(kvm);
603 kvm_free_physmem(kvm); 610 kvm_free_physmem(kvm);
611 cleanup_srcu_struct(&kvm->irq_srcu);
604 cleanup_srcu_struct(&kvm->srcu); 612 cleanup_srcu_struct(&kvm->srcu);
605 kvm_arch_free_vm(kvm); 613 kvm_arch_free_vm(kvm);
606 hardware_disable_all(); 614 hardware_disable_all();
@@ -637,14 +645,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
637 */ 645 */
638static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) 646static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
639{ 647{
640#ifndef CONFIG_S390
641 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); 648 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
642 649
643 memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); 650 memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
644 if (!memslot->dirty_bitmap) 651 if (!memslot->dirty_bitmap)
645 return -ENOMEM; 652 return -ENOMEM;
646 653
647#endif /* !CONFIG_S390 */
648 return 0; 654 return 0;
649} 655}
650 656
@@ -2922,6 +2928,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
2922 2928
2923 return -EOPNOTSUPP; 2929 return -EOPNOTSUPP;
2924} 2930}
2931EXPORT_SYMBOL_GPL(kvm_io_bus_write);
2925 2932
2926/* kvm_io_bus_read - called under kvm->slots_lock */ 2933/* kvm_io_bus_read - called under kvm->slots_lock */
2927int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 2934int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,