diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 12:55:35 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 12:55:35 -0400 |
| commit | 10dc3747661bea9215417b659449bb7b8ed3df2c (patch) | |
| tree | d943974b4941203a7db2fabe4896852cf0f16bc4 /Documentation/virtual | |
| parent | 047486d8e7c2a7e8d75b068b69cb67b47364f5d4 (diff) | |
| parent | f958ee745f70b60d0e41927cab2c073104bc70c2 (diff) | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"One of the largest releases for KVM... Hardly any generic
changes, but lots of architecture-specific updates.
ARM:
- VHE support so that we can run the kernel at EL2 on ARMv8.1 systems
- PMU support for guests
- 32bit world switch rewritten in C
- various optimizations to the vgic save/restore code.
PPC:
- enabled KVM-VFIO integration ("VFIO device")
- optimizations to speed up IPIs between vcpus
- in-kernel handling of IOMMU hypercalls
- support for dynamic DMA windows (DDW).
s390:
- provide the floating point registers via sync regs;
- separated instruction vs. data accesses
- dirty log improvements for huge guests
- bugfixes and documentation improvements.
x86:
- Hyper-V VMBus hypercall userspace exit
- alternative implementation of lowest-priority interrupts using
vector hashing (for better VT-d posted interrupt support)
- fixed guest debugging with nested virtualizations
- improved interrupt tracking in the in-kernel IOAPIC
- generic infrastructure for tracking writes to guest
memory - currently its only use is to speedup the legacy shadow
paging (pre-EPT) case, but in the future it will be used for
virtual GPUs as well
- much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits)
KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch
KVM: x86: disable MPX if host did not enable MPX XSAVE features
arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit
arm64: KVM: vgic-v3: Reset LRs at boot time
arm64: KVM: vgic-v3: Do not save an LR known to be empty
arm64: KVM: vgic-v3: Save maintenance interrupt state only if required
arm64: KVM: vgic-v3: Avoid accessing ICH registers
KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit
KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit
KVM: arm/arm64: vgic-v2: Reset LRs at boot time
KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty
KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function
KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required
KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers
KVM: s390: allocate only one DMA page per VM
KVM: s390: enable STFLE interpretation only if enabled for the guest
KVM: s390: wake up when the VCPU cpu timer expires
KVM: s390: step the VCPU timer while in enabled wait
KVM: s390: protect VCPU cpu timer with a seqcount
KVM: s390: step VCPU cpu timer during kvm_run ioctl
...
Diffstat (limited to 'Documentation/virtual')
| -rw-r--r-- | Documentation/virtual/kvm/api.txt | 99 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/devices/s390_flic.txt | 2 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/devices/vcpu.txt | 33 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/devices/vm.txt | 52 | ||||
| -rw-r--r-- | Documentation/virtual/kvm/mmu.txt | 6 |
5 files changed, 185 insertions, 7 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07e4cdf02407..4d0542c5206b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -2507,8 +2507,9 @@ struct kvm_create_device { | |||
| 2507 | 2507 | ||
| 2508 | 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR | 2508 | 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR |
| 2509 | 2509 | ||
| 2510 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device | 2510 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, |
| 2511 | Type: device ioctl, vm ioctl | 2511 | KVM_CAP_VCPU_ATTRIBUTES for vcpu device |
| 2512 | Type: device ioctl, vm ioctl, vcpu ioctl | ||
| 2512 | Parameters: struct kvm_device_attr | 2513 | Parameters: struct kvm_device_attr |
| 2513 | Returns: 0 on success, -1 on error | 2514 | Returns: 0 on success, -1 on error |
| 2514 | Errors: | 2515 | Errors: |
| @@ -2533,8 +2534,9 @@ struct kvm_device_attr { | |||
| 2533 | 2534 | ||
| 2534 | 4.81 KVM_HAS_DEVICE_ATTR | 2535 | 4.81 KVM_HAS_DEVICE_ATTR |
| 2535 | 2536 | ||
| 2536 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device | 2537 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, |
| 2537 | Type: device ioctl, vm ioctl | 2538 | KVM_CAP_VCPU_ATTRIBUTES for vcpu device |
| 2539 | Type: device ioctl, vm ioctl, vcpu ioctl | ||
| 2538 | Parameters: struct kvm_device_attr | 2540 | Parameters: struct kvm_device_attr |
| 2539 | Returns: 0 on success, -1 on error | 2541 | Returns: 0 on success, -1 on error |
| 2540 | Errors: | 2542 | Errors: |
| @@ -2577,6 +2579,8 @@ Possible features: | |||
| 2577 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). | 2579 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). |
| 2578 | - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. | 2580 | - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. |
| 2579 | Depends on KVM_CAP_ARM_PSCI_0_2. | 2581 | Depends on KVM_CAP_ARM_PSCI_0_2. |
| 2582 | - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. | ||
| 2583 | Depends on KVM_CAP_ARM_PMU_V3. | ||
| 2580 | 2584 | ||
| 2581 | 2585 | ||
| 2582 | 4.83 KVM_ARM_PREFERRED_TARGET | 2586 | 4.83 KVM_ARM_PREFERRED_TARGET |
| @@ -3035,6 +3039,87 @@ Returns: 0 on success, -1 on error | |||
| 3035 | 3039 | ||
| 3036 | Queues an SMI on the thread's vcpu. | 3040 | Queues an SMI on the thread's vcpu. |
| 3037 | 3041 | ||
| 3042 | 4.97 KVM_CAP_PPC_MULTITCE | ||
| 3043 | |||
| 3044 | Capability: KVM_CAP_PPC_MULTITCE | ||
| 3045 | Architectures: ppc | ||
| 3046 | Type: vm | ||
| 3047 | |||
| 3048 | This capability means the kernel is capable of handling hypercalls | ||
| 3049 | H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user | ||
| 3050 | space. This significantly accelerates DMA operations for PPC KVM guests. | ||
| 3051 | User space should expect that its handlers for these hypercalls | ||
| 3052 | are not going to be called if user space previously registered LIOBN | ||
| 3053 | in KVM (via KVM_CREATE_SPAPR_TCE or similar calls). | ||
| 3054 | |||
| 3055 | In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest, | ||
| 3056 | user space might have to advertise it for the guest. For example, | ||
| 3057 | IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is | ||
| 3058 | present in the "ibm,hypertas-functions" device-tree property. | ||
| 3059 | |||
| 3060 | The hypercalls mentioned above may or may not be processed successfully | ||
| 3061 | in the kernel based fast path. If they can not be handled by the kernel, | ||
| 3062 | they will get passed on to user space. So user space still has to have | ||
| 3063 | an implementation for these despite the in kernel acceleration. | ||
| 3064 | |||
| 3065 | This capability is always enabled. | ||
| 3066 | |||
| 3067 | 4.98 KVM_CREATE_SPAPR_TCE_64 | ||
| 3068 | |||
| 3069 | Capability: KVM_CAP_SPAPR_TCE_64 | ||
| 3070 | Architectures: powerpc | ||
| 3071 | Type: vm ioctl | ||
| 3072 | Parameters: struct kvm_create_spapr_tce_64 (in) | ||
| 3073 | Returns: file descriptor for manipulating the created TCE table | ||
| 3074 | |||
| 3075 | This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit | ||
| 3076 | windows, described in 4.62 KVM_CREATE_SPAPR_TCE | ||
| 3077 | |||
| 3078 | This capability uses extended struct in ioctl interface: | ||
| 3079 | |||
| 3080 | /* for KVM_CAP_SPAPR_TCE_64 */ | ||
| 3081 | struct kvm_create_spapr_tce_64 { | ||
| 3082 | __u64 liobn; | ||
| 3083 | __u32 page_shift; | ||
| 3084 | __u32 flags; | ||
| 3085 | __u64 offset; /* in pages */ | ||
| 3086 | __u64 size; /* in pages */ | ||
| 3087 | }; | ||
| 3088 | |||
| 3089 | The aim of extension is to support an additional bigger DMA window with | ||
| 3090 | a variable page size. | ||
| 3091 | KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and | ||
| 3092 | a bus offset of the corresponding DMA window, @size and @offset are numbers | ||
| 3093 | of IOMMU pages. | ||
| 3094 | |||
| 3095 | @flags are not used at the moment. | ||
| 3096 | |||
| 3097 | The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. | ||
| 3098 | |||
| 3099 | 4.98 KVM_REINJECT_CONTROL | ||
| 3100 | |||
| 3101 | Capability: KVM_CAP_REINJECT_CONTROL | ||
| 3102 | Architectures: x86 | ||
| 3103 | Type: vm ioctl | ||
| 3104 | Parameters: struct kvm_reinject_control (in) | ||
| 3105 | Returns: 0 on success, | ||
| 3106 | -EFAULT if struct kvm_reinject_control cannot be read, | ||
| 3107 | -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier. | ||
| 3108 | |||
| 3109 | i8254 (PIT) has two modes, reinject and !reinject. The default is reinject, | ||
| 3110 | where KVM queues elapsed i8254 ticks and monitors completion of interrupt from | ||
| 3111 | vector(s) that i8254 injects. Reinject mode dequeues a tick and injects its | ||
| 3112 | interrupt whenever there isn't a pending interrupt from i8254. | ||
| 3113 | !reinject mode injects an interrupt as soon as a tick arrives. | ||
| 3114 | |||
| 3115 | struct kvm_reinject_control { | ||
| 3116 | __u8 pit_reinject; | ||
| 3117 | __u8 reserved[31]; | ||
| 3118 | }; | ||
| 3119 | |||
| 3120 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old | ||
| 3121 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). | ||
| 3122 | |||
| 3038 | 5. The kvm_run structure | 3123 | 5. The kvm_run structure |
| 3039 | ------------------------ | 3124 | ------------------------ |
| 3040 | 3125 | ||
| @@ -3339,6 +3424,7 @@ EOI was received. | |||
| 3339 | 3424 | ||
| 3340 | struct kvm_hyperv_exit { | 3425 | struct kvm_hyperv_exit { |
| 3341 | #define KVM_EXIT_HYPERV_SYNIC 1 | 3426 | #define KVM_EXIT_HYPERV_SYNIC 1 |
| 3427 | #define KVM_EXIT_HYPERV_HCALL 2 | ||
| 3342 | __u32 type; | 3428 | __u32 type; |
| 3343 | union { | 3429 | union { |
| 3344 | struct { | 3430 | struct { |
| @@ -3347,6 +3433,11 @@ EOI was received. | |||
| 3347 | __u64 evt_page; | 3433 | __u64 evt_page; |
| 3348 | __u64 msg_page; | 3434 | __u64 msg_page; |
| 3349 | } synic; | 3435 | } synic; |
| 3436 | struct { | ||
| 3437 | __u64 input; | ||
| 3438 | __u64 result; | ||
| 3439 | __u64 params[2]; | ||
| 3440 | } hcall; | ||
| 3350 | } u; | 3441 | } u; |
| 3351 | }; | 3442 | }; |
| 3352 | /* KVM_EXIT_HYPERV */ | 3443 | /* KVM_EXIT_HYPERV */ |
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index d1ad9d5cae46..e3e314cb83e8 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt | |||
| @@ -88,6 +88,8 @@ struct kvm_s390_io_adapter_req { | |||
| 88 | perform a gmap translation for the guest address provided in addr, | 88 | perform a gmap translation for the guest address provided in addr, |
| 89 | pin a userspace page for the translated address and add it to the | 89 | pin a userspace page for the translated address and add it to the |
| 90 | list of mappings | 90 | list of mappings |
| 91 | Note: A new mapping will be created unconditionally; therefore, | ||
| 92 | the calling code should avoid making duplicate mappings. | ||
| 91 | 93 | ||
| 92 | KVM_S390_IO_ADAPTER_UNMAP | 94 | KVM_S390_IO_ADAPTER_UNMAP |
| 93 | release a userspace page for the translated address specified in addr | 95 | release a userspace page for the translated address specified in addr |
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt new file mode 100644 index 000000000000..c04165868faf --- /dev/null +++ b/Documentation/virtual/kvm/devices/vcpu.txt | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | Generic vcpu interface | ||
| 2 | ==================================== | ||
| 3 | |||
| 4 | The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, | ||
| 5 | KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct | ||
| 6 | kvm_device_attr as other devices, but targets VCPU-wide settings and controls. | ||
| 7 | |||
| 8 | The groups and attributes per virtual cpu, if any, are architecture specific. | ||
| 9 | |||
| 10 | 1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL | ||
| 11 | Architectures: ARM64 | ||
| 12 | |||
| 13 | 1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ | ||
| 14 | Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a | ||
| 15 | pointer to an int | ||
| 16 | Returns: -EBUSY: The PMU overflow interrupt is already set | ||
| 17 | -ENXIO: The overflow interrupt not set when attempting to get it | ||
| 18 | -ENODEV: PMUv3 not supported | ||
| 19 | -EINVAL: Invalid PMU overflow interrupt number supplied | ||
| 20 | |||
| 21 | A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt | ||
| 22 | number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt | ||
| 23 | type must be same for each vcpu. As a PPI, the interrupt number is the same for | ||
| 24 | all vcpus, while as an SPI it must be a separate number per vcpu. | ||
| 25 | |||
| 26 | 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT | ||
| 27 | Parameters: no additional parameter in kvm_device_attr.addr | ||
| 28 | Returns: -ENODEV: PMUv3 not supported | ||
| 29 | -ENXIO: PMUv3 not properly configured as required prior to calling this | ||
| 30 | attribute | ||
| 31 | -EBUSY: PMUv3 already initialized | ||
| 32 | |||
| 33 | Request the initialization of the PMUv3. | ||
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index f083a168eb35..a9ea8774a45f 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
| @@ -84,3 +84,55 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write | |||
| 84 | -EFAULT if the given address is not accessible from kernel space | 84 | -EFAULT if the given address is not accessible from kernel space |
| 85 | -ENOMEM if not enough memory is available to process the ioctl | 85 | -ENOMEM if not enough memory is available to process the ioctl |
| 86 | 0 in case of success | 86 | 0 in case of success |
| 87 | |||
| 88 | 3. GROUP: KVM_S390_VM_TOD | ||
| 89 | Architectures: s390 | ||
| 90 | |||
| 91 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH | ||
| 92 | |||
| 93 | Allows user space to set/get the TOD clock extension (u8). | ||
| 94 | |||
| 95 | Parameters: address of a buffer in user space to store the data (u8) to | ||
| 96 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
| 97 | -EINVAL if setting the TOD clock extension to != 0 is not supported | ||
| 98 | |||
| 99 | 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW | ||
| 100 | |||
| 101 | Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||
| 102 | the POP (u64). | ||
| 103 | |||
| 104 | Parameters: address of a buffer in user space to store the data (u64) to | ||
| 105 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
| 106 | |||
| 107 | 4. GROUP: KVM_S390_VM_CRYPTO | ||
| 108 | Architectures: s390 | ||
| 109 | |||
| 110 | 4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o) | ||
| 111 | |||
| 112 | Allows user space to enable aes key wrapping, including generating a new | ||
| 113 | wrapping key. | ||
| 114 | |||
| 115 | Parameters: none | ||
| 116 | Returns: 0 | ||
| 117 | |||
| 118 | 4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o) | ||
| 119 | |||
| 120 | Allows user space to enable dea key wrapping, including generating a new | ||
| 121 | wrapping key. | ||
| 122 | |||
| 123 | Parameters: none | ||
| 124 | Returns: 0 | ||
| 125 | |||
| 126 | 4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o) | ||
| 127 | |||
| 128 | Allows user space to disable aes key wrapping, clearing the wrapping key. | ||
| 129 | |||
| 130 | Parameters: none | ||
| 131 | Returns: 0 | ||
| 132 | |||
| 133 | 4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o) | ||
| 134 | |||
| 135 | Allows user space to disable dea key wrapping, clearing the wrapping key. | ||
| 136 | |||
| 137 | Parameters: none | ||
| 138 | Returns: 0 | ||
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index c81731096a43..481b6a9c25d5 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
| @@ -392,11 +392,11 @@ To instantiate a large spte, four constraints must be satisfied: | |||
| 392 | write-protected pages | 392 | write-protected pages |
| 393 | - the guest page must be wholly contained by a single memory slot | 393 | - the guest page must be wholly contained by a single memory slot |
| 394 | 394 | ||
| 395 | To check the last two conditions, the mmu maintains a ->write_count set of | 395 | To check the last two conditions, the mmu maintains a ->disallow_lpage set of |
| 396 | arrays for each memory slot and large page size. Every write protected page | 396 | arrays for each memory slot and large page size. Every write protected page |
| 397 | causes its write_count to be incremented, thus preventing instantiation of | 397 | causes its disallow_lpage to be incremented, thus preventing instantiation of |
| 398 | a large spte. The frames at the end of an unaligned memory slot have | 398 | a large spte. The frames at the end of an unaligned memory slot have |
| 399 | artificially inflated ->write_counts so they can never be instantiated. | 399 | artificially inflated ->disallow_lpages so they can never be instantiated. |
| 400 | 400 | ||
| 401 | Zapping all pages (page generation count) | 401 | Zapping all pages (page generation count) |
| 402 | ========================================= | 402 | ========================================= |
