diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 19:26:26 -0500 |
commit | 933425fb0010bd02bd459b41e63082756818ffce (patch) | |
tree | 1cbc6c2035b9dcff8cb265c9ac562cbee7c6bb82 | |
parent | a3e7531535a0c6e5acbaa5436f37933bb471aa95 (diff) | |
parent | a3eaa8649e4c6a6afdafaa04b9114fb230617bb1 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"First batch of KVM changes for 4.4.
s390:
A bunch of fixes and optimizations for interrupt and time handling.
PPC:
Mostly bug fixes.
ARM:
No big features, but many small fixes and prerequisites including:
- a number of fixes for the arch-timer
- introducing proper level-triggered semantics for the arch-timers
- a series of patches to synchronously halt a guest (prerequisite
for IRQ forwarding)
- some tracepoint improvements
- a tweak for the EL2 panic handlers
- some more VGIC cleanups getting rid of redundant state
x86:
Quite a few changes:
- support for VT-d posted interrupts (i.e. PCI devices can inject
interrupts directly into vCPUs). This introduces a new
component (in virt/lib/) that connects VFIO and KVM together.
The same infrastructure will be used for ARM interrupt
forwarding as well.
- more Hyper-V features, though the main one Hyper-V synthetic
interrupt controller will have to wait for 4.5. These will let
KVM expose Hyper-V devices.
- nested virtualization now supports VPID (same as PCID but for
vCPUs) which makes it quite a bit faster
- for future hardware that supports NVDIMM, there is support for
clflushopt, clwb, pcommit
- support for "split irqchip", i.e. LAPIC in kernel +
IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
the hypervisor
- obligatory smattering of SMM fixes
- on the guest side, stable scheduler clock support was rewritten
to not require help from the hypervisor"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
KVM: VMX: Fix commit which broke PML
KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
KVM: x86: allow RSM from 64-bit mode
KVM: VMX: fix SMEP and SMAP without EPT
KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
KVM: device assignment: remove pointless #ifdefs
KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
KVM: x86: zero apic_arb_prio on reset
drivers/hv: share Hyper-V SynIC constants with userspace
KVM: x86: handle SMBASE as physical address in RSM
KVM: x86: add read_phys to x86_emulate_ops
KVM: x86: removing unused variable
KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
KVM: arm/arm64: Optimize away redundant LR tracking
KVM: s390: use simple switch statement as multiplexer
KVM: s390: drop useless newline in debugging data
KVM: s390: SCA must not cross page boundaries
KVM: arm: Do not indent the arguments of DECLARE_BITMAP
...
89 files changed, 2956 insertions, 1029 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 688760f790b1..816bf2fe55f5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1585,6 +1585,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1585 | nosid disable Source ID checking | 1585 | nosid disable Source ID checking |
1586 | no_x2apic_optout | 1586 | no_x2apic_optout |
1587 | BIOS x2APIC opt-out request will be ignored | 1587 | BIOS x2APIC opt-out request will be ignored |
1588 | nopost disable Interrupt Posting | ||
1588 | 1589 | ||
1589 | iomem= Disable strict checking of access to MMIO memory | 1590 | iomem= Disable strict checking of access to MMIO memory |
1590 | strict regions from userspace. | 1591 | strict regions from userspace. |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 29ece601008e..092ee9fbaf2b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -401,10 +401,9 @@ Capability: basic | |||
401 | Architectures: x86, ppc, mips | 401 | Architectures: x86, ppc, mips |
402 | Type: vcpu ioctl | 402 | Type: vcpu ioctl |
403 | Parameters: struct kvm_interrupt (in) | 403 | Parameters: struct kvm_interrupt (in) |
404 | Returns: 0 on success, -1 on error | 404 | Returns: 0 on success, negative on failure. |
405 | 405 | ||
406 | Queues a hardware interrupt vector to be injected. This is only | 406 | Queues a hardware interrupt vector to be injected. |
407 | useful if in-kernel local APIC or equivalent is not used. | ||
408 | 407 | ||
409 | /* for KVM_INTERRUPT */ | 408 | /* for KVM_INTERRUPT */ |
410 | struct kvm_interrupt { | 409 | struct kvm_interrupt { |
@@ -414,7 +413,14 @@ struct kvm_interrupt { | |||
414 | 413 | ||
415 | X86: | 414 | X86: |
416 | 415 | ||
417 | Note 'irq' is an interrupt vector, not an interrupt pin or line. | 416 | Returns: 0 on success, |
417 | -EEXIST if an interrupt is already enqueued | ||
418 | -EINVAL the the irq number is invalid | ||
419 | -ENXIO if the PIC is in the kernel | ||
420 | -EFAULT if the pointer is invalid | ||
421 | |||
422 | Note 'irq' is an interrupt vector, not an interrupt pin or line. This | ||
423 | ioctl is useful if the in-kernel PIC is not used. | ||
418 | 424 | ||
419 | PPC: | 425 | PPC: |
420 | 426 | ||
@@ -1598,7 +1604,7 @@ provided event instead of triggering an exit. | |||
1598 | struct kvm_ioeventfd { | 1604 | struct kvm_ioeventfd { |
1599 | __u64 datamatch; | 1605 | __u64 datamatch; |
1600 | __u64 addr; /* legal pio/mmio address */ | 1606 | __u64 addr; /* legal pio/mmio address */ |
1601 | __u32 len; /* 1, 2, 4, or 8 bytes */ | 1607 | __u32 len; /* 0, 1, 2, 4, or 8 bytes */ |
1602 | __s32 fd; | 1608 | __s32 fd; |
1603 | __u32 flags; | 1609 | __u32 flags; |
1604 | __u8 pad[36]; | 1610 | __u8 pad[36]; |
@@ -1621,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd. | |||
1621 | For virtio-ccw devices, addr contains the subchannel id and datamatch the | 1627 | For virtio-ccw devices, addr contains the subchannel id and datamatch the |
1622 | virtqueue index. | 1628 | virtqueue index. |
1623 | 1629 | ||
1630 | With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and | ||
1631 | the kernel will ignore the length of guest write and may get a faster vmexit. | ||
1632 | The speedup may only apply to specific architectures, but the ioeventfd will | ||
1633 | work anyway. | ||
1624 | 1634 | ||
1625 | 4.60 KVM_DIRTY_TLB | 1635 | 4.60 KVM_DIRTY_TLB |
1626 | 1636 | ||
@@ -3309,6 +3319,18 @@ Valid values for 'type' are: | |||
3309 | to ignore the request, or to gather VM memory core dump and/or | 3319 | to ignore the request, or to gather VM memory core dump and/or |
3310 | reset/shutdown of the VM. | 3320 | reset/shutdown of the VM. |
3311 | 3321 | ||
3322 | /* KVM_EXIT_IOAPIC_EOI */ | ||
3323 | struct { | ||
3324 | __u8 vector; | ||
3325 | } eoi; | ||
3326 | |||
3327 | Indicates that the VCPU's in-kernel local APIC received an EOI for a | ||
3328 | level-triggered IOAPIC interrupt. This exit only triggers when the | ||
3329 | IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled); | ||
3330 | the userspace IOAPIC should process the EOI and retrigger the interrupt if | ||
3331 | it is still asserted. Vector is the LAPIC interrupt vector for which the | ||
3332 | EOI was received. | ||
3333 | |||
3312 | /* Fix the size of the union. */ | 3334 | /* Fix the size of the union. */ |
3313 | char padding[256]; | 3335 | char padding[256]; |
3314 | }; | 3336 | }; |
@@ -3627,6 +3649,26 @@ struct { | |||
3627 | 3649 | ||
3628 | KVM handlers should exit to userspace with rc = -EREMOTE. | 3650 | KVM handlers should exit to userspace with rc = -EREMOTE. |
3629 | 3651 | ||
3652 | 7.5 KVM_CAP_SPLIT_IRQCHIP | ||
3653 | |||
3654 | Architectures: x86 | ||
3655 | Parameters: args[0] - number of routes reserved for userspace IOAPICs | ||
3656 | Returns: 0 on success, -1 on error | ||
3657 | |||
3658 | Create a local apic for each processor in the kernel. This can be used | ||
3659 | instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the | ||
3660 | IOAPIC and PIC (and also the PIT, even though this has to be enabled | ||
3661 | separately). | ||
3662 | |||
3663 | This capability also enables in kernel routing of interrupt requests; | ||
3664 | when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are | ||
3665 | used in the IRQ routing table. The first args[0] MSI routes are reserved | ||
3666 | for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes, | ||
3667 | a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace. | ||
3668 | |||
3669 | Fails if VCPU has already been created, or if the irqchip is already in the | ||
3670 | kernel (i.e. KVM_CREATE_IRQCHIP has already been called). | ||
3671 | |||
3630 | 3672 | ||
3631 | 8. Other capabilities. | 3673 | 8. Other capabilities. |
3632 | ---------------------- | 3674 | ---------------------- |
diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt new file mode 100644 index 000000000000..38bca2835278 --- /dev/null +++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt | |||
@@ -0,0 +1,187 @@ | |||
1 | KVM/ARM VGIC Forwarded Physical Interrupts | ||
2 | ========================================== | ||
3 | |||
4 | The KVM/ARM code implements software support for the ARM Generic | ||
5 | Interrupt Controller's (GIC's) hardware support for virtualization by | ||
6 | allowing software to inject virtual interrupts to a VM, which the guest | ||
7 | OS sees as regular interrupts. The code is famously known as the VGIC. | ||
8 | |||
9 | Some of these virtual interrupts, however, correspond to physical | ||
10 | interrupts from real physical devices. One example could be the | ||
11 | architected timer, which itself supports virtualization, and therefore | ||
12 | lets a guest OS program the hardware device directly to raise an | ||
13 | interrupt at some point in time. When such an interrupt is raised, the | ||
14 | host OS initially handles the interrupt and must somehow signal this | ||
15 | event as a virtual interrupt to the guest. Another example could be a | ||
16 | passthrough device, where the physical interrupts are initially handled | ||
17 | by the host, but the device driver for the device lives in the guest OS | ||
18 | and KVM must therefore somehow inject a virtual interrupt on behalf of | ||
19 | the physical one to the guest OS. | ||
20 | |||
21 | These virtual interrupts corresponding to a physical interrupt on the | ||
22 | host are called forwarded physical interrupts, but are also sometimes | ||
23 | referred to as 'virtualized physical interrupts' and 'mapped interrupts'. | ||
24 | |||
25 | Forwarded physical interrupts are handled slightly differently compared | ||
26 | to virtual interrupts generated purely by a software emulated device. | ||
27 | |||
28 | |||
29 | The HW bit | ||
30 | ---------- | ||
31 | Virtual interrupts are signalled to the guest by programming the List | ||
32 | Registers (LRs) on the GIC before running a VCPU. The LR is programmed | ||
33 | with the virtual IRQ number and the state of the interrupt (Pending, | ||
34 | Active, or Pending+Active). When the guest ACKs and EOIs a virtual | ||
35 | interrupt, the LR state moves from Pending to Active, and finally to | ||
36 | inactive. | ||
37 | |||
38 | The LRs include an extra bit, called the HW bit. When this bit is set, | ||
39 | KVM must also program an additional field in the LR, the physical IRQ | ||
40 | number, to link the virtual with the physical IRQ. | ||
41 | |||
42 | When the HW bit is set, KVM must EITHER set the Pending OR the Active | ||
43 | bit, never both at the same time. | ||
44 | |||
45 | Setting the HW bit causes the hardware to deactivate the physical | ||
46 | interrupt on the physical distributor when the guest deactivates the | ||
47 | corresponding virtual interrupt. | ||
48 | |||
49 | |||
50 | Forwarded Physical Interrupts Life Cycle | ||
51 | ---------------------------------------- | ||
52 | |||
53 | The state of forwarded physical interrupts is managed in the following way: | ||
54 | |||
55 | - The physical interrupt is acked by the host, and becomes active on | ||
56 | the physical distributor (*). | ||
57 | - KVM sets the LR.Pending bit, because this is the only way the GICV | ||
58 | interface is going to present it to the guest. | ||
59 | - LR.Pending will stay set as long as the guest has not acked the interrupt. | ||
60 | - LR.Pending transitions to LR.Active on the guest read of the IAR, as | ||
61 | expected. | ||
62 | - On guest EOI, the *physical distributor* active bit gets cleared, | ||
63 | but the LR.Active is left untouched (set). | ||
64 | - KVM clears the LR on VM exits when the physical distributor | ||
65 | active state has been cleared. | ||
66 | |||
67 | (*): The host handling is slightly more complicated. For some forwarded | ||
68 | interrupts (shared), KVM directly sets the active state on the physical | ||
69 | distributor before entering the guest, because the interrupt is never actually | ||
70 | handled on the host (see details on the timer as an example below). For other | ||
71 | forwarded interrupts (non-shared) the host does not deactivate the interrupt | ||
72 | when the host ISR completes, but leaves the interrupt active until the guest | ||
73 | deactivates it. Leaving the interrupt active is allowed, because Linux | ||
74 | configures the physical GIC with EOIMode=1, which causes EOI operations to | ||
75 | perform a priority drop allowing the GIC to receive other interrupts of the | ||
76 | default priority. | ||
77 | |||
78 | |||
79 | Forwarded Edge and Level Triggered PPIs and SPIs | ||
80 | ------------------------------------------------ | ||
81 | Forwarded physical interrupts injected should always be active on the | ||
82 | physical distributor when injected to a guest. | ||
83 | |||
84 | Level-triggered interrupts will keep the interrupt line to the GIC | ||
85 | asserted, typically until the guest programs the device to deassert the | ||
86 | line. This means that the interrupt will remain pending on the physical | ||
87 | distributor until the guest has reprogrammed the device. Since we | ||
88 | always run the VM with interrupts enabled on the CPU, a pending | ||
89 | interrupt will exit the guest as soon as we switch into the guest, | ||
90 | preventing the guest from ever making progress as the process repeats | ||
91 | over and over. Therefore, the active state on the physical distributor | ||
92 | must be set when entering the guest, preventing the GIC from forwarding | ||
93 | the pending interrupt to the CPU. As soon as the guest deactivates the | ||
94 | interrupt, the physical line is sampled by the hardware again and the host | ||
95 | takes a new interrupt if and only if the physical line is still asserted. | ||
96 | |||
97 | Edge-triggered interrupts do not exhibit the same problem with | ||
98 | preventing guest execution that level-triggered interrupts do. One | ||
99 | option is to not use HW bit at all, and inject edge-triggered interrupts | ||
100 | from a physical device as pure virtual interrupts. But that would | ||
101 | potentially slow down handling of the interrupt in the guest, because a | ||
102 | physical interrupt occurring in the middle of the guest ISR would | ||
103 | preempt the guest for the host to handle the interrupt. Additionally, | ||
104 | if you configure the system to handle interrupts on a separate physical | ||
105 | core from that running your VCPU, you still have to interrupt the VCPU | ||
106 | to queue the pending state onto the LR, even though the guest won't use | ||
107 | this information until the guest ISR completes. Therefore, the HW | ||
108 | bit should always be set for forwarded edge-triggered interrupts. With | ||
109 | the HW bit set, the virtual interrupt is injected and additional | ||
110 | physical interrupts occurring before the guest deactivates the interrupt | ||
111 | simply mark the state on the physical distributor as Pending+Active. As | ||
112 | soon as the guest deactivates the interrupt, the host takes another | ||
113 | interrupt if and only if there was a physical interrupt between injecting | ||
114 | the forwarded interrupt to the guest and the guest deactivating the | ||
115 | interrupt. | ||
116 | |||
117 | Consequently, whenever we schedule a VCPU with one or more LRs with the | ||
118 | HW bit set, the interrupt must also be active on the physical | ||
119 | distributor. | ||
120 | |||
121 | |||
122 | Forwarded LPIs | ||
123 | -------------- | ||
124 | LPIs, introduced in GICv3, are always edge-triggered and do not have an | ||
125 | active state. They become pending when a device signal them, and as | ||
126 | soon as they are acked by the CPU, they are inactive again. | ||
127 | |||
128 | It therefore doesn't make sense, and is not supported, to set the HW bit | ||
129 | for physical LPIs that are forwarded to a VM as virtual interrupts, | ||
130 | typically virtual SPIs. | ||
131 | |||
132 | For LPIs, there is no other choice than to preempt the VCPU thread if | ||
133 | necessary, and queue the pending state onto the LR. | ||
134 | |||
135 | |||
136 | Putting It Together: The Architected Timer | ||
137 | ------------------------------------------ | ||
138 | The architected timer is a device that signals interrupts with level | ||
139 | triggered semantics. The timer hardware is directly accessed by VCPUs | ||
140 | which program the timer to fire at some point in time. Each VCPU on a | ||
141 | system programs the timer to fire at different times, and therefore the | ||
142 | hardware is multiplexed between multiple VCPUs. This is implemented by | ||
143 | context-switching the timer state along with each VCPU thread. | ||
144 | |||
145 | However, this means that a scenario like the following is entirely | ||
146 | possible, and in fact, typical: | ||
147 | |||
148 | 1. KVM runs the VCPU | ||
149 | 2. The guest programs the time to fire in T+100 | ||
150 | 3. The guest is idle and calls WFI (wait-for-interrupts) | ||
151 | 4. The hardware traps to the host | ||
152 | 5. KVM stores the timer state to memory and disables the hardware timer | ||
153 | 6. KVM schedules a soft timer to fire in T+(100 - time since step 2) | ||
154 | 7. KVM puts the VCPU thread to sleep (on a waitqueue) | ||
155 | 8. The soft timer fires, waking up the VCPU thread | ||
156 | 9. KVM reprograms the timer hardware with the VCPU's values | ||
157 | 10. KVM marks the timer interrupt as active on the physical distributor | ||
158 | 11. KVM injects a forwarded physical interrupt to the guest | ||
159 | 12. KVM runs the VCPU | ||
160 | |||
161 | Notice that KVM injects a forwarded physical interrupt in step 11 without | ||
162 | the corresponding interrupt having actually fired on the host. That is | ||
163 | exactly why we mark the timer interrupt as active in step 10, because | ||
164 | the active state on the physical distributor is part of the state | ||
165 | belonging to the timer hardware, which is context-switched along with | ||
166 | the VCPU thread. | ||
167 | |||
168 | If the guest does not idle because it is busy, the flow looks like this | ||
169 | instead: | ||
170 | |||
171 | 1. KVM runs the VCPU | ||
172 | 2. The guest programs the time to fire in T+100 | ||
173 | 4. At T+100 the timer fires and a physical IRQ causes the VM to exit | ||
174 | (note that this initially only traps to EL2 and does not run the host ISR | ||
175 | until KVM has returned to the host). | ||
176 | 5. With interrupts still disabled on the CPU coming back from the guest, KVM | ||
177 | stores the virtual timer state to memory and disables the virtual hw timer. | ||
178 | 6. KVM looks at the timer state (in memory) and injects a forwarded physical | ||
179 | interrupt because it concludes the timer has expired. | ||
180 | 7. KVM marks the timer interrupt as active on the physical distributor | ||
181 | 7. KVM enables the timer, enables interrupts, and runs the VCPU | ||
182 | |||
183 | Notice that again the forwarded physical interrupt is injected to the | ||
184 | guest without having actually been handled on the host. In this case it | ||
185 | is because the physical interrupt is never actually seen by the host because the | ||
186 | timer is disabled upon guest return, and the virtual forwarded interrupt is | ||
187 | injected on the KVM guest entry path. | ||
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 3fb905429e8a..59541d49e15c 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt | |||
@@ -44,28 +44,29 @@ Groups: | |||
44 | Attributes: | 44 | Attributes: |
45 | The attr field of kvm_device_attr encodes two values: | 45 | The attr field of kvm_device_attr encodes two values: |
46 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | | 46 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | |
47 | values: | reserved | cpu id | offset | | 47 | values: | reserved | vcpu_index | offset | |
48 | 48 | ||
49 | All distributor regs are (rw, 32-bit) | 49 | All distributor regs are (rw, 32-bit) |
50 | 50 | ||
51 | The offset is relative to the "Distributor base address" as defined in the | 51 | The offset is relative to the "Distributor base address" as defined in the |
52 | GICv2 specs. Getting or setting such a register has the same effect as | 52 | GICv2 specs. Getting or setting such a register has the same effect as |
53 | reading or writing the register on the actual hardware from the cpu | 53 | reading or writing the register on the actual hardware from the cpu whose |
54 | specified with cpu id field. Note that most distributor fields are not | 54 | index is specified with the vcpu_index field. Note that most distributor |
55 | banked, but return the same value regardless of the cpu id used to access | 55 | fields are not banked, but return the same value regardless of the |
56 | the register. | 56 | vcpu_index used to access the register. |
57 | Limitations: | 57 | Limitations: |
58 | - Priorities are not implemented, and registers are RAZ/WI | 58 | - Priorities are not implemented, and registers are RAZ/WI |
59 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. | 59 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. |
60 | Errors: | 60 | Errors: |
61 | -ENODEV: Getting or setting this register is not yet supported | 61 | -ENXIO: Getting or setting this register is not yet supported |
62 | -EBUSY: One or more VCPUs are running | 62 | -EBUSY: One or more VCPUs are running |
63 | -EINVAL: Invalid vcpu_index supplied | ||
63 | 64 | ||
64 | KVM_DEV_ARM_VGIC_GRP_CPU_REGS | 65 | KVM_DEV_ARM_VGIC_GRP_CPU_REGS |
65 | Attributes: | 66 | Attributes: |
66 | The attr field of kvm_device_attr encodes two values: | 67 | The attr field of kvm_device_attr encodes two values: |
67 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | | 68 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | |
68 | values: | reserved | cpu id | offset | | 69 | values: | reserved | vcpu_index | offset | |
69 | 70 | ||
70 | All CPU interface regs are (rw, 32-bit) | 71 | All CPU interface regs are (rw, 32-bit) |
71 | 72 | ||
@@ -91,8 +92,9 @@ Groups: | |||
91 | - Priorities are not implemented, and registers are RAZ/WI | 92 | - Priorities are not implemented, and registers are RAZ/WI |
92 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. | 93 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. |
93 | Errors: | 94 | Errors: |
94 | -ENODEV: Getting or setting this register is not yet supported | 95 | -ENXIO: Getting or setting this register is not yet supported |
95 | -EBUSY: One or more VCPUs are running | 96 | -EBUSY: One or more VCPUs are running |
97 | -EINVAL: Invalid vcpu_index supplied | ||
96 | 98 | ||
97 | KVM_DEV_ARM_VGIC_GRP_NR_IRQS | 99 | KVM_DEV_ARM_VGIC_GRP_NR_IRQS |
98 | Attributes: | 100 | Attributes: |
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index d68af4dc3006..19f94a6b9bb0 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt | |||
@@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g. | |||
166 | MMIO/PIO address->device structure mapping (kvm->buses). | 166 | MMIO/PIO address->device structure mapping (kvm->buses). |
167 | The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu | 167 | The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu |
168 | if it is needed by multiple functions. | 168 | if it is needed by multiple functions. |
169 | |||
170 | Name: blocked_vcpu_on_cpu_lock | ||
171 | Type: spinlock_t | ||
172 | Arch: x86 | ||
173 | Protects: blocked_vcpu_on_cpu | ||
174 | Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts. | ||
175 | When VT-d posted-interrupts is supported and the VM has assigned | ||
176 | devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu | ||
177 | protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues | ||
178 | wakeup notification event since external interrupts from the | ||
179 | assigned devices happens, we will find the vCPU on the list to | ||
180 | wakeup. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 1ef6adc990a6..7301ae17ec63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -11348,6 +11348,13 @@ L: netdev@vger.kernel.org | |||
11348 | S: Maintained | 11348 | S: Maintained |
11349 | F: drivers/net/ethernet/via/via-velocity.* | 11349 | F: drivers/net/ethernet/via/via-velocity.* |
11350 | 11350 | ||
11351 | VIRT LIB | ||
11352 | M: Alex Williamson <alex.williamson@redhat.com> | ||
11353 | M: Paolo Bonzini <pbonzini@redhat.com> | ||
11354 | L: kvm@vger.kernel.org | ||
11355 | S: Supported | ||
11356 | F: virt/lib/ | ||
11357 | |||
11351 | VIVID VIRTUAL VIDEO DRIVER | 11358 | VIVID VIRTUAL VIDEO DRIVER |
11352 | M: Hans Verkuil <hverkuil@xs4all.nl> | 11359 | M: Hans Verkuil <hverkuil@xs4all.nl> |
11353 | L: linux-media@vger.kernel.org | 11360 | L: linux-media@vger.kernel.org |
@@ -550,6 +550,7 @@ drivers-y := drivers/ sound/ firmware/ | |||
550 | net-y := net/ | 550 | net-y := net/ |
551 | libs-y := lib/ | 551 | libs-y := lib/ |
552 | core-y := usr/ | 552 | core-y := usr/ |
553 | virt-y := virt/ | ||
553 | endif # KBUILD_EXTMOD | 554 | endif # KBUILD_EXTMOD |
554 | 555 | ||
555 | ifeq ($(dot-config),1) | 556 | ifeq ($(dot-config),1) |
@@ -882,10 +883,10 @@ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ | |||
882 | 883 | ||
883 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 884 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
884 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 885 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
885 | $(net-y) $(net-m) $(libs-y) $(libs-m))) | 886 | $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y))) |
886 | 887 | ||
887 | vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ | 888 | vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ |
888 | $(init-) $(core-) $(drivers-) $(net-) $(libs-)))) | 889 | $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-)))) |
889 | 890 | ||
890 | init-y := $(patsubst %/, %/built-in.o, $(init-y)) | 891 | init-y := $(patsubst %/, %/built-in.o, $(init-y)) |
891 | core-y := $(patsubst %/, %/built-in.o, $(core-y)) | 892 | core-y := $(patsubst %/, %/built-in.o, $(core-y)) |
@@ -894,14 +895,15 @@ net-y := $(patsubst %/, %/built-in.o, $(net-y)) | |||
894 | libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) | 895 | libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) |
895 | libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) | 896 | libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) |
896 | libs-y := $(libs-y1) $(libs-y2) | 897 | libs-y := $(libs-y1) $(libs-y2) |
898 | virt-y := $(patsubst %/, %/built-in.o, $(virt-y)) | ||
897 | 899 | ||
898 | # Externally visible symbols (used by link-vmlinux.sh) | 900 | # Externally visible symbols (used by link-vmlinux.sh) |
899 | export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) | 901 | export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) |
900 | export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) | 902 | export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y) |
901 | export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds | 903 | export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds |
902 | export LDFLAGS_vmlinux | 904 | export LDFLAGS_vmlinux |
903 | # used by scripts/pacmage/Makefile | 905 | # used by scripts/pacmage/Makefile |
904 | export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt) | 906 | export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools) |
905 | 907 | ||
906 | vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) | 908 | vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) |
907 | 909 | ||
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d995821f1698..dc641ddf0784 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h | |||
@@ -218,4 +218,24 @@ | |||
218 | #define HSR_DABT_CM (1U << 8) | 218 | #define HSR_DABT_CM (1U << 8) |
219 | #define HSR_DABT_EA (1U << 9) | 219 | #define HSR_DABT_EA (1U << 9) |
220 | 220 | ||
221 | #define kvm_arm_exception_type \ | ||
222 | {0, "RESET" }, \ | ||
223 | {1, "UNDEFINED" }, \ | ||
224 | {2, "SOFTWARE" }, \ | ||
225 | {3, "PREF_ABORT" }, \ | ||
226 | {4, "DATA_ABORT" }, \ | ||
227 | {5, "IRQ" }, \ | ||
228 | {6, "FIQ" }, \ | ||
229 | {7, "HVC" } | ||
230 | |||
231 | #define HSRECN(x) { HSR_EC_##x, #x } | ||
232 | |||
233 | #define kvm_arm_exception_class \ | ||
234 | HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \ | ||
235 | HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \ | ||
236 | HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \ | ||
237 | HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \ | ||
238 | HSRECN(DABT), HSRECN(DABT_HYP) | ||
239 | |||
240 | |||
221 | #endif /* __ARM_KVM_ARM_H__ */ | 241 | #endif /* __ARM_KVM_ARM_H__ */ |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c4072d9f32c7..6692982c9b57 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -126,7 +126,10 @@ struct kvm_vcpu_arch { | |||
126 | * here. | 126 | * here. |
127 | */ | 127 | */ |
128 | 128 | ||
129 | /* Don't run the guest on this vcpu */ | 129 | /* vcpu power-off state */ |
130 | bool power_off; | ||
131 | |||
132 | /* Don't run the guest (internal implementation need) */ | ||
130 | bool pause; | 133 | bool pause; |
131 | 134 | ||
132 | /* IO related fields */ | 135 | /* IO related fields */ |
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 356970f3b25e..95a000515e43 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -46,4 +46,6 @@ config KVM_ARM_HOST | |||
46 | ---help--- | 46 | ---help--- |
47 | Provides host support for ARM processors. | 47 | Provides host support for ARM processors. |
48 | 48 | ||
49 | source drivers/vhost/Kconfig | ||
50 | |||
49 | endif # VIRTUALIZATION | 51 | endif # VIRTUALIZATION |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 78b286994577..eab83b2435b8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
271 | return kvm_timer_should_fire(vcpu); | 271 | return kvm_timer_should_fire(vcpu); |
272 | } | 272 | } |
273 | 273 | ||
274 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) | ||
275 | { | ||
276 | kvm_timer_schedule(vcpu); | ||
277 | } | ||
278 | |||
279 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | ||
280 | { | ||
281 | kvm_timer_unschedule(vcpu); | ||
282 | } | ||
283 | |||
274 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 284 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
275 | { | 285 | { |
276 | /* Force users to call KVM_ARM_VCPU_INIT */ | 286 | /* Force users to call KVM_ARM_VCPU_INIT */ |
@@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
308 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 318 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
309 | struct kvm_mp_state *mp_state) | 319 | struct kvm_mp_state *mp_state) |
310 | { | 320 | { |
311 | if (vcpu->arch.pause) | 321 | if (vcpu->arch.power_off) |
312 | mp_state->mp_state = KVM_MP_STATE_STOPPED; | 322 | mp_state->mp_state = KVM_MP_STATE_STOPPED; |
313 | else | 323 | else |
314 | mp_state->mp_state = KVM_MP_STATE_RUNNABLE; | 324 | mp_state->mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
321 | { | 331 | { |
322 | switch (mp_state->mp_state) { | 332 | switch (mp_state->mp_state) { |
323 | case KVM_MP_STATE_RUNNABLE: | 333 | case KVM_MP_STATE_RUNNABLE: |
324 | vcpu->arch.pause = false; | 334 | vcpu->arch.power_off = false; |
325 | break; | 335 | break; |
326 | case KVM_MP_STATE_STOPPED: | 336 | case KVM_MP_STATE_STOPPED: |
327 | vcpu->arch.pause = true; | 337 | vcpu->arch.power_off = true; |
328 | break; | 338 | break; |
329 | default: | 339 | default: |
330 | return -EINVAL; | 340 | return -EINVAL; |
@@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
342 | */ | 352 | */ |
343 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 353 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
344 | { | 354 | { |
345 | return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); | 355 | return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) |
356 | && !v->arch.power_off && !v->arch.pause); | ||
346 | } | 357 | } |
347 | 358 | ||
348 | /* Just ensure a guest exit from a particular CPU */ | 359 | /* Just ensure a guest exit from a particular CPU */ |
@@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm) | |||
468 | return vgic_initialized(kvm); | 479 | return vgic_initialized(kvm); |
469 | } | 480 | } |
470 | 481 | ||
471 | static void vcpu_pause(struct kvm_vcpu *vcpu) | 482 | static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused; |
483 | static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused; | ||
484 | |||
485 | static void kvm_arm_halt_guest(struct kvm *kvm) | ||
486 | { | ||
487 | int i; | ||
488 | struct kvm_vcpu *vcpu; | ||
489 | |||
490 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
491 | vcpu->arch.pause = true; | ||
492 | force_vm_exit(cpu_all_mask); | ||
493 | } | ||
494 | |||
495 | static void kvm_arm_resume_guest(struct kvm *kvm) | ||
496 | { | ||
497 | int i; | ||
498 | struct kvm_vcpu *vcpu; | ||
499 | |||
500 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
501 | wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); | ||
502 | |||
503 | vcpu->arch.pause = false; | ||
504 | wake_up_interruptible(wq); | ||
505 | } | ||
506 | } | ||
507 | |||
508 | static void vcpu_sleep(struct kvm_vcpu *vcpu) | ||
472 | { | 509 | { |
473 | wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); | 510 | wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); |
474 | 511 | ||
475 | wait_event_interruptible(*wq, !vcpu->arch.pause); | 512 | wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && |
513 | (!vcpu->arch.pause))); | ||
476 | } | 514 | } |
477 | 515 | ||
478 | static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) | 516 | static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) |
@@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
522 | 560 | ||
523 | update_vttbr(vcpu->kvm); | 561 | update_vttbr(vcpu->kvm); |
524 | 562 | ||
525 | if (vcpu->arch.pause) | 563 | if (vcpu->arch.power_off || vcpu->arch.pause) |
526 | vcpu_pause(vcpu); | 564 | vcpu_sleep(vcpu); |
527 | 565 | ||
528 | /* | 566 | /* |
529 | * Disarming the background timer must be done in a | 567 | * Disarming the background timer must be done in a |
@@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
549 | run->exit_reason = KVM_EXIT_INTR; | 587 | run->exit_reason = KVM_EXIT_INTR; |
550 | } | 588 | } |
551 | 589 | ||
552 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { | 590 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || |
591 | vcpu->arch.power_off || vcpu->arch.pause) { | ||
553 | local_irq_enable(); | 592 | local_irq_enable(); |
593 | kvm_timer_sync_hwstate(vcpu); | ||
554 | kvm_vgic_sync_hwstate(vcpu); | 594 | kvm_vgic_sync_hwstate(vcpu); |
555 | preempt_enable(); | 595 | preempt_enable(); |
556 | kvm_timer_sync_hwstate(vcpu); | ||
557 | continue; | 596 | continue; |
558 | } | 597 | } |
559 | 598 | ||
@@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
596 | * guest time. | 635 | * guest time. |
597 | */ | 636 | */ |
598 | kvm_guest_exit(); | 637 | kvm_guest_exit(); |
599 | trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); | 638 | trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); |
639 | |||
640 | /* | ||
641 | * We must sync the timer state before the vgic state so that | ||
642 | * the vgic can properly sample the updated state of the | ||
643 | * interrupt line. | ||
644 | */ | ||
645 | kvm_timer_sync_hwstate(vcpu); | ||
600 | 646 | ||
601 | kvm_vgic_sync_hwstate(vcpu); | 647 | kvm_vgic_sync_hwstate(vcpu); |
602 | 648 | ||
603 | preempt_enable(); | 649 | preempt_enable(); |
604 | 650 | ||
605 | kvm_timer_sync_hwstate(vcpu); | ||
606 | |||
607 | ret = handle_exit(vcpu, run, ret); | 651 | ret = handle_exit(vcpu, run, ret); |
608 | } | 652 | } |
609 | 653 | ||
@@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, | |||
765 | vcpu_reset_hcr(vcpu); | 809 | vcpu_reset_hcr(vcpu); |
766 | 810 | ||
767 | /* | 811 | /* |
768 | * Handle the "start in power-off" case by marking the VCPU as paused. | 812 | * Handle the "start in power-off" case. |
769 | */ | 813 | */ |
770 | if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) | 814 | if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) |
771 | vcpu->arch.pause = true; | 815 | vcpu->arch.power_off = true; |
772 | else | 816 | else |
773 | vcpu->arch.pause = false; | 817 | vcpu->arch.power_off = false; |
774 | 818 | ||
775 | return 0; | 819 | return 0; |
776 | } | 820 | } |
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index ad6f6424f1d1..0b556968a6da 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c | |||
@@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) | |||
63 | 63 | ||
64 | static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) | 64 | static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) |
65 | { | 65 | { |
66 | vcpu->arch.pause = true; | 66 | vcpu->arch.power_off = true; |
67 | } | 67 | } |
68 | 68 | ||
69 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | 69 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) |
@@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | |||
87 | */ | 87 | */ |
88 | if (!vcpu) | 88 | if (!vcpu) |
89 | return PSCI_RET_INVALID_PARAMS; | 89 | return PSCI_RET_INVALID_PARAMS; |
90 | if (!vcpu->arch.pause) { | 90 | if (!vcpu->arch.power_off) { |
91 | if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) | 91 | if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) |
92 | return PSCI_RET_ALREADY_ON; | 92 | return PSCI_RET_ALREADY_ON; |
93 | else | 93 | else |
@@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | |||
115 | * the general puspose registers are undefined upon CPU_ON. | 115 | * the general puspose registers are undefined upon CPU_ON. |
116 | */ | 116 | */ |
117 | *vcpu_reg(vcpu, 0) = context_id; | 117 | *vcpu_reg(vcpu, 0) = context_id; |
118 | vcpu->arch.pause = false; | 118 | vcpu->arch.power_off = false; |
119 | smp_mb(); /* Make sure the above is visible */ | 119 | smp_mb(); /* Make sure the above is visible */ |
120 | 120 | ||
121 | wq = kvm_arch_vcpu_wq(vcpu); | 121 | wq = kvm_arch_vcpu_wq(vcpu); |
@@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) | |||
153 | mpidr = kvm_vcpu_get_mpidr_aff(tmp); | 153 | mpidr = kvm_vcpu_get_mpidr_aff(tmp); |
154 | if ((mpidr & target_affinity_mask) == target_affinity) { | 154 | if ((mpidr & target_affinity_mask) == target_affinity) { |
155 | matching_cpus++; | 155 | matching_cpus++; |
156 | if (!tmp->arch.pause) | 156 | if (!tmp->arch.power_off) |
157 | return PSCI_0_2_AFFINITY_LEVEL_ON; | 157 | return PSCI_0_2_AFFINITY_LEVEL_ON; |
158 | } | 158 | } |
159 | } | 159 | } |
@@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) | |||
179 | * re-initialized. | 179 | * re-initialized. |
180 | */ | 180 | */ |
181 | kvm_for_each_vcpu(i, tmp, vcpu->kvm) { | 181 | kvm_for_each_vcpu(i, tmp, vcpu->kvm) { |
182 | tmp->arch.pause = true; | 182 | tmp->arch.power_off = true; |
183 | kvm_vcpu_kick(tmp); | 183 | kvm_vcpu_kick(tmp); |
184 | } | 184 | } |
185 | 185 | ||
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 0ec35392d208..c25a88598eb0 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h | |||
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry, | |||
25 | ); | 25 | ); |
26 | 26 | ||
27 | TRACE_EVENT(kvm_exit, | 27 | TRACE_EVENT(kvm_exit, |
28 | TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), | 28 | TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc), |
29 | TP_ARGS(exit_reason, vcpu_pc), | 29 | TP_ARGS(idx, exit_reason, vcpu_pc), |
30 | 30 | ||
31 | TP_STRUCT__entry( | 31 | TP_STRUCT__entry( |
32 | __field( int, idx ) | ||
32 | __field( unsigned int, exit_reason ) | 33 | __field( unsigned int, exit_reason ) |
33 | __field( unsigned long, vcpu_pc ) | 34 | __field( unsigned long, vcpu_pc ) |
34 | ), | 35 | ), |
35 | 36 | ||
36 | TP_fast_assign( | 37 | TP_fast_assign( |
38 | __entry->idx = idx; | ||
37 | __entry->exit_reason = exit_reason; | 39 | __entry->exit_reason = exit_reason; |
38 | __entry->vcpu_pc = vcpu_pc; | 40 | __entry->vcpu_pc = vcpu_pc; |
39 | ), | 41 | ), |
40 | 42 | ||
41 | TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx", | 43 | TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", |
44 | __print_symbolic(__entry->idx, kvm_arm_exception_type), | ||
42 | __entry->exit_reason, | 45 | __entry->exit_reason, |
46 | __print_symbolic(__entry->exit_reason, kvm_arm_exception_class), | ||
43 | __entry->vcpu_pc) | 47 | __entry->vcpu_pc) |
44 | ); | 48 | ); |
45 | 49 | ||
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 9694f2654593..5e6857b6bdc4 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h | |||
@@ -200,4 +200,20 @@ | |||
200 | /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ | 200 | /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ |
201 | #define HPFAR_MASK (~UL(0xf)) | 201 | #define HPFAR_MASK (~UL(0xf)) |
202 | 202 | ||
203 | #define kvm_arm_exception_type \ | ||
204 | {0, "IRQ" }, \ | ||
205 | {1, "TRAP" } | ||
206 | |||
207 | #define ECN(x) { ESR_ELx_EC_##x, #x } | ||
208 | |||
209 | #define kvm_arm_exception_class \ | ||
210 | ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ | ||
211 | ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ | ||
212 | ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ | ||
213 | ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ | ||
214 | ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ | ||
215 | ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ | ||
216 | ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ | ||
217 | ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) | ||
218 | |||
203 | #endif /* __ARM64_KVM_ARM_H__ */ | 219 | #endif /* __ARM64_KVM_ARM_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed039688c221..a35ce7266aac 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -149,7 +149,10 @@ struct kvm_vcpu_arch { | |||
149 | u32 mdscr_el1; | 149 | u32 mdscr_el1; |
150 | } guest_debug_preserved; | 150 | } guest_debug_preserved; |
151 | 151 | ||
152 | /* Don't run the guest */ | 152 | /* vcpu power-off state */ |
153 | bool power_off; | ||
154 | |||
155 | /* Don't run the guest (internal implementation need) */ | ||
153 | bool pause; | 156 | bool pause; |
154 | 157 | ||
155 | /* IO related fields */ | 158 | /* IO related fields */ |
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index c9d1f34daab1..a5272c07d1cb 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
@@ -48,4 +48,6 @@ config KVM_ARM_HOST | |||
48 | ---help--- | 48 | ---help--- |
49 | Provides host support for ARM processors. | 49 | Provides host support for ARM processors. |
50 | 50 | ||
51 | source drivers/vhost/Kconfig | ||
52 | |||
51 | endif # VIRTUALIZATION | 53 | endif # VIRTUALIZATION |
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index e5836138ec42..1599701ef044 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
@@ -880,6 +880,14 @@ __kvm_hyp_panic: | |||
880 | 880 | ||
881 | bl __restore_sysregs | 881 | bl __restore_sysregs |
882 | 882 | ||
883 | /* | ||
884 | * Make sure we have a valid host stack, and don't leave junk in the | ||
885 | * frame pointer that will give us a misleading host stack unwinding. | ||
886 | */ | ||
887 | ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] | ||
888 | msr sp_el1, x22 | ||
889 | mov x29, xzr | ||
890 | |||
883 | 1: adr x0, __hyp_panic_str | 891 | 1: adr x0, __hyp_panic_str |
884 | adr x1, 2f | 892 | adr x1, 2f |
885 | ldp x2, x3, [x1] | 893 | ldp x2, x3, [x1] |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5a1a882e0a75..6ded8d347af9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | |||
847 | struct kvm_memory_slot *slot) {} | 847 | struct kvm_memory_slot *slot) {} |
848 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | 848 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} |
849 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 849 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
850 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | ||
851 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | ||
850 | 852 | ||
851 | #endif /* __MIPS_KVM_HOST_H__ */ | 853 | #endif /* __MIPS_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 6330a61b875a..4852e849128b 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h | |||
@@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst) | |||
42 | return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); | 42 | return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); |
43 | } | 43 | } |
44 | 44 | ||
45 | static inline unsigned int get_tmrn(u32 inst) | ||
46 | { | ||
47 | return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); | ||
48 | } | ||
49 | |||
45 | static inline unsigned int get_rt(u32 inst) | 50 | static inline unsigned int get_rt(u32 inst) |
46 | { | 51 | { |
47 | return (inst >> 21) & 0x1f; | 52 | return (inst >> 21) & 0x1f; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 887c259556df..cfa758c6b4f6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -716,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot | |||
716 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 716 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
717 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 717 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
718 | static inline void kvm_arch_exit(void) {} | 718 | static inline void kvm_arch_exit(void) {} |
719 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | ||
720 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | ||
719 | 721 | ||
720 | #endif /* __POWERPC_KVM_HOST_H__ */ | 722 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 16547efa2d5a..2fef74b474f0 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h | |||
@@ -742,6 +742,12 @@ | |||
742 | #define MMUBE1_VBE4 0x00000002 | 742 | #define MMUBE1_VBE4 0x00000002 |
743 | #define MMUBE1_VBE5 0x00000001 | 743 | #define MMUBE1_VBE5 0x00000001 |
744 | 744 | ||
745 | #define TMRN_TMCFG0 16 /* Thread Management Configuration Register 0 */ | ||
746 | #define TMRN_TMCFG0_NPRIBITS 0x003f0000 /* Bits of thread priority */ | ||
747 | #define TMRN_TMCFG0_NPRIBITS_SHIFT 16 | ||
748 | #define TMRN_TMCFG0_NATHRD 0x00003f00 /* Number of active threads */ | ||
749 | #define TMRN_TMCFG0_NATHRD_SHIFT 8 | ||
750 | #define TMRN_TMCFG0_NTHRD 0x0000003f /* Number of threads */ | ||
745 | #define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */ | 751 | #define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */ |
746 | #define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */ | 752 | #define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */ |
747 | #define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */ | 753 | #define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */ |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1f9c0a17f445..10722b1e38b5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
70 | } | 70 | } |
71 | 71 | ||
72 | /* Lastly try successively smaller sizes from the page allocator */ | 72 | /* Lastly try successively smaller sizes from the page allocator */ |
73 | while (!hpt && order > PPC_MIN_HPT_ORDER) { | 73 | /* Only do this if userspace didn't specify a size via ioctl */ |
74 | while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) { | ||
74 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| | 75 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| |
75 | __GFP_NOWARN, order - PAGE_SHIFT); | 76 | __GFP_NOWARN, order - PAGE_SHIFT); |
76 | if (!hpt) | 77 | if (!hpt) |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index c1df9bb1e413..97e7f8c853d8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -470,6 +470,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
470 | note_hpte_modification(kvm, rev); | 470 | note_hpte_modification(kvm, rev); |
471 | unlock_hpte(hpte, 0); | 471 | unlock_hpte(hpte, 0); |
472 | 472 | ||
473 | if (v & HPTE_V_ABSENT) | ||
474 | v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; | ||
473 | hpret[0] = v; | 475 | hpret[0] = v; |
474 | hpret[1] = r; | 476 | hpret[1] = r; |
475 | return H_SUCCESS; | 477 | return H_SUCCESS; |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b98889e9851d..b1dab8d1d885 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
150 | cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 150 | cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK |
151 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | 151 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
152 | beq 11f | 152 | beq 11f |
153 | cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL | ||
154 | beq 15f /* Invoke the H_DOORBELL handler */ | ||
153 | cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI | 155 | cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI |
154 | beq cr2, 14f /* HMI check */ | 156 | beq cr2, 14f /* HMI check */ |
155 | 157 | ||
@@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
174 | mtspr SPRN_HSRR1, r7 | 176 | mtspr SPRN_HSRR1, r7 |
175 | b hmi_exception_after_realmode | 177 | b hmi_exception_after_realmode |
176 | 178 | ||
179 | 15: mtspr SPRN_HSRR0, r8 | ||
180 | mtspr SPRN_HSRR1, r7 | ||
181 | ba 0xe80 | ||
182 | |||
177 | kvmppc_primary_no_guest: | 183 | kvmppc_primary_no_guest: |
178 | /* We handle this much like a ceded vcpu */ | 184 | /* We handle this much like a ceded vcpu */ |
179 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ | 185 | /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ |
@@ -2377,7 +2383,6 @@ machine_check_realmode: | |||
2377 | mr r3, r9 /* get vcpu pointer */ | 2383 | mr r3, r9 /* get vcpu pointer */ |
2378 | bl kvmppc_realmode_machine_check | 2384 | bl kvmppc_realmode_machine_check |
2379 | nop | 2385 | nop |
2380 | cmpdi r3, 0 /* Did we handle MCE ? */ | ||
2381 | ld r9, HSTATE_KVM_VCPU(r13) | 2386 | ld r9, HSTATE_KVM_VCPU(r13) |
2382 | li r12, BOOK3S_INTERRUPT_MACHINE_CHECK | 2387 | li r12, BOOK3S_INTERRUPT_MACHINE_CHECK |
2383 | /* | 2388 | /* |
@@ -2390,13 +2395,18 @@ machine_check_realmode: | |||
2390 | * The old code used to return to host for unhandled errors which | 2395 | * The old code used to return to host for unhandled errors which |
2391 | * was causing guest to hang with soft lockups inside guest and | 2396 | * was causing guest to hang with soft lockups inside guest and |
2392 | * makes it difficult to recover guest instance. | 2397 | * makes it difficult to recover guest instance. |
2398 | * | ||
2399 | * if we receive machine check with MSR(RI=0) then deliver it to | ||
2400 | * guest as machine check causing guest to crash. | ||
2393 | */ | 2401 | */ |
2394 | ld r10, VCPU_PC(r9) | ||
2395 | ld r11, VCPU_MSR(r9) | 2402 | ld r11, VCPU_MSR(r9) |
2403 | andi. r10, r11, MSR_RI /* check for unrecoverable exception */ | ||
2404 | beq 1f /* Deliver a machine check to guest */ | ||
2405 | ld r10, VCPU_PC(r9) | ||
2406 | cmpdi r3, 0 /* Did we handle MCE ? */ | ||
2396 | bne 2f /* Continue guest execution. */ | 2407 | bne 2f /* Continue guest execution. */ |
2397 | /* If not, deliver a machine check. SRR0/1 are already set */ | 2408 | /* If not, deliver a machine check. SRR0/1 are already set */ |
2398 | li r10, BOOK3S_INTERRUPT_MACHINE_CHECK | 2409 | 1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK |
2399 | ld r11, VCPU_MSR(r9) | ||
2400 | bl kvmppc_msr_interrupt | 2410 | bl kvmppc_msr_interrupt |
2401 | 2: b fast_interrupt_c_return | 2411 | 2: b fast_interrupt_c_return |
2402 | 2412 | ||
@@ -2436,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
2436 | 2446 | ||
2437 | /* hypervisor doorbell */ | 2447 | /* hypervisor doorbell */ |
2438 | 3: li r12, BOOK3S_INTERRUPT_H_DOORBELL | 2448 | 3: li r12, BOOK3S_INTERRUPT_H_DOORBELL |
2449 | |||
2450 | /* | ||
2451 | * Clear the doorbell as we will invoke the handler | ||
2452 | * explicitly in the guest exit path. | ||
2453 | */ | ||
2454 | lis r6, (PPC_DBELL_SERVER << (63-36))@h | ||
2455 | PPC_MSGCLR(6) | ||
2439 | /* see if it's a host IPI */ | 2456 | /* see if it's a host IPI */ |
2440 | li r3, 1 | 2457 | li r3, 1 |
2441 | lbz r0, HSTATE_HOST_IPI(r13) | 2458 | lbz r0, HSTATE_HOST_IPI(r13) |
2442 | cmpwi r0, 0 | 2459 | cmpwi r0, 0 |
2443 | bnelr | 2460 | bnelr |
2444 | /* if not, clear it and return -1 */ | 2461 | /* if not, return -1 */ |
2445 | lis r6, (PPC_DBELL_SERVER << (63-36))@h | ||
2446 | PPC_MSGCLR(6) | ||
2447 | li r3, -1 | 2462 | li r3, -1 |
2448 | blr | 2463 | blr |
2449 | 2464 | ||
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b29ce752c7d6..32fdab57d604 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
237 | struct kvm_book3e_206_tlb_entry *gtlbe) | 237 | struct kvm_book3e_206_tlb_entry *gtlbe) |
238 | { | 238 | { |
239 | struct vcpu_id_table *idt = vcpu_e500->idt; | 239 | struct vcpu_id_table *idt = vcpu_e500->idt; |
240 | unsigned int pr, tid, ts, pid; | 240 | unsigned int pr, tid, ts; |
241 | int pid; | ||
241 | u32 val, eaddr; | 242 | u32 val, eaddr; |
242 | unsigned long flags; | 243 | unsigned long flags; |
243 | 244 | ||
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index ce7291c79f6c..990db69a1d0b 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/kvm_ppc.h> | 15 | #include <asm/kvm_ppc.h> |
16 | #include <asm/disassemble.h> | 16 | #include <asm/disassemble.h> |
17 | #include <asm/dbell.h> | 17 | #include <asm/dbell.h> |
18 | #include <asm/reg_booke.h> | ||
18 | 19 | ||
19 | #include "booke.h" | 20 | #include "booke.h" |
20 | #include "e500.h" | 21 | #include "e500.h" |
@@ -22,6 +23,7 @@ | |||
22 | #define XOP_DCBTLS 166 | 23 | #define XOP_DCBTLS 166 |
23 | #define XOP_MSGSND 206 | 24 | #define XOP_MSGSND 206 |
24 | #define XOP_MSGCLR 238 | 25 | #define XOP_MSGCLR 238 |
26 | #define XOP_MFTMR 366 | ||
25 | #define XOP_TLBIVAX 786 | 27 | #define XOP_TLBIVAX 786 |
26 | #define XOP_TLBSX 914 | 28 | #define XOP_TLBSX 914 |
27 | #define XOP_TLBRE 946 | 29 | #define XOP_TLBRE 946 |
@@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu) | |||
113 | return EMULATE_DONE; | 115 | return EMULATE_DONE; |
114 | } | 116 | } |
115 | 117 | ||
118 | static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst, | ||
119 | int rt) | ||
120 | { | ||
121 | /* Expose one thread per vcpu */ | ||
122 | if (get_tmrn(inst) == TMRN_TMCFG0) { | ||
123 | kvmppc_set_gpr(vcpu, rt, | ||
124 | 1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT)); | ||
125 | return EMULATE_DONE; | ||
126 | } | ||
127 | |||
128 | return EMULATE_FAIL; | ||
129 | } | ||
130 | |||
116 | int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, | 131 | int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, |
117 | unsigned int inst, int *advance) | 132 | unsigned int inst, int *advance) |
118 | { | 133 | { |
@@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
165 | emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); | 180 | emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); |
166 | break; | 181 | break; |
167 | 182 | ||
183 | case XOP_MFTMR: | ||
184 | emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt); | ||
185 | break; | ||
186 | |||
168 | case XOP_EHPRIV: | 187 | case XOP_EHPRIV: |
169 | emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, | 188 | emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, |
170 | advance); | 189 | advance); |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4d33e199edcc..5e2102c19586 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
@@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
406 | 406 | ||
407 | for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { | 407 | for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { |
408 | unsigned long gfn_start, gfn_end; | 408 | unsigned long gfn_start, gfn_end; |
409 | tsize_pages = 1 << (tsize - 2); | 409 | tsize_pages = 1UL << (tsize - 2); |
410 | 410 | ||
411 | gfn_start = gfn & ~(tsize_pages - 1); | 411 | gfn_start = gfn & ~(tsize_pages - 1); |
412 | gfn_end = gfn_start + tsize_pages; | 412 | gfn_end = gfn_start + tsize_pages; |
@@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
447 | } | 447 | } |
448 | 448 | ||
449 | if (likely(!pfnmap)) { | 449 | if (likely(!pfnmap)) { |
450 | tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | 450 | tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); |
451 | pfn = gfn_to_pfn_memslot(slot, gfn); | 451 | pfn = gfn_to_pfn_memslot(slot, gfn); |
452 | if (is_error_noslot_pfn(pfn)) { | 452 | if (is_error_noslot_pfn(pfn)) { |
453 | if (printk_ratelimit()) | 453 | if (printk_ratelimit()) |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2e51289610e4..6fd2405c7f4a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
559 | else | 559 | else |
560 | r = num_online_cpus(); | 560 | r = num_online_cpus(); |
561 | break; | 561 | break; |
562 | case KVM_CAP_NR_MEMSLOTS: | ||
563 | r = KVM_USER_MEM_SLOTS; | ||
564 | break; | ||
562 | case KVM_CAP_MAX_VCPUS: | 565 | case KVM_CAP_MAX_VCPUS: |
563 | r = KVM_MAX_VCPUS; | 566 | r = KVM_MAX_VCPUS; |
564 | break; | 567 | break; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 7f654308817c..efaac2c3bb77 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot | |||
644 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 644 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
645 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 645 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
646 | struct kvm_memory_slot *slot) {} | 646 | struct kvm_memory_slot *slot) {} |
647 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | ||
648 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | ||
647 | 649 | ||
648 | #endif | 650 | #endif |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 7365e8a46032..b4a5aa110cec 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -336,28 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) | |||
336 | return -EOPNOTSUPP; | 336 | return -EOPNOTSUPP; |
337 | } | 337 | } |
338 | 338 | ||
339 | static const intercept_handler_t intercept_funcs[] = { | ||
340 | [0x00 >> 2] = handle_noop, | ||
341 | [0x04 >> 2] = handle_instruction, | ||
342 | [0x08 >> 2] = handle_prog, | ||
343 | [0x10 >> 2] = handle_noop, | ||
344 | [0x14 >> 2] = handle_external_interrupt, | ||
345 | [0x18 >> 2] = handle_noop, | ||
346 | [0x1C >> 2] = kvm_s390_handle_wait, | ||
347 | [0x20 >> 2] = handle_validity, | ||
348 | [0x28 >> 2] = handle_stop, | ||
349 | [0x38 >> 2] = handle_partial_execution, | ||
350 | }; | ||
351 | |||
352 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) | 339 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) |
353 | { | 340 | { |
354 | intercept_handler_t func; | 341 | switch (vcpu->arch.sie_block->icptcode) { |
355 | u8 code = vcpu->arch.sie_block->icptcode; | 342 | case 0x00: |
356 | 343 | case 0x10: | |
357 | if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs)) | 344 | case 0x18: |
345 | return handle_noop(vcpu); | ||
346 | case 0x04: | ||
347 | return handle_instruction(vcpu); | ||
348 | case 0x08: | ||
349 | return handle_prog(vcpu); | ||
350 | case 0x14: | ||
351 | return handle_external_interrupt(vcpu); | ||
352 | case 0x1c: | ||
353 | return kvm_s390_handle_wait(vcpu); | ||
354 | case 0x20: | ||
355 | return handle_validity(vcpu); | ||
356 | case 0x28: | ||
357 | return handle_stop(vcpu); | ||
358 | case 0x38: | ||
359 | return handle_partial_execution(vcpu); | ||
360 | default: | ||
358 | return -EOPNOTSUPP; | 361 | return -EOPNOTSUPP; |
359 | func = intercept_funcs[code >> 2]; | 362 | } |
360 | if (func) | ||
361 | return func(vcpu); | ||
362 | return -EOPNOTSUPP; | ||
363 | } | 363 | } |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c2c169395c3..373e32346d68 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -51,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu) | |||
51 | 51 | ||
52 | static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) | 52 | static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) |
53 | { | 53 | { |
54 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || | 54 | return psw_extint_disabled(vcpu) && |
55 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || | 55 | psw_ioint_disabled(vcpu) && |
56 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) | 56 | psw_mchk_disabled(vcpu); |
57 | return 0; | ||
58 | return 1; | ||
59 | } | 57 | } |
60 | 58 | ||
61 | static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) | 59 | static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) |
@@ -71,13 +69,8 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) | |||
71 | 69 | ||
72 | static int ckc_irq_pending(struct kvm_vcpu *vcpu) | 70 | static int ckc_irq_pending(struct kvm_vcpu *vcpu) |
73 | { | 71 | { |
74 | preempt_disable(); | 72 | if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) |
75 | if (!(vcpu->arch.sie_block->ckc < | ||
76 | get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { | ||
77 | preempt_enable(); | ||
78 | return 0; | 73 | return 0; |
79 | } | ||
80 | preempt_enable(); | ||
81 | return ckc_interrupts_enabled(vcpu); | 74 | return ckc_interrupts_enabled(vcpu); |
82 | } | 75 | } |
83 | 76 | ||
@@ -109,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word) | |||
109 | return (int_word & 0x38000000) >> 27; | 102 | return (int_word & 0x38000000) >> 27; |
110 | } | 103 | } |
111 | 104 | ||
112 | static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu) | 105 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) |
113 | { | 106 | { |
114 | return vcpu->kvm->arch.float_int.pending_irqs; | 107 | return vcpu->kvm->arch.float_int.pending_irqs | |
115 | } | 108 | vcpu->arch.local_int.pending_irqs; |
116 | |||
117 | static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) | ||
118 | { | ||
119 | return vcpu->arch.local_int.pending_irqs; | ||
120 | } | 109 | } |
121 | 110 | ||
122 | static unsigned long disable_iscs(struct kvm_vcpu *vcpu, | 111 | static unsigned long disable_iscs(struct kvm_vcpu *vcpu, |
@@ -135,8 +124,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) | |||
135 | { | 124 | { |
136 | unsigned long active_mask; | 125 | unsigned long active_mask; |
137 | 126 | ||
138 | active_mask = pending_local_irqs(vcpu); | 127 | active_mask = pending_irqs(vcpu); |
139 | active_mask |= pending_floating_irqs(vcpu); | ||
140 | if (!active_mask) | 128 | if (!active_mask) |
141 | return 0; | 129 | return 0; |
142 | 130 | ||
@@ -204,7 +192,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) | |||
204 | 192 | ||
205 | static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | 193 | static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) |
206 | { | 194 | { |
207 | if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK)) | 195 | if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) |
208 | return; | 196 | return; |
209 | else if (psw_ioint_disabled(vcpu)) | 197 | else if (psw_ioint_disabled(vcpu)) |
210 | __set_cpuflag(vcpu, CPUSTAT_IO_INT); | 198 | __set_cpuflag(vcpu, CPUSTAT_IO_INT); |
@@ -214,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | |||
214 | 202 | ||
215 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | 203 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) |
216 | { | 204 | { |
217 | if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) | 205 | if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) |
218 | return; | 206 | return; |
219 | if (psw_extint_disabled(vcpu)) | 207 | if (psw_extint_disabled(vcpu)) |
220 | __set_cpuflag(vcpu, CPUSTAT_EXT_INT); | 208 | __set_cpuflag(vcpu, CPUSTAT_EXT_INT); |
@@ -224,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | |||
224 | 212 | ||
225 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) | 213 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) |
226 | { | 214 | { |
227 | if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) | 215 | if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) |
228 | return; | 216 | return; |
229 | if (psw_mchk_disabled(vcpu)) | 217 | if (psw_mchk_disabled(vcpu)) |
230 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; | 218 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; |
@@ -815,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) | |||
815 | 803 | ||
816 | int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) | 804 | int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) |
817 | { | 805 | { |
818 | int rc; | 806 | if (deliverable_irqs(vcpu)) |
807 | return 1; | ||
819 | 808 | ||
820 | rc = !!deliverable_irqs(vcpu); | 809 | if (kvm_cpu_has_pending_timer(vcpu)) |
821 | 810 | return 1; | |
822 | if (!rc && kvm_cpu_has_pending_timer(vcpu)) | ||
823 | rc = 1; | ||
824 | 811 | ||
825 | /* external call pending and deliverable */ | 812 | /* external call pending and deliverable */ |
826 | if (!rc && kvm_s390_ext_call_pending(vcpu) && | 813 | if (kvm_s390_ext_call_pending(vcpu) && |
827 | !psw_extint_disabled(vcpu) && | 814 | !psw_extint_disabled(vcpu) && |
828 | (vcpu->arch.sie_block->gcr[0] & 0x2000ul)) | 815 | (vcpu->arch.sie_block->gcr[0] & 0x2000ul)) |
829 | rc = 1; | 816 | return 1; |
830 | |||
831 | if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) | ||
832 | rc = 1; | ||
833 | 817 | ||
834 | return rc; | 818 | if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) |
819 | return 1; | ||
820 | return 0; | ||
835 | } | 821 | } |
836 | 822 | ||
837 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 823 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
@@ -846,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
846 | vcpu->stat.exit_wait_state++; | 832 | vcpu->stat.exit_wait_state++; |
847 | 833 | ||
848 | /* fast path */ | 834 | /* fast path */ |
849 | if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu)) | 835 | if (kvm_arch_vcpu_runnable(vcpu)) |
850 | return 0; | 836 | return 0; |
851 | 837 | ||
852 | if (psw_interrupts_disabled(vcpu)) { | 838 | if (psw_interrupts_disabled(vcpu)) { |
@@ -860,9 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
860 | goto no_timer; | 846 | goto no_timer; |
861 | } | 847 | } |
862 | 848 | ||
863 | preempt_disable(); | 849 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); |
864 | now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; | ||
865 | preempt_enable(); | ||
866 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | 850 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); |
867 | 851 | ||
868 | /* underflow */ | 852 | /* underflow */ |
@@ -901,9 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) | |||
901 | u64 now, sltime; | 885 | u64 now, sltime; |
902 | 886 | ||
903 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); | 887 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); |
904 | preempt_disable(); | 888 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); |
905 | now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; | ||
906 | preempt_enable(); | ||
907 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | 889 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); |
908 | 890 | ||
909 | /* | 891 | /* |
@@ -981,39 +963,30 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
981 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, | 963 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, |
982 | irq->u.pgm.code, 0); | 964 | irq->u.pgm.code, 0); |
983 | 965 | ||
984 | li->irq.pgm = irq->u.pgm; | 966 | if (irq->u.pgm.code == PGM_PER) { |
967 | li->irq.pgm.code |= PGM_PER; | ||
968 | /* only modify PER related information */ | ||
969 | li->irq.pgm.per_address = irq->u.pgm.per_address; | ||
970 | li->irq.pgm.per_code = irq->u.pgm.per_code; | ||
971 | li->irq.pgm.per_atmid = irq->u.pgm.per_atmid; | ||
972 | li->irq.pgm.per_access_id = irq->u.pgm.per_access_id; | ||
973 | } else if (!(irq->u.pgm.code & PGM_PER)) { | ||
974 | li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | | ||
975 | irq->u.pgm.code; | ||
976 | /* only modify non-PER information */ | ||
977 | li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; | ||
978 | li->irq.pgm.mon_code = irq->u.pgm.mon_code; | ||
979 | li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code; | ||
980 | li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr; | ||
981 | li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id; | ||
982 | li->irq.pgm.op_access_id = irq->u.pgm.op_access_id; | ||
983 | } else { | ||
984 | li->irq.pgm = irq->u.pgm; | ||
985 | } | ||
985 | set_bit(IRQ_PEND_PROG, &li->pending_irqs); | 986 | set_bit(IRQ_PEND_PROG, &li->pending_irqs); |
986 | return 0; | 987 | return 0; |
987 | } | 988 | } |
988 | 989 | ||
989 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | ||
990 | { | ||
991 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
992 | struct kvm_s390_irq irq; | ||
993 | |||
994 | spin_lock(&li->lock); | ||
995 | irq.u.pgm.code = code; | ||
996 | __inject_prog(vcpu, &irq); | ||
997 | BUG_ON(waitqueue_active(li->wq)); | ||
998 | spin_unlock(&li->lock); | ||
999 | return 0; | ||
1000 | } | ||
1001 | |||
1002 | int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, | ||
1003 | struct kvm_s390_pgm_info *pgm_info) | ||
1004 | { | ||
1005 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
1006 | struct kvm_s390_irq irq; | ||
1007 | int rc; | ||
1008 | |||
1009 | spin_lock(&li->lock); | ||
1010 | irq.u.pgm = *pgm_info; | ||
1011 | rc = __inject_prog(vcpu, &irq); | ||
1012 | BUG_ON(waitqueue_active(li->wq)); | ||
1013 | spin_unlock(&li->lock); | ||
1014 | return rc; | ||
1015 | } | ||
1016 | |||
1017 | static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | 990 | static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) |
1018 | { | 991 | { |
1019 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 992 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
@@ -1390,12 +1363,9 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
1390 | 1363 | ||
1391 | static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | 1364 | static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) |
1392 | { | 1365 | { |
1393 | struct kvm_s390_float_interrupt *fi; | ||
1394 | u64 type = READ_ONCE(inti->type); | 1366 | u64 type = READ_ONCE(inti->type); |
1395 | int rc; | 1367 | int rc; |
1396 | 1368 | ||
1397 | fi = &kvm->arch.float_int; | ||
1398 | |||
1399 | switch (type) { | 1369 | switch (type) { |
1400 | case KVM_S390_MCHK: | 1370 | case KVM_S390_MCHK: |
1401 | rc = __inject_float_mchk(kvm, inti); | 1371 | rc = __inject_float_mchk(kvm, inti); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c6b4063fce29..8fe2f1c722dc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -514,35 +514,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | |||
514 | 514 | ||
515 | if (gtod_high != 0) | 515 | if (gtod_high != 0) |
516 | return -EINVAL; | 516 | return -EINVAL; |
517 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high); | 517 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); |
518 | 518 | ||
519 | return 0; | 519 | return 0; |
520 | } | 520 | } |
521 | 521 | ||
522 | static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | 522 | static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) |
523 | { | 523 | { |
524 | struct kvm_vcpu *cur_vcpu; | 524 | u64 gtod; |
525 | unsigned int vcpu_idx; | ||
526 | u64 host_tod, gtod; | ||
527 | int r; | ||
528 | 525 | ||
529 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | 526 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) |
530 | return -EFAULT; | 527 | return -EFAULT; |
531 | 528 | ||
532 | r = store_tod_clock(&host_tod); | 529 | kvm_s390_set_tod_clock(kvm, gtod); |
533 | if (r) | 530 | VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); |
534 | return r; | ||
535 | |||
536 | mutex_lock(&kvm->lock); | ||
537 | preempt_disable(); | ||
538 | kvm->arch.epoch = gtod - host_tod; | ||
539 | kvm_s390_vcpu_block_all(kvm); | ||
540 | kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) | ||
541 | cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
542 | kvm_s390_vcpu_unblock_all(kvm); | ||
543 | preempt_enable(); | ||
544 | mutex_unlock(&kvm->lock); | ||
545 | VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod); | ||
546 | return 0; | 531 | return 0; |
547 | } | 532 | } |
548 | 533 | ||
@@ -574,26 +559,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | |||
574 | if (copy_to_user((void __user *)attr->addr, >od_high, | 559 | if (copy_to_user((void __user *)attr->addr, >od_high, |
575 | sizeof(gtod_high))) | 560 | sizeof(gtod_high))) |
576 | return -EFAULT; | 561 | return -EFAULT; |
577 | VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high); | 562 | VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); |
578 | 563 | ||
579 | return 0; | 564 | return 0; |
580 | } | 565 | } |
581 | 566 | ||
582 | static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | 567 | static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) |
583 | { | 568 | { |
584 | u64 host_tod, gtod; | 569 | u64 gtod; |
585 | int r; | ||
586 | 570 | ||
587 | r = store_tod_clock(&host_tod); | 571 | gtod = kvm_s390_get_tod_clock_fast(kvm); |
588 | if (r) | ||
589 | return r; | ||
590 | |||
591 | preempt_disable(); | ||
592 | gtod = host_tod + kvm->arch.epoch; | ||
593 | preempt_enable(); | ||
594 | if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) | 572 | if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) |
595 | return -EFAULT; | 573 | return -EFAULT; |
596 | VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod); | 574 | VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); |
597 | 575 | ||
598 | return 0; | 576 | return 0; |
599 | } | 577 | } |
@@ -1120,7 +1098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
1120 | if (!kvm->arch.sca) | 1098 | if (!kvm->arch.sca) |
1121 | goto out_err; | 1099 | goto out_err; |
1122 | spin_lock(&kvm_lock); | 1100 | spin_lock(&kvm_lock); |
1123 | sca_offset = (sca_offset + 16) & 0x7f0; | 1101 | sca_offset += 16; |
1102 | if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) | ||
1103 | sca_offset = 0; | ||
1124 | kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); | 1104 | kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); |
1125 | spin_unlock(&kvm_lock); | 1105 | spin_unlock(&kvm_lock); |
1126 | 1106 | ||
@@ -1911,6 +1891,22 @@ retry: | |||
1911 | return 0; | 1891 | return 0; |
1912 | } | 1892 | } |
1913 | 1893 | ||
1894 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) | ||
1895 | { | ||
1896 | struct kvm_vcpu *vcpu; | ||
1897 | int i; | ||
1898 | |||
1899 | mutex_lock(&kvm->lock); | ||
1900 | preempt_disable(); | ||
1901 | kvm->arch.epoch = tod - get_tod_clock(); | ||
1902 | kvm_s390_vcpu_block_all(kvm); | ||
1903 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1904 | vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
1905 | kvm_s390_vcpu_unblock_all(kvm); | ||
1906 | preempt_enable(); | ||
1907 | mutex_unlock(&kvm->lock); | ||
1908 | } | ||
1909 | |||
1914 | /** | 1910 | /** |
1915 | * kvm_arch_fault_in_page - fault-in guest page if necessary | 1911 | * kvm_arch_fault_in_page - fault-in guest page if necessary |
1916 | * @vcpu: The corresponding virtual cpu | 1912 | * @vcpu: The corresponding virtual cpu |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c446aabf60d3..1e70e00d3c5e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -175,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) | |||
175 | return kvm->arch.user_cpu_state_ctrl != 0; | 175 | return kvm->arch.user_cpu_state_ctrl != 0; |
176 | } | 176 | } |
177 | 177 | ||
178 | /* implemented in interrupt.c */ | ||
178 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 179 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
179 | void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); | 180 | void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); |
180 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | 181 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
@@ -185,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm, | |||
185 | struct kvm_s390_interrupt *s390int); | 186 | struct kvm_s390_interrupt *s390int); |
186 | int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | 187 | int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, |
187 | struct kvm_s390_irq *irq); | 188 | struct kvm_s390_irq *irq); |
188 | int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); | 189 | static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, |
190 | struct kvm_s390_pgm_info *pgm_info) | ||
191 | { | ||
192 | struct kvm_s390_irq irq = { | ||
193 | .type = KVM_S390_PROGRAM_INT, | ||
194 | .u.pgm = *pgm_info, | ||
195 | }; | ||
196 | |||
197 | return kvm_s390_inject_vcpu(vcpu, &irq); | ||
198 | } | ||
199 | static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | ||
200 | { | ||
201 | struct kvm_s390_irq irq = { | ||
202 | .type = KVM_S390_PROGRAM_INT, | ||
203 | .u.pgm.code = code, | ||
204 | }; | ||
205 | |||
206 | return kvm_s390_inject_vcpu(vcpu, &irq); | ||
207 | } | ||
189 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | 208 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, |
190 | u64 isc_mask, u32 schid); | 209 | u64 isc_mask, u32 schid); |
191 | int kvm_s390_reinject_io_int(struct kvm *kvm, | 210 | int kvm_s390_reinject_io_int(struct kvm *kvm, |
@@ -212,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | |||
212 | int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); | 231 | int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); |
213 | 232 | ||
214 | /* implemented in kvm-s390.c */ | 233 | /* implemented in kvm-s390.c */ |
234 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); | ||
215 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); | 235 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); |
216 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); | 236 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); |
217 | int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, | 237 | int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, |
@@ -231,9 +251,6 @@ extern unsigned long kvm_s390_fac_list_mask[]; | |||
231 | 251 | ||
232 | /* implemented in diag.c */ | 252 | /* implemented in diag.c */ |
233 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 253 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
234 | /* implemented in interrupt.c */ | ||
235 | int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, | ||
236 | struct kvm_s390_pgm_info *pgm_info); | ||
237 | 254 | ||
238 | static inline void kvm_s390_vcpu_block_all(struct kvm *kvm) | 255 | static inline void kvm_s390_vcpu_block_all(struct kvm *kvm) |
239 | { | 256 | { |
@@ -254,6 +271,16 @@ static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm) | |||
254 | kvm_s390_vcpu_unblock(vcpu); | 271 | kvm_s390_vcpu_unblock(vcpu); |
255 | } | 272 | } |
256 | 273 | ||
274 | static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm) | ||
275 | { | ||
276 | u64 rc; | ||
277 | |||
278 | preempt_disable(); | ||
279 | rc = get_tod_clock_fast() + kvm->arch.epoch; | ||
280 | preempt_enable(); | ||
281 | return rc; | ||
282 | } | ||
283 | |||
257 | /** | 284 | /** |
258 | * kvm_s390_inject_prog_cond - conditionally inject a program check | 285 | * kvm_s390_inject_prog_cond - conditionally inject a program check |
259 | * @vcpu: virtual cpu | 286 | * @vcpu: virtual cpu |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4d21dc4d1a84..77191b85ea7a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -33,11 +33,9 @@ | |||
33 | /* Handle SCK (SET CLOCK) interception */ | 33 | /* Handle SCK (SET CLOCK) interception */ |
34 | static int handle_set_clock(struct kvm_vcpu *vcpu) | 34 | static int handle_set_clock(struct kvm_vcpu *vcpu) |
35 | { | 35 | { |
36 | struct kvm_vcpu *cpup; | 36 | int rc; |
37 | s64 hostclk, val; | ||
38 | int i, rc; | ||
39 | ar_t ar; | 37 | ar_t ar; |
40 | u64 op2; | 38 | u64 op2, val; |
41 | 39 | ||
42 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 40 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
43 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 41 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
@@ -49,19 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) | |||
49 | if (rc) | 47 | if (rc) |
50 | return kvm_s390_inject_prog_cond(vcpu, rc); | 48 | return kvm_s390_inject_prog_cond(vcpu, rc); |
51 | 49 | ||
52 | if (store_tod_clock(&hostclk)) { | ||
53 | kvm_s390_set_psw_cc(vcpu, 3); | ||
54 | return 0; | ||
55 | } | ||
56 | VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); | 50 | VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); |
57 | val = (val - hostclk) & ~0x3fUL; | 51 | kvm_s390_set_tod_clock(vcpu->kvm, val); |
58 | |||
59 | mutex_lock(&vcpu->kvm->lock); | ||
60 | preempt_disable(); | ||
61 | kvm_for_each_vcpu(i, cpup, vcpu->kvm) | ||
62 | cpup->arch.sie_block->epoch = val; | ||
63 | preempt_enable(); | ||
64 | mutex_unlock(&vcpu->kvm->lock); | ||
65 | 52 | ||
66 | kvm_s390_set_psw_cc(vcpu, 0); | 53 | kvm_s390_set_psw_cc(vcpu, 0); |
67 | return 0; | 54 | return 0; |
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 046c7fb1ca43..a210eba2727c 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h | |||
@@ -33,6 +33,11 @@ enum irq_remap_cap { | |||
33 | IRQ_POSTING_CAP = 0, | 33 | IRQ_POSTING_CAP = 0, |
34 | }; | 34 | }; |
35 | 35 | ||
36 | struct vcpu_data { | ||
37 | u64 pi_desc_addr; /* Physical address of PI Descriptor */ | ||
38 | u32 vector; /* Guest vector of the interrupt */ | ||
39 | }; | ||
40 | |||
36 | #ifdef CONFIG_IRQ_REMAP | 41 | #ifdef CONFIG_IRQ_REMAP |
37 | 42 | ||
38 | extern bool irq_remapping_cap(enum irq_remap_cap cap); | 43 | extern bool irq_remapping_cap(enum irq_remap_cap cap); |
@@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) | |||
58 | return x86_vector_domain; | 63 | return x86_vector_domain; |
59 | } | 64 | } |
60 | 65 | ||
61 | struct vcpu_data { | ||
62 | u64 pi_desc_addr; /* Physical address of PI Descriptor */ | ||
63 | u32 vector; /* Guest vector of the interrupt */ | ||
64 | }; | ||
65 | |||
66 | #else /* CONFIG_IRQ_REMAP */ | 66 | #else /* CONFIG_IRQ_REMAP */ |
67 | 67 | ||
68 | static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } | 68 | static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e16466ec473c..e9cd7befcb76 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -112,6 +112,16 @@ struct x86_emulate_ops { | |||
112 | struct x86_exception *fault); | 112 | struct x86_exception *fault); |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * read_phys: Read bytes of standard (non-emulated/special) memory. | ||
116 | * Used for descriptor reading. | ||
117 | * @addr: [IN ] Physical address from which to read. | ||
118 | * @val: [OUT] Value read from memory. | ||
119 | * @bytes: [IN ] Number of bytes to read from memory. | ||
120 | */ | ||
121 | int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, | ||
122 | void *val, unsigned int bytes); | ||
123 | |||
124 | /* | ||
115 | * write_std: Write bytes of standard (non-emulated/special) memory. | 125 | * write_std: Write bytes of standard (non-emulated/special) memory. |
116 | * Used for descriptor writing. | 126 | * Used for descriptor writing. |
117 | * @addr: [IN ] Linear address to which to write. | 127 | * @addr: [IN ] Linear address to which to write. |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a36ee704c30..9265196e877f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/perf_event.h> | 24 | #include <linux/perf_event.h> |
25 | #include <linux/pvclock_gtod.h> | 25 | #include <linux/pvclock_gtod.h> |
26 | #include <linux/clocksource.h> | 26 | #include <linux/clocksource.h> |
27 | #include <linux/irqbypass.h> | ||
27 | 28 | ||
28 | #include <asm/pvclock-abi.h> | 29 | #include <asm/pvclock-abi.h> |
29 | #include <asm/desc.h> | 30 | #include <asm/desc.h> |
@@ -176,6 +177,8 @@ enum { | |||
176 | */ | 177 | */ |
177 | #define KVM_APIC_PV_EOI_PENDING 1 | 178 | #define KVM_APIC_PV_EOI_PENDING 1 |
178 | 179 | ||
180 | struct kvm_kernel_irq_routing_entry; | ||
181 | |||
179 | /* | 182 | /* |
180 | * We don't want allocation failures within the mmu code, so we preallocate | 183 | * We don't want allocation failures within the mmu code, so we preallocate |
181 | * enough memory for a single page fault in a cache. | 184 | * enough memory for a single page fault in a cache. |
@@ -374,6 +377,7 @@ struct kvm_mtrr { | |||
374 | /* Hyper-V per vcpu emulation context */ | 377 | /* Hyper-V per vcpu emulation context */ |
375 | struct kvm_vcpu_hv { | 378 | struct kvm_vcpu_hv { |
376 | u64 hv_vapic; | 379 | u64 hv_vapic; |
380 | s64 runtime_offset; | ||
377 | }; | 381 | }; |
378 | 382 | ||
379 | struct kvm_vcpu_arch { | 383 | struct kvm_vcpu_arch { |
@@ -396,6 +400,7 @@ struct kvm_vcpu_arch { | |||
396 | u64 efer; | 400 | u64 efer; |
397 | u64 apic_base; | 401 | u64 apic_base; |
398 | struct kvm_lapic *apic; /* kernel irqchip context */ | 402 | struct kvm_lapic *apic; /* kernel irqchip context */ |
403 | u64 eoi_exit_bitmap[4]; | ||
399 | unsigned long apic_attention; | 404 | unsigned long apic_attention; |
400 | int32_t apic_arb_prio; | 405 | int32_t apic_arb_prio; |
401 | int mp_state; | 406 | int mp_state; |
@@ -573,6 +578,9 @@ struct kvm_vcpu_arch { | |||
573 | struct { | 578 | struct { |
574 | bool pv_unhalted; | 579 | bool pv_unhalted; |
575 | } pv; | 580 | } pv; |
581 | |||
582 | int pending_ioapic_eoi; | ||
583 | int pending_external_vector; | ||
576 | }; | 584 | }; |
577 | 585 | ||
578 | struct kvm_lpage_info { | 586 | struct kvm_lpage_info { |
@@ -683,6 +691,9 @@ struct kvm_arch { | |||
683 | u32 bsp_vcpu_id; | 691 | u32 bsp_vcpu_id; |
684 | 692 | ||
685 | u64 disabled_quirks; | 693 | u64 disabled_quirks; |
694 | |||
695 | bool irqchip_split; | ||
696 | u8 nr_reserved_ioapic_pins; | ||
686 | }; | 697 | }; |
687 | 698 | ||
688 | struct kvm_vm_stat { | 699 | struct kvm_vm_stat { |
@@ -819,10 +830,10 @@ struct kvm_x86_ops { | |||
819 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 830 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
820 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 831 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
821 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 832 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
822 | int (*vm_has_apicv)(struct kvm *kvm); | 833 | int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); |
823 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 834 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
824 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 835 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
825 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 836 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); |
826 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 837 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
827 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); | 838 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); |
828 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 839 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
@@ -887,6 +898,20 @@ struct kvm_x86_ops { | |||
887 | gfn_t offset, unsigned long mask); | 898 | gfn_t offset, unsigned long mask); |
888 | /* pmu operations of sub-arch */ | 899 | /* pmu operations of sub-arch */ |
889 | const struct kvm_pmu_ops *pmu_ops; | 900 | const struct kvm_pmu_ops *pmu_ops; |
901 | |||
902 | /* | ||
903 | * Architecture specific hooks for vCPU blocking due to | ||
904 | * HLT instruction. | ||
905 | * Returns for .pre_block(): | ||
906 | * - 0 means continue to block the vCPU. | ||
907 | * - 1 means we cannot block the vCPU since some event | ||
908 | * happens during this period, such as, 'ON' bit in | ||
909 | * posted-interrupts descriptor is set. | ||
910 | */ | ||
911 | int (*pre_block)(struct kvm_vcpu *vcpu); | ||
912 | void (*post_block)(struct kvm_vcpu *vcpu); | ||
913 | int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, | ||
914 | uint32_t guest_irq, bool set); | ||
890 | }; | 915 | }; |
891 | 916 | ||
892 | struct kvm_arch_async_pf { | 917 | struct kvm_arch_async_pf { |
@@ -1231,4 +1256,13 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); | |||
1231 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); | 1256 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); |
1232 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); | 1257 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); |
1233 | 1258 | ||
1259 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
1260 | struct kvm_vcpu **dest_vcpu); | ||
1261 | |||
1262 | void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | ||
1263 | struct kvm_lapic_irq *irq); | ||
1264 | |||
1265 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | ||
1266 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | ||
1267 | |||
1234 | #endif /* _ASM_X86_KVM_HOST_H */ | 1268 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 448b7ca61aee..aa336ff3e03e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -72,7 +72,7 @@ | |||
72 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 72 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
73 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | 73 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 |
74 | #define SECONDARY_EXEC_XSAVES 0x00100000 | 74 | #define SECONDARY_EXEC_XSAVES 0x00100000 |
75 | 75 | #define SECONDARY_EXEC_PCOMMIT 0x00200000 | |
76 | 76 | ||
77 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 77 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
78 | #define PIN_BASED_NMI_EXITING 0x00000008 | 78 | #define PIN_BASED_NMI_EXITING 0x00000008 |
@@ -416,6 +416,7 @@ enum vmcs_field { | |||
416 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 416 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
417 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 417 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
418 | 418 | ||
419 | #define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ | ||
419 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ | 420 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ |
420 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | 421 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ |
421 | 422 | ||
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f0412c50c47b..040d4083c24f 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -153,6 +153,12 @@ | |||
153 | /* MSR used to provide vcpu index */ | 153 | /* MSR used to provide vcpu index */ |
154 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 154 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
155 | 155 | ||
156 | /* MSR used to reset the guest OS. */ | ||
157 | #define HV_X64_MSR_RESET 0x40000003 | ||
158 | |||
159 | /* MSR used to provide vcpu runtime in 100ns units */ | ||
160 | #define HV_X64_MSR_VP_RUNTIME 0x40000010 | ||
161 | |||
156 | /* MSR used to read the per-partition time reference counter */ | 162 | /* MSR used to read the per-partition time reference counter */ |
157 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | 163 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 |
158 | 164 | ||
@@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE { | |||
251 | __s64 tsc_offset; | 257 | __s64 tsc_offset; |
252 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; | 258 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; |
253 | 259 | ||
260 | /* Define the number of synthetic interrupt sources. */ | ||
261 | #define HV_SYNIC_SINT_COUNT (16) | ||
262 | /* Define the expected SynIC version. */ | ||
263 | #define HV_SYNIC_VERSION_1 (0x1) | ||
264 | |||
265 | #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) | ||
266 | #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) | ||
267 | #define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) | ||
268 | #define HV_SYNIC_SINT_MASKED (1ULL << 16) | ||
269 | #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) | ||
270 | #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) | ||
271 | |||
254 | #endif | 272 | #endif |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 37fee272618f..5b15d94a33f8 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define EXIT_REASON_PML_FULL 62 | 78 | #define EXIT_REASON_PML_FULL 62 |
79 | #define EXIT_REASON_XSAVES 63 | 79 | #define EXIT_REASON_XSAVES 63 |
80 | #define EXIT_REASON_XRSTORS 64 | 80 | #define EXIT_REASON_XRSTORS 64 |
81 | #define EXIT_REASON_PCOMMIT 65 | ||
81 | 82 | ||
82 | #define VMX_EXIT_REASONS \ | 83 | #define VMX_EXIT_REASONS \ |
83 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ | 84 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ |
@@ -126,7 +127,8 @@ | |||
126 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | 127 | { EXIT_REASON_INVVPID, "INVVPID" }, \ |
127 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 128 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
128 | { EXIT_REASON_XSAVES, "XSAVES" }, \ | 129 | { EXIT_REASON_XSAVES, "XSAVES" }, \ |
129 | { EXIT_REASON_XRSTORS, "XRSTORS" } | 130 | { EXIT_REASON_XRSTORS, "XRSTORS" }, \ |
131 | { EXIT_REASON_PCOMMIT, "PCOMMIT" } | ||
130 | 132 | ||
131 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 | 133 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 |
132 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 | 134 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2c7aafa70702..2bd81e302427 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -32,6 +32,7 @@ | |||
32 | static int kvmclock = 1; | 32 | static int kvmclock = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
35 | static cycle_t kvm_sched_clock_offset; | ||
35 | 36 | ||
36 | static int parse_no_kvmclock(char *arg) | 37 | static int parse_no_kvmclock(char *arg) |
37 | { | 38 | { |
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) | |||
92 | return kvm_clock_read(); | 93 | return kvm_clock_read(); |
93 | } | 94 | } |
94 | 95 | ||
96 | static cycle_t kvm_sched_clock_read(void) | ||
97 | { | ||
98 | return kvm_clock_read() - kvm_sched_clock_offset; | ||
99 | } | ||
100 | |||
101 | static inline void kvm_sched_clock_init(bool stable) | ||
102 | { | ||
103 | if (!stable) { | ||
104 | pv_time_ops.sched_clock = kvm_clock_read; | ||
105 | return; | ||
106 | } | ||
107 | |||
108 | kvm_sched_clock_offset = kvm_clock_read(); | ||
109 | pv_time_ops.sched_clock = kvm_sched_clock_read; | ||
110 | set_sched_clock_stable(); | ||
111 | |||
112 | printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", | ||
113 | kvm_sched_clock_offset); | ||
114 | |||
115 | BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > | ||
116 | sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); | ||
117 | } | ||
118 | |||
95 | /* | 119 | /* |
96 | * If we don't do that, there is the possibility that the guest | 120 | * If we don't do that, there is the possibility that the guest |
97 | * will calibrate under heavy load - thus, getting a lower lpj - | 121 | * will calibrate under heavy load - thus, getting a lower lpj - |
@@ -248,7 +272,17 @@ void __init kvmclock_init(void) | |||
248 | memblock_free(mem, size); | 272 | memblock_free(mem, size); |
249 | return; | 273 | return; |
250 | } | 274 | } |
251 | pv_time_ops.sched_clock = kvm_clock_read; | 275 | |
276 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
277 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
278 | |||
279 | cpu = get_cpu(); | ||
280 | vcpu_time = &hv_clock[cpu].pvti; | ||
281 | flags = pvclock_read_flags(vcpu_time); | ||
282 | |||
283 | kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); | ||
284 | put_cpu(); | ||
285 | |||
252 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 286 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
253 | x86_platform.get_wallclock = kvm_get_wallclock; | 287 | x86_platform.get_wallclock = kvm_get_wallclock; |
254 | x86_platform.set_wallclock = kvm_set_wallclock; | 288 | x86_platform.set_wallclock = kvm_set_wallclock; |
@@ -265,16 +299,6 @@ void __init kvmclock_init(void) | |||
265 | kvm_get_preset_lpj(); | 299 | kvm_get_preset_lpj(); |
266 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); | 300 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
267 | pv_info.name = "KVM"; | 301 | pv_info.name = "KVM"; |
268 | |||
269 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
270 | pvclock_set_flags(~0); | ||
271 | |||
272 | cpu = get_cpu(); | ||
273 | vcpu_time = &hv_clock[cpu].pvti; | ||
274 | flags = pvclock_read_flags(vcpu_time); | ||
275 | if (flags & PVCLOCK_COUNTS_FROM_ZERO) | ||
276 | set_sched_clock_stable(); | ||
277 | put_cpu(); | ||
278 | } | 302 | } |
279 | 303 | ||
280 | int __init kvm_setup_vsyscall_timeinfo(void) | 304 | int __init kvm_setup_vsyscall_timeinfo(void) |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d8a1d56276e1..639a6e34500c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,8 @@ config KVM | |||
28 | select ANON_INODES | 28 | select ANON_INODES |
29 | select HAVE_KVM_IRQCHIP | 29 | select HAVE_KVM_IRQCHIP |
30 | select HAVE_KVM_IRQFD | 30 | select HAVE_KVM_IRQFD |
31 | select IRQ_BYPASS_MANAGER | ||
32 | select HAVE_KVM_IRQ_BYPASS | ||
31 | select HAVE_KVM_IRQ_ROUTING | 33 | select HAVE_KVM_IRQ_ROUTING |
32 | select HAVE_KVM_EVENTFD | 34 | select HAVE_KVM_EVENTFD |
33 | select KVM_APIC_ARCHITECTURE | 35 | select KVM_APIC_ARCHITECTURE |
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index d090ecf08809..9dc091acd5fb 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include "irq.h" | 22 | #include "irq.h" |
23 | #include "assigned-dev.h" | 23 | #include "assigned-dev.h" |
24 | #include "trace/events/kvm.h" | ||
24 | 25 | ||
25 | struct kvm_assigned_dev_kernel { | 26 | struct kvm_assigned_dev_kernel { |
26 | struct kvm_irq_ack_notifier ack_notifier; | 27 | struct kvm_irq_ack_notifier ack_notifier; |
@@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | |||
131 | return IRQ_HANDLED; | 132 | return IRQ_HANDLED; |
132 | } | 133 | } |
133 | 134 | ||
134 | #ifdef __KVM_HAVE_MSI | 135 | /* |
136 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
137 | * user can retry in a process context. | ||
138 | * Return value: | ||
139 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
140 | * Other values - No need to retry. | ||
141 | */ | ||
142 | static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, | ||
143 | int level) | ||
144 | { | ||
145 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | ||
146 | struct kvm_kernel_irq_routing_entry *e; | ||
147 | int ret = -EINVAL; | ||
148 | int idx; | ||
149 | |||
150 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
151 | |||
152 | /* | ||
153 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
154 | * which would need to be retried from thread context; when same GSI | ||
155 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
156 | * partial failure here. | ||
157 | * Since there's no easy way to do this, we only support injecting MSI | ||
158 | * which is limited to 1:1 GSI mapping. | ||
159 | */ | ||
160 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
161 | if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { | ||
162 | e = &entries[0]; | ||
163 | ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, | ||
164 | irq, level); | ||
165 | } | ||
166 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
167 | return ret; | ||
168 | } | ||
169 | |||
170 | |||
135 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) | 171 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) |
136 | { | 172 | { |
137 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 173 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
@@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) | |||
150 | 186 | ||
151 | return IRQ_HANDLED; | 187 | return IRQ_HANDLED; |
152 | } | 188 | } |
153 | #endif | ||
154 | 189 | ||
155 | #ifdef __KVM_HAVE_MSIX | ||
156 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) | 190 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) |
157 | { | 191 | { |
158 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 192 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
@@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | |||
183 | 217 | ||
184 | return IRQ_HANDLED; | 218 | return IRQ_HANDLED; |
185 | } | 219 | } |
186 | #endif | ||
187 | 220 | ||
188 | /* Ack the irq line for an assigned device */ | 221 | /* Ack the irq line for an assigned device */ |
189 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 222 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
@@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, | |||
386 | return 0; | 419 | return 0; |
387 | } | 420 | } |
388 | 421 | ||
389 | #ifdef __KVM_HAVE_MSI | ||
390 | static int assigned_device_enable_host_msi(struct kvm *kvm, | 422 | static int assigned_device_enable_host_msi(struct kvm *kvm, |
391 | struct kvm_assigned_dev_kernel *dev) | 423 | struct kvm_assigned_dev_kernel *dev) |
392 | { | 424 | { |
@@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
408 | 440 | ||
409 | return 0; | 441 | return 0; |
410 | } | 442 | } |
411 | #endif | ||
412 | 443 | ||
413 | #ifdef __KVM_HAVE_MSIX | ||
414 | static int assigned_device_enable_host_msix(struct kvm *kvm, | 444 | static int assigned_device_enable_host_msix(struct kvm *kvm, |
415 | struct kvm_assigned_dev_kernel *dev) | 445 | struct kvm_assigned_dev_kernel *dev) |
416 | { | 446 | { |
@@ -443,8 +473,6 @@ err: | |||
443 | return r; | 473 | return r; |
444 | } | 474 | } |
445 | 475 | ||
446 | #endif | ||
447 | |||
448 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | 476 | static int assigned_device_enable_guest_intx(struct kvm *kvm, |
449 | struct kvm_assigned_dev_kernel *dev, | 477 | struct kvm_assigned_dev_kernel *dev, |
450 | struct kvm_assigned_irq *irq) | 478 | struct kvm_assigned_irq *irq) |
@@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm, | |||
454 | return 0; | 482 | return 0; |
455 | } | 483 | } |
456 | 484 | ||
457 | #ifdef __KVM_HAVE_MSI | ||
458 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | 485 | static int assigned_device_enable_guest_msi(struct kvm *kvm, |
459 | struct kvm_assigned_dev_kernel *dev, | 486 | struct kvm_assigned_dev_kernel *dev, |
460 | struct kvm_assigned_irq *irq) | 487 | struct kvm_assigned_irq *irq) |
@@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, | |||
463 | dev->ack_notifier.gsi = -1; | 490 | dev->ack_notifier.gsi = -1; |
464 | return 0; | 491 | return 0; |
465 | } | 492 | } |
466 | #endif | ||
467 | 493 | ||
468 | #ifdef __KVM_HAVE_MSIX | ||
469 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | 494 | static int assigned_device_enable_guest_msix(struct kvm *kvm, |
470 | struct kvm_assigned_dev_kernel *dev, | 495 | struct kvm_assigned_dev_kernel *dev, |
471 | struct kvm_assigned_irq *irq) | 496 | struct kvm_assigned_irq *irq) |
@@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, | |||
474 | dev->ack_notifier.gsi = -1; | 499 | dev->ack_notifier.gsi = -1; |
475 | return 0; | 500 | return 0; |
476 | } | 501 | } |
477 | #endif | ||
478 | 502 | ||
479 | static int assign_host_irq(struct kvm *kvm, | 503 | static int assign_host_irq(struct kvm *kvm, |
480 | struct kvm_assigned_dev_kernel *dev, | 504 | struct kvm_assigned_dev_kernel *dev, |
@@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm, | |||
492 | case KVM_DEV_IRQ_HOST_INTX: | 516 | case KVM_DEV_IRQ_HOST_INTX: |
493 | r = assigned_device_enable_host_intx(kvm, dev); | 517 | r = assigned_device_enable_host_intx(kvm, dev); |
494 | break; | 518 | break; |
495 | #ifdef __KVM_HAVE_MSI | ||
496 | case KVM_DEV_IRQ_HOST_MSI: | 519 | case KVM_DEV_IRQ_HOST_MSI: |
497 | r = assigned_device_enable_host_msi(kvm, dev); | 520 | r = assigned_device_enable_host_msi(kvm, dev); |
498 | break; | 521 | break; |
499 | #endif | ||
500 | #ifdef __KVM_HAVE_MSIX | ||
501 | case KVM_DEV_IRQ_HOST_MSIX: | 522 | case KVM_DEV_IRQ_HOST_MSIX: |
502 | r = assigned_device_enable_host_msix(kvm, dev); | 523 | r = assigned_device_enable_host_msix(kvm, dev); |
503 | break; | 524 | break; |
504 | #endif | ||
505 | default: | 525 | default: |
506 | r = -EINVAL; | 526 | r = -EINVAL; |
507 | } | 527 | } |
@@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm, | |||
534 | case KVM_DEV_IRQ_GUEST_INTX: | 554 | case KVM_DEV_IRQ_GUEST_INTX: |
535 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | 555 | r = assigned_device_enable_guest_intx(kvm, dev, irq); |
536 | break; | 556 | break; |
537 | #ifdef __KVM_HAVE_MSI | ||
538 | case KVM_DEV_IRQ_GUEST_MSI: | 557 | case KVM_DEV_IRQ_GUEST_MSI: |
539 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | 558 | r = assigned_device_enable_guest_msi(kvm, dev, irq); |
540 | break; | 559 | break; |
541 | #endif | ||
542 | #ifdef __KVM_HAVE_MSIX | ||
543 | case KVM_DEV_IRQ_GUEST_MSIX: | 560 | case KVM_DEV_IRQ_GUEST_MSIX: |
544 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | 561 | r = assigned_device_enable_guest_msix(kvm, dev, irq); |
545 | break; | 562 | break; |
546 | #endif | ||
547 | default: | 563 | default: |
548 | r = -EINVAL; | 564 | r = -EINVAL; |
549 | } | 565 | } |
@@ -826,7 +842,6 @@ out: | |||
826 | } | 842 | } |
827 | 843 | ||
828 | 844 | ||
829 | #ifdef __KVM_HAVE_MSIX | ||
830 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | 845 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, |
831 | struct kvm_assigned_msix_nr *entry_nr) | 846 | struct kvm_assigned_msix_nr *entry_nr) |
832 | { | 847 | { |
@@ -906,7 +921,6 @@ msix_entry_out: | |||
906 | 921 | ||
907 | return r; | 922 | return r; |
908 | } | 923 | } |
909 | #endif | ||
910 | 924 | ||
911 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | 925 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, |
912 | struct kvm_assigned_pci_dev *assigned_dev) | 926 | struct kvm_assigned_pci_dev *assigned_dev) |
@@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1012 | goto out; | 1026 | goto out; |
1013 | break; | 1027 | break; |
1014 | } | 1028 | } |
1015 | #ifdef __KVM_HAVE_MSIX | ||
1016 | case KVM_ASSIGN_SET_MSIX_NR: { | 1029 | case KVM_ASSIGN_SET_MSIX_NR: { |
1017 | struct kvm_assigned_msix_nr entry_nr; | 1030 | struct kvm_assigned_msix_nr entry_nr; |
1018 | r = -EFAULT; | 1031 | r = -EFAULT; |
@@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1033 | goto out; | 1046 | goto out; |
1034 | break; | 1047 | break; |
1035 | } | 1048 | } |
1036 | #endif | ||
1037 | case KVM_ASSIGN_SET_INTX_MASK: { | 1049 | case KVM_ASSIGN_SET_INTX_MASK: { |
1038 | struct kvm_assigned_pci_dev assigned_dev; | 1050 | struct kvm_assigned_pci_dev assigned_dev; |
1039 | 1051 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 156441bcaac8..6525e926f566 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
348 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | 348 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
349 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | | 349 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | |
350 | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | | 350 | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | |
351 | F(AVX512CD); | 351 | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); |
352 | 352 | ||
353 | /* cpuid 0xD.1.eax */ | 353 | /* cpuid 0xD.1.eax */ |
354 | const u32 kvm_supported_word10_x86_features = | 354 | const u32 kvm_supported_word10_x86_features = |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index dd05b9cef6ae..06332cb7e7d1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | |||
133 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 133 | best = kvm_find_cpuid_entry(vcpu, 7, 0); |
134 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | 134 | return best && (best->ebx & bit(X86_FEATURE_MPX)); |
135 | } | 135 | } |
136 | |||
137 | static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) | ||
138 | { | ||
139 | struct kvm_cpuid_entry2 *best; | ||
140 | |||
141 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
142 | return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); | ||
143 | } | ||
144 | |||
145 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | ||
146 | { | ||
147 | struct kvm_cpuid_entry2 *best; | ||
148 | |||
149 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
150 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 | ||
155 | */ | ||
156 | #define BIT_NRIPS 3 | ||
157 | |||
158 | static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) | ||
159 | { | ||
160 | struct kvm_cpuid_entry2 *best; | ||
161 | |||
162 | best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); | ||
163 | |||
164 | /* | ||
165 | * NRIPS is a scattered cpuid feature, so we can't use | ||
166 | * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit | ||
167 | * position 8, not 3). | ||
168 | */ | ||
169 | return best && (best->edx & bit(BIT_NRIPS)); | ||
170 | } | ||
171 | #undef BIT_NRIPS | ||
172 | |||
136 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9da95b9daf8d..1505587d06e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) | |||
2272 | #define GET_SMSTATE(type, smbase, offset) \ | 2272 | #define GET_SMSTATE(type, smbase, offset) \ |
2273 | ({ \ | 2273 | ({ \ |
2274 | type __val; \ | 2274 | type __val; \ |
2275 | int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ | 2275 | int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ |
2276 | sizeof(__val), NULL); \ | 2276 | sizeof(__val)); \ |
2277 | if (r != X86EMUL_CONTINUE) \ | 2277 | if (r != X86EMUL_CONTINUE) \ |
2278 | return X86EMUL_UNHANDLEABLE; \ | 2278 | return X86EMUL_UNHANDLEABLE; \ |
2279 | __val; \ | 2279 | __val; \ |
@@ -2484,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) | |||
2484 | 2484 | ||
2485 | /* | 2485 | /* |
2486 | * Get back to real mode, to prepare a safe state in which to load | 2486 | * Get back to real mode, to prepare a safe state in which to load |
2487 | * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed | 2487 | * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU |
2488 | * to read_std/write_std are not virtual. | 2488 | * supports long mode. |
2489 | * | ||
2490 | * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. | ||
2491 | */ | 2489 | */ |
2490 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2491 | if (emulator_has_longmode(ctxt)) { | ||
2492 | struct desc_struct cs_desc; | ||
2493 | |||
2494 | /* Zero CR4.PCIDE before CR0.PG. */ | ||
2495 | if (cr4 & X86_CR4_PCIDE) { | ||
2496 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); | ||
2497 | cr4 &= ~X86_CR4_PCIDE; | ||
2498 | } | ||
2499 | |||
2500 | /* A 32-bit code segment is required to clear EFER.LMA. */ | ||
2501 | memset(&cs_desc, 0, sizeof(cs_desc)); | ||
2502 | cs_desc.type = 0xb; | ||
2503 | cs_desc.s = cs_desc.g = cs_desc.p = 1; | ||
2504 | ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); | ||
2505 | } | ||
2506 | |||
2507 | /* For the 64-bit case, this will clear EFER.LMA. */ | ||
2492 | cr0 = ctxt->ops->get_cr(ctxt, 0); | 2508 | cr0 = ctxt->ops->get_cr(ctxt, 0); |
2493 | if (cr0 & X86_CR0_PE) | 2509 | if (cr0 & X86_CR0_PE) |
2494 | ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); | 2510 | ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); |
2495 | cr4 = ctxt->ops->get_cr(ctxt, 4); | 2511 | |
2512 | /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ | ||
2496 | if (cr4 & X86_CR4_PAE) | 2513 | if (cr4 & X86_CR4_PAE) |
2497 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); | 2514 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); |
2515 | |||
2516 | /* And finally go back to 32-bit mode. */ | ||
2498 | efer = 0; | 2517 | efer = 0; |
2499 | ctxt->ops->set_msr(ctxt, MSR_EFER, efer); | 2518 | ctxt->ops->set_msr(ctxt, MSR_EFER, efer); |
2500 | 2519 | ||
@@ -4455,7 +4474,7 @@ static const struct opcode twobyte_table[256] = { | |||
4455 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, | 4474 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, |
4456 | /* 0xA8 - 0xAF */ | 4475 | /* 0xA8 - 0xAF */ |
4457 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 4476 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
4458 | II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm), | 4477 | II(EmulateOnUD | ImplicitOps, em_rsm, rsm), |
4459 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4478 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
4460 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4479 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
4461 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4480 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a8160d2ae362..62cf8c915e95 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
41 | case HV_X64_MSR_TIME_REF_COUNT: | 41 | case HV_X64_MSR_TIME_REF_COUNT: |
42 | case HV_X64_MSR_CRASH_CTL: | 42 | case HV_X64_MSR_CRASH_CTL: |
43 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 43 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
44 | case HV_X64_MSR_RESET: | ||
44 | r = true; | 45 | r = true; |
45 | break; | 46 | break; |
46 | } | 47 | } |
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
163 | data); | 164 | data); |
164 | case HV_X64_MSR_CRASH_CTL: | 165 | case HV_X64_MSR_CRASH_CTL: |
165 | return kvm_hv_msr_set_crash_ctl(vcpu, data, host); | 166 | return kvm_hv_msr_set_crash_ctl(vcpu, data, host); |
167 | case HV_X64_MSR_RESET: | ||
168 | if (data == 1) { | ||
169 | vcpu_debug(vcpu, "hyper-v reset requested\n"); | ||
170 | kvm_make_request(KVM_REQ_HV_RESET, vcpu); | ||
171 | } | ||
172 | break; | ||
166 | default: | 173 | default: |
167 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", | 174 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", |
168 | msr, data); | 175 | msr, data); |
@@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
171 | return 0; | 178 | return 0; |
172 | } | 179 | } |
173 | 180 | ||
174 | static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 181 | /* Calculate cpu time spent by current task in 100ns units */ |
182 | static u64 current_task_runtime_100ns(void) | ||
183 | { | ||
184 | cputime_t utime, stime; | ||
185 | |||
186 | task_cputime_adjusted(current, &utime, &stime); | ||
187 | return div_u64(cputime_to_nsecs(utime + stime), 100); | ||
188 | } | ||
189 | |||
190 | static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) | ||
175 | { | 191 | { |
176 | struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; | 192 | struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; |
177 | 193 | ||
@@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
205 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | 221 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); |
206 | case HV_X64_MSR_TPR: | 222 | case HV_X64_MSR_TPR: |
207 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | 223 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); |
224 | case HV_X64_MSR_VP_RUNTIME: | ||
225 | if (!host) | ||
226 | return 1; | ||
227 | hv->runtime_offset = data - current_task_runtime_100ns(); | ||
228 | break; | ||
208 | default: | 229 | default: |
209 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", | 230 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", |
210 | msr, data); | 231 | msr, data); |
@@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
241 | pdata); | 262 | pdata); |
242 | case HV_X64_MSR_CRASH_CTL: | 263 | case HV_X64_MSR_CRASH_CTL: |
243 | return kvm_hv_msr_get_crash_ctl(vcpu, pdata); | 264 | return kvm_hv_msr_get_crash_ctl(vcpu, pdata); |
265 | case HV_X64_MSR_RESET: | ||
266 | data = 0; | ||
267 | break; | ||
244 | default: | 268 | default: |
245 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 269 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
246 | return 1; | 270 | return 1; |
@@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
277 | case HV_X64_MSR_APIC_ASSIST_PAGE: | 301 | case HV_X64_MSR_APIC_ASSIST_PAGE: |
278 | data = hv->hv_vapic; | 302 | data = hv->hv_vapic; |
279 | break; | 303 | break; |
304 | case HV_X64_MSR_VP_RUNTIME: | ||
305 | data = current_task_runtime_100ns() + hv->runtime_offset; | ||
306 | break; | ||
280 | default: | 307 | default: |
281 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 308 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
282 | return 1; | 309 | return 1; |
@@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) | |||
295 | mutex_unlock(&vcpu->kvm->lock); | 322 | mutex_unlock(&vcpu->kvm->lock); |
296 | return r; | 323 | return r; |
297 | } else | 324 | } else |
298 | return kvm_hv_set_msr(vcpu, msr, data); | 325 | return kvm_hv_set_msr(vcpu, msr, data, host); |
299 | } | 326 | } |
300 | 327 | ||
301 | int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 328 | int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index f90952f64e79..08116ff227cc 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | 37 | ||
38 | #include "ioapic.h" | ||
38 | #include "irq.h" | 39 | #include "irq.h" |
39 | #include "i8254.h" | 40 | #include "i8254.h" |
40 | #include "x86.h" | 41 | #include "x86.h" |
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
333 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 334 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; |
334 | s64 interval; | 335 | s64 interval; |
335 | 336 | ||
336 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | 337 | if (!ioapic_in_kernel(kvm) || |
338 | ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | ||
337 | return; | 339 | return; |
338 | 340 | ||
339 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 341 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 856f79105bb5..88d0a92d3f94 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | 235 | ||
236 | static void update_handled_vectors(struct kvm_ioapic *ioapic) | 236 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
237 | { | ||
238 | DECLARE_BITMAP(handled_vectors, 256); | ||
239 | int i; | ||
240 | |||
241 | memset(handled_vectors, 0, sizeof(handled_vectors)); | ||
242 | for (i = 0; i < IOAPIC_NUM_PINS; ++i) | ||
243 | __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); | ||
244 | memcpy(ioapic->handled_vectors, handled_vectors, | ||
245 | sizeof(handled_vectors)); | ||
246 | smp_wmb(); | ||
247 | } | ||
248 | |||
249 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | ||
250 | u32 *tmr) | ||
251 | { | 237 | { |
252 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | 238 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
253 | union kvm_ioapic_redirect_entry *e; | 239 | union kvm_ioapic_redirect_entry *e; |
@@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | |||
260 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || | 246 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || |
261 | index == RTC_GSI) { | 247 | index == RTC_GSI) { |
262 | if (kvm_apic_match_dest(vcpu, NULL, 0, | 248 | if (kvm_apic_match_dest(vcpu, NULL, 0, |
263 | e->fields.dest_id, e->fields.dest_mode)) { | 249 | e->fields.dest_id, e->fields.dest_mode) || |
250 | (e->fields.trig_mode == IOAPIC_EDGE_TRIG && | ||
251 | kvm_apic_pending_eoi(vcpu, e->fields.vector))) | ||
264 | __set_bit(e->fields.vector, | 252 | __set_bit(e->fields.vector, |
265 | (unsigned long *)eoi_exit_bitmap); | 253 | (unsigned long *)eoi_exit_bitmap); |
266 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) | ||
267 | __set_bit(e->fields.vector, | ||
268 | (unsigned long *)tmr); | ||
269 | } | ||
270 | } | 254 | } |
271 | } | 255 | } |
272 | spin_unlock(&ioapic->lock); | 256 | spin_unlock(&ioapic->lock); |
@@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
315 | e->bits |= (u32) val; | 299 | e->bits |= (u32) val; |
316 | e->fields.remote_irr = 0; | 300 | e->fields.remote_irr = 0; |
317 | } | 301 | } |
318 | update_handled_vectors(ioapic); | ||
319 | mask_after = e->fields.mask; | 302 | mask_after = e->fields.mask; |
320 | if (mask_before != mask_after) | 303 | if (mask_before != mask_after) |
321 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); | 304 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); |
@@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
599 | ioapic->id = 0; | 582 | ioapic->id = 0; |
600 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); | 583 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); |
601 | rtc_irq_eoi_tracking_reset(ioapic); | 584 | rtc_irq_eoi_tracking_reset(ioapic); |
602 | update_handled_vectors(ioapic); | ||
603 | } | 585 | } |
604 | 586 | ||
605 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | 587 | static const struct kvm_io_device_ops ioapic_mmio_ops = { |
@@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
628 | if (ret < 0) { | 610 | if (ret < 0) { |
629 | kvm->arch.vioapic = NULL; | 611 | kvm->arch.vioapic = NULL; |
630 | kfree(ioapic); | 612 | kfree(ioapic); |
613 | return ret; | ||
631 | } | 614 | } |
632 | 615 | ||
616 | kvm_vcpu_request_scan_ioapic(kvm); | ||
633 | return ret; | 617 | return ret; |
634 | } | 618 | } |
635 | 619 | ||
@@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
666 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 650 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
667 | ioapic->irr = 0; | 651 | ioapic->irr = 0; |
668 | ioapic->irr_delivered = 0; | 652 | ioapic->irr_delivered = 0; |
669 | update_handled_vectors(ioapic); | ||
670 | kvm_vcpu_request_scan_ioapic(kvm); | 653 | kvm_vcpu_request_scan_ioapic(kvm); |
671 | kvm_ioapic_inject_all(ioapic, state->irr); | 654 | kvm_ioapic_inject_all(ioapic, state->irr); |
672 | spin_unlock(&ioapic->lock); | 655 | spin_unlock(&ioapic->lock); |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ca0b0b4e6256..084617d37c74 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -9,6 +9,7 @@ struct kvm; | |||
9 | struct kvm_vcpu; | 9 | struct kvm_vcpu; |
10 | 10 | ||
11 | #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS | 11 | #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS |
12 | #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES | ||
12 | #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ | 13 | #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ |
13 | #define IOAPIC_EDGE_TRIG 0 | 14 | #define IOAPIC_EDGE_TRIG 0 |
14 | #define IOAPIC_LEVEL_TRIG 1 | 15 | #define IOAPIC_LEVEL_TRIG 1 |
@@ -73,7 +74,6 @@ struct kvm_ioapic { | |||
73 | struct kvm *kvm; | 74 | struct kvm *kvm; |
74 | void (*ack_notifier)(void *opaque, int irq); | 75 | void (*ack_notifier)(void *opaque, int irq); |
75 | spinlock_t lock; | 76 | spinlock_t lock; |
76 | DECLARE_BITMAP(handled_vectors, 256); | ||
77 | struct rtc_status rtc_status; | 77 | struct rtc_status rtc_status; |
78 | struct delayed_work eoi_inject; | 78 | struct delayed_work eoi_inject; |
79 | u32 irq_eoi[IOAPIC_NUM_PINS]; | 79 | u32 irq_eoi[IOAPIC_NUM_PINS]; |
@@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
98 | return kvm->arch.vioapic; | 98 | return kvm->arch.vioapic; |
99 | } | 99 | } |
100 | 100 | ||
101 | static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | 101 | static inline int ioapic_in_kernel(struct kvm *kvm) |
102 | { | 102 | { |
103 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 103 | int ret; |
104 | smp_rmb(); | 104 | |
105 | return test_bit(vector, ioapic->handled_vectors); | 105 | ret = (ioapic_irqchip(kvm) != NULL); |
106 | return ret; | ||
106 | } | 107 | } |
107 | 108 | ||
108 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | 109 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); |
@@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
120 | struct kvm_lapic_irq *irq, unsigned long *dest_map); | 121 | struct kvm_lapic_irq *irq, unsigned long *dest_map); |
121 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 122 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
122 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 123 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
123 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | 124 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
124 | u32 *tmr); | 125 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
125 | 126 | ||
126 | #endif | 127 | #endif |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index a1ec6a50a05a..097060e33bd6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * check if there is a pending userspace external interrupt | ||
42 | */ | ||
43 | static int pending_userspace_extint(struct kvm_vcpu *v) | ||
44 | { | ||
45 | return v->arch.pending_external_vector != -1; | ||
46 | } | ||
47 | |||
48 | /* | ||
41 | * check if there is pending interrupt from | 49 | * check if there is pending interrupt from |
42 | * non-APIC source without intack. | 50 | * non-APIC source without intack. |
43 | */ | 51 | */ |
44 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) | 52 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) |
45 | { | 53 | { |
46 | if (kvm_apic_accept_pic_intr(v)) | 54 | u8 accept = kvm_apic_accept_pic_intr(v); |
47 | return pic_irqchip(v->kvm)->output; /* PIC */ | 55 | |
48 | else | 56 | if (accept) { |
57 | if (irqchip_split(v->kvm)) | ||
58 | return pending_userspace_extint(v); | ||
59 | else | ||
60 | return pic_irqchip(v->kvm)->output; | ||
61 | } else | ||
49 | return 0; | 62 | return 0; |
50 | } | 63 | } |
51 | 64 | ||
@@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) | |||
57 | */ | 70 | */ |
58 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | 71 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) |
59 | { | 72 | { |
60 | if (!irqchip_in_kernel(v->kvm)) | 73 | if (!lapic_in_kernel(v)) |
61 | return v->arch.interrupt.pending; | 74 | return v->arch.interrupt.pending; |
62 | 75 | ||
63 | if (kvm_cpu_has_extint(v)) | 76 | if (kvm_cpu_has_extint(v)) |
64 | return 1; | 77 | return 1; |
65 | 78 | ||
66 | if (kvm_apic_vid_enabled(v->kvm)) | 79 | if (kvm_vcpu_apic_vid_enabled(v)) |
67 | return 0; | 80 | return 0; |
68 | 81 | ||
69 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | 82 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ |
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | |||
75 | */ | 88 | */ |
76 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 89 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
77 | { | 90 | { |
78 | if (!irqchip_in_kernel(v->kvm)) | 91 | if (!lapic_in_kernel(v)) |
79 | return v->arch.interrupt.pending; | 92 | return v->arch.interrupt.pending; |
80 | 93 | ||
81 | if (kvm_cpu_has_extint(v)) | 94 | if (kvm_cpu_has_extint(v)) |
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | |||
91 | */ | 104 | */ |
92 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) | 105 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) |
93 | { | 106 | { |
94 | if (kvm_cpu_has_extint(v)) | 107 | if (kvm_cpu_has_extint(v)) { |
95 | return kvm_pic_read_irq(v->kvm); /* PIC */ | 108 | if (irqchip_split(v->kvm)) { |
96 | return -1; | 109 | int vector = v->arch.pending_external_vector; |
110 | |||
111 | v->arch.pending_external_vector = -1; | ||
112 | return vector; | ||
113 | } else | ||
114 | return kvm_pic_read_irq(v->kvm); /* PIC */ | ||
115 | } else | ||
116 | return -1; | ||
97 | } | 117 | } |
98 | 118 | ||
99 | /* | 119 | /* |
@@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
103 | { | 123 | { |
104 | int vector; | 124 | int vector; |
105 | 125 | ||
106 | if (!irqchip_in_kernel(v->kvm)) | 126 | if (!lapic_in_kernel(v)) |
107 | return v->arch.interrupt.nr; | 127 | return v->arch.interrupt.nr; |
108 | 128 | ||
109 | vector = kvm_cpu_get_extint(v); | 129 | vector = kvm_cpu_get_extint(v); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 3d782a2c336a..ae5c78f2337d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
83 | return kvm->arch.vpic; | 83 | return kvm->arch.vpic; |
84 | } | 84 | } |
85 | 85 | ||
86 | static inline int pic_in_kernel(struct kvm *kvm) | ||
87 | { | ||
88 | int ret; | ||
89 | |||
90 | ret = (pic_irqchip(kvm) != NULL); | ||
91 | return ret; | ||
92 | } | ||
93 | |||
94 | static inline int irqchip_split(struct kvm *kvm) | ||
95 | { | ||
96 | return kvm->arch.irqchip_split; | ||
97 | } | ||
98 | |||
86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 99 | static inline int irqchip_in_kernel(struct kvm *kvm) |
87 | { | 100 | { |
88 | struct kvm_pic *vpic = pic_irqchip(kvm); | 101 | struct kvm_pic *vpic = pic_irqchip(kvm); |
102 | bool ret; | ||
103 | |||
104 | ret = (vpic != NULL); | ||
105 | ret |= irqchip_split(kvm); | ||
89 | 106 | ||
90 | /* Read vpic before kvm->irq_routing. */ | 107 | /* Read vpic before kvm->irq_routing. */ |
91 | smp_rmb(); | 108 | smp_rmb(); |
92 | return vpic != NULL; | 109 | return ret; |
110 | } | ||
111 | |||
112 | static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) | ||
113 | { | ||
114 | /* Same as irqchip_in_kernel(vcpu->kvm), but with less | ||
115 | * pointer chasing and no unnecessary memory barriers. | ||
116 | */ | ||
117 | return vcpu->arch.apic != NULL; | ||
93 | } | 118 | } |
94 | 119 | ||
95 | void kvm_pic_reset(struct kvm_kpic_state *s); | 120 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 9efff9e5b58c..84b96d319909 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
91 | return r; | 91 | return r; |
92 | } | 92 | } |
93 | 93 | ||
94 | static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | 94 | void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, |
95 | struct kvm_lapic_irq *irq) | 95 | struct kvm_lapic_irq *irq) |
96 | { | 96 | { |
97 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | 97 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
98 | 98 | ||
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | |||
108 | irq->level = 1; | 108 | irq->level = 1; |
109 | irq->shorthand = 0; | 109 | irq->shorthand = 0; |
110 | } | 110 | } |
111 | EXPORT_SYMBOL_GPL(kvm_set_msi_irq); | ||
111 | 112 | ||
112 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 113 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
113 | struct kvm *kvm, int irq_source_id, int level, bool line_status) | 114 | struct kvm *kvm, int irq_source_id, int level, bool line_status) |
@@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
123 | } | 124 | } |
124 | 125 | ||
125 | 126 | ||
126 | static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | 127 | int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, |
127 | struct kvm *kvm) | 128 | struct kvm *kvm, int irq_source_id, int level, |
129 | bool line_status) | ||
128 | { | 130 | { |
129 | struct kvm_lapic_irq irq; | 131 | struct kvm_lapic_irq irq; |
130 | int r; | 132 | int r; |
131 | 133 | ||
134 | if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) | ||
135 | return -EWOULDBLOCK; | ||
136 | |||
132 | kvm_set_msi_irq(e, &irq); | 137 | kvm_set_msi_irq(e, &irq); |
133 | 138 | ||
134 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) | 139 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) |
@@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | |||
137 | return -EWOULDBLOCK; | 142 | return -EWOULDBLOCK; |
138 | } | 143 | } |
139 | 144 | ||
140 | /* | ||
141 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
142 | * user can retry in a process context. | ||
143 | * Return value: | ||
144 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
145 | * Other values - No need to retry. | ||
146 | */ | ||
147 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) | ||
148 | { | ||
149 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | ||
150 | struct kvm_kernel_irq_routing_entry *e; | ||
151 | int ret = -EINVAL; | ||
152 | int idx; | ||
153 | |||
154 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
155 | |||
156 | /* | ||
157 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
158 | * which would need to be retried from thread context; when same GSI | ||
159 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
160 | * partial failure here. | ||
161 | * Since there's no easy way to do this, we only support injecting MSI | ||
162 | * which is limited to 1:1 GSI mapping. | ||
163 | */ | ||
164 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
165 | if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { | ||
166 | e = &entries[0]; | ||
167 | if (likely(e->type == KVM_IRQ_ROUTING_MSI)) | ||
168 | ret = kvm_set_msi_inatomic(e, kvm); | ||
169 | else | ||
170 | ret = -EWOULDBLOCK; | ||
171 | } | ||
172 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
173 | return ret; | ||
174 | } | ||
175 | |||
176 | int kvm_request_irq_source_id(struct kvm *kvm) | 145 | int kvm_request_irq_source_id(struct kvm *kvm) |
177 | { | 146 | { |
178 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; | 147 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; |
@@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
208 | goto unlock; | 177 | goto unlock; |
209 | } | 178 | } |
210 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 179 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
211 | if (!irqchip_in_kernel(kvm)) | 180 | if (!ioapic_in_kernel(kvm)) |
212 | goto unlock; | 181 | goto unlock; |
213 | 182 | ||
214 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); | 183 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); |
@@ -297,6 +266,33 @@ out: | |||
297 | return r; | 266 | return r; |
298 | } | 267 | } |
299 | 268 | ||
269 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
270 | struct kvm_vcpu **dest_vcpu) | ||
271 | { | ||
272 | int i, r = 0; | ||
273 | struct kvm_vcpu *vcpu; | ||
274 | |||
275 | if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) | ||
276 | return true; | ||
277 | |||
278 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
279 | if (!kvm_apic_present(vcpu)) | ||
280 | continue; | ||
281 | |||
282 | if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, | ||
283 | irq->dest_id, irq->dest_mode)) | ||
284 | continue; | ||
285 | |||
286 | if (++r == 2) | ||
287 | return false; | ||
288 | |||
289 | *dest_vcpu = vcpu; | ||
290 | } | ||
291 | |||
292 | return r == 1; | ||
293 | } | ||
294 | EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); | ||
295 | |||
300 | #define IOAPIC_ROUTING_ENTRY(irq) \ | 296 | #define IOAPIC_ROUTING_ENTRY(irq) \ |
301 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ | 297 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ |
302 | .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } | 298 | .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } |
@@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) | |||
328 | return kvm_set_irq_routing(kvm, default_routing, | 324 | return kvm_set_irq_routing(kvm, default_routing, |
329 | ARRAY_SIZE(default_routing), 0); | 325 | ARRAY_SIZE(default_routing), 0); |
330 | } | 326 | } |
327 | |||
328 | static const struct kvm_irq_routing_entry empty_routing[] = {}; | ||
329 | |||
330 | int kvm_setup_empty_irq_routing(struct kvm *kvm) | ||
331 | { | ||
332 | return kvm_set_irq_routing(kvm, empty_routing, 0, 0); | ||
333 | } | ||
334 | |||
335 | void kvm_arch_irq_routing_update(struct kvm *kvm) | ||
336 | { | ||
337 | if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) | ||
338 | return; | ||
339 | kvm_make_scan_ioapic_request(kvm); | ||
340 | } | ||
341 | |||
342 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
343 | { | ||
344 | struct kvm *kvm = vcpu->kvm; | ||
345 | struct kvm_kernel_irq_routing_entry *entry; | ||
346 | struct kvm_irq_routing_table *table; | ||
347 | u32 i, nr_ioapic_pins; | ||
348 | int idx; | ||
349 | |||
350 | /* kvm->irq_routing must be read after clearing | ||
351 | * KVM_SCAN_IOAPIC. */ | ||
352 | smp_mb(); | ||
353 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
354 | table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | ||
355 | nr_ioapic_pins = min_t(u32, table->nr_rt_entries, | ||
356 | kvm->arch.nr_reserved_ioapic_pins); | ||
357 | for (i = 0; i < nr_ioapic_pins; ++i) { | ||
358 | hlist_for_each_entry(entry, &table->map[i], link) { | ||
359 | u32 dest_id, dest_mode; | ||
360 | bool level; | ||
361 | |||
362 | if (entry->type != KVM_IRQ_ROUTING_MSI) | ||
363 | continue; | ||
364 | dest_id = (entry->msi.address_lo >> 12) & 0xff; | ||
365 | dest_mode = (entry->msi.address_lo >> 2) & 0x1; | ||
366 | level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL; | ||
367 | if (level && kvm_apic_match_dest(vcpu, NULL, 0, | ||
368 | dest_id, dest_mode)) { | ||
369 | u32 vector = entry->msi.data & 0xff; | ||
370 | |||
371 | __set_bit(vector, | ||
372 | (unsigned long *) eoi_exit_bitmap); | ||
373 | } | ||
374 | } | ||
375 | } | ||
376 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
377 | } | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8d9013c5e1ee..ecd4ea1d28a8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -209,7 +209,7 @@ out: | |||
209 | if (old) | 209 | if (old) |
210 | kfree_rcu(old, rcu); | 210 | kfree_rcu(old, rcu); |
211 | 211 | ||
212 | kvm_vcpu_request_scan_ioapic(kvm); | 212 | kvm_make_scan_ioapic_request(kvm); |
213 | } | 213 | } |
214 | 214 | ||
215 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) | 215 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) |
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | |||
348 | struct kvm_lapic *apic = vcpu->arch.apic; | 348 | struct kvm_lapic *apic = vcpu->arch.apic; |
349 | 349 | ||
350 | __kvm_apic_update_irr(pir, apic->regs); | 350 | __kvm_apic_update_irr(pir, apic->regs); |
351 | |||
352 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
351 | } | 353 | } |
352 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 354 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
353 | 355 | ||
@@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
390 | 392 | ||
391 | vcpu = apic->vcpu; | 393 | vcpu = apic->vcpu; |
392 | 394 | ||
393 | if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) { | 395 | if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { |
394 | /* try to update RVI */ | 396 | /* try to update RVI */ |
395 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 397 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
396 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 398 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
@@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
551 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 553 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
552 | } | 554 | } |
553 | 555 | ||
554 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) | ||
555 | { | ||
556 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
557 | int i; | ||
558 | |||
559 | for (i = 0; i < 8; i++) | ||
560 | apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); | ||
561 | } | ||
562 | |||
563 | static void apic_update_ppr(struct kvm_lapic *apic) | 556 | static void apic_update_ppr(struct kvm_lapic *apic) |
564 | { | 557 | { |
565 | u32 tpr, isrv, ppr, old_ppr; | 558 | u32 tpr, isrv, ppr, old_ppr; |
@@ -764,6 +757,65 @@ out: | |||
764 | return ret; | 757 | return ret; |
765 | } | 758 | } |
766 | 759 | ||
760 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
761 | struct kvm_vcpu **dest_vcpu) | ||
762 | { | ||
763 | struct kvm_apic_map *map; | ||
764 | bool ret = false; | ||
765 | struct kvm_lapic *dst = NULL; | ||
766 | |||
767 | if (irq->shorthand) | ||
768 | return false; | ||
769 | |||
770 | rcu_read_lock(); | ||
771 | map = rcu_dereference(kvm->arch.apic_map); | ||
772 | |||
773 | if (!map) | ||
774 | goto out; | ||
775 | |||
776 | if (irq->dest_mode == APIC_DEST_PHYSICAL) { | ||
777 | if (irq->dest_id == 0xFF) | ||
778 | goto out; | ||
779 | |||
780 | if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) | ||
781 | goto out; | ||
782 | |||
783 | dst = map->phys_map[irq->dest_id]; | ||
784 | if (dst && kvm_apic_present(dst->vcpu)) | ||
785 | *dest_vcpu = dst->vcpu; | ||
786 | else | ||
787 | goto out; | ||
788 | } else { | ||
789 | u16 cid; | ||
790 | unsigned long bitmap = 1; | ||
791 | int i, r = 0; | ||
792 | |||
793 | if (!kvm_apic_logical_map_valid(map)) | ||
794 | goto out; | ||
795 | |||
796 | apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); | ||
797 | |||
798 | if (cid >= ARRAY_SIZE(map->logical_map)) | ||
799 | goto out; | ||
800 | |||
801 | for_each_set_bit(i, &bitmap, 16) { | ||
802 | dst = map->logical_map[cid][i]; | ||
803 | if (++r == 2) | ||
804 | goto out; | ||
805 | } | ||
806 | |||
807 | if (dst && kvm_apic_present(dst->vcpu)) | ||
808 | *dest_vcpu = dst->vcpu; | ||
809 | else | ||
810 | goto out; | ||
811 | } | ||
812 | |||
813 | ret = true; | ||
814 | out: | ||
815 | rcu_read_unlock(); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
767 | /* | 819 | /* |
768 | * Add a pending IRQ into lapic. | 820 | * Add a pending IRQ into lapic. |
769 | * Return 1 if successfully added and 0 if discarded. | 821 | * Return 1 if successfully added and 0 if discarded. |
@@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
781 | case APIC_DM_LOWEST: | 833 | case APIC_DM_LOWEST: |
782 | vcpu->arch.apic_arb_prio++; | 834 | vcpu->arch.apic_arb_prio++; |
783 | case APIC_DM_FIXED: | 835 | case APIC_DM_FIXED: |
836 | if (unlikely(trig_mode && !level)) | ||
837 | break; | ||
838 | |||
784 | /* FIXME add logic for vcpu on reset */ | 839 | /* FIXME add logic for vcpu on reset */ |
785 | if (unlikely(!apic_enabled(apic))) | 840 | if (unlikely(!apic_enabled(apic))) |
786 | break; | 841 | break; |
@@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
790 | if (dest_map) | 845 | if (dest_map) |
791 | __set_bit(vcpu->vcpu_id, dest_map); | 846 | __set_bit(vcpu->vcpu_id, dest_map); |
792 | 847 | ||
848 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { | ||
849 | if (trig_mode) | ||
850 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
851 | else | ||
852 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
853 | } | ||
854 | |||
793 | if (kvm_x86_ops->deliver_posted_interrupt) | 855 | if (kvm_x86_ops->deliver_posted_interrupt) |
794 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | 856 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
795 | else { | 857 | else { |
@@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
868 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 930 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
869 | } | 931 | } |
870 | 932 | ||
933 | static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) | ||
934 | { | ||
935 | return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); | ||
936 | } | ||
937 | |||
871 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | 938 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) |
872 | { | 939 | { |
873 | if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | 940 | int trigger_mode; |
874 | int trigger_mode; | 941 | |
875 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | 942 | /* Eoi the ioapic only if the ioapic doesn't own the vector. */ |
876 | trigger_mode = IOAPIC_LEVEL_TRIG; | 943 | if (!kvm_ioapic_handles_vector(apic, vector)) |
877 | else | 944 | return; |
878 | trigger_mode = IOAPIC_EDGE_TRIG; | 945 | |
879 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); | 946 | /* Request a KVM exit to inform the userspace IOAPIC. */ |
947 | if (irqchip_split(apic->vcpu->kvm)) { | ||
948 | apic->vcpu->arch.pending_ioapic_eoi = vector; | ||
949 | kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); | ||
950 | return; | ||
880 | } | 951 | } |
952 | |||
953 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
954 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
955 | else | ||
956 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
957 | |||
958 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); | ||
881 | } | 959 | } |
882 | 960 | ||
883 | static int apic_set_eoi(struct kvm_lapic *apic) | 961 | static int apic_set_eoi(struct kvm_lapic *apic) |
@@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1615 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 1693 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
1616 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1694 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1617 | } | 1695 | } |
1618 | apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); | 1696 | apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); |
1619 | apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; | 1697 | apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; |
1620 | apic->highest_isr_cache = -1; | 1698 | apic->highest_isr_cache = -1; |
1621 | update_divide_count(apic); | 1699 | update_divide_count(apic); |
@@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1838 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, | 1916 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, |
1839 | apic_find_highest_isr(apic)); | 1917 | apic_find_highest_isr(apic)); |
1840 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1918 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1841 | kvm_rtc_eoi_tracking_restore_one(vcpu); | 1919 | if (ioapic_in_kernel(vcpu->kvm)) |
1920 | kvm_rtc_eoi_tracking_restore_one(vcpu); | ||
1921 | |||
1922 | vcpu->arch.apic_arb_prio = 0; | ||
1842 | } | 1923 | } |
1843 | 1924 | ||
1844 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1925 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | |||
1922 | /* Cache not set: could be safe but we don't bother. */ | 2003 | /* Cache not set: could be safe but we don't bother. */ |
1923 | apic->highest_isr_cache == -1 || | 2004 | apic->highest_isr_cache == -1 || |
1924 | /* Need EOI to update ioapic. */ | 2005 | /* Need EOI to update ioapic. */ |
1925 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | 2006 | kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { |
1926 | /* | 2007 | /* |
1927 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | 2008 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest |
1928 | * so we need not do anything here. | 2009 | * so we need not do anything here. |
@@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1978 | struct kvm_lapic *apic = vcpu->arch.apic; | 2059 | struct kvm_lapic *apic = vcpu->arch.apic; |
1979 | u32 reg = (msr - APIC_BASE_MSR) << 4; | 2060 | u32 reg = (msr - APIC_BASE_MSR) << 4; |
1980 | 2061 | ||
1981 | if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) | 2062 | if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) |
1982 | return 1; | 2063 | return 1; |
1983 | 2064 | ||
1984 | if (reg == APIC_ICR2) | 2065 | if (reg == APIC_ICR2) |
@@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
1995 | struct kvm_lapic *apic = vcpu->arch.apic; | 2076 | struct kvm_lapic *apic = vcpu->arch.apic; |
1996 | u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; | 2077 | u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; |
1997 | 2078 | ||
1998 | if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) | 2079 | if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) |
1999 | return 1; | 2080 | return 1; |
2000 | 2081 | ||
2001 | if (reg == APIC_DFR || reg == APIC_ICR2) { | 2082 | if (reg == APIC_DFR || reg == APIC_ICR2) { |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 764037991d26..fde8e35d5850 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | |||
57 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 57 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
58 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 58 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
59 | 59 | ||
60 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | ||
61 | void __kvm_apic_update_irr(u32 *pir, void *regs); | 60 | void __kvm_apic_update_irr(u32 *pir, void *regs); |
62 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 61 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
63 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 62 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
@@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) | |||
144 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | 143 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; |
145 | } | 144 | } |
146 | 145 | ||
147 | static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | 146 | static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) |
148 | { | 147 | { |
149 | return kvm_x86_ops->vm_has_apicv(kvm); | 148 | return kvm_x86_ops->cpu_uses_apicv(vcpu); |
150 | } | 149 | } |
151 | 150 | ||
152 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | 151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
@@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | |||
169 | 168 | ||
170 | void wait_lapic_expire(struct kvm_vcpu *vcpu); | 169 | void wait_lapic_expire(struct kvm_vcpu *vcpu); |
171 | 170 | ||
171 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
172 | struct kvm_vcpu **dest_vcpu); | ||
172 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff606f507913..7d85bcae3332 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
818 | kvm->arch.indirect_shadow_pages--; | 818 | kvm->arch.indirect_shadow_pages--; |
819 | } | 819 | } |
820 | 820 | ||
821 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, | 821 | static int __has_wrprotected_page(gfn_t gfn, int level, |
822 | gfn_t gfn, | 822 | struct kvm_memory_slot *slot) |
823 | int level) | ||
824 | { | 823 | { |
825 | struct kvm_memory_slot *slot; | ||
826 | struct kvm_lpage_info *linfo; | 824 | struct kvm_lpage_info *linfo; |
827 | 825 | ||
828 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
829 | if (slot) { | 826 | if (slot) { |
830 | linfo = lpage_info_slot(gfn, slot, level); | 827 | linfo = lpage_info_slot(gfn, slot, level); |
831 | return linfo->write_count; | 828 | return linfo->write_count; |
@@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu, | |||
834 | return 1; | 831 | return 1; |
835 | } | 832 | } |
836 | 833 | ||
834 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) | ||
835 | { | ||
836 | struct kvm_memory_slot *slot; | ||
837 | |||
838 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
839 | return __has_wrprotected_page(gfn, level, slot); | ||
840 | } | ||
841 | |||
837 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 842 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
838 | { | 843 | { |
839 | unsigned long page_size; | 844 | unsigned long page_size; |
@@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
851 | return ret; | 856 | return ret; |
852 | } | 857 | } |
853 | 858 | ||
859 | static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, | ||
860 | bool no_dirty_log) | ||
861 | { | ||
862 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | ||
863 | return false; | ||
864 | if (no_dirty_log && slot->dirty_bitmap) | ||
865 | return false; | ||
866 | |||
867 | return true; | ||
868 | } | ||
869 | |||
854 | static struct kvm_memory_slot * | 870 | static struct kvm_memory_slot * |
855 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | 871 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, |
856 | bool no_dirty_log) | 872 | bool no_dirty_log) |
@@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
858 | struct kvm_memory_slot *slot; | 874 | struct kvm_memory_slot *slot; |
859 | 875 | ||
860 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | 876 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
861 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | 877 | if (!memslot_valid_for_gpte(slot, no_dirty_log)) |
862 | (no_dirty_log && slot->dirty_bitmap)) | ||
863 | slot = NULL; | 878 | slot = NULL; |
864 | 879 | ||
865 | return slot; | 880 | return slot; |
866 | } | 881 | } |
867 | 882 | ||
868 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 883 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, |
869 | { | 884 | bool *force_pt_level) |
870 | return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); | ||
871 | } | ||
872 | |||
873 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
874 | { | 885 | { |
875 | int host_level, level, max_level; | 886 | int host_level, level, max_level; |
887 | struct kvm_memory_slot *slot; | ||
888 | |||
889 | if (unlikely(*force_pt_level)) | ||
890 | return PT_PAGE_TABLE_LEVEL; | ||
891 | |||
892 | slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); | ||
893 | *force_pt_level = !memslot_valid_for_gpte(slot, true); | ||
894 | if (unlikely(*force_pt_level)) | ||
895 | return PT_PAGE_TABLE_LEVEL; | ||
876 | 896 | ||
877 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 897 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
878 | 898 | ||
@@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
882 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); | 902 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
883 | 903 | ||
884 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 904 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
885 | if (has_wrprotected_page(vcpu, large_gfn, level)) | 905 | if (__has_wrprotected_page(large_gfn, level, slot)) |
886 | break; | 906 | break; |
887 | 907 | ||
888 | return level - 1; | 908 | return level - 1; |
@@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2962 | { | 2982 | { |
2963 | int r; | 2983 | int r; |
2964 | int level; | 2984 | int level; |
2965 | int force_pt_level; | 2985 | bool force_pt_level = false; |
2966 | pfn_t pfn; | 2986 | pfn_t pfn; |
2967 | unsigned long mmu_seq; | 2987 | unsigned long mmu_seq; |
2968 | bool map_writable, write = error_code & PFERR_WRITE_MASK; | 2988 | bool map_writable, write = error_code & PFERR_WRITE_MASK; |
2969 | 2989 | ||
2970 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); | 2990 | level = mapping_level(vcpu, gfn, &force_pt_level); |
2971 | if (likely(!force_pt_level)) { | 2991 | if (likely(!force_pt_level)) { |
2972 | level = mapping_level(vcpu, gfn); | ||
2973 | /* | 2992 | /* |
2974 | * This path builds a PAE pagetable - so we can map | 2993 | * This path builds a PAE pagetable - so we can map |
2975 | * 2mb pages at maximum. Therefore check if the level | 2994 | * 2mb pages at maximum. Therefore check if the level |
@@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2979 | level = PT_DIRECTORY_LEVEL; | 2998 | level = PT_DIRECTORY_LEVEL; |
2980 | 2999 | ||
2981 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 3000 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2982 | } else | 3001 | } |
2983 | level = PT_PAGE_TABLE_LEVEL; | ||
2984 | 3002 | ||
2985 | if (fast_page_fault(vcpu, v, level, error_code)) | 3003 | if (fast_page_fault(vcpu, v, level, error_code)) |
2986 | return 0; | 3004 | return 0; |
@@ -3427,7 +3445,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | |||
3427 | 3445 | ||
3428 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | 3446 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) |
3429 | { | 3447 | { |
3430 | if (unlikely(!irqchip_in_kernel(vcpu->kvm) || | 3448 | if (unlikely(!lapic_in_kernel(vcpu) || |
3431 | kvm_event_needs_reinjection(vcpu))) | 3449 | kvm_event_needs_reinjection(vcpu))) |
3432 | return false; | 3450 | return false; |
3433 | 3451 | ||
@@ -3476,7 +3494,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3476 | pfn_t pfn; | 3494 | pfn_t pfn; |
3477 | int r; | 3495 | int r; |
3478 | int level; | 3496 | int level; |
3479 | int force_pt_level; | 3497 | bool force_pt_level; |
3480 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3498 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3481 | unsigned long mmu_seq; | 3499 | unsigned long mmu_seq; |
3482 | int write = error_code & PFERR_WRITE_MASK; | 3500 | int write = error_code & PFERR_WRITE_MASK; |
@@ -3495,20 +3513,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3495 | if (r) | 3513 | if (r) |
3496 | return r; | 3514 | return r; |
3497 | 3515 | ||
3498 | if (mapping_level_dirty_bitmap(vcpu, gfn) || | 3516 | force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, |
3499 | !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL)) | 3517 | PT_DIRECTORY_LEVEL); |
3500 | force_pt_level = 1; | 3518 | level = mapping_level(vcpu, gfn, &force_pt_level); |
3501 | else | ||
3502 | force_pt_level = 0; | ||
3503 | |||
3504 | if (likely(!force_pt_level)) { | 3519 | if (likely(!force_pt_level)) { |
3505 | level = mapping_level(vcpu, gfn); | ||
3506 | if (level > PT_DIRECTORY_LEVEL && | 3520 | if (level > PT_DIRECTORY_LEVEL && |
3507 | !check_hugepage_cache_consistency(vcpu, gfn, level)) | 3521 | !check_hugepage_cache_consistency(vcpu, gfn, level)) |
3508 | level = PT_DIRECTORY_LEVEL; | 3522 | level = PT_DIRECTORY_LEVEL; |
3509 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 3523 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
3510 | } else | 3524 | } |
3511 | level = PT_PAGE_TABLE_LEVEL; | ||
3512 | 3525 | ||
3513 | if (fast_page_fault(vcpu, gpa, level, error_code)) | 3526 | if (fast_page_fault(vcpu, gpa, level, error_code)) |
3514 | return 0; | 3527 | return 0; |
@@ -3706,7 +3719,7 @@ static void | |||
3706 | __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | 3719 | __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, |
3707 | int maxphyaddr, bool execonly) | 3720 | int maxphyaddr, bool execonly) |
3708 | { | 3721 | { |
3709 | int pte; | 3722 | u64 bad_mt_xwr; |
3710 | 3723 | ||
3711 | rsvd_check->rsvd_bits_mask[0][3] = | 3724 | rsvd_check->rsvd_bits_mask[0][3] = |
3712 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | 3725 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); |
@@ -3724,14 +3737,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
3724 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); | 3737 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); |
3725 | rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; | 3738 | rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; |
3726 | 3739 | ||
3727 | for (pte = 0; pte < 64; pte++) { | 3740 | bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ |
3728 | int rwx_bits = pte & 7; | 3741 | bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ |
3729 | int mt = pte >> 3; | 3742 | bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ |
3730 | if (mt == 0x2 || mt == 0x3 || mt == 0x7 || | 3743 | bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ |
3731 | rwx_bits == 0x2 || rwx_bits == 0x6 || | 3744 | bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ |
3732 | (rwx_bits == 0x4 && !execonly)) | 3745 | if (!execonly) { |
3733 | rsvd_check->bad_mt_xwr |= (1ull << pte); | 3746 | /* bits 0..2 must not be 100 unless VMX capabilities allow it */ |
3747 | bad_mt_xwr |= REPEAT_BYTE(1ull << 4); | ||
3734 | } | 3748 | } |
3749 | rsvd_check->bad_mt_xwr = bad_mt_xwr; | ||
3735 | } | 3750 | } |
3736 | 3751 | ||
3737 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, | 3752 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 736e6ab8784d..b41faa91a6f9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -698,7 +698,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
698 | int r; | 698 | int r; |
699 | pfn_t pfn; | 699 | pfn_t pfn; |
700 | int level = PT_PAGE_TABLE_LEVEL; | 700 | int level = PT_PAGE_TABLE_LEVEL; |
701 | int force_pt_level; | 701 | bool force_pt_level = false; |
702 | unsigned long mmu_seq; | 702 | unsigned long mmu_seq; |
703 | bool map_writable, is_self_change_mapping; | 703 | bool map_writable, is_self_change_mapping; |
704 | 704 | ||
@@ -743,15 +743,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
743 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | 743 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, |
744 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); | 744 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); |
745 | 745 | ||
746 | if (walker.level >= PT_DIRECTORY_LEVEL) | 746 | if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) { |
747 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) | 747 | level = mapping_level(vcpu, walker.gfn, &force_pt_level); |
748 | || is_self_change_mapping; | 748 | if (likely(!force_pt_level)) { |
749 | else | 749 | level = min(walker.level, level); |
750 | force_pt_level = 1; | 750 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); |
751 | if (!force_pt_level) { | 751 | } |
752 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); | 752 | } else |
753 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); | 753 | force_pt_level = true; |
754 | } | ||
755 | 754 | ||
756 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 755 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
757 | smp_rmb(); | 756 | smp_rmb(); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2f9ed1ff0632..f2c8e4917688 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -159,6 +159,9 @@ struct vcpu_svm { | |||
159 | u32 apf_reason; | 159 | u32 apf_reason; |
160 | 160 | ||
161 | u64 tsc_ratio; | 161 | u64 tsc_ratio; |
162 | |||
163 | /* cached guest cpuid flags for faster access */ | ||
164 | bool nrips_enabled : 1; | ||
162 | }; | 165 | }; |
163 | 166 | ||
164 | static DEFINE_PER_CPU(u64, current_tsc_ratio); | 167 | static DEFINE_PER_CPU(u64, current_tsc_ratio); |
@@ -1086,7 +1089,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | |||
1086 | return target_tsc - tsc; | 1089 | return target_tsc - tsc; |
1087 | } | 1090 | } |
1088 | 1091 | ||
1089 | static void init_vmcb(struct vcpu_svm *svm, bool init_event) | 1092 | static void init_vmcb(struct vcpu_svm *svm) |
1090 | { | 1093 | { |
1091 | struct vmcb_control_area *control = &svm->vmcb->control; | 1094 | struct vmcb_control_area *control = &svm->vmcb->control; |
1092 | struct vmcb_save_area *save = &svm->vmcb->save; | 1095 | struct vmcb_save_area *save = &svm->vmcb->save; |
@@ -1157,8 +1160,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) | |||
1157 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); | 1160 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); |
1158 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); | 1161 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); |
1159 | 1162 | ||
1160 | if (!init_event) | 1163 | svm_set_efer(&svm->vcpu, 0); |
1161 | svm_set_efer(&svm->vcpu, 0); | ||
1162 | save->dr6 = 0xffff0ff0; | 1164 | save->dr6 = 0xffff0ff0; |
1163 | kvm_set_rflags(&svm->vcpu, 2); | 1165 | kvm_set_rflags(&svm->vcpu, 2); |
1164 | save->rip = 0x0000fff0; | 1166 | save->rip = 0x0000fff0; |
@@ -1212,7 +1214,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1212 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) | 1214 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) |
1213 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1215 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1214 | } | 1216 | } |
1215 | init_vmcb(svm, init_event); | 1217 | init_vmcb(svm); |
1216 | 1218 | ||
1217 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1219 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1218 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1220 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
@@ -1268,7 +1270,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1268 | clear_page(svm->vmcb); | 1270 | clear_page(svm->vmcb); |
1269 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 1271 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; |
1270 | svm->asid_generation = 0; | 1272 | svm->asid_generation = 0; |
1271 | init_vmcb(svm, false); | 1273 | init_vmcb(svm); |
1272 | 1274 | ||
1273 | svm_init_osvw(&svm->vcpu); | 1275 | svm_init_osvw(&svm->vcpu); |
1274 | 1276 | ||
@@ -1890,7 +1892,7 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1890 | * so reinitialize it. | 1892 | * so reinitialize it. |
1891 | */ | 1893 | */ |
1892 | clear_page(svm->vmcb); | 1894 | clear_page(svm->vmcb); |
1893 | init_vmcb(svm, false); | 1895 | init_vmcb(svm); |
1894 | 1896 | ||
1895 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 1897 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; |
1896 | return 0; | 1898 | return 0; |
@@ -2365,7 +2367,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2365 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 2367 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
2366 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 2368 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
2367 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 2369 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
2368 | nested_vmcb->control.next_rip = vmcb->control.next_rip; | 2370 | |
2371 | if (svm->nrips_enabled) | ||
2372 | nested_vmcb->control.next_rip = vmcb->control.next_rip; | ||
2369 | 2373 | ||
2370 | /* | 2374 | /* |
2371 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | 2375 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have |
@@ -3060,7 +3064,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
3060 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 3064 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
3061 | /* instruction emulation calls kvm_set_cr8() */ | 3065 | /* instruction emulation calls kvm_set_cr8() */ |
3062 | r = cr_interception(svm); | 3066 | r = cr_interception(svm); |
3063 | if (irqchip_in_kernel(svm->vcpu.kvm)) | 3067 | if (lapic_in_kernel(&svm->vcpu)) |
3064 | return r; | 3068 | return r; |
3065 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | 3069 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) |
3066 | return r; | 3070 | return r; |
@@ -3294,24 +3298,11 @@ static int msr_interception(struct vcpu_svm *svm) | |||
3294 | 3298 | ||
3295 | static int interrupt_window_interception(struct vcpu_svm *svm) | 3299 | static int interrupt_window_interception(struct vcpu_svm *svm) |
3296 | { | 3300 | { |
3297 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
3298 | |||
3299 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3301 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
3300 | svm_clear_vintr(svm); | 3302 | svm_clear_vintr(svm); |
3301 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 3303 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
3302 | mark_dirty(svm->vmcb, VMCB_INTR); | 3304 | mark_dirty(svm->vmcb, VMCB_INTR); |
3303 | ++svm->vcpu.stat.irq_window_exits; | 3305 | ++svm->vcpu.stat.irq_window_exits; |
3304 | /* | ||
3305 | * If the user space waits to inject interrupts, exit as soon as | ||
3306 | * possible | ||
3307 | */ | ||
3308 | if (!irqchip_in_kernel(svm->vcpu.kvm) && | ||
3309 | kvm_run->request_interrupt_window && | ||
3310 | !kvm_cpu_has_interrupt(&svm->vcpu)) { | ||
3311 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | ||
3312 | return 0; | ||
3313 | } | ||
3314 | |||
3315 | return 1; | 3306 | return 1; |
3316 | } | 3307 | } |
3317 | 3308 | ||
@@ -3659,12 +3650,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
3659 | return; | 3650 | return; |
3660 | } | 3651 | } |
3661 | 3652 | ||
3662 | static int svm_vm_has_apicv(struct kvm *kvm) | 3653 | static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) |
3663 | { | 3654 | { |
3664 | return 0; | 3655 | return 0; |
3665 | } | 3656 | } |
3666 | 3657 | ||
3667 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 3658 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) |
3668 | { | 3659 | { |
3669 | return; | 3660 | return; |
3670 | } | 3661 | } |
@@ -4098,6 +4089,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
4098 | 4089 | ||
4099 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | 4090 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) |
4100 | { | 4091 | { |
4092 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4093 | |||
4094 | /* Update nrips enabled cache */ | ||
4095 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); | ||
4101 | } | 4096 | } |
4102 | 4097 | ||
4103 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 4098 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -4425,7 +4420,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4425 | .enable_irq_window = enable_irq_window, | 4420 | .enable_irq_window = enable_irq_window, |
4426 | .update_cr8_intercept = update_cr8_intercept, | 4421 | .update_cr8_intercept = update_cr8_intercept, |
4427 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, | 4422 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, |
4428 | .vm_has_apicv = svm_vm_has_apicv, | 4423 | .cpu_uses_apicv = svm_cpu_uses_apicv, |
4429 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 4424 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
4430 | .sync_pir_to_irr = svm_sync_pir_to_irr, | 4425 | .sync_pir_to_irr = svm_sync_pir_to_irr, |
4431 | 4426 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4eae7c35ddf5..120302511802 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -129,6 +129,24 @@ TRACE_EVENT(kvm_pio, | |||
129 | ); | 129 | ); |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * Tracepoint for fast mmio. | ||
133 | */ | ||
134 | TRACE_EVENT(kvm_fast_mmio, | ||
135 | TP_PROTO(u64 gpa), | ||
136 | TP_ARGS(gpa), | ||
137 | |||
138 | TP_STRUCT__entry( | ||
139 | __field(u64, gpa) | ||
140 | ), | ||
141 | |||
142 | TP_fast_assign( | ||
143 | __entry->gpa = gpa; | ||
144 | ), | ||
145 | |||
146 | TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) | ||
147 | ); | ||
148 | |||
149 | /* | ||
132 | * Tracepoint for cpuid. | 150 | * Tracepoint for cpuid. |
133 | */ | 151 | */ |
134 | TRACE_EVENT(kvm_cpuid, | 152 | TRACE_EVENT(kvm_cpuid, |
@@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm, | |||
974 | __entry->smbase) | 992 | __entry->smbase) |
975 | ); | 993 | ); |
976 | 994 | ||
995 | /* | ||
996 | * Tracepoint for VT-d posted-interrupts. | ||
997 | */ | ||
998 | TRACE_EVENT(kvm_pi_irte_update, | ||
999 | TP_PROTO(unsigned int vcpu_id, unsigned int gsi, | ||
1000 | unsigned int gvec, u64 pi_desc_addr, bool set), | ||
1001 | TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), | ||
1002 | |||
1003 | TP_STRUCT__entry( | ||
1004 | __field( unsigned int, vcpu_id ) | ||
1005 | __field( unsigned int, gsi ) | ||
1006 | __field( unsigned int, gvec ) | ||
1007 | __field( u64, pi_desc_addr ) | ||
1008 | __field( bool, set ) | ||
1009 | ), | ||
1010 | |||
1011 | TP_fast_assign( | ||
1012 | __entry->vcpu_id = vcpu_id; | ||
1013 | __entry->gsi = gsi; | ||
1014 | __entry->gvec = gvec; | ||
1015 | __entry->pi_desc_addr = pi_desc_addr; | ||
1016 | __entry->set = set; | ||
1017 | ), | ||
1018 | |||
1019 | TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " | ||
1020 | "gvec: 0x%x, pi_desc_addr: 0x%llx", | ||
1021 | __entry->set ? "enabled and being updated" : "disabled", | ||
1022 | __entry->vcpu_id, | ||
1023 | __entry->gsi, | ||
1024 | __entry->gvec, | ||
1025 | __entry->pi_desc_addr) | ||
1026 | ); | ||
1027 | |||
977 | #endif /* _TRACE_KVM_H */ | 1028 | #endif /* _TRACE_KVM_H */ |
978 | 1029 | ||
979 | #undef TRACE_INCLUDE_PATH | 1030 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6a8bc64566ab..5eb56ed77c1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
36 | #include "x86.h" | 36 | #include "x86.h" |
37 | 37 | ||
38 | #include <asm/cpu.h> | ||
38 | #include <asm/io.h> | 39 | #include <asm/io.h> |
39 | #include <asm/desc.h> | 40 | #include <asm/desc.h> |
40 | #include <asm/vmx.h> | 41 | #include <asm/vmx.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <asm/debugreg.h> | 46 | #include <asm/debugreg.h> |
46 | #include <asm/kexec.h> | 47 | #include <asm/kexec.h> |
47 | #include <asm/apic.h> | 48 | #include <asm/apic.h> |
49 | #include <asm/irq_remapping.h> | ||
48 | 50 | ||
49 | #include "trace.h" | 51 | #include "trace.h" |
50 | #include "pmu.h" | 52 | #include "pmu.h" |
@@ -424,6 +426,9 @@ struct nested_vmx { | |||
424 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ | 426 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ |
425 | u64 vmcs01_debugctl; | 427 | u64 vmcs01_debugctl; |
426 | 428 | ||
429 | u16 vpid02; | ||
430 | u16 last_vpid; | ||
431 | |||
427 | u32 nested_vmx_procbased_ctls_low; | 432 | u32 nested_vmx_procbased_ctls_low; |
428 | u32 nested_vmx_procbased_ctls_high; | 433 | u32 nested_vmx_procbased_ctls_high; |
429 | u32 nested_vmx_true_procbased_ctls_low; | 434 | u32 nested_vmx_true_procbased_ctls_low; |
@@ -440,14 +445,33 @@ struct nested_vmx { | |||
440 | u32 nested_vmx_misc_low; | 445 | u32 nested_vmx_misc_low; |
441 | u32 nested_vmx_misc_high; | 446 | u32 nested_vmx_misc_high; |
442 | u32 nested_vmx_ept_caps; | 447 | u32 nested_vmx_ept_caps; |
448 | u32 nested_vmx_vpid_caps; | ||
443 | }; | 449 | }; |
444 | 450 | ||
445 | #define POSTED_INTR_ON 0 | 451 | #define POSTED_INTR_ON 0 |
452 | #define POSTED_INTR_SN 1 | ||
453 | |||
446 | /* Posted-Interrupt Descriptor */ | 454 | /* Posted-Interrupt Descriptor */ |
447 | struct pi_desc { | 455 | struct pi_desc { |
448 | u32 pir[8]; /* Posted interrupt requested */ | 456 | u32 pir[8]; /* Posted interrupt requested */ |
449 | u32 control; /* bit 0 of control is outstanding notification bit */ | 457 | union { |
450 | u32 rsvd[7]; | 458 | struct { |
459 | /* bit 256 - Outstanding Notification */ | ||
460 | u16 on : 1, | ||
461 | /* bit 257 - Suppress Notification */ | ||
462 | sn : 1, | ||
463 | /* bit 271:258 - Reserved */ | ||
464 | rsvd_1 : 14; | ||
465 | /* bit 279:272 - Notification Vector */ | ||
466 | u8 nv; | ||
467 | /* bit 287:280 - Reserved */ | ||
468 | u8 rsvd_2; | ||
469 | /* bit 319:288 - Notification Destination */ | ||
470 | u32 ndst; | ||
471 | }; | ||
472 | u64 control; | ||
473 | }; | ||
474 | u32 rsvd[6]; | ||
451 | } __aligned(64); | 475 | } __aligned(64); |
452 | 476 | ||
453 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) | 477 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) |
@@ -467,6 +491,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
467 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | 491 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); |
468 | } | 492 | } |
469 | 493 | ||
494 | static inline void pi_clear_sn(struct pi_desc *pi_desc) | ||
495 | { | ||
496 | return clear_bit(POSTED_INTR_SN, | ||
497 | (unsigned long *)&pi_desc->control); | ||
498 | } | ||
499 | |||
500 | static inline void pi_set_sn(struct pi_desc *pi_desc) | ||
501 | { | ||
502 | return set_bit(POSTED_INTR_SN, | ||
503 | (unsigned long *)&pi_desc->control); | ||
504 | } | ||
505 | |||
506 | static inline int pi_test_on(struct pi_desc *pi_desc) | ||
507 | { | ||
508 | return test_bit(POSTED_INTR_ON, | ||
509 | (unsigned long *)&pi_desc->control); | ||
510 | } | ||
511 | |||
512 | static inline int pi_test_sn(struct pi_desc *pi_desc) | ||
513 | { | ||
514 | return test_bit(POSTED_INTR_SN, | ||
515 | (unsigned long *)&pi_desc->control); | ||
516 | } | ||
517 | |||
470 | struct vcpu_vmx { | 518 | struct vcpu_vmx { |
471 | struct kvm_vcpu vcpu; | 519 | struct kvm_vcpu vcpu; |
472 | unsigned long host_rsp; | 520 | unsigned long host_rsp; |
@@ -532,8 +580,6 @@ struct vcpu_vmx { | |||
532 | s64 vnmi_blocked_time; | 580 | s64 vnmi_blocked_time; |
533 | u32 exit_reason; | 581 | u32 exit_reason; |
534 | 582 | ||
535 | bool rdtscp_enabled; | ||
536 | |||
537 | /* Posted interrupt descriptor */ | 583 | /* Posted interrupt descriptor */ |
538 | struct pi_desc pi_desc; | 584 | struct pi_desc pi_desc; |
539 | 585 | ||
@@ -563,6 +609,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
563 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 609 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
564 | } | 610 | } |
565 | 611 | ||
612 | static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) | ||
613 | { | ||
614 | return &(to_vmx(vcpu)->pi_desc); | ||
615 | } | ||
616 | |||
566 | #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) | 617 | #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) |
567 | #define FIELD(number, name) [number] = VMCS12_OFFSET(name) | 618 | #define FIELD(number, name) [number] = VMCS12_OFFSET(name) |
568 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 619 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
@@ -809,7 +860,7 @@ static void kvm_cpu_vmxon(u64 addr); | |||
809 | static void kvm_cpu_vmxoff(void); | 860 | static void kvm_cpu_vmxoff(void); |
810 | static bool vmx_mpx_supported(void); | 861 | static bool vmx_mpx_supported(void); |
811 | static bool vmx_xsaves_supported(void); | 862 | static bool vmx_xsaves_supported(void); |
812 | static int vmx_vm_has_apicv(struct kvm *kvm); | 863 | static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); |
813 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 864 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
814 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 865 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
815 | struct kvm_segment *var, int seg); | 866 | struct kvm_segment *var, int seg); |
@@ -831,6 +882,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | |||
831 | static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); | 882 | static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); |
832 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | 883 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); |
833 | 884 | ||
885 | /* | ||
886 | * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we | ||
887 | * can find which vCPU should be waken up. | ||
888 | */ | ||
889 | static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); | ||
890 | static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); | ||
891 | |||
834 | static unsigned long *vmx_io_bitmap_a; | 892 | static unsigned long *vmx_io_bitmap_a; |
835 | static unsigned long *vmx_io_bitmap_b; | 893 | static unsigned long *vmx_io_bitmap_b; |
836 | static unsigned long *vmx_msr_bitmap_legacy; | 894 | static unsigned long *vmx_msr_bitmap_legacy; |
@@ -946,9 +1004,9 @@ static inline bool cpu_has_vmx_tpr_shadow(void) | |||
946 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 1004 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
947 | } | 1005 | } |
948 | 1006 | ||
949 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) | 1007 | static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu) |
950 | { | 1008 | { |
951 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 1009 | return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu); |
952 | } | 1010 | } |
953 | 1011 | ||
954 | static inline bool cpu_has_secondary_exec_ctrls(void) | 1012 | static inline bool cpu_has_secondary_exec_ctrls(void) |
@@ -983,7 +1041,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) | |||
983 | 1041 | ||
984 | static inline bool cpu_has_vmx_posted_intr(void) | 1042 | static inline bool cpu_has_vmx_posted_intr(void) |
985 | { | 1043 | { |
986 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | 1044 | return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && |
1045 | vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | ||
987 | } | 1046 | } |
988 | 1047 | ||
989 | static inline bool cpu_has_vmx_apicv(void) | 1048 | static inline bool cpu_has_vmx_apicv(void) |
@@ -1062,9 +1121,9 @@ static inline bool cpu_has_vmx_ple(void) | |||
1062 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 1121 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
1063 | } | 1122 | } |
1064 | 1123 | ||
1065 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) | 1124 | static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) |
1066 | { | 1125 | { |
1067 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 1126 | return flexpriority_enabled && lapic_in_kernel(vcpu); |
1068 | } | 1127 | } |
1069 | 1128 | ||
1070 | static inline bool cpu_has_vmx_vpid(void) | 1129 | static inline bool cpu_has_vmx_vpid(void) |
@@ -1157,6 +1216,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) | |||
1157 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); | 1216 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); |
1158 | } | 1217 | } |
1159 | 1218 | ||
1219 | static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12) | ||
1220 | { | ||
1221 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID); | ||
1222 | } | ||
1223 | |||
1160 | static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) | 1224 | static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) |
1161 | { | 1225 | { |
1162 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); | 1226 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); |
@@ -1337,13 +1401,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) | |||
1337 | __loaded_vmcs_clear, loaded_vmcs, 1); | 1401 | __loaded_vmcs_clear, loaded_vmcs, 1); |
1338 | } | 1402 | } |
1339 | 1403 | ||
1340 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) | 1404 | static inline void vpid_sync_vcpu_single(int vpid) |
1341 | { | 1405 | { |
1342 | if (vmx->vpid == 0) | 1406 | if (vpid == 0) |
1343 | return; | 1407 | return; |
1344 | 1408 | ||
1345 | if (cpu_has_vmx_invvpid_single()) | 1409 | if (cpu_has_vmx_invvpid_single()) |
1346 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 1410 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); |
1347 | } | 1411 | } |
1348 | 1412 | ||
1349 | static inline void vpid_sync_vcpu_global(void) | 1413 | static inline void vpid_sync_vcpu_global(void) |
@@ -1352,10 +1416,10 @@ static inline void vpid_sync_vcpu_global(void) | |||
1352 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); | 1416 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); |
1353 | } | 1417 | } |
1354 | 1418 | ||
1355 | static inline void vpid_sync_context(struct vcpu_vmx *vmx) | 1419 | static inline void vpid_sync_context(int vpid) |
1356 | { | 1420 | { |
1357 | if (cpu_has_vmx_invvpid_single()) | 1421 | if (cpu_has_vmx_invvpid_single()) |
1358 | vpid_sync_vcpu_single(vmx); | 1422 | vpid_sync_vcpu_single(vpid); |
1359 | else | 1423 | else |
1360 | vpid_sync_vcpu_global(); | 1424 | vpid_sync_vcpu_global(); |
1361 | } | 1425 | } |
@@ -1895,6 +1959,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1895 | preempt_enable(); | 1959 | preempt_enable(); |
1896 | } | 1960 | } |
1897 | 1961 | ||
1962 | static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | ||
1963 | { | ||
1964 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
1965 | struct pi_desc old, new; | ||
1966 | unsigned int dest; | ||
1967 | |||
1968 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
1969 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
1970 | return; | ||
1971 | |||
1972 | do { | ||
1973 | old.control = new.control = pi_desc->control; | ||
1974 | |||
1975 | /* | ||
1976 | * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there | ||
1977 | * are two possible cases: | ||
1978 | * 1. After running 'pre_block', context switch | ||
1979 | * happened. For this case, 'sn' was set in | ||
1980 | * vmx_vcpu_put(), so we need to clear it here. | ||
1981 | * 2. After running 'pre_block', we were blocked, | ||
1982 | * and woken up by some other guy. For this case, | ||
1983 | * we don't need to do anything, 'pi_post_block' | ||
1984 | * will do everything for us. However, we cannot | ||
1985 | * check whether it is case #1 or case #2 here | ||
1986 | * (maybe, not needed), so we also clear sn here, | ||
1987 | * I think it is not a big deal. | ||
1988 | */ | ||
1989 | if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { | ||
1990 | if (vcpu->cpu != cpu) { | ||
1991 | dest = cpu_physical_id(cpu); | ||
1992 | |||
1993 | if (x2apic_enabled()) | ||
1994 | new.ndst = dest; | ||
1995 | else | ||
1996 | new.ndst = (dest << 8) & 0xFF00; | ||
1997 | } | ||
1998 | |||
1999 | /* set 'NV' to 'notification vector' */ | ||
2000 | new.nv = POSTED_INTR_VECTOR; | ||
2001 | } | ||
2002 | |||
2003 | /* Allow posting non-urgent interrupts */ | ||
2004 | new.sn = 0; | ||
2005 | } while (cmpxchg(&pi_desc->control, old.control, | ||
2006 | new.control) != old.control); | ||
2007 | } | ||
1898 | /* | 2008 | /* |
1899 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 2009 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
1900 | * vcpu mutex is already taken. | 2010 | * vcpu mutex is already taken. |
@@ -1945,10 +2055,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1945 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 2055 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
1946 | vmx->loaded_vmcs->cpu = cpu; | 2056 | vmx->loaded_vmcs->cpu = cpu; |
1947 | } | 2057 | } |
2058 | |||
2059 | vmx_vcpu_pi_load(vcpu, cpu); | ||
2060 | } | ||
2061 | |||
2062 | static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) | ||
2063 | { | ||
2064 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
2065 | |||
2066 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
2067 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
2068 | return; | ||
2069 | |||
2070 | /* Set SN when the vCPU is preempted */ | ||
2071 | if (vcpu->preempted) | ||
2072 | pi_set_sn(pi_desc); | ||
1948 | } | 2073 | } |
1949 | 2074 | ||
1950 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 2075 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
1951 | { | 2076 | { |
2077 | vmx_vcpu_pi_put(vcpu); | ||
2078 | |||
1952 | __vmx_load_host_state(to_vmx(vcpu)); | 2079 | __vmx_load_host_state(to_vmx(vcpu)); |
1953 | if (!vmm_exclusive) { | 2080 | if (!vmm_exclusive) { |
1954 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); | 2081 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); |
@@ -2207,7 +2334,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
2207 | if (index >= 0) | 2334 | if (index >= 0) |
2208 | move_msr_up(vmx, index, save_nmsrs++); | 2335 | move_msr_up(vmx, index, save_nmsrs++); |
2209 | index = __find_msr_index(vmx, MSR_TSC_AUX); | 2336 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
2210 | if (index >= 0 && vmx->rdtscp_enabled) | 2337 | if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) |
2211 | move_msr_up(vmx, index, save_nmsrs++); | 2338 | move_msr_up(vmx, index, save_nmsrs++); |
2212 | /* | 2339 | /* |
2213 | * MSR_STAR is only needed on long mode guests, and only | 2340 | * MSR_STAR is only needed on long mode guests, and only |
@@ -2377,7 +2504,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2377 | vmx->nested.nested_vmx_pinbased_ctls_high |= | 2504 | vmx->nested.nested_vmx_pinbased_ctls_high |= |
2378 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2505 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
2379 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2506 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2380 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) | 2507 | if (vmx_cpu_uses_apicv(&vmx->vcpu)) |
2381 | vmx->nested.nested_vmx_pinbased_ctls_high |= | 2508 | vmx->nested.nested_vmx_pinbased_ctls_high |= |
2382 | PIN_BASED_POSTED_INTR; | 2509 | PIN_BASED_POSTED_INTR; |
2383 | 2510 | ||
@@ -2471,10 +2598,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2471 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2598 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2472 | SECONDARY_EXEC_RDTSCP | | 2599 | SECONDARY_EXEC_RDTSCP | |
2473 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2600 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
2601 | SECONDARY_EXEC_ENABLE_VPID | | ||
2474 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2602 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2475 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2603 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2476 | SECONDARY_EXEC_WBINVD_EXITING | | 2604 | SECONDARY_EXEC_WBINVD_EXITING | |
2477 | SECONDARY_EXEC_XSAVES; | 2605 | SECONDARY_EXEC_XSAVES | |
2606 | SECONDARY_EXEC_PCOMMIT; | ||
2478 | 2607 | ||
2479 | if (enable_ept) { | 2608 | if (enable_ept) { |
2480 | /* nested EPT: emulate EPT also to L1 */ | 2609 | /* nested EPT: emulate EPT also to L1 */ |
@@ -2493,6 +2622,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2493 | } else | 2622 | } else |
2494 | vmx->nested.nested_vmx_ept_caps = 0; | 2623 | vmx->nested.nested_vmx_ept_caps = 0; |
2495 | 2624 | ||
2625 | if (enable_vpid) | ||
2626 | vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | | ||
2627 | VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | ||
2628 | else | ||
2629 | vmx->nested.nested_vmx_vpid_caps = 0; | ||
2630 | |||
2496 | if (enable_unrestricted_guest) | 2631 | if (enable_unrestricted_guest) |
2497 | vmx->nested.nested_vmx_secondary_ctls_high |= | 2632 | vmx->nested.nested_vmx_secondary_ctls_high |= |
2498 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2633 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
@@ -2608,7 +2743,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2608 | break; | 2743 | break; |
2609 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2744 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2610 | /* Currently, no nested vpid support */ | 2745 | /* Currently, no nested vpid support */ |
2611 | *pdata = vmx->nested.nested_vmx_ept_caps; | 2746 | *pdata = vmx->nested.nested_vmx_ept_caps | |
2747 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); | ||
2612 | break; | 2748 | break; |
2613 | default: | 2749 | default: |
2614 | return 1; | 2750 | return 1; |
@@ -2673,7 +2809,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2673 | msr_info->data = vcpu->arch.ia32_xss; | 2809 | msr_info->data = vcpu->arch.ia32_xss; |
2674 | break; | 2810 | break; |
2675 | case MSR_TSC_AUX: | 2811 | case MSR_TSC_AUX: |
2676 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2812 | if (!guest_cpuid_has_rdtscp(vcpu)) |
2677 | return 1; | 2813 | return 1; |
2678 | /* Otherwise falls through */ | 2814 | /* Otherwise falls through */ |
2679 | default: | 2815 | default: |
@@ -2779,7 +2915,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2779 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | 2915 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); |
2780 | break; | 2916 | break; |
2781 | case MSR_TSC_AUX: | 2917 | case MSR_TSC_AUX: |
2782 | if (!vmx->rdtscp_enabled) | 2918 | if (!guest_cpuid_has_rdtscp(vcpu)) |
2783 | return 1; | 2919 | return 1; |
2784 | /* Check reserved bit, higher 32 bits should be zero */ | 2920 | /* Check reserved bit, higher 32 bits should be zero */ |
2785 | if ((data >> 32) != 0) | 2921 | if ((data >> 32) != 0) |
@@ -2874,6 +3010,8 @@ static int hardware_enable(void) | |||
2874 | return -EBUSY; | 3010 | return -EBUSY; |
2875 | 3011 | ||
2876 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); | 3012 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); |
3013 | INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); | ||
3014 | spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
2877 | 3015 | ||
2878 | /* | 3016 | /* |
2879 | * Now we can enable the vmclear operation in kdump | 3017 | * Now we can enable the vmclear operation in kdump |
@@ -3015,7 +3153,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3015 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 3153 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
3016 | SECONDARY_EXEC_SHADOW_VMCS | | 3154 | SECONDARY_EXEC_SHADOW_VMCS | |
3017 | SECONDARY_EXEC_XSAVES | | 3155 | SECONDARY_EXEC_XSAVES | |
3018 | SECONDARY_EXEC_ENABLE_PML; | 3156 | SECONDARY_EXEC_ENABLE_PML | |
3157 | SECONDARY_EXEC_PCOMMIT; | ||
3019 | if (adjust_vmx_controls(min2, opt2, | 3158 | if (adjust_vmx_controls(min2, opt2, |
3020 | MSR_IA32_VMX_PROCBASED_CTLS2, | 3159 | MSR_IA32_VMX_PROCBASED_CTLS2, |
3021 | &_cpu_based_2nd_exec_control) < 0) | 3160 | &_cpu_based_2nd_exec_control) < 0) |
@@ -3441,9 +3580,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
3441 | 3580 | ||
3442 | #endif | 3581 | #endif |
3443 | 3582 | ||
3444 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 3583 | static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) |
3445 | { | 3584 | { |
3446 | vpid_sync_context(to_vmx(vcpu)); | 3585 | vpid_sync_context(vpid); |
3447 | if (enable_ept) { | 3586 | if (enable_ept) { |
3448 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3587 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
3449 | return; | 3588 | return; |
@@ -3451,6 +3590,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
3451 | } | 3590 | } |
3452 | } | 3591 | } |
3453 | 3592 | ||
3593 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
3594 | { | ||
3595 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); | ||
3596 | } | ||
3597 | |||
3454 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 3598 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
3455 | { | 3599 | { |
3456 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | 3600 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; |
@@ -3644,20 +3788,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3644 | if (!is_paging(vcpu)) { | 3788 | if (!is_paging(vcpu)) { |
3645 | hw_cr4 &= ~X86_CR4_PAE; | 3789 | hw_cr4 &= ~X86_CR4_PAE; |
3646 | hw_cr4 |= X86_CR4_PSE; | 3790 | hw_cr4 |= X86_CR4_PSE; |
3647 | /* | ||
3648 | * SMEP/SMAP is disabled if CPU is in non-paging mode | ||
3649 | * in hardware. However KVM always uses paging mode to | ||
3650 | * emulate guest non-paging mode with TDP. | ||
3651 | * To emulate this behavior, SMEP/SMAP needs to be | ||
3652 | * manually disabled when guest switches to non-paging | ||
3653 | * mode. | ||
3654 | */ | ||
3655 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); | ||
3656 | } else if (!(cr4 & X86_CR4_PAE)) { | 3791 | } else if (!(cr4 & X86_CR4_PAE)) { |
3657 | hw_cr4 &= ~X86_CR4_PAE; | 3792 | hw_cr4 &= ~X86_CR4_PAE; |
3658 | } | 3793 | } |
3659 | } | 3794 | } |
3660 | 3795 | ||
3796 | if (!enable_unrestricted_guest && !is_paging(vcpu)) | ||
3797 | /* | ||
3798 | * SMEP/SMAP is disabled if CPU is in non-paging mode in | ||
3799 | * hardware. However KVM always uses paging mode without | ||
3800 | * unrestricted guest. | ||
3801 | * To emulate this behavior, SMEP/SMAP needs to be manually | ||
3802 | * disabled when guest switches to non-paging mode. | ||
3803 | */ | ||
3804 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); | ||
3805 | |||
3661 | vmcs_writel(CR4_READ_SHADOW, cr4); | 3806 | vmcs_writel(CR4_READ_SHADOW, cr4); |
3662 | vmcs_writel(GUEST_CR4, hw_cr4); | 3807 | vmcs_writel(GUEST_CR4, hw_cr4); |
3663 | return 0; | 3808 | return 0; |
@@ -4146,29 +4291,28 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
4146 | return r; | 4291 | return r; |
4147 | } | 4292 | } |
4148 | 4293 | ||
4149 | static void allocate_vpid(struct vcpu_vmx *vmx) | 4294 | static int allocate_vpid(void) |
4150 | { | 4295 | { |
4151 | int vpid; | 4296 | int vpid; |
4152 | 4297 | ||
4153 | vmx->vpid = 0; | ||
4154 | if (!enable_vpid) | 4298 | if (!enable_vpid) |
4155 | return; | 4299 | return 0; |
4156 | spin_lock(&vmx_vpid_lock); | 4300 | spin_lock(&vmx_vpid_lock); |
4157 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); | 4301 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); |
4158 | if (vpid < VMX_NR_VPIDS) { | 4302 | if (vpid < VMX_NR_VPIDS) |
4159 | vmx->vpid = vpid; | ||
4160 | __set_bit(vpid, vmx_vpid_bitmap); | 4303 | __set_bit(vpid, vmx_vpid_bitmap); |
4161 | } | 4304 | else |
4305 | vpid = 0; | ||
4162 | spin_unlock(&vmx_vpid_lock); | 4306 | spin_unlock(&vmx_vpid_lock); |
4307 | return vpid; | ||
4163 | } | 4308 | } |
4164 | 4309 | ||
4165 | static void free_vpid(struct vcpu_vmx *vmx) | 4310 | static void free_vpid(int vpid) |
4166 | { | 4311 | { |
4167 | if (!enable_vpid) | 4312 | if (!enable_vpid || vpid == 0) |
4168 | return; | 4313 | return; |
4169 | spin_lock(&vmx_vpid_lock); | 4314 | spin_lock(&vmx_vpid_lock); |
4170 | if (vmx->vpid != 0) | 4315 | __clear_bit(vpid, vmx_vpid_bitmap); |
4171 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
4172 | spin_unlock(&vmx_vpid_lock); | 4316 | spin_unlock(&vmx_vpid_lock); |
4173 | } | 4317 | } |
4174 | 4318 | ||
@@ -4323,9 +4467,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | |||
4323 | msr, MSR_TYPE_W); | 4467 | msr, MSR_TYPE_W); |
4324 | } | 4468 | } |
4325 | 4469 | ||
4326 | static int vmx_vm_has_apicv(struct kvm *kvm) | 4470 | static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) |
4327 | { | 4471 | { |
4328 | return enable_apicv && irqchip_in_kernel(kvm); | 4472 | return enable_apicv && lapic_in_kernel(vcpu); |
4329 | } | 4473 | } |
4330 | 4474 | ||
4331 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | 4475 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
@@ -4369,6 +4513,22 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) | |||
4369 | { | 4513 | { |
4370 | #ifdef CONFIG_SMP | 4514 | #ifdef CONFIG_SMP |
4371 | if (vcpu->mode == IN_GUEST_MODE) { | 4515 | if (vcpu->mode == IN_GUEST_MODE) { |
4516 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4517 | |||
4518 | /* | ||
4519 | * Currently, we don't support urgent interrupt, | ||
4520 | * all interrupts are recognized as non-urgent | ||
4521 | * interrupt, so we cannot post interrupts when | ||
4522 | * 'SN' is set. | ||
4523 | * | ||
4524 | * If the vcpu is in guest mode, it means it is | ||
4525 | * running instead of being scheduled out and | ||
4526 | * waiting in the run queue, and that's the only | ||
4527 | * case when 'SN' is set currently, warning if | ||
4528 | * 'SN' is set. | ||
4529 | */ | ||
4530 | WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); | ||
4531 | |||
4372 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | 4532 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), |
4373 | POSTED_INTR_VECTOR); | 4533 | POSTED_INTR_VECTOR); |
4374 | return true; | 4534 | return true; |
@@ -4505,7 +4665,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | |||
4505 | { | 4665 | { |
4506 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | 4666 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; |
4507 | 4667 | ||
4508 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | 4668 | if (!vmx_cpu_uses_apicv(&vmx->vcpu)) |
4509 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | 4669 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; |
4510 | return pin_based_exec_ctrl; | 4670 | return pin_based_exec_ctrl; |
4511 | } | 4671 | } |
@@ -4517,7 +4677,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
4517 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) | 4677 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) |
4518 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; | 4678 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; |
4519 | 4679 | ||
4520 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | 4680 | if (!cpu_need_tpr_shadow(&vmx->vcpu)) { |
4521 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 4681 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
4522 | #ifdef CONFIG_X86_64 | 4682 | #ifdef CONFIG_X86_64 |
4523 | exec_control |= CPU_BASED_CR8_STORE_EXITING | | 4683 | exec_control |= CPU_BASED_CR8_STORE_EXITING | |
@@ -4534,7 +4694,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
4534 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 4694 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
4535 | { | 4695 | { |
4536 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 4696 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
4537 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | 4697 | if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) |
4538 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 4698 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
4539 | if (vmx->vpid == 0) | 4699 | if (vmx->vpid == 0) |
4540 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 4700 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
@@ -4548,7 +4708,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
4548 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 4708 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
4549 | if (!ple_gap) | 4709 | if (!ple_gap) |
4550 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 4710 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
4551 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | 4711 | if (!vmx_cpu_uses_apicv(&vmx->vcpu)) |
4552 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 4712 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
4553 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 4713 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
4554 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 4714 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
@@ -4558,8 +4718,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
4558 | a current VMCS12 | 4718 | a current VMCS12 |
4559 | */ | 4719 | */ |
4560 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 4720 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; |
4561 | /* PML is enabled/disabled in creating/destorying vcpu */ | 4721 | |
4562 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | 4722 | if (!enable_pml) |
4723 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
4724 | |||
4725 | /* Currently, we allow L1 guest to directly run pcommit instruction. */ | ||
4726 | exec_control &= ~SECONDARY_EXEC_PCOMMIT; | ||
4563 | 4727 | ||
4564 | return exec_control; | 4728 | return exec_control; |
4565 | } | 4729 | } |
@@ -4604,12 +4768,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4604 | 4768 | ||
4605 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4769 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
4606 | 4770 | ||
4607 | if (cpu_has_secondary_exec_ctrls()) { | 4771 | if (cpu_has_secondary_exec_ctrls()) |
4608 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 4772 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, |
4609 | vmx_secondary_exec_control(vmx)); | 4773 | vmx_secondary_exec_control(vmx)); |
4610 | } | ||
4611 | 4774 | ||
4612 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { | 4775 | if (vmx_cpu_uses_apicv(&vmx->vcpu)) { |
4613 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | 4776 | vmcs_write64(EOI_EXIT_BITMAP0, 0); |
4614 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | 4777 | vmcs_write64(EOI_EXIT_BITMAP1, 0); |
4615 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | 4778 | vmcs_write64(EOI_EXIT_BITMAP2, 0); |
@@ -4753,7 +4916,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4753 | 4916 | ||
4754 | if (cpu_has_vmx_tpr_shadow() && !init_event) { | 4917 | if (cpu_has_vmx_tpr_shadow() && !init_event) { |
4755 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 4918 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
4756 | if (vm_need_tpr_shadow(vcpu->kvm)) | 4919 | if (cpu_need_tpr_shadow(vcpu)) |
4757 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 4920 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
4758 | __pa(vcpu->arch.apic->regs)); | 4921 | __pa(vcpu->arch.apic->regs)); |
4759 | vmcs_write32(TPR_THRESHOLD, 0); | 4922 | vmcs_write32(TPR_THRESHOLD, 0); |
@@ -4761,7 +4924,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4761 | 4924 | ||
4762 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); | 4925 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); |
4763 | 4926 | ||
4764 | if (vmx_vm_has_apicv(vcpu->kvm)) | 4927 | if (vmx_cpu_uses_apicv(vcpu)) |
4765 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | 4928 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); |
4766 | 4929 | ||
4767 | if (vmx->vpid != 0) | 4930 | if (vmx->vpid != 0) |
@@ -4771,12 +4934,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4771 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ | 4934 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ |
4772 | vmx->vcpu.arch.cr0 = cr0; | 4935 | vmx->vcpu.arch.cr0 = cr0; |
4773 | vmx_set_cr4(vcpu, 0); | 4936 | vmx_set_cr4(vcpu, 0); |
4774 | if (!init_event) | 4937 | vmx_set_efer(vcpu, 0); |
4775 | vmx_set_efer(vcpu, 0); | ||
4776 | vmx_fpu_activate(vcpu); | 4938 | vmx_fpu_activate(vcpu); |
4777 | update_exception_bitmap(vcpu); | 4939 | update_exception_bitmap(vcpu); |
4778 | 4940 | ||
4779 | vpid_sync_context(vmx); | 4941 | vpid_sync_context(vmx->vpid); |
4780 | } | 4942 | } |
4781 | 4943 | ||
4782 | /* | 4944 | /* |
@@ -5296,7 +5458,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5296 | u8 cr8 = (u8)val; | 5458 | u8 cr8 = (u8)val; |
5297 | err = kvm_set_cr8(vcpu, cr8); | 5459 | err = kvm_set_cr8(vcpu, cr8); |
5298 | kvm_complete_insn_gp(vcpu, err); | 5460 | kvm_complete_insn_gp(vcpu, err); |
5299 | if (irqchip_in_kernel(vcpu->kvm)) | 5461 | if (lapic_in_kernel(vcpu)) |
5300 | return 1; | 5462 | return 1; |
5301 | if (cr8_prev <= cr8) | 5463 | if (cr8_prev <= cr8) |
5302 | return 1; | 5464 | return 1; |
@@ -5510,17 +5672,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) | |||
5510 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5672 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5511 | 5673 | ||
5512 | ++vcpu->stat.irq_window_exits; | 5674 | ++vcpu->stat.irq_window_exits; |
5513 | |||
5514 | /* | ||
5515 | * If the user space waits to inject interrupts, exit as soon as | ||
5516 | * possible | ||
5517 | */ | ||
5518 | if (!irqchip_in_kernel(vcpu->kvm) && | ||
5519 | vcpu->run->request_interrupt_window && | ||
5520 | !kvm_cpu_has_interrupt(vcpu)) { | ||
5521 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | ||
5522 | return 0; | ||
5523 | } | ||
5524 | return 1; | 5675 | return 1; |
5525 | } | 5676 | } |
5526 | 5677 | ||
@@ -5753,6 +5904,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
5753 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 5904 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
5754 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | 5905 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { |
5755 | skip_emulated_instruction(vcpu); | 5906 | skip_emulated_instruction(vcpu); |
5907 | trace_kvm_fast_mmio(gpa); | ||
5756 | return 1; | 5908 | return 1; |
5757 | } | 5909 | } |
5758 | 5910 | ||
@@ -5910,6 +6062,25 @@ static void update_ple_window_actual_max(void) | |||
5910 | ple_window_grow, INT_MIN); | 6062 | ple_window_grow, INT_MIN); |
5911 | } | 6063 | } |
5912 | 6064 | ||
6065 | /* | ||
6066 | * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. | ||
6067 | */ | ||
6068 | static void wakeup_handler(void) | ||
6069 | { | ||
6070 | struct kvm_vcpu *vcpu; | ||
6071 | int cpu = smp_processor_id(); | ||
6072 | |||
6073 | spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
6074 | list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), | ||
6075 | blocked_vcpu_list) { | ||
6076 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
6077 | |||
6078 | if (pi_test_on(pi_desc) == 1) | ||
6079 | kvm_vcpu_kick(vcpu); | ||
6080 | } | ||
6081 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
6082 | } | ||
6083 | |||
5913 | static __init int hardware_setup(void) | 6084 | static __init int hardware_setup(void) |
5914 | { | 6085 | { |
5915 | int r = -ENOMEM, i, msr; | 6086 | int r = -ENOMEM, i, msr; |
@@ -6096,6 +6267,8 @@ static __init int hardware_setup(void) | |||
6096 | kvm_x86_ops->enable_log_dirty_pt_masked = NULL; | 6267 | kvm_x86_ops->enable_log_dirty_pt_masked = NULL; |
6097 | } | 6268 | } |
6098 | 6269 | ||
6270 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); | ||
6271 | |||
6099 | return alloc_kvm_area(); | 6272 | return alloc_kvm_area(); |
6100 | 6273 | ||
6101 | out8: | 6274 | out8: |
@@ -6627,7 +6800,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
6627 | 6800 | ||
6628 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | 6801 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) |
6629 | { | 6802 | { |
6630 | u32 exec_control; | ||
6631 | if (vmx->nested.current_vmptr == -1ull) | 6803 | if (vmx->nested.current_vmptr == -1ull) |
6632 | return; | 6804 | return; |
6633 | 6805 | ||
@@ -6640,9 +6812,8 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | |||
6640 | they were modified */ | 6812 | they were modified */ |
6641 | copy_shadow_to_vmcs12(vmx); | 6813 | copy_shadow_to_vmcs12(vmx); |
6642 | vmx->nested.sync_shadow_vmcs = false; | 6814 | vmx->nested.sync_shadow_vmcs = false; |
6643 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 6815 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, |
6644 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 6816 | SECONDARY_EXEC_SHADOW_VMCS); |
6645 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
6646 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 6817 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
6647 | } | 6818 | } |
6648 | vmx->nested.posted_intr_nv = -1; | 6819 | vmx->nested.posted_intr_nv = -1; |
@@ -6662,6 +6833,7 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
6662 | return; | 6833 | return; |
6663 | 6834 | ||
6664 | vmx->nested.vmxon = false; | 6835 | vmx->nested.vmxon = false; |
6836 | free_vpid(vmx->nested.vpid02); | ||
6665 | nested_release_vmcs12(vmx); | 6837 | nested_release_vmcs12(vmx); |
6666 | if (enable_shadow_vmcs) | 6838 | if (enable_shadow_vmcs) |
6667 | free_vmcs(vmx->nested.current_shadow_vmcs); | 6839 | free_vmcs(vmx->nested.current_shadow_vmcs); |
@@ -7038,7 +7210,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7038 | { | 7210 | { |
7039 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7211 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7040 | gpa_t vmptr; | 7212 | gpa_t vmptr; |
7041 | u32 exec_control; | ||
7042 | 7213 | ||
7043 | if (!nested_vmx_check_permission(vcpu)) | 7214 | if (!nested_vmx_check_permission(vcpu)) |
7044 | return 1; | 7215 | return 1; |
@@ -7070,9 +7241,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7070 | vmx->nested.current_vmcs12 = new_vmcs12; | 7241 | vmx->nested.current_vmcs12 = new_vmcs12; |
7071 | vmx->nested.current_vmcs12_page = page; | 7242 | vmx->nested.current_vmcs12_page = page; |
7072 | if (enable_shadow_vmcs) { | 7243 | if (enable_shadow_vmcs) { |
7073 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 7244 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, |
7074 | exec_control |= SECONDARY_EXEC_SHADOW_VMCS; | 7245 | SECONDARY_EXEC_SHADOW_VMCS); |
7075 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7076 | vmcs_write64(VMCS_LINK_POINTER, | 7246 | vmcs_write64(VMCS_LINK_POINTER, |
7077 | __pa(vmx->nested.current_shadow_vmcs)); | 7247 | __pa(vmx->nested.current_shadow_vmcs)); |
7078 | vmx->nested.sync_shadow_vmcs = true; | 7248 | vmx->nested.sync_shadow_vmcs = true; |
@@ -7178,7 +7348,63 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
7178 | 7348 | ||
7179 | static int handle_invvpid(struct kvm_vcpu *vcpu) | 7349 | static int handle_invvpid(struct kvm_vcpu *vcpu) |
7180 | { | 7350 | { |
7181 | kvm_queue_exception(vcpu, UD_VECTOR); | 7351 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7352 | u32 vmx_instruction_info; | ||
7353 | unsigned long type, types; | ||
7354 | gva_t gva; | ||
7355 | struct x86_exception e; | ||
7356 | int vpid; | ||
7357 | |||
7358 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & | ||
7359 | SECONDARY_EXEC_ENABLE_VPID) || | ||
7360 | !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { | ||
7361 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
7362 | return 1; | ||
7363 | } | ||
7364 | |||
7365 | if (!nested_vmx_check_permission(vcpu)) | ||
7366 | return 1; | ||
7367 | |||
7368 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
7369 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); | ||
7370 | |||
7371 | types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; | ||
7372 | |||
7373 | if (!(types & (1UL << type))) { | ||
7374 | nested_vmx_failValid(vcpu, | ||
7375 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
7376 | return 1; | ||
7377 | } | ||
7378 | |||
7379 | /* according to the intel vmx instruction reference, the memory | ||
7380 | * operand is read even if it isn't needed (e.g., for type==global) | ||
7381 | */ | ||
7382 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
7383 | vmx_instruction_info, false, &gva)) | ||
7384 | return 1; | ||
7385 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, | ||
7386 | sizeof(u32), &e)) { | ||
7387 | kvm_inject_page_fault(vcpu, &e); | ||
7388 | return 1; | ||
7389 | } | ||
7390 | |||
7391 | switch (type) { | ||
7392 | case VMX_VPID_EXTENT_ALL_CONTEXT: | ||
7393 | if (get_vmcs12(vcpu)->virtual_processor_id == 0) { | ||
7394 | nested_vmx_failValid(vcpu, | ||
7395 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
7396 | return 1; | ||
7397 | } | ||
7398 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); | ||
7399 | nested_vmx_succeed(vcpu); | ||
7400 | break; | ||
7401 | default: | ||
7402 | /* Trap single context invalidation invvpid calls */ | ||
7403 | BUG_ON(1); | ||
7404 | break; | ||
7405 | } | ||
7406 | |||
7407 | skip_emulated_instruction(vcpu); | ||
7182 | return 1; | 7408 | return 1; |
7183 | } | 7409 | } |
7184 | 7410 | ||
@@ -7207,6 +7433,13 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) | |||
7207 | return 1; | 7433 | return 1; |
7208 | } | 7434 | } |
7209 | 7435 | ||
7436 | static int handle_pcommit(struct kvm_vcpu *vcpu) | ||
7437 | { | ||
7438 | /* we never catch pcommit instruct for L1 guest. */ | ||
7439 | WARN_ON(1); | ||
7440 | return 1; | ||
7441 | } | ||
7442 | |||
7210 | /* | 7443 | /* |
7211 | * The exit handlers return 1 if the exit was handled fully and guest execution | 7444 | * The exit handlers return 1 if the exit was handled fully and guest execution |
7212 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 7445 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -7257,6 +7490,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
7257 | [EXIT_REASON_XSAVES] = handle_xsaves, | 7490 | [EXIT_REASON_XSAVES] = handle_xsaves, |
7258 | [EXIT_REASON_XRSTORS] = handle_xrstors, | 7491 | [EXIT_REASON_XRSTORS] = handle_xrstors, |
7259 | [EXIT_REASON_PML_FULL] = handle_pml_full, | 7492 | [EXIT_REASON_PML_FULL] = handle_pml_full, |
7493 | [EXIT_REASON_PCOMMIT] = handle_pcommit, | ||
7260 | }; | 7494 | }; |
7261 | 7495 | ||
7262 | static const int kvm_vmx_max_exit_handlers = | 7496 | static const int kvm_vmx_max_exit_handlers = |
@@ -7558,6 +7792,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7558 | * the XSS exit bitmap in vmcs12. | 7792 | * the XSS exit bitmap in vmcs12. |
7559 | */ | 7793 | */ |
7560 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); | 7794 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); |
7795 | case EXIT_REASON_PCOMMIT: | ||
7796 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); | ||
7561 | default: | 7797 | default: |
7562 | return true; | 7798 | return true; |
7563 | } | 7799 | } |
@@ -7569,10 +7805,9 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
7569 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 7805 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
7570 | } | 7806 | } |
7571 | 7807 | ||
7572 | static int vmx_enable_pml(struct vcpu_vmx *vmx) | 7808 | static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) |
7573 | { | 7809 | { |
7574 | struct page *pml_pg; | 7810 | struct page *pml_pg; |
7575 | u32 exec_control; | ||
7576 | 7811 | ||
7577 | pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); | 7812 | pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); |
7578 | if (!pml_pg) | 7813 | if (!pml_pg) |
@@ -7583,24 +7818,15 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx) | |||
7583 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); | 7818 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); |
7584 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | 7819 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); |
7585 | 7820 | ||
7586 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7587 | exec_control |= SECONDARY_EXEC_ENABLE_PML; | ||
7588 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7589 | |||
7590 | return 0; | 7821 | return 0; |
7591 | } | 7822 | } |
7592 | 7823 | ||
7593 | static void vmx_disable_pml(struct vcpu_vmx *vmx) | 7824 | static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) |
7594 | { | 7825 | { |
7595 | u32 exec_control; | 7826 | if (vmx->pml_pg) { |
7596 | 7827 | __free_page(vmx->pml_pg); | |
7597 | ASSERT(vmx->pml_pg); | 7828 | vmx->pml_pg = NULL; |
7598 | __free_page(vmx->pml_pg); | 7829 | } |
7599 | vmx->pml_pg = NULL; | ||
7600 | |||
7601 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7602 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
7603 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7604 | } | 7830 | } |
7605 | 7831 | ||
7606 | static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) | 7832 | static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) |
@@ -7924,10 +8150,10 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
7924 | * apicv | 8150 | * apicv |
7925 | */ | 8151 | */ |
7926 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | 8152 | if (!cpu_has_vmx_virtualize_x2apic_mode() || |
7927 | !vmx_vm_has_apicv(vcpu->kvm)) | 8153 | !vmx_cpu_uses_apicv(vcpu)) |
7928 | return; | 8154 | return; |
7929 | 8155 | ||
7930 | if (!vm_need_tpr_shadow(vcpu->kvm)) | 8156 | if (!cpu_need_tpr_shadow(vcpu)) |
7931 | return; | 8157 | return; |
7932 | 8158 | ||
7933 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8159 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); |
@@ -8029,9 +8255,10 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
8029 | } | 8255 | } |
8030 | } | 8256 | } |
8031 | 8257 | ||
8032 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 8258 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) |
8033 | { | 8259 | { |
8034 | if (!vmx_vm_has_apicv(vcpu->kvm)) | 8260 | u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; |
8261 | if (!vmx_cpu_uses_apicv(vcpu)) | ||
8035 | return; | 8262 | return; |
8036 | 8263 | ||
8037 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | 8264 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); |
@@ -8477,8 +8704,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
8477 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8704 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8478 | 8705 | ||
8479 | if (enable_pml) | 8706 | if (enable_pml) |
8480 | vmx_disable_pml(vmx); | 8707 | vmx_destroy_pml_buffer(vmx); |
8481 | free_vpid(vmx); | 8708 | free_vpid(vmx->vpid); |
8482 | leave_guest_mode(vcpu); | 8709 | leave_guest_mode(vcpu); |
8483 | vmx_load_vmcs01(vcpu); | 8710 | vmx_load_vmcs01(vcpu); |
8484 | free_nested(vmx); | 8711 | free_nested(vmx); |
@@ -8497,7 +8724,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8497 | if (!vmx) | 8724 | if (!vmx) |
8498 | return ERR_PTR(-ENOMEM); | 8725 | return ERR_PTR(-ENOMEM); |
8499 | 8726 | ||
8500 | allocate_vpid(vmx); | 8727 | vmx->vpid = allocate_vpid(); |
8501 | 8728 | ||
8502 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 8729 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
8503 | if (err) | 8730 | if (err) |
@@ -8530,7 +8757,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8530 | put_cpu(); | 8757 | put_cpu(); |
8531 | if (err) | 8758 | if (err) |
8532 | goto free_vmcs; | 8759 | goto free_vmcs; |
8533 | if (vm_need_virtualize_apic_accesses(kvm)) { | 8760 | if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { |
8534 | err = alloc_apic_access_page(kvm); | 8761 | err = alloc_apic_access_page(kvm); |
8535 | if (err) | 8762 | if (err) |
8536 | goto free_vmcs; | 8763 | goto free_vmcs; |
@@ -8545,8 +8772,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8545 | goto free_vmcs; | 8772 | goto free_vmcs; |
8546 | } | 8773 | } |
8547 | 8774 | ||
8548 | if (nested) | 8775 | if (nested) { |
8549 | nested_vmx_setup_ctls_msrs(vmx); | 8776 | nested_vmx_setup_ctls_msrs(vmx); |
8777 | vmx->nested.vpid02 = allocate_vpid(); | ||
8778 | } | ||
8550 | 8779 | ||
8551 | vmx->nested.posted_intr_nv = -1; | 8780 | vmx->nested.posted_intr_nv = -1; |
8552 | vmx->nested.current_vmptr = -1ull; | 8781 | vmx->nested.current_vmptr = -1ull; |
@@ -8559,7 +8788,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8559 | * for the guest, etc. | 8788 | * for the guest, etc. |
8560 | */ | 8789 | */ |
8561 | if (enable_pml) { | 8790 | if (enable_pml) { |
8562 | err = vmx_enable_pml(vmx); | 8791 | err = vmx_create_pml_buffer(vmx); |
8563 | if (err) | 8792 | if (err) |
8564 | goto free_vmcs; | 8793 | goto free_vmcs; |
8565 | } | 8794 | } |
@@ -8567,13 +8796,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8567 | return &vmx->vcpu; | 8796 | return &vmx->vcpu; |
8568 | 8797 | ||
8569 | free_vmcs: | 8798 | free_vmcs: |
8799 | free_vpid(vmx->nested.vpid02); | ||
8570 | free_loaded_vmcs(vmx->loaded_vmcs); | 8800 | free_loaded_vmcs(vmx->loaded_vmcs); |
8571 | free_msrs: | 8801 | free_msrs: |
8572 | kfree(vmx->guest_msrs); | 8802 | kfree(vmx->guest_msrs); |
8573 | uninit_vcpu: | 8803 | uninit_vcpu: |
8574 | kvm_vcpu_uninit(&vmx->vcpu); | 8804 | kvm_vcpu_uninit(&vmx->vcpu); |
8575 | free_vcpu: | 8805 | free_vcpu: |
8576 | free_vpid(vmx); | 8806 | free_vpid(vmx->vpid); |
8577 | kmem_cache_free(kvm_vcpu_cache, vmx); | 8807 | kmem_cache_free(kvm_vcpu_cache, vmx); |
8578 | return ERR_PTR(err); | 8808 | return ERR_PTR(err); |
8579 | } | 8809 | } |
@@ -8648,49 +8878,67 @@ static int vmx_get_lpage_level(void) | |||
8648 | return PT_PDPE_LEVEL; | 8878 | return PT_PDPE_LEVEL; |
8649 | } | 8879 | } |
8650 | 8880 | ||
8881 | static void vmcs_set_secondary_exec_control(u32 new_ctl) | ||
8882 | { | ||
8883 | /* | ||
8884 | * These bits in the secondary execution controls field | ||
8885 | * are dynamic, the others are mostly based on the hypervisor | ||
8886 | * architecture and the guest's CPUID. Do not touch the | ||
8887 | * dynamic bits. | ||
8888 | */ | ||
8889 | u32 mask = | ||
8890 | SECONDARY_EXEC_SHADOW_VMCS | | ||
8891 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
8892 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
8893 | |||
8894 | u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
8895 | |||
8896 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8897 | (new_ctl & ~mask) | (cur_ctl & mask)); | ||
8898 | } | ||
8899 | |||
8651 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 8900 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
8652 | { | 8901 | { |
8653 | struct kvm_cpuid_entry2 *best; | 8902 | struct kvm_cpuid_entry2 *best; |
8654 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8903 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8655 | u32 exec_control; | 8904 | u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); |
8656 | 8905 | ||
8657 | vmx->rdtscp_enabled = false; | ||
8658 | if (vmx_rdtscp_supported()) { | 8906 | if (vmx_rdtscp_supported()) { |
8659 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8907 | bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); |
8660 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | 8908 | if (!rdtscp_enabled) |
8661 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 8909 | secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; |
8662 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | 8910 | |
8663 | vmx->rdtscp_enabled = true; | 8911 | if (nested) { |
8664 | else { | 8912 | if (rdtscp_enabled) |
8665 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 8913 | vmx->nested.nested_vmx_secondary_ctls_high |= |
8666 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 8914 | SECONDARY_EXEC_RDTSCP; |
8667 | exec_control); | 8915 | else |
8668 | } | 8916 | vmx->nested.nested_vmx_secondary_ctls_high &= |
8917 | ~SECONDARY_EXEC_RDTSCP; | ||
8669 | } | 8918 | } |
8670 | if (nested && !vmx->rdtscp_enabled) | ||
8671 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
8672 | ~SECONDARY_EXEC_RDTSCP; | ||
8673 | } | 8919 | } |
8674 | 8920 | ||
8675 | /* Exposing INVPCID only when PCID is exposed */ | 8921 | /* Exposing INVPCID only when PCID is exposed */ |
8676 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | 8922 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); |
8677 | if (vmx_invpcid_supported() && | 8923 | if (vmx_invpcid_supported() && |
8678 | best && (best->ebx & bit(X86_FEATURE_INVPCID)) && | 8924 | (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || |
8679 | guest_cpuid_has_pcid(vcpu)) { | 8925 | !guest_cpuid_has_pcid(vcpu))) { |
8680 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8926 | secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; |
8681 | exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; | 8927 | |
8682 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8683 | exec_control); | ||
8684 | } else { | ||
8685 | if (cpu_has_secondary_exec_ctrls()) { | ||
8686 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
8687 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
8688 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8689 | exec_control); | ||
8690 | } | ||
8691 | if (best) | 8928 | if (best) |
8692 | best->ebx &= ~bit(X86_FEATURE_INVPCID); | 8929 | best->ebx &= ~bit(X86_FEATURE_INVPCID); |
8693 | } | 8930 | } |
8931 | |||
8932 | vmcs_set_secondary_exec_control(secondary_exec_ctl); | ||
8933 | |||
8934 | if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { | ||
8935 | if (guest_cpuid_has_pcommit(vcpu)) | ||
8936 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
8937 | SECONDARY_EXEC_PCOMMIT; | ||
8938 | else | ||
8939 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
8940 | ~SECONDARY_EXEC_PCOMMIT; | ||
8941 | } | ||
8694 | } | 8942 | } |
8695 | 8943 | ||
8696 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 8944 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -9298,13 +9546,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9298 | 9546 | ||
9299 | if (cpu_has_secondary_exec_ctrls()) { | 9547 | if (cpu_has_secondary_exec_ctrls()) { |
9300 | exec_control = vmx_secondary_exec_control(vmx); | 9548 | exec_control = vmx_secondary_exec_control(vmx); |
9301 | if (!vmx->rdtscp_enabled) | 9549 | |
9302 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
9303 | /* Take the following fields only from vmcs12 */ | 9550 | /* Take the following fields only from vmcs12 */ |
9304 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 9551 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
9305 | SECONDARY_EXEC_RDTSCP | | 9552 | SECONDARY_EXEC_RDTSCP | |
9306 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 9553 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
9307 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | 9554 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
9555 | SECONDARY_EXEC_PCOMMIT); | ||
9308 | if (nested_cpu_has(vmcs12, | 9556 | if (nested_cpu_has(vmcs12, |
9309 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 9557 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
9310 | exec_control |= vmcs12->secondary_vm_exec_control; | 9558 | exec_control |= vmcs12->secondary_vm_exec_control; |
@@ -9323,7 +9571,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9323 | vmcs_write64(APIC_ACCESS_ADDR, | 9571 | vmcs_write64(APIC_ACCESS_ADDR, |
9324 | page_to_phys(vmx->nested.apic_access_page)); | 9572 | page_to_phys(vmx->nested.apic_access_page)); |
9325 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | 9573 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && |
9326 | (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) { | 9574 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { |
9327 | exec_control |= | 9575 | exec_control |= |
9328 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 9576 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
9329 | kvm_vcpu_reload_apic_access_page(vcpu); | 9577 | kvm_vcpu_reload_apic_access_page(vcpu); |
@@ -9433,12 +9681,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9433 | 9681 | ||
9434 | if (enable_vpid) { | 9682 | if (enable_vpid) { |
9435 | /* | 9683 | /* |
9436 | * Trivially support vpid by letting L2s share their parent | 9684 | * There is no direct mapping between vpid02 and vpid12, the |
9437 | * L1's vpid. TODO: move to a more elaborate solution, giving | 9685 | * vpid02 is per-vCPU for L0 and reused while the value of |
9438 | * each L2 its own vpid and exposing the vpid feature to L1. | 9686 | * vpid12 is changed w/ one invvpid during nested vmentry. |
9687 | * The vpid12 is allocated by L1 for L2, so it will not | ||
9688 | * influence global bitmap(for vpid01 and vpid02 allocation) | ||
9689 | * even if spawn a lot of nested vCPUs. | ||
9439 | */ | 9690 | */ |
9440 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 9691 | if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { |
9441 | vmx_flush_tlb(vcpu); | 9692 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); |
9693 | if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { | ||
9694 | vmx->nested.last_vpid = vmcs12->virtual_processor_id; | ||
9695 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); | ||
9696 | } | ||
9697 | } else { | ||
9698 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||
9699 | vmx_flush_tlb(vcpu); | ||
9700 | } | ||
9701 | |||
9442 | } | 9702 | } |
9443 | 9703 | ||
9444 | if (nested_cpu_has_ept(vmcs12)) { | 9704 | if (nested_cpu_has_ept(vmcs12)) { |
@@ -10278,6 +10538,201 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
10278 | kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); | 10538 | kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); |
10279 | } | 10539 | } |
10280 | 10540 | ||
10541 | /* | ||
10542 | * This routine does the following things for vCPU which is going | ||
10543 | * to be blocked if VT-d PI is enabled. | ||
10544 | * - Store the vCPU to the wakeup list, so when interrupts happen | ||
10545 | * we can find the right vCPU to wake up. | ||
10546 | * - Change the Posted-interrupt descriptor as below: | ||
10547 | * 'NDST' <-- vcpu->pre_pcpu | ||
10548 | * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR | ||
10549 | * - If 'ON' is set during this process, which means at least one | ||
10550 | * interrupt is posted for this vCPU, we cannot block it, in | ||
10551 | * this case, return 1, otherwise, return 0. | ||
10552 | * | ||
10553 | */ | ||
10554 | static int vmx_pre_block(struct kvm_vcpu *vcpu) | ||
10555 | { | ||
10556 | unsigned long flags; | ||
10557 | unsigned int dest; | ||
10558 | struct pi_desc old, new; | ||
10559 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
10560 | |||
10561 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
10562 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10563 | return 0; | ||
10564 | |||
10565 | vcpu->pre_pcpu = vcpu->cpu; | ||
10566 | spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10567 | vcpu->pre_pcpu), flags); | ||
10568 | list_add_tail(&vcpu->blocked_vcpu_list, | ||
10569 | &per_cpu(blocked_vcpu_on_cpu, | ||
10570 | vcpu->pre_pcpu)); | ||
10571 | spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10572 | vcpu->pre_pcpu), flags); | ||
10573 | |||
10574 | do { | ||
10575 | old.control = new.control = pi_desc->control; | ||
10576 | |||
10577 | /* | ||
10578 | * We should not block the vCPU if | ||
10579 | * an interrupt is posted for it. | ||
10580 | */ | ||
10581 | if (pi_test_on(pi_desc) == 1) { | ||
10582 | spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10583 | vcpu->pre_pcpu), flags); | ||
10584 | list_del(&vcpu->blocked_vcpu_list); | ||
10585 | spin_unlock_irqrestore( | ||
10586 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10587 | vcpu->pre_pcpu), flags); | ||
10588 | vcpu->pre_pcpu = -1; | ||
10589 | |||
10590 | return 1; | ||
10591 | } | ||
10592 | |||
10593 | WARN((pi_desc->sn == 1), | ||
10594 | "Warning: SN field of posted-interrupts " | ||
10595 | "is set before blocking\n"); | ||
10596 | |||
10597 | /* | ||
10598 | * Since vCPU can be preempted during this process, | ||
10599 | * vcpu->cpu could be different with pre_pcpu, we | ||
10600 | * need to set pre_pcpu as the destination of wakeup | ||
10601 | * notification event, then we can find the right vCPU | ||
10602 | * to wakeup in wakeup handler if interrupts happen | ||
10603 | * when the vCPU is in blocked state. | ||
10604 | */ | ||
10605 | dest = cpu_physical_id(vcpu->pre_pcpu); | ||
10606 | |||
10607 | if (x2apic_enabled()) | ||
10608 | new.ndst = dest; | ||
10609 | else | ||
10610 | new.ndst = (dest << 8) & 0xFF00; | ||
10611 | |||
10612 | /* set 'NV' to 'wakeup vector' */ | ||
10613 | new.nv = POSTED_INTR_WAKEUP_VECTOR; | ||
10614 | } while (cmpxchg(&pi_desc->control, old.control, | ||
10615 | new.control) != old.control); | ||
10616 | |||
10617 | return 0; | ||
10618 | } | ||
10619 | |||
10620 | static void vmx_post_block(struct kvm_vcpu *vcpu) | ||
10621 | { | ||
10622 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
10623 | struct pi_desc old, new; | ||
10624 | unsigned int dest; | ||
10625 | unsigned long flags; | ||
10626 | |||
10627 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
10628 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10629 | return; | ||
10630 | |||
10631 | do { | ||
10632 | old.control = new.control = pi_desc->control; | ||
10633 | |||
10634 | dest = cpu_physical_id(vcpu->cpu); | ||
10635 | |||
10636 | if (x2apic_enabled()) | ||
10637 | new.ndst = dest; | ||
10638 | else | ||
10639 | new.ndst = (dest << 8) & 0xFF00; | ||
10640 | |||
10641 | /* Allow posting non-urgent interrupts */ | ||
10642 | new.sn = 0; | ||
10643 | |||
10644 | /* set 'NV' to 'notification vector' */ | ||
10645 | new.nv = POSTED_INTR_VECTOR; | ||
10646 | } while (cmpxchg(&pi_desc->control, old.control, | ||
10647 | new.control) != old.control); | ||
10648 | |||
10649 | if(vcpu->pre_pcpu != -1) { | ||
10650 | spin_lock_irqsave( | ||
10651 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10652 | vcpu->pre_pcpu), flags); | ||
10653 | list_del(&vcpu->blocked_vcpu_list); | ||
10654 | spin_unlock_irqrestore( | ||
10655 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10656 | vcpu->pre_pcpu), flags); | ||
10657 | vcpu->pre_pcpu = -1; | ||
10658 | } | ||
10659 | } | ||
10660 | |||
10661 | /* | ||
10662 | * vmx_update_pi_irte - set IRTE for Posted-Interrupts | ||
10663 | * | ||
10664 | * @kvm: kvm | ||
10665 | * @host_irq: host irq of the interrupt | ||
10666 | * @guest_irq: gsi of the interrupt | ||
10667 | * @set: set or unset PI | ||
10668 | * returns 0 on success, < 0 on failure | ||
10669 | */ | ||
10670 | static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | ||
10671 | uint32_t guest_irq, bool set) | ||
10672 | { | ||
10673 | struct kvm_kernel_irq_routing_entry *e; | ||
10674 | struct kvm_irq_routing_table *irq_rt; | ||
10675 | struct kvm_lapic_irq irq; | ||
10676 | struct kvm_vcpu *vcpu; | ||
10677 | struct vcpu_data vcpu_info; | ||
10678 | int idx, ret = -EINVAL; | ||
10679 | |||
10680 | if (!kvm_arch_has_assigned_device(kvm) || | ||
10681 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10682 | return 0; | ||
10683 | |||
10684 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
10685 | irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | ||
10686 | BUG_ON(guest_irq >= irq_rt->nr_rt_entries); | ||
10687 | |||
10688 | hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { | ||
10689 | if (e->type != KVM_IRQ_ROUTING_MSI) | ||
10690 | continue; | ||
10691 | /* | ||
10692 | * VT-d PI cannot support posting multicast/broadcast | ||
10693 | * interrupts to a vCPU, we still use interrupt remapping | ||
10694 | * for these kind of interrupts. | ||
10695 | * | ||
10696 | * For lowest-priority interrupts, we only support | ||
10697 | * those with single CPU as the destination, e.g. user | ||
10698 | * configures the interrupts via /proc/irq or uses | ||
10699 | * irqbalance to make the interrupts single-CPU. | ||
10700 | * | ||
10701 | * We will support full lowest-priority interrupt later. | ||
10702 | */ | ||
10703 | |||
10704 | kvm_set_msi_irq(e, &irq); | ||
10705 | if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) | ||
10706 | continue; | ||
10707 | |||
10708 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); | ||
10709 | vcpu_info.vector = irq.vector; | ||
10710 | |||
10711 | trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, | ||
10712 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); | ||
10713 | |||
10714 | if (set) | ||
10715 | ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); | ||
10716 | else { | ||
10717 | /* suppress notification event before unposting */ | ||
10718 | pi_set_sn(vcpu_to_pi_desc(vcpu)); | ||
10719 | ret = irq_set_vcpu_affinity(host_irq, NULL); | ||
10720 | pi_clear_sn(vcpu_to_pi_desc(vcpu)); | ||
10721 | } | ||
10722 | |||
10723 | if (ret < 0) { | ||
10724 | printk(KERN_INFO "%s: failed to update PI IRTE\n", | ||
10725 | __func__); | ||
10726 | goto out; | ||
10727 | } | ||
10728 | } | ||
10729 | |||
10730 | ret = 0; | ||
10731 | out: | ||
10732 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
10733 | return ret; | ||
10734 | } | ||
10735 | |||
10281 | static struct kvm_x86_ops vmx_x86_ops = { | 10736 | static struct kvm_x86_ops vmx_x86_ops = { |
10282 | .cpu_has_kvm_support = cpu_has_kvm_support, | 10737 | .cpu_has_kvm_support = cpu_has_kvm_support, |
10283 | .disabled_by_bios = vmx_disabled_by_bios, | 10738 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -10347,7 +10802,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10347 | .update_cr8_intercept = update_cr8_intercept, | 10802 | .update_cr8_intercept = update_cr8_intercept, |
10348 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | 10803 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, |
10349 | .set_apic_access_page_addr = vmx_set_apic_access_page_addr, | 10804 | .set_apic_access_page_addr = vmx_set_apic_access_page_addr, |
10350 | .vm_has_apicv = vmx_vm_has_apicv, | 10805 | .cpu_uses_apicv = vmx_cpu_uses_apicv, |
10351 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 10806 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
10352 | .hwapic_irr_update = vmx_hwapic_irr_update, | 10807 | .hwapic_irr_update = vmx_hwapic_irr_update, |
10353 | .hwapic_isr_update = vmx_hwapic_isr_update, | 10808 | .hwapic_isr_update = vmx_hwapic_isr_update, |
@@ -10394,7 +10849,12 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10394 | .flush_log_dirty = vmx_flush_log_dirty, | 10849 | .flush_log_dirty = vmx_flush_log_dirty, |
10395 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, | 10850 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, |
10396 | 10851 | ||
10852 | .pre_block = vmx_pre_block, | ||
10853 | .post_block = vmx_post_block, | ||
10854 | |||
10397 | .pmu_ops = &intel_pmu_ops, | 10855 | .pmu_ops = &intel_pmu_ops, |
10856 | |||
10857 | .update_pi_irte = vmx_update_pi_irte, | ||
10398 | }; | 10858 | }; |
10399 | 10859 | ||
10400 | static int __init vmx_init(void) | 10860 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bda65690788e..4a6eff166fc6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <linux/pci.h> | 51 | #include <linux/pci.h> |
52 | #include <linux/timekeeper_internal.h> | 52 | #include <linux/timekeeper_internal.h> |
53 | #include <linux/pvclock_gtod.h> | 53 | #include <linux/pvclock_gtod.h> |
54 | #include <linux/kvm_irqfd.h> | ||
55 | #include <linux/irqbypass.h> | ||
54 | #include <trace/events/kvm.h> | 56 | #include <trace/events/kvm.h> |
55 | 57 | ||
56 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
@@ -64,6 +66,7 @@ | |||
64 | #include <asm/fpu/internal.h> /* Ugh! */ | 66 | #include <asm/fpu/internal.h> /* Ugh! */ |
65 | #include <asm/pvclock.h> | 67 | #include <asm/pvclock.h> |
66 | #include <asm/div64.h> | 68 | #include <asm/div64.h> |
69 | #include <asm/irq_remapping.h> | ||
67 | 70 | ||
68 | #define MAX_IO_MSRS 256 | 71 | #define MAX_IO_MSRS 256 |
69 | #define KVM_MAX_MCE_BANKS 32 | 72 | #define KVM_MAX_MCE_BANKS 32 |
@@ -622,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
622 | if ((cr0 ^ old_cr0) & update_bits) | 625 | if ((cr0 ^ old_cr0) & update_bits) |
623 | kvm_mmu_reset_context(vcpu); | 626 | kvm_mmu_reset_context(vcpu); |
624 | 627 | ||
625 | if ((cr0 ^ old_cr0) & X86_CR0_CD) | 628 | if (((cr0 ^ old_cr0) & X86_CR0_CD) && |
629 | kvm_arch_has_noncoherent_dma(vcpu->kvm) && | ||
630 | !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) | ||
626 | kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); | 631 | kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); |
627 | 632 | ||
628 | return 0; | 633 | return 0; |
@@ -789,7 +794,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
789 | { | 794 | { |
790 | if (cr8 & CR8_RESERVED_BITS) | 795 | if (cr8 & CR8_RESERVED_BITS) |
791 | return 1; | 796 | return 1; |
792 | if (irqchip_in_kernel(vcpu->kvm)) | 797 | if (lapic_in_kernel(vcpu)) |
793 | kvm_lapic_set_tpr(vcpu, cr8); | 798 | kvm_lapic_set_tpr(vcpu, cr8); |
794 | else | 799 | else |
795 | vcpu->arch.cr8 = cr8; | 800 | vcpu->arch.cr8 = cr8; |
@@ -799,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); | |||
799 | 804 | ||
800 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | 805 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
801 | { | 806 | { |
802 | if (irqchip_in_kernel(vcpu->kvm)) | 807 | if (lapic_in_kernel(vcpu)) |
803 | return kvm_lapic_get_cr8(vcpu); | 808 | return kvm_lapic_get_cr8(vcpu); |
804 | else | 809 | else |
805 | return vcpu->arch.cr8; | 810 | return vcpu->arch.cr8; |
@@ -953,6 +958,9 @@ static u32 emulated_msrs[] = { | |||
953 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | 958 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, |
954 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, | 959 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, |
955 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, | 960 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, |
961 | HV_X64_MSR_RESET, | ||
962 | HV_X64_MSR_VP_INDEX, | ||
963 | HV_X64_MSR_VP_RUNTIME, | ||
956 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 964 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
957 | MSR_KVM_PV_EOI_EN, | 965 | MSR_KVM_PV_EOI_EN, |
958 | 966 | ||
@@ -1898,6 +1906,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu) | |||
1898 | 1906 | ||
1899 | static void record_steal_time(struct kvm_vcpu *vcpu) | 1907 | static void record_steal_time(struct kvm_vcpu *vcpu) |
1900 | { | 1908 | { |
1909 | accumulate_steal_time(vcpu); | ||
1910 | |||
1901 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | 1911 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) |
1902 | return; | 1912 | return; |
1903 | 1913 | ||
@@ -2048,12 +2058,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2048 | if (!(data & KVM_MSR_ENABLED)) | 2058 | if (!(data & KVM_MSR_ENABLED)) |
2049 | break; | 2059 | break; |
2050 | 2060 | ||
2051 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | ||
2052 | |||
2053 | preempt_disable(); | ||
2054 | accumulate_steal_time(vcpu); | ||
2055 | preempt_enable(); | ||
2056 | |||
2057 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 2061 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
2058 | 2062 | ||
2059 | break; | 2063 | break; |
@@ -2449,6 +2453,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2449 | case KVM_CAP_ENABLE_CAP_VM: | 2453 | case KVM_CAP_ENABLE_CAP_VM: |
2450 | case KVM_CAP_DISABLE_QUIRKS: | 2454 | case KVM_CAP_DISABLE_QUIRKS: |
2451 | case KVM_CAP_SET_BOOT_CPU_ID: | 2455 | case KVM_CAP_SET_BOOT_CPU_ID: |
2456 | case KVM_CAP_SPLIT_IRQCHIP: | ||
2452 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2457 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2453 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2458 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2454 | case KVM_CAP_PCI_2_3: | 2459 | case KVM_CAP_PCI_2_3: |
@@ -2628,7 +2633,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2628 | vcpu->cpu = cpu; | 2633 | vcpu->cpu = cpu; |
2629 | } | 2634 | } |
2630 | 2635 | ||
2631 | accumulate_steal_time(vcpu); | ||
2632 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 2636 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
2633 | } | 2637 | } |
2634 | 2638 | ||
@@ -2662,12 +2666,24 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2662 | { | 2666 | { |
2663 | if (irq->irq >= KVM_NR_INTERRUPTS) | 2667 | if (irq->irq >= KVM_NR_INTERRUPTS) |
2664 | return -EINVAL; | 2668 | return -EINVAL; |
2665 | if (irqchip_in_kernel(vcpu->kvm)) | 2669 | |
2670 | if (!irqchip_in_kernel(vcpu->kvm)) { | ||
2671 | kvm_queue_interrupt(vcpu, irq->irq, false); | ||
2672 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
2673 | return 0; | ||
2674 | } | ||
2675 | |||
2676 | /* | ||
2677 | * With in-kernel LAPIC, we only use this to inject EXTINT, so | ||
2678 | * fail for in-kernel 8259. | ||
2679 | */ | ||
2680 | if (pic_in_kernel(vcpu->kvm)) | ||
2666 | return -ENXIO; | 2681 | return -ENXIO; |
2667 | 2682 | ||
2668 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2683 | if (vcpu->arch.pending_external_vector != -1) |
2669 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 2684 | return -EEXIST; |
2670 | 2685 | ||
2686 | vcpu->arch.pending_external_vector = irq->irq; | ||
2671 | return 0; | 2687 | return 0; |
2672 | } | 2688 | } |
2673 | 2689 | ||
@@ -3176,7 +3192,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3176 | struct kvm_vapic_addr va; | 3192 | struct kvm_vapic_addr va; |
3177 | 3193 | ||
3178 | r = -EINVAL; | 3194 | r = -EINVAL; |
3179 | if (!irqchip_in_kernel(vcpu->kvm)) | 3195 | if (!lapic_in_kernel(vcpu)) |
3180 | goto out; | 3196 | goto out; |
3181 | r = -EFAULT; | 3197 | r = -EFAULT; |
3182 | if (copy_from_user(&va, argp, sizeof va)) | 3198 | if (copy_from_user(&va, argp, sizeof va)) |
@@ -3425,41 +3441,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
3425 | 3441 | ||
3426 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3442 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3427 | { | 3443 | { |
3428 | int r = 0; | ||
3429 | |||
3430 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3444 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3431 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); | 3445 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); |
3432 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3446 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3433 | return r; | 3447 | return 0; |
3434 | } | 3448 | } |
3435 | 3449 | ||
3436 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3450 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3437 | { | 3451 | { |
3438 | int r = 0; | ||
3439 | |||
3440 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3452 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3441 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); | 3453 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); |
3442 | kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); | 3454 | kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); |
3443 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3455 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3444 | return r; | 3456 | return 0; |
3445 | } | 3457 | } |
3446 | 3458 | ||
3447 | static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | 3459 | static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) |
3448 | { | 3460 | { |
3449 | int r = 0; | ||
3450 | |||
3451 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3461 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3452 | memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, | 3462 | memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, |
3453 | sizeof(ps->channels)); | 3463 | sizeof(ps->channels)); |
3454 | ps->flags = kvm->arch.vpit->pit_state.flags; | 3464 | ps->flags = kvm->arch.vpit->pit_state.flags; |
3455 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3465 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3456 | memset(&ps->reserved, 0, sizeof(ps->reserved)); | 3466 | memset(&ps->reserved, 0, sizeof(ps->reserved)); |
3457 | return r; | 3467 | return 0; |
3458 | } | 3468 | } |
3459 | 3469 | ||
3460 | static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | 3470 | static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) |
3461 | { | 3471 | { |
3462 | int r = 0, start = 0; | 3472 | int start = 0; |
3463 | u32 prev_legacy, cur_legacy; | 3473 | u32 prev_legacy, cur_legacy; |
3464 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3474 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3465 | prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; | 3475 | prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; |
@@ -3471,7 +3481,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | |||
3471 | kvm->arch.vpit->pit_state.flags = ps->flags; | 3481 | kvm->arch.vpit->pit_state.flags = ps->flags; |
3472 | kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); | 3482 | kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); |
3473 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3483 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3474 | return r; | 3484 | return 0; |
3475 | } | 3485 | } |
3476 | 3486 | ||
3477 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, | 3487 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, |
@@ -3556,6 +3566,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | |||
3556 | kvm->arch.disabled_quirks = cap->args[0]; | 3566 | kvm->arch.disabled_quirks = cap->args[0]; |
3557 | r = 0; | 3567 | r = 0; |
3558 | break; | 3568 | break; |
3569 | case KVM_CAP_SPLIT_IRQCHIP: { | ||
3570 | mutex_lock(&kvm->lock); | ||
3571 | r = -EINVAL; | ||
3572 | if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) | ||
3573 | goto split_irqchip_unlock; | ||
3574 | r = -EEXIST; | ||
3575 | if (irqchip_in_kernel(kvm)) | ||
3576 | goto split_irqchip_unlock; | ||
3577 | if (atomic_read(&kvm->online_vcpus)) | ||
3578 | goto split_irqchip_unlock; | ||
3579 | r = kvm_setup_empty_irq_routing(kvm); | ||
3580 | if (r) | ||
3581 | goto split_irqchip_unlock; | ||
3582 | /* Pairs with irqchip_in_kernel. */ | ||
3583 | smp_wmb(); | ||
3584 | kvm->arch.irqchip_split = true; | ||
3585 | kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; | ||
3586 | r = 0; | ||
3587 | split_irqchip_unlock: | ||
3588 | mutex_unlock(&kvm->lock); | ||
3589 | break; | ||
3590 | } | ||
3559 | default: | 3591 | default: |
3560 | r = -EINVAL; | 3592 | r = -EINVAL; |
3561 | break; | 3593 | break; |
@@ -3669,7 +3701,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3669 | } | 3701 | } |
3670 | 3702 | ||
3671 | r = -ENXIO; | 3703 | r = -ENXIO; |
3672 | if (!irqchip_in_kernel(kvm)) | 3704 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) |
3673 | goto get_irqchip_out; | 3705 | goto get_irqchip_out; |
3674 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); | 3706 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); |
3675 | if (r) | 3707 | if (r) |
@@ -3693,7 +3725,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3693 | } | 3725 | } |
3694 | 3726 | ||
3695 | r = -ENXIO; | 3727 | r = -ENXIO; |
3696 | if (!irqchip_in_kernel(kvm)) | 3728 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) |
3697 | goto set_irqchip_out; | 3729 | goto set_irqchip_out; |
3698 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); | 3730 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); |
3699 | if (r) | 3731 | if (r) |
@@ -4060,6 +4092,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
4060 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | 4092 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
4061 | } | 4093 | } |
4062 | 4094 | ||
4095 | static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, | ||
4096 | unsigned long addr, void *val, unsigned int bytes) | ||
4097 | { | ||
4098 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4099 | int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); | ||
4100 | |||
4101 | return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; | ||
4102 | } | ||
4103 | |||
4063 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | 4104 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
4064 | gva_t addr, void *val, | 4105 | gva_t addr, void *val, |
4065 | unsigned int bytes, | 4106 | unsigned int bytes, |
@@ -4795,6 +4836,7 @@ static const struct x86_emulate_ops emulate_ops = { | |||
4795 | .write_gpr = emulator_write_gpr, | 4836 | .write_gpr = emulator_write_gpr, |
4796 | .read_std = kvm_read_guest_virt_system, | 4837 | .read_std = kvm_read_guest_virt_system, |
4797 | .write_std = kvm_write_guest_virt_system, | 4838 | .write_std = kvm_write_guest_virt_system, |
4839 | .read_phys = kvm_read_guest_phys_system, | ||
4798 | .fetch = kvm_fetch_guest_virt, | 4840 | .fetch = kvm_fetch_guest_virt, |
4799 | .read_emulated = emulator_read_emulated, | 4841 | .read_emulated = emulator_read_emulated, |
4800 | .write_emulated = emulator_write_emulated, | 4842 | .write_emulated = emulator_write_emulated, |
@@ -5667,7 +5709,7 @@ void kvm_arch_exit(void) | |||
5667 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu) | 5709 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu) |
5668 | { | 5710 | { |
5669 | ++vcpu->stat.halt_exits; | 5711 | ++vcpu->stat.halt_exits; |
5670 | if (irqchip_in_kernel(vcpu->kvm)) { | 5712 | if (lapic_in_kernel(vcpu)) { |
5671 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | 5713 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
5672 | return 1; | 5714 | return 1; |
5673 | } else { | 5715 | } else { |
@@ -5774,9 +5816,15 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5774 | */ | 5816 | */ |
5775 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) | 5817 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
5776 | { | 5818 | { |
5777 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 5819 | if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm)) |
5778 | vcpu->run->request_interrupt_window && | 5820 | return false; |
5779 | kvm_arch_interrupt_allowed(vcpu)); | 5821 | |
5822 | if (kvm_cpu_has_interrupt(vcpu)) | ||
5823 | return false; | ||
5824 | |||
5825 | return (irqchip_split(vcpu->kvm) | ||
5826 | ? kvm_apic_accept_pic_intr(vcpu) | ||
5827 | : kvm_arch_interrupt_allowed(vcpu)); | ||
5780 | } | 5828 | } |
5781 | 5829 | ||
5782 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) | 5830 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
@@ -5787,13 +5835,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) | |||
5787 | kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; | 5835 | kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; |
5788 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 5836 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
5789 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 5837 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
5790 | if (irqchip_in_kernel(vcpu->kvm)) | 5838 | if (!irqchip_in_kernel(vcpu->kvm)) |
5791 | kvm_run->ready_for_interrupt_injection = 1; | ||
5792 | else | ||
5793 | kvm_run->ready_for_interrupt_injection = | 5839 | kvm_run->ready_for_interrupt_injection = |
5794 | kvm_arch_interrupt_allowed(vcpu) && | 5840 | kvm_arch_interrupt_allowed(vcpu) && |
5795 | !kvm_cpu_has_interrupt(vcpu) && | 5841 | !kvm_cpu_has_interrupt(vcpu) && |
5796 | !kvm_event_needs_reinjection(vcpu); | 5842 | !kvm_event_needs_reinjection(vcpu); |
5843 | else if (!pic_in_kernel(vcpu->kvm)) | ||
5844 | kvm_run->ready_for_interrupt_injection = | ||
5845 | kvm_apic_accept_pic_intr(vcpu) && | ||
5846 | !kvm_cpu_has_interrupt(vcpu); | ||
5847 | else | ||
5848 | kvm_run->ready_for_interrupt_injection = 1; | ||
5797 | } | 5849 | } |
5798 | 5850 | ||
5799 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 5851 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -6144,18 +6196,18 @@ static void process_smi(struct kvm_vcpu *vcpu) | |||
6144 | 6196 | ||
6145 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 6197 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
6146 | { | 6198 | { |
6147 | u64 eoi_exit_bitmap[4]; | ||
6148 | u32 tmr[8]; | ||
6149 | |||
6150 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | 6199 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) |
6151 | return; | 6200 | return; |
6152 | 6201 | ||
6153 | memset(eoi_exit_bitmap, 0, 32); | 6202 | memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); |
6154 | memset(tmr, 0, 32); | ||
6155 | 6203 | ||
6156 | kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); | 6204 | if (irqchip_split(vcpu->kvm)) |
6157 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 6205 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); |
6158 | kvm_apic_update_tmr(vcpu, tmr); | 6206 | else { |
6207 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
6208 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); | ||
6209 | } | ||
6210 | kvm_x86_ops->load_eoi_exitmap(vcpu); | ||
6159 | } | 6211 | } |
6160 | 6212 | ||
6161 | static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) | 6213 | static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) |
@@ -6168,7 +6220,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) | |||
6168 | { | 6220 | { |
6169 | struct page *page = NULL; | 6221 | struct page *page = NULL; |
6170 | 6222 | ||
6171 | if (!irqchip_in_kernel(vcpu->kvm)) | 6223 | if (!lapic_in_kernel(vcpu)) |
6172 | return; | 6224 | return; |
6173 | 6225 | ||
6174 | if (!kvm_x86_ops->set_apic_access_page_addr) | 6226 | if (!kvm_x86_ops->set_apic_access_page_addr) |
@@ -6206,7 +6258,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
6206 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 6258 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
6207 | { | 6259 | { |
6208 | int r; | 6260 | int r; |
6209 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 6261 | bool req_int_win = !lapic_in_kernel(vcpu) && |
6210 | vcpu->run->request_interrupt_window; | 6262 | vcpu->run->request_interrupt_window; |
6211 | bool req_immediate_exit = false; | 6263 | bool req_immediate_exit = false; |
6212 | 6264 | ||
@@ -6258,6 +6310,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6258 | kvm_pmu_handle_event(vcpu); | 6310 | kvm_pmu_handle_event(vcpu); |
6259 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 6311 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
6260 | kvm_pmu_deliver_pmi(vcpu); | 6312 | kvm_pmu_deliver_pmi(vcpu); |
6313 | if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { | ||
6314 | BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); | ||
6315 | if (test_bit(vcpu->arch.pending_ioapic_eoi, | ||
6316 | (void *) vcpu->arch.eoi_exit_bitmap)) { | ||
6317 | vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; | ||
6318 | vcpu->run->eoi.vector = | ||
6319 | vcpu->arch.pending_ioapic_eoi; | ||
6320 | r = 0; | ||
6321 | goto out; | ||
6322 | } | ||
6323 | } | ||
6261 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) | 6324 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
6262 | vcpu_scan_ioapic(vcpu); | 6325 | vcpu_scan_ioapic(vcpu); |
6263 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) | 6326 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) |
@@ -6268,6 +6331,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6268 | r = 0; | 6331 | r = 0; |
6269 | goto out; | 6332 | goto out; |
6270 | } | 6333 | } |
6334 | if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { | ||
6335 | vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; | ||
6336 | vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; | ||
6337 | r = 0; | ||
6338 | goto out; | ||
6339 | } | ||
6340 | } | ||
6341 | |||
6342 | /* | ||
6343 | * KVM_REQ_EVENT is not set when posted interrupts are set by | ||
6344 | * VT-d hardware, so we have to update RVI unconditionally. | ||
6345 | */ | ||
6346 | if (kvm_lapic_enabled(vcpu)) { | ||
6347 | /* | ||
6348 | * Update architecture specific hints for APIC | ||
6349 | * virtual interrupt delivery. | ||
6350 | */ | ||
6351 | if (kvm_x86_ops->hwapic_irr_update) | ||
6352 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
6353 | kvm_lapic_find_highest_irr(vcpu)); | ||
6271 | } | 6354 | } |
6272 | 6355 | ||
6273 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 6356 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -6286,13 +6369,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6286 | kvm_x86_ops->enable_irq_window(vcpu); | 6369 | kvm_x86_ops->enable_irq_window(vcpu); |
6287 | 6370 | ||
6288 | if (kvm_lapic_enabled(vcpu)) { | 6371 | if (kvm_lapic_enabled(vcpu)) { |
6289 | /* | ||
6290 | * Update architecture specific hints for APIC | ||
6291 | * virtual interrupt delivery. | ||
6292 | */ | ||
6293 | if (kvm_x86_ops->hwapic_irr_update) | ||
6294 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
6295 | kvm_lapic_find_highest_irr(vcpu)); | ||
6296 | update_cr8_intercept(vcpu); | 6372 | update_cr8_intercept(vcpu); |
6297 | kvm_lapic_sync_to_vapic(vcpu); | 6373 | kvm_lapic_sync_to_vapic(vcpu); |
6298 | } | 6374 | } |
@@ -6428,10 +6504,15 @@ out: | |||
6428 | 6504 | ||
6429 | static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) | 6505 | static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) |
6430 | { | 6506 | { |
6431 | if (!kvm_arch_vcpu_runnable(vcpu)) { | 6507 | if (!kvm_arch_vcpu_runnable(vcpu) && |
6508 | (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { | ||
6432 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6509 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6433 | kvm_vcpu_block(vcpu); | 6510 | kvm_vcpu_block(vcpu); |
6434 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6511 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6512 | |||
6513 | if (kvm_x86_ops->post_block) | ||
6514 | kvm_x86_ops->post_block(vcpu); | ||
6515 | |||
6435 | if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) | 6516 | if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) |
6436 | return 1; | 6517 | return 1; |
6437 | } | 6518 | } |
@@ -6468,10 +6549,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) | |||
6468 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6549 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6469 | 6550 | ||
6470 | for (;;) { | 6551 | for (;;) { |
6471 | if (kvm_vcpu_running(vcpu)) | 6552 | if (kvm_vcpu_running(vcpu)) { |
6472 | r = vcpu_enter_guest(vcpu); | 6553 | r = vcpu_enter_guest(vcpu); |
6473 | else | 6554 | } else { |
6474 | r = vcpu_block(kvm, vcpu); | 6555 | r = vcpu_block(kvm, vcpu); |
6556 | } | ||
6557 | |||
6475 | if (r <= 0) | 6558 | if (r <= 0) |
6476 | break; | 6559 | break; |
6477 | 6560 | ||
@@ -6480,8 +6563,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu) | |||
6480 | kvm_inject_pending_timer_irqs(vcpu); | 6563 | kvm_inject_pending_timer_irqs(vcpu); |
6481 | 6564 | ||
6482 | if (dm_request_for_irq_injection(vcpu)) { | 6565 | if (dm_request_for_irq_injection(vcpu)) { |
6483 | r = -EINTR; | 6566 | r = 0; |
6484 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 6567 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
6485 | ++vcpu->stat.request_irq_exits; | 6568 | ++vcpu->stat.request_irq_exits; |
6486 | break; | 6569 | break; |
6487 | } | 6570 | } |
@@ -6608,7 +6691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
6608 | } | 6691 | } |
6609 | 6692 | ||
6610 | /* re-sync apic's tpr */ | 6693 | /* re-sync apic's tpr */ |
6611 | if (!irqchip_in_kernel(vcpu->kvm)) { | 6694 | if (!lapic_in_kernel(vcpu)) { |
6612 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { | 6695 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
6613 | r = -EINVAL; | 6696 | r = -EINVAL; |
6614 | goto out; | 6697 | goto out; |
@@ -7308,7 +7391,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) | |||
7308 | 7391 | ||
7309 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | 7392 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) |
7310 | { | 7393 | { |
7311 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); | 7394 | return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu); |
7312 | } | 7395 | } |
7313 | 7396 | ||
7314 | struct static_key kvm_no_apic_vcpu __read_mostly; | 7397 | struct static_key kvm_no_apic_vcpu __read_mostly; |
@@ -7377,6 +7460,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7377 | kvm_async_pf_hash_reset(vcpu); | 7460 | kvm_async_pf_hash_reset(vcpu); |
7378 | kvm_pmu_init(vcpu); | 7461 | kvm_pmu_init(vcpu); |
7379 | 7462 | ||
7463 | vcpu->arch.pending_external_vector = -1; | ||
7464 | |||
7380 | return 0; | 7465 | return 0; |
7381 | 7466 | ||
7382 | fail_free_mce_banks: | 7467 | fail_free_mce_banks: |
@@ -7402,7 +7487,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
7402 | kvm_mmu_destroy(vcpu); | 7487 | kvm_mmu_destroy(vcpu); |
7403 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 7488 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
7404 | free_page((unsigned long)vcpu->arch.pio_data); | 7489 | free_page((unsigned long)vcpu->arch.pio_data); |
7405 | if (!irqchip_in_kernel(vcpu->kvm)) | 7490 | if (!lapic_in_kernel(vcpu)) |
7406 | static_key_slow_dec(&kvm_no_apic_vcpu); | 7491 | static_key_slow_dec(&kvm_no_apic_vcpu); |
7407 | } | 7492 | } |
7408 | 7493 | ||
@@ -8029,7 +8114,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) | |||
8029 | } | 8114 | } |
8030 | EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); | 8115 | EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); |
8031 | 8116 | ||
8117 | int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, | ||
8118 | struct irq_bypass_producer *prod) | ||
8119 | { | ||
8120 | struct kvm_kernel_irqfd *irqfd = | ||
8121 | container_of(cons, struct kvm_kernel_irqfd, consumer); | ||
8122 | |||
8123 | if (kvm_x86_ops->update_pi_irte) { | ||
8124 | irqfd->producer = prod; | ||
8125 | return kvm_x86_ops->update_pi_irte(irqfd->kvm, | ||
8126 | prod->irq, irqfd->gsi, 1); | ||
8127 | } | ||
8128 | |||
8129 | return -EINVAL; | ||
8130 | } | ||
8131 | |||
8132 | void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, | ||
8133 | struct irq_bypass_producer *prod) | ||
8134 | { | ||
8135 | int ret; | ||
8136 | struct kvm_kernel_irqfd *irqfd = | ||
8137 | container_of(cons, struct kvm_kernel_irqfd, consumer); | ||
8138 | |||
8139 | if (!kvm_x86_ops->update_pi_irte) { | ||
8140 | WARN_ON(irqfd->producer != NULL); | ||
8141 | return; | ||
8142 | } | ||
8143 | |||
8144 | WARN_ON(irqfd->producer != prod); | ||
8145 | irqfd->producer = NULL; | ||
8146 | |||
8147 | /* | ||
8148 | * When producer of consumer is unregistered, we change back to | ||
8149 | * remapped mode, so we can re-use the current implementation | ||
8150 | * when the irq is masked/disabed or the consumer side (KVM | ||
8151 | * int this case doesn't want to receive the interrupts. | ||
8152 | */ | ||
8153 | ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0); | ||
8154 | if (ret) | ||
8155 | printk(KERN_INFO "irq bypass consumer (token %p) unregistration" | ||
8156 | " fails: %d\n", irqfd->consumer.token, ret); | ||
8157 | } | ||
8158 | |||
8159 | int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, | ||
8160 | uint32_t guest_irq, bool set) | ||
8161 | { | ||
8162 | if (!kvm_x86_ops->update_pi_irte) | ||
8163 | return -EINVAL; | ||
8164 | |||
8165 | return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); | ||
8166 | } | ||
8167 | |||
8032 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 8168 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
8169 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); | ||
8033 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 8170 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
8034 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 8171 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
8035 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 8172 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
@@ -8044,3 +8181,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | |||
8044 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); | 8181 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); |
8045 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); | 8182 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); |
8046 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); | 8183 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); |
8184 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); | ||
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 3d70e36c918e..3782636562a1 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h | |||
@@ -63,9 +63,6 @@ enum hv_cpuid_function { | |||
63 | /* Define version of the synthetic interrupt controller. */ | 63 | /* Define version of the synthetic interrupt controller. */ |
64 | #define HV_SYNIC_VERSION (1) | 64 | #define HV_SYNIC_VERSION (1) |
65 | 65 | ||
66 | /* Define the expected SynIC version. */ | ||
67 | #define HV_SYNIC_VERSION_1 (0x1) | ||
68 | |||
69 | /* Define synthetic interrupt controller message constants. */ | 66 | /* Define synthetic interrupt controller message constants. */ |
70 | #define HV_MESSAGE_SIZE (256) | 67 | #define HV_MESSAGE_SIZE (256) |
71 | #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) | 68 | #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) |
@@ -105,8 +102,6 @@ enum hv_message_type { | |||
105 | HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 | 102 | HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 |
106 | }; | 103 | }; |
107 | 104 | ||
108 | /* Define the number of synthetic interrupt sources. */ | ||
109 | #define HV_SYNIC_SINT_COUNT (16) | ||
110 | #define HV_SYNIC_STIMER_COUNT (4) | 105 | #define HV_SYNIC_STIMER_COUNT (4) |
111 | 106 | ||
112 | /* Define invalid partition identifier. */ | 107 | /* Define invalid partition identifier. */ |
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 913455a5fd40..8adaaeae3268 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c | |||
@@ -22,7 +22,7 @@ int irq_remap_broken; | |||
22 | int disable_sourceid_checking; | 22 | int disable_sourceid_checking; |
23 | int no_x2apic_optout; | 23 | int no_x2apic_optout; |
24 | 24 | ||
25 | int disable_irq_post = 1; | 25 | int disable_irq_post = 0; |
26 | 26 | ||
27 | static int disable_irq_remap; | 27 | static int disable_irq_remap; |
28 | static struct irq_remap_ops *remap_ops; | 28 | static struct irq_remap_ops *remap_ops; |
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str) | |||
58 | return -EINVAL; | 58 | return -EINVAL; |
59 | 59 | ||
60 | while (*str) { | 60 | while (*str) { |
61 | if (!strncmp(str, "on", 2)) | 61 | if (!strncmp(str, "on", 2)) { |
62 | disable_irq_remap = 0; | 62 | disable_irq_remap = 0; |
63 | else if (!strncmp(str, "off", 3)) | 63 | disable_irq_post = 0; |
64 | } else if (!strncmp(str, "off", 3)) { | ||
64 | disable_irq_remap = 1; | 65 | disable_irq_remap = 1; |
65 | else if (!strncmp(str, "nosid", 5)) | 66 | disable_irq_post = 1; |
67 | } else if (!strncmp(str, "nosid", 5)) | ||
66 | disable_sourceid_checking = 1; | 68 | disable_sourceid_checking = 1; |
67 | else if (!strncmp(str, "no_x2apic_optout", 16)) | 69 | else if (!strncmp(str, "no_x2apic_optout", 16)) |
68 | no_x2apic_optout = 1; | 70 | no_x2apic_optout = 1; |
71 | else if (!strncmp(str, "nopost", 6)) | ||
72 | disable_irq_post = 1; | ||
69 | 73 | ||
70 | str += strcspn(str, ","); | 74 | str += strcspn(str, ","); |
71 | while (*str == ',') | 75 | while (*str == ',') |
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 454017928ed0..850d86ca685b 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig | |||
@@ -33,3 +33,4 @@ menuconfig VFIO | |||
33 | 33 | ||
34 | source "drivers/vfio/pci/Kconfig" | 34 | source "drivers/vfio/pci/Kconfig" |
35 | source "drivers/vfio/platform/Kconfig" | 35 | source "drivers/vfio/platform/Kconfig" |
36 | source "virt/lib/Kconfig" | ||
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 579d83bf5358..02912f180c6d 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig | |||
@@ -2,6 +2,7 @@ config VFIO_PCI | |||
2 | tristate "VFIO support for PCI devices" | 2 | tristate "VFIO support for PCI devices" |
3 | depends on VFIO && PCI && EVENTFD | 3 | depends on VFIO && PCI && EVENTFD |
4 | select VFIO_VIRQFD | 4 | select VFIO_VIRQFD |
5 | select IRQ_BYPASS_MANAGER | ||
5 | help | 6 | help |
6 | Support for the PCI VFIO bus driver. This is required to make | 7 | Support for the PCI VFIO bus driver. This is required to make |
7 | use of PCI drivers using the VFIO framework. | 8 | use of PCI drivers using the VFIO framework. |
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 1f577b4ac126..3b3ba15558b7 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c | |||
@@ -319,6 +319,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, | |||
319 | 319 | ||
320 | if (vdev->ctx[vector].trigger) { | 320 | if (vdev->ctx[vector].trigger) { |
321 | free_irq(irq, vdev->ctx[vector].trigger); | 321 | free_irq(irq, vdev->ctx[vector].trigger); |
322 | irq_bypass_unregister_producer(&vdev->ctx[vector].producer); | ||
322 | kfree(vdev->ctx[vector].name); | 323 | kfree(vdev->ctx[vector].name); |
323 | eventfd_ctx_put(vdev->ctx[vector].trigger); | 324 | eventfd_ctx_put(vdev->ctx[vector].trigger); |
324 | vdev->ctx[vector].trigger = NULL; | 325 | vdev->ctx[vector].trigger = NULL; |
@@ -360,6 +361,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, | |||
360 | return ret; | 361 | return ret; |
361 | } | 362 | } |
362 | 363 | ||
364 | vdev->ctx[vector].producer.token = trigger; | ||
365 | vdev->ctx[vector].producer.irq = irq; | ||
366 | ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); | ||
367 | if (unlikely(ret)) | ||
368 | dev_info(&pdev->dev, | ||
369 | "irq bypass producer (token %p) registration fails: %d\n", | ||
370 | vdev->ctx[vector].producer.token, ret); | ||
371 | |||
363 | vdev->ctx[vector].trigger = trigger; | 372 | vdev->ctx[vector].trigger = trigger; |
364 | 373 | ||
365 | return 0; | 374 | return 0; |
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index ae0e1b4c1711..0e7394f8f69b 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | #include <linux/irqbypass.h> | ||
16 | 17 | ||
17 | #ifndef VFIO_PCI_PRIVATE_H | 18 | #ifndef VFIO_PCI_PRIVATE_H |
18 | #define VFIO_PCI_PRIVATE_H | 19 | #define VFIO_PCI_PRIVATE_H |
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx { | |||
29 | struct virqfd *mask; | 30 | struct virqfd *mask; |
30 | char *name; | 31 | char *name; |
31 | bool masked; | 32 | bool masked; |
33 | struct irq_bypass_producer producer; | ||
32 | }; | 34 | }; |
33 | 35 | ||
34 | struct vfio_pci_device { | 36 | struct vfio_pci_device { |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e1e4d7c38dda..1800227af9d6 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -51,7 +51,7 @@ struct arch_timer_cpu { | |||
51 | bool armed; | 51 | bool armed; |
52 | 52 | ||
53 | /* Timer IRQ */ | 53 | /* Timer IRQ */ |
54 | const struct kvm_irq_level *irq; | 54 | struct kvm_irq_level irq; |
55 | 55 | ||
56 | /* VGIC mapping */ | 56 | /* VGIC mapping */ |
57 | struct irq_phys_map *map; | 57 | struct irq_phys_map *map; |
@@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); | |||
71 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); | 71 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); |
72 | 72 | ||
73 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); | 73 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); |
74 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | ||
75 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | ||
74 | 76 | ||
75 | #endif | 77 | #endif |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 6a3538ef7275..9c747cb14ad8 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -112,7 +112,6 @@ struct vgic_vmcr { | |||
112 | struct vgic_ops { | 112 | struct vgic_ops { |
113 | struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); | 113 | struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); |
114 | void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); | 114 | void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); |
115 | void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); | ||
116 | u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); | 115 | u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); |
117 | u64 (*get_eisr)(const struct kvm_vcpu *vcpu); | 116 | u64 (*get_eisr)(const struct kvm_vcpu *vcpu); |
118 | void (*clear_eisr)(struct kvm_vcpu *vcpu); | 117 | void (*clear_eisr)(struct kvm_vcpu *vcpu); |
@@ -159,7 +158,6 @@ struct irq_phys_map { | |||
159 | u32 virt_irq; | 158 | u32 virt_irq; |
160 | u32 phys_irq; | 159 | u32 phys_irq; |
161 | u32 irq; | 160 | u32 irq; |
162 | bool active; | ||
163 | }; | 161 | }; |
164 | 162 | ||
165 | struct irq_phys_map_entry { | 163 | struct irq_phys_map_entry { |
@@ -296,22 +294,16 @@ struct vgic_v3_cpu_if { | |||
296 | }; | 294 | }; |
297 | 295 | ||
298 | struct vgic_cpu { | 296 | struct vgic_cpu { |
299 | /* per IRQ to LR mapping */ | ||
300 | u8 *vgic_irq_lr_map; | ||
301 | |||
302 | /* Pending/active/both interrupts on this VCPU */ | 297 | /* Pending/active/both interrupts on this VCPU */ |
303 | DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); | 298 | DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); |
304 | DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); | 299 | DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); |
305 | DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); | 300 | DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS); |
306 | 301 | ||
307 | /* Pending/active/both shared interrupts, dynamically sized */ | 302 | /* Pending/active/both shared interrupts, dynamically sized */ |
308 | unsigned long *pending_shared; | 303 | unsigned long *pending_shared; |
309 | unsigned long *active_shared; | 304 | unsigned long *active_shared; |
310 | unsigned long *pend_act_shared; | 305 | unsigned long *pend_act_shared; |
311 | 306 | ||
312 | /* Bitmap of used/free list registers */ | ||
313 | DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); | ||
314 | |||
315 | /* Number of list registers on this CPU */ | 307 | /* Number of list registers on this CPU */ |
316 | int nr_lr; | 308 | int nr_lr; |
317 | 309 | ||
@@ -354,8 +346,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); | |||
354 | struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, | 346 | struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, |
355 | int virt_irq, int irq); | 347 | int virt_irq, int irq); |
356 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); | 348 | int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); |
357 | bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); | ||
358 | void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); | ||
359 | 349 | ||
360 | #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) | 350 | #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) |
361 | #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) | 351 | #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) |
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5b503e..8fdc17b84739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h | |||
@@ -26,6 +26,7 @@ | |||
26 | #define _HYPERV_H | 26 | #define _HYPERV_H |
27 | 27 | ||
28 | #include <uapi/linux/hyperv.h> | 28 | #include <uapi/linux/hyperv.h> |
29 | #include <uapi/asm/hyperv.h> | ||
29 | 30 | ||
30 | #include <linux/types.h> | 31 | #include <linux/types.h> |
31 | #include <linux/scatterlist.h> | 32 | #include <linux/scatterlist.h> |
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h new file mode 100644 index 000000000000..1551b5b2f4c2 --- /dev/null +++ b/include/linux/irqbypass.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * IRQ offload/bypass manager | ||
3 | * | ||
4 | * Copyright (C) 2015 Red Hat, Inc. | ||
5 | * Copyright (c) 2015 Linaro Ltd. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | #ifndef IRQBYPASS_H | ||
12 | #define IRQBYPASS_H | ||
13 | |||
14 | #include <linux/list.h> | ||
15 | |||
16 | struct irq_bypass_consumer; | ||
17 | |||
18 | /* | ||
19 | * Theory of operation | ||
20 | * | ||
21 | * The IRQ bypass manager is a simple set of lists and callbacks that allows | ||
22 | * IRQ producers (ex. physical interrupt sources) to be matched to IRQ | ||
23 | * consumers (ex. virtualization hardware that allows IRQ bypass or offload) | ||
24 | * via a shared token (ex. eventfd_ctx). Producers and consumers register | ||
25 | * independently. When a token match is found, the optional @stop callback | ||
26 | * will be called for each participant. The pair will then be connected via | ||
27 | * the @add_* callbacks, and finally the optional @start callback will allow | ||
28 | * any final coordination. When either participant is unregistered, the | ||
29 | * process is repeated using the @del_* callbacks in place of the @add_* | ||
30 | * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings | ||
31 | * are not supported. | ||
32 | */ | ||
33 | |||
34 | /** | ||
35 | * struct irq_bypass_producer - IRQ bypass producer definition | ||
36 | * @node: IRQ bypass manager private list management | ||
37 | * @token: opaque token to match between producer and consumer | ||
38 | * @irq: Linux IRQ number for the producer device | ||
39 | * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) | ||
40 | * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) | ||
41 | * @stop: Perform any quiesce operations necessary prior to add/del (optional) | ||
42 | * @start: Perform any startup operations necessary after add/del (optional) | ||
43 | * | ||
44 | * The IRQ bypass producer structure represents an interrupt source for | ||
45 | * participation in possible host bypass, for instance an interrupt vector | ||
46 | * for a physical device assigned to a VM. | ||
47 | */ | ||
48 | struct irq_bypass_producer { | ||
49 | struct list_head node; | ||
50 | void *token; | ||
51 | int irq; | ||
52 | int (*add_consumer)(struct irq_bypass_producer *, | ||
53 | struct irq_bypass_consumer *); | ||
54 | void (*del_consumer)(struct irq_bypass_producer *, | ||
55 | struct irq_bypass_consumer *); | ||
56 | void (*stop)(struct irq_bypass_producer *); | ||
57 | void (*start)(struct irq_bypass_producer *); | ||
58 | }; | ||
59 | |||
60 | /** | ||
61 | * struct irq_bypass_consumer - IRQ bypass consumer definition | ||
62 | * @node: IRQ bypass manager private list management | ||
63 | * @token: opaque token to match between producer and consumer | ||
64 | * @add_producer: Connect the IRQ consumer to an IRQ producer | ||
65 | * @del_producer: Disconnect the IRQ consumer from an IRQ producer | ||
66 | * @stop: Perform any quiesce operations necessary prior to add/del (optional) | ||
67 | * @start: Perform any startup operations necessary after add/del (optional) | ||
68 | * | ||
69 | * The IRQ bypass consumer structure represents an interrupt sink for | ||
70 | * participation in possible host bypass, for instance a hypervisor may | ||
71 | * support offloads to allow bypassing the host entirely or offload | ||
72 | * portions of the interrupt handling to the VM. | ||
73 | */ | ||
74 | struct irq_bypass_consumer { | ||
75 | struct list_head node; | ||
76 | void *token; | ||
77 | int (*add_producer)(struct irq_bypass_consumer *, | ||
78 | struct irq_bypass_producer *); | ||
79 | void (*del_producer)(struct irq_bypass_consumer *, | ||
80 | struct irq_bypass_producer *); | ||
81 | void (*stop)(struct irq_bypass_consumer *); | ||
82 | void (*start)(struct irq_bypass_consumer *); | ||
83 | }; | ||
84 | |||
85 | int irq_bypass_register_producer(struct irq_bypass_producer *); | ||
86 | void irq_bypass_unregister_producer(struct irq_bypass_producer *); | ||
87 | int irq_bypass_register_consumer(struct irq_bypass_consumer *); | ||
88 | void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); | ||
89 | |||
90 | #endif /* IRQBYPASS_H */ | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..242a6d2b53ff 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/err.h> | 24 | #include <linux/err.h> |
25 | #include <linux/irqflags.h> | 25 | #include <linux/irqflags.h> |
26 | #include <linux/context_tracking.h> | 26 | #include <linux/context_tracking.h> |
27 | #include <linux/irqbypass.h> | ||
27 | #include <asm/signal.h> | 28 | #include <asm/signal.h> |
28 | 29 | ||
29 | #include <linux/kvm.h> | 30 | #include <linux/kvm.h> |
@@ -140,6 +141,8 @@ static inline bool is_error_page(struct page *page) | |||
140 | #define KVM_REQ_APIC_PAGE_RELOAD 25 | 141 | #define KVM_REQ_APIC_PAGE_RELOAD 25 |
141 | #define KVM_REQ_SMI 26 | 142 | #define KVM_REQ_SMI 26 |
142 | #define KVM_REQ_HV_CRASH 27 | 143 | #define KVM_REQ_HV_CRASH 27 |
144 | #define KVM_REQ_IOAPIC_EOI_EXIT 28 | ||
145 | #define KVM_REQ_HV_RESET 29 | ||
143 | 146 | ||
144 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 147 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
145 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 148 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
@@ -231,6 +234,9 @@ struct kvm_vcpu { | |||
231 | unsigned long requests; | 234 | unsigned long requests; |
232 | unsigned long guest_debug; | 235 | unsigned long guest_debug; |
233 | 236 | ||
237 | int pre_pcpu; | ||
238 | struct list_head blocked_vcpu_list; | ||
239 | |||
234 | struct mutex mutex; | 240 | struct mutex mutex; |
235 | struct kvm_run *run; | 241 | struct kvm_run *run; |
236 | 242 | ||
@@ -329,6 +335,18 @@ struct kvm_kernel_irq_routing_entry { | |||
329 | struct hlist_node link; | 335 | struct hlist_node link; |
330 | }; | 336 | }; |
331 | 337 | ||
338 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | ||
339 | struct kvm_irq_routing_table { | ||
340 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; | ||
341 | u32 nr_rt_entries; | ||
342 | /* | ||
343 | * Array indexed by gsi. Each entry contains list of irq chips | ||
344 | * the gsi is connected to. | ||
345 | */ | ||
346 | struct hlist_head map[0]; | ||
347 | }; | ||
348 | #endif | ||
349 | |||
332 | #ifndef KVM_PRIVATE_MEM_SLOTS | 350 | #ifndef KVM_PRIVATE_MEM_SLOTS |
333 | #define KVM_PRIVATE_MEM_SLOTS 0 | 351 | #define KVM_PRIVATE_MEM_SLOTS 0 |
334 | #endif | 352 | #endif |
@@ -455,10 +473,14 @@ void vcpu_put(struct kvm_vcpu *vcpu); | |||
455 | 473 | ||
456 | #ifdef __KVM_HAVE_IOAPIC | 474 | #ifdef __KVM_HAVE_IOAPIC |
457 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); | 475 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); |
476 | void kvm_arch_irq_routing_update(struct kvm *kvm); | ||
458 | #else | 477 | #else |
459 | static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) | 478 | static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) |
460 | { | 479 | { |
461 | } | 480 | } |
481 | static inline void kvm_arch_irq_routing_update(struct kvm *kvm) | ||
482 | { | ||
483 | } | ||
462 | #endif | 484 | #endif |
463 | 485 | ||
464 | #ifdef CONFIG_HAVE_KVM_IRQFD | 486 | #ifdef CONFIG_HAVE_KVM_IRQFD |
@@ -625,6 +647,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, | |||
625 | void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); | 647 | void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); |
626 | 648 | ||
627 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); | 649 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); |
650 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); | ||
651 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); | ||
628 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 652 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
629 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); | 653 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); |
630 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); | 654 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); |
@@ -803,10 +827,13 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); | |||
803 | 827 | ||
804 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | 828 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
805 | bool line_status); | 829 | bool line_status); |
806 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); | ||
807 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, | 830 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, |
808 | int irq_source_id, int level, bool line_status); | 831 | int irq_source_id, int level, bool line_status); |
832 | int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, | ||
833 | struct kvm *kvm, int irq_source_id, | ||
834 | int level, bool line_status); | ||
809 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); | 835 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); |
836 | void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); | ||
810 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); | 837 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); |
811 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 838 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
812 | struct kvm_irq_ack_notifier *kian); | 839 | struct kvm_irq_ack_notifier *kian); |
@@ -1002,6 +1029,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) | |||
1002 | #endif | 1029 | #endif |
1003 | 1030 | ||
1004 | int kvm_setup_default_irq_routing(struct kvm *kvm); | 1031 | int kvm_setup_default_irq_routing(struct kvm *kvm); |
1032 | int kvm_setup_empty_irq_routing(struct kvm *kvm); | ||
1005 | int kvm_set_irq_routing(struct kvm *kvm, | 1033 | int kvm_set_irq_routing(struct kvm *kvm, |
1006 | const struct kvm_irq_routing_entry *entries, | 1034 | const struct kvm_irq_routing_entry *entries, |
1007 | unsigned nr, | 1035 | unsigned nr, |
@@ -1144,5 +1172,15 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) | |||
1144 | { | 1172 | { |
1145 | } | 1173 | } |
1146 | #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ | 1174 | #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ |
1147 | #endif | ||
1148 | 1175 | ||
1176 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
1177 | int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, | ||
1178 | struct irq_bypass_producer *); | ||
1179 | void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, | ||
1180 | struct irq_bypass_producer *); | ||
1181 | void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); | ||
1182 | void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); | ||
1183 | int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, | ||
1184 | uint32_t guest_irq, bool set); | ||
1185 | #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ | ||
1186 | #endif | ||
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h new file mode 100644 index 000000000000..0c1de05098c8 --- /dev/null +++ b/include/linux/kvm_irqfd.h | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
12 | * Credit goes to Avi Kivity for the original idea. | ||
13 | */ | ||
14 | |||
15 | #ifndef __LINUX_KVM_IRQFD_H | ||
16 | #define __LINUX_KVM_IRQFD_H | ||
17 | |||
18 | #include <linux/kvm_host.h> | ||
19 | #include <linux/poll.h> | ||
20 | |||
21 | /* | ||
22 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
23 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
24 | * trigger. On acknowledgment through the irq ack notifier, the | ||
25 | * interrupt is de-asserted and userspace is notified through the | ||
26 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
27 | * together, so we don't need to track the state of each individual | ||
28 | * user. We can also therefore share the same irq source ID. | ||
29 | */ | ||
30 | struct kvm_kernel_irqfd_resampler { | ||
31 | struct kvm *kvm; | ||
32 | /* | ||
33 | * List of resampling struct _irqfd objects sharing this gsi. | ||
34 | * RCU list modified under kvm->irqfds.resampler_lock | ||
35 | */ | ||
36 | struct list_head list; | ||
37 | struct kvm_irq_ack_notifier notifier; | ||
38 | /* | ||
39 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
40 | * resamplers among irqfds on the same gsi. | ||
41 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
42 | */ | ||
43 | struct list_head link; | ||
44 | }; | ||
45 | |||
46 | struct kvm_kernel_irqfd { | ||
47 | /* Used for MSI fast-path */ | ||
48 | struct kvm *kvm; | ||
49 | wait_queue_t wait; | ||
50 | /* Update side is protected by irqfds.lock */ | ||
51 | struct kvm_kernel_irq_routing_entry irq_entry; | ||
52 | seqcount_t irq_entry_sc; | ||
53 | /* Used for level IRQ fast-path */ | ||
54 | int gsi; | ||
55 | struct work_struct inject; | ||
56 | /* The resampler used by this irqfd (resampler-only) */ | ||
57 | struct kvm_kernel_irqfd_resampler *resampler; | ||
58 | /* Eventfd notified on resample (resampler-only) */ | ||
59 | struct eventfd_ctx *resamplefd; | ||
60 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
61 | struct list_head resampler_link; | ||
62 | /* Used for setup/shutdown */ | ||
63 | struct eventfd_ctx *eventfd; | ||
64 | struct list_head list; | ||
65 | poll_table pt; | ||
66 | struct work_struct shutdown; | ||
67 | struct irq_bypass_consumer consumer; | ||
68 | struct irq_bypass_producer *producer; | ||
69 | }; | ||
70 | |||
71 | #endif /* __LINUX_KVM_IRQFD_H */ | ||
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a9256f0331ae..03f3618612aa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -183,6 +183,7 @@ struct kvm_s390_skeys { | |||
183 | #define KVM_EXIT_EPR 23 | 183 | #define KVM_EXIT_EPR 23 |
184 | #define KVM_EXIT_SYSTEM_EVENT 24 | 184 | #define KVM_EXIT_SYSTEM_EVENT 24 |
185 | #define KVM_EXIT_S390_STSI 25 | 185 | #define KVM_EXIT_S390_STSI 25 |
186 | #define KVM_EXIT_IOAPIC_EOI 26 | ||
186 | 187 | ||
187 | /* For KVM_EXIT_INTERNAL_ERROR */ | 188 | /* For KVM_EXIT_INTERNAL_ERROR */ |
188 | /* Emulate instruction failed. */ | 189 | /* Emulate instruction failed. */ |
@@ -333,6 +334,10 @@ struct kvm_run { | |||
333 | __u8 sel1; | 334 | __u8 sel1; |
334 | __u16 sel2; | 335 | __u16 sel2; |
335 | } s390_stsi; | 336 | } s390_stsi; |
337 | /* KVM_EXIT_IOAPIC_EOI */ | ||
338 | struct { | ||
339 | __u8 vector; | ||
340 | } eoi; | ||
336 | /* Fix the size of the union. */ | 341 | /* Fix the size of the union. */ |
337 | char padding[256]; | 342 | char padding[256]; |
338 | }; | 343 | }; |
@@ -824,6 +829,8 @@ struct kvm_ppc_smmu_info { | |||
824 | #define KVM_CAP_MULTI_ADDRESS_SPACE 118 | 829 | #define KVM_CAP_MULTI_ADDRESS_SPACE 118 |
825 | #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 | 830 | #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 |
826 | #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 | 831 | #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 |
832 | #define KVM_CAP_SPLIT_IRQCHIP 121 | ||
833 | #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 | ||
827 | 834 | ||
828 | #ifdef KVM_CAP_IRQ_ROUTING | 835 | #ifdef KVM_CAP_IRQ_ROUTING |
829 | 836 | ||
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8cbc3db671df..26a54461bf59 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -444,6 +444,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
444 | *ut = p->utime; | 444 | *ut = p->utime; |
445 | *st = p->stime; | 445 | *st = p->stime; |
446 | } | 446 | } |
447 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); | ||
447 | 448 | ||
448 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | 449 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
449 | { | 450 | { |
@@ -652,6 +653,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
652 | task_cputime(p, &cputime.utime, &cputime.stime); | 653 | task_cputime(p, &cputime.utime, &cputime.stime); |
653 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); | 654 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
654 | } | 655 | } |
656 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); | ||
655 | 657 | ||
656 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | 658 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
657 | { | 659 | { |
diff --git a/virt/Makefile b/virt/Makefile new file mode 100644 index 000000000000..be783472ac81 --- /dev/null +++ b/virt/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y += lib/ | |||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..7a79b6853583 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -46,4 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT | |||
46 | 46 | ||
47 | config KVM_COMPAT | 47 | config KVM_COMPAT |
48 | def_bool y | 48 | def_bool y |
49 | depends on COMPAT && !S390 | 49 | depends on KVM && COMPAT && !S390 |
50 | |||
51 | config HAVE_KVM_IRQ_BYPASS | ||
52 | bool | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..21a0ab2d8919 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <kvm/arm_vgic.h> | 28 | #include <kvm/arm_vgic.h> |
29 | #include <kvm/arm_arch_timer.h> | 29 | #include <kvm/arm_arch_timer.h> |
30 | 30 | ||
31 | #include "trace.h" | ||
32 | |||
31 | static struct timecounter *timecounter; | 33 | static struct timecounter *timecounter; |
32 | static struct workqueue_struct *wqueue; | 34 | static struct workqueue_struct *wqueue; |
33 | static unsigned int host_vtimer_irq; | 35 | static unsigned int host_vtimer_irq; |
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) | |||
59 | } | 61 | } |
60 | } | 62 | } |
61 | 63 | ||
62 | static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | ||
63 | { | ||
64 | int ret; | ||
65 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
66 | |||
67 | kvm_vgic_set_phys_irq_active(timer->map, true); | ||
68 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
69 | timer->map, | ||
70 | timer->irq->level); | ||
71 | WARN_ON(ret); | ||
72 | } | ||
73 | |||
74 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 64 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
75 | { | 65 | { |
76 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 66 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |||
111 | return HRTIMER_NORESTART; | 101 | return HRTIMER_NORESTART; |
112 | } | 102 | } |
113 | 103 | ||
104 | static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) | ||
105 | { | ||
106 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
107 | |||
108 | return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
109 | (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); | ||
110 | } | ||
111 | |||
114 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | 112 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) |
115 | { | 113 | { |
116 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 114 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
117 | cycle_t cval, now; | 115 | cycle_t cval, now; |
118 | 116 | ||
119 | if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || | 117 | if (!kvm_timer_irq_can_fire(vcpu)) |
120 | !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || | ||
121 | kvm_vgic_get_phys_irq_active(timer->map)) | ||
122 | return false; | 118 | return false; |
123 | 119 | ||
124 | cval = timer->cntv_cval; | 120 | cval = timer->cntv_cval; |
@@ -127,12 +123,94 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | |||
127 | return cval <= now; | 123 | return cval <= now; |
128 | } | 124 | } |
129 | 125 | ||
126 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | ||
127 | { | ||
128 | int ret; | ||
129 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
130 | |||
131 | BUG_ON(!vgic_initialized(vcpu->kvm)); | ||
132 | |||
133 | timer->irq.level = new_level; | ||
134 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | ||
135 | timer->irq.level); | ||
136 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | ||
137 | timer->map, | ||
138 | timer->irq.level); | ||
139 | WARN_ON(ret); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Check if there was a change in the timer state (should we raise or lower | ||
144 | * the line level to the GIC). | ||
145 | */ | ||
146 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
147 | { | ||
148 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
149 | |||
150 | /* | ||
151 | * If userspace modified the timer registers via SET_ONE_REG before | ||
152 | * the vgic was initialized, we mustn't set the timer->irq.level value | ||
153 | * because the guest would never see the interrupt. Instead wait | ||
154 | * until we call this function from kvm_timer_flush_hwstate. | ||
155 | */ | ||
156 | if (!vgic_initialized(vcpu->kvm)) | ||
157 | return; | ||
158 | |||
159 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | ||
160 | kvm_timer_update_irq(vcpu, !timer->irq.level); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Schedule the background timer before calling kvm_vcpu_block, so that this | ||
165 | * thread is removed from its waitqueue and made runnable when there's a timer | ||
166 | * interrupt to handle. | ||
167 | */ | ||
168 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | ||
169 | { | ||
170 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
171 | u64 ns; | ||
172 | cycle_t cval, now; | ||
173 | |||
174 | BUG_ON(timer_is_armed(timer)); | ||
175 | |||
176 | /* | ||
177 | * No need to schedule a background timer if the guest timer has | ||
178 | * already expired, because kvm_vcpu_block will return before putting | ||
179 | * the thread to sleep. | ||
180 | */ | ||
181 | if (kvm_timer_should_fire(vcpu)) | ||
182 | return; | ||
183 | |||
184 | /* | ||
185 | * If the timer is not capable of raising interrupts (disabled or | ||
186 | * masked), then there's no more work for us to do. | ||
187 | */ | ||
188 | if (!kvm_timer_irq_can_fire(vcpu)) | ||
189 | return; | ||
190 | |||
191 | /* The timer has not yet expired, schedule a background timer */ | ||
192 | cval = timer->cntv_cval; | ||
193 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
194 | |||
195 | ns = cyclecounter_cyc2ns(timecounter->cc, | ||
196 | cval - now, | ||
197 | timecounter->mask, | ||
198 | &timecounter->frac); | ||
199 | timer_arm(timer, ns); | ||
200 | } | ||
201 | |||
202 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
203 | { | ||
204 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
205 | timer_disarm(timer); | ||
206 | } | ||
207 | |||
130 | /** | 208 | /** |
131 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu | 209 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu |
132 | * @vcpu: The vcpu pointer | 210 | * @vcpu: The vcpu pointer |
133 | * | 211 | * |
134 | * Disarm any pending soft timers, since the world-switch code will write the | 212 | * Check if the virtual timer has expired while we were running in the host, |
135 | * virtual timer state back to the physical CPU. | 213 | * and inject an interrupt if that was the case. |
136 | */ | 214 | */ |
137 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | 215 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) |
138 | { | 216 | { |
@@ -140,28 +218,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
140 | bool phys_active; | 218 | bool phys_active; |
141 | int ret; | 219 | int ret; |
142 | 220 | ||
143 | /* | 221 | kvm_timer_update_state(vcpu); |
144 | * We're about to run this vcpu again, so there is no need to | ||
145 | * keep the background timer running, as we're about to | ||
146 | * populate the CPU timer again. | ||
147 | */ | ||
148 | timer_disarm(timer); | ||
149 | 222 | ||
150 | /* | 223 | /* |
151 | * If the timer expired while we were not scheduled, now is the time | 224 | * If we enter the guest with the virtual input level to the VGIC |
152 | * to inject it. | 225 | * asserted, then we have already told the VGIC what we need to, and |
226 | * we don't need to exit from the guest until the guest deactivates | ||
227 | * the already injected interrupt, so therefore we should set the | ||
228 | * hardware active state to prevent unnecessary exits from the guest. | ||
229 | * | ||
230 | * Conversely, if the virtual input level is deasserted, then always | ||
231 | * clear the hardware active state to ensure that hardware interrupts | ||
232 | * from the timer triggers a guest exit. | ||
153 | */ | 233 | */ |
154 | if (kvm_timer_should_fire(vcpu)) | 234 | if (timer->irq.level) |
155 | kvm_timer_inject_irq(vcpu); | ||
156 | |||
157 | /* | ||
158 | * We keep track of whether the edge-triggered interrupt has been | ||
159 | * signalled to the vgic/guest, and if so, we mask the interrupt and | ||
160 | * the physical distributor to prevent the timer from raising a | ||
161 | * physical interrupt whenever we run a guest, preventing forward | ||
162 | * VCPU progress. | ||
163 | */ | ||
164 | if (kvm_vgic_get_phys_irq_active(timer->map)) | ||
165 | phys_active = true; | 235 | phys_active = true; |
166 | else | 236 | else |
167 | phys_active = false; | 237 | phys_active = false; |
@@ -176,32 +246,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
176 | * kvm_timer_sync_hwstate - sync timer state from cpu | 246 | * kvm_timer_sync_hwstate - sync timer state from cpu |
177 | * @vcpu: The vcpu pointer | 247 | * @vcpu: The vcpu pointer |
178 | * | 248 | * |
179 | * Check if the virtual timer was armed and either schedule a corresponding | 249 | * Check if the virtual timer has expired while we were running in the guest, |
180 | * soft timer or inject directly if already expired. | 250 | * and inject an interrupt if that was the case. |
181 | */ | 251 | */ |
182 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 252 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
183 | { | 253 | { |
184 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 254 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
185 | cycle_t cval, now; | ||
186 | u64 ns; | ||
187 | 255 | ||
188 | BUG_ON(timer_is_armed(timer)); | 256 | BUG_ON(timer_is_armed(timer)); |
189 | 257 | ||
190 | if (kvm_timer_should_fire(vcpu)) { | 258 | /* |
191 | /* | 259 | * The guest could have modified the timer registers or the timer |
192 | * Timer has already expired while we were not | 260 | * could have expired, update the timer state. |
193 | * looking. Inject the interrupt and carry on. | 261 | */ |
194 | */ | 262 | kvm_timer_update_state(vcpu); |
195 | kvm_timer_inject_irq(vcpu); | ||
196 | return; | ||
197 | } | ||
198 | |||
199 | cval = timer->cntv_cval; | ||
200 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | ||
201 | |||
202 | ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, | ||
203 | &timecounter->frac); | ||
204 | timer_arm(timer, ns); | ||
205 | } | 263 | } |
206 | 264 | ||
207 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 265 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
@@ -216,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
216 | * kvm_vcpu_set_target(). To handle this, we determine | 274 | * kvm_vcpu_set_target(). To handle this, we determine |
217 | * vcpu timer irq number when the vcpu is reset. | 275 | * vcpu timer irq number when the vcpu is reset. |
218 | */ | 276 | */ |
219 | timer->irq = irq; | 277 | timer->irq.irq = irq->irq; |
220 | 278 | ||
221 | /* | 279 | /* |
222 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 280 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
@@ -225,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
225 | * the ARMv7 architecture. | 283 | * the ARMv7 architecture. |
226 | */ | 284 | */ |
227 | timer->cntv_ctl = 0; | 285 | timer->cntv_ctl = 0; |
286 | kvm_timer_update_state(vcpu); | ||
228 | 287 | ||
229 | /* | 288 | /* |
230 | * Tell the VGIC that the virtual interrupt is tied to a | 289 | * Tell the VGIC that the virtual interrupt is tied to a |
@@ -269,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |||
269 | default: | 328 | default: |
270 | return -1; | 329 | return -1; |
271 | } | 330 | } |
331 | |||
332 | kvm_timer_update_state(vcpu); | ||
272 | return 0; | 333 | return 0; |
273 | } | 334 | } |
274 | 335 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000000000000..37d8b98867d5 --- /dev/null +++ b/virt/kvm/arm/trace.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | ||
2 | #define _TRACE_KVM_H | ||
3 | |||
4 | #include <linux/tracepoint.h> | ||
5 | |||
6 | #undef TRACE_SYSTEM | ||
7 | #define TRACE_SYSTEM kvm | ||
8 | |||
9 | /* | ||
10 | * Tracepoints for vgic | ||
11 | */ | ||
12 | TRACE_EVENT(vgic_update_irq_pending, | ||
13 | TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), | ||
14 | TP_ARGS(vcpu_id, irq, level), | ||
15 | |||
16 | TP_STRUCT__entry( | ||
17 | __field( unsigned long, vcpu_id ) | ||
18 | __field( __u32, irq ) | ||
19 | __field( bool, level ) | ||
20 | ), | ||
21 | |||
22 | TP_fast_assign( | ||
23 | __entry->vcpu_id = vcpu_id; | ||
24 | __entry->irq = irq; | ||
25 | __entry->level = level; | ||
26 | ), | ||
27 | |||
28 | TP_printk("VCPU: %ld, IRQ %d, level: %d", | ||
29 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
30 | ); | ||
31 | |||
32 | /* | ||
33 | * Tracepoints for arch_timer | ||
34 | */ | ||
35 | TRACE_EVENT(kvm_timer_update_irq, | ||
36 | TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), | ||
37 | TP_ARGS(vcpu_id, irq, level), | ||
38 | |||
39 | TP_STRUCT__entry( | ||
40 | __field( unsigned long, vcpu_id ) | ||
41 | __field( __u32, irq ) | ||
42 | __field( int, level ) | ||
43 | ), | ||
44 | |||
45 | TP_fast_assign( | ||
46 | __entry->vcpu_id = vcpu_id; | ||
47 | __entry->irq = irq; | ||
48 | __entry->level = level; | ||
49 | ), | ||
50 | |||
51 | TP_printk("VCPU: %ld, IRQ %d, level %d", | ||
52 | __entry->vcpu_id, __entry->irq, __entry->level) | ||
53 | ); | ||
54 | |||
55 | #endif /* _TRACE_KVM_H */ | ||
56 | |||
57 | #undef TRACE_INCLUDE_PATH | ||
58 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | ||
59 | #undef TRACE_INCLUDE_FILE | ||
60 | #define TRACE_INCLUDE_FILE trace | ||
61 | |||
62 | /* This part must be outside protection */ | ||
63 | #include <trace/define_trace.h> | ||
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); | 79 | lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); |
80 | 80 | ||
81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; | 81 | vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; |
82 | } | ||
83 | 82 | ||
84 | static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
85 | struct vgic_lr lr_desc) | ||
86 | { | ||
87 | if (!(lr_desc.state & LR_STATE_MASK)) | 83 | if (!(lr_desc.state & LR_STATE_MASK)) |
88 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); | 84 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); |
89 | else | 85 | else |
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
158 | * anyway. | 154 | * anyway. |
159 | */ | 155 | */ |
160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; | 156 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; |
157 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; | ||
161 | 158 | ||
162 | /* Get the show on the road... */ | 159 | /* Get the show on the road... */ |
163 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; | 160 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; |
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
166 | static const struct vgic_ops vgic_v2_ops = { | 163 | static const struct vgic_ops vgic_v2_ops = { |
167 | .get_lr = vgic_v2_get_lr, | 164 | .get_lr = vgic_v2_get_lr, |
168 | .set_lr = vgic_v2_set_lr, | 165 | .set_lr = vgic_v2_set_lr, |
169 | .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, | ||
170 | .get_elrsr = vgic_v2_get_elrsr, | 166 | .get_elrsr = vgic_v2_get_elrsr, |
171 | .get_eisr = vgic_v2_get_eisr, | 167 | .get_eisr = vgic_v2_get_eisr, |
172 | .clear_eisr = vgic_v2_clear_eisr, | 168 | .clear_eisr = vgic_v2_clear_eisr, |
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
112 | } | 112 | } |
113 | 113 | ||
114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; | 114 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; |
115 | } | ||
116 | 115 | ||
117 | static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
118 | struct vgic_lr lr_desc) | ||
119 | { | ||
120 | if (!(lr_desc.state & LR_STATE_MASK)) | 116 | if (!(lr_desc.state & LR_STATE_MASK)) |
121 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); | 117 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); |
122 | else | 118 | else |
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
193 | * anyway. | 189 | * anyway. |
194 | */ | 190 | */ |
195 | vgic_v3->vgic_vmcr = 0; | 191 | vgic_v3->vgic_vmcr = 0; |
192 | vgic_v3->vgic_elrsr = ~0; | ||
196 | 193 | ||
197 | /* | 194 | /* |
198 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | 195 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible |
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
211 | static const struct vgic_ops vgic_v3_ops = { | 208 | static const struct vgic_ops vgic_v3_ops = { |
212 | .get_lr = vgic_v3_get_lr, | 209 | .get_lr = vgic_v3_get_lr, |
213 | .set_lr = vgic_v3_set_lr, | 210 | .set_lr = vgic_v3_set_lr, |
214 | .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, | ||
215 | .get_elrsr = vgic_v3_get_elrsr, | 211 | .get_elrsr = vgic_v3_get_elrsr, |
216 | .get_eisr = vgic_v3_get_eisr, | 212 | .get_eisr = vgic_v3_get_eisr, |
217 | .clear_eisr = vgic_v3_clear_eisr, | 213 | .clear_eisr = vgic_v3_clear_eisr, |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 30489181922d..533538385d5d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -34,6 +34,9 @@ | |||
34 | #include <asm/kvm.h> | 34 | #include <asm/kvm.h> |
35 | #include <kvm/iodev.h> | 35 | #include <kvm/iodev.h> |
36 | 36 | ||
37 | #define CREATE_TRACE_POINTS | ||
38 | #include "trace.h" | ||
39 | |||
37 | /* | 40 | /* |
38 | * How the whole thing works (courtesy of Christoffer Dall): | 41 | * How the whole thing works (courtesy of Christoffer Dall): |
39 | * | 42 | * |
@@ -102,11 +105,13 @@ | |||
102 | #include "vgic.h" | 105 | #include "vgic.h" |
103 | 106 | ||
104 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | 107 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); |
105 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 108 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); |
106 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 109 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
107 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 110 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
111 | static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); | ||
108 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, | 112 | static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, |
109 | int virt_irq); | 113 | int virt_irq); |
114 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); | ||
110 | 115 | ||
111 | static const struct vgic_ops *vgic_ops; | 116 | static const struct vgic_ops *vgic_ops; |
112 | static const struct vgic_params *vgic; | 117 | static const struct vgic_params *vgic; |
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) | |||
357 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 362 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
358 | 363 | ||
359 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); | 364 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); |
365 | if (!vgic_dist_irq_get_level(vcpu, irq)) { | ||
366 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
367 | if (!compute_pending_for_cpu(vcpu)) | ||
368 | clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | ||
369 | } | ||
360 | } | 370 | } |
361 | 371 | ||
362 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | 372 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) |
@@ -531,34 +541,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, | |||
531 | return false; | 541 | return false; |
532 | } | 542 | } |
533 | 543 | ||
534 | /* | ||
535 | * If a mapped interrupt's state has been modified by the guest such that it | ||
536 | * is no longer active or pending, without it have gone through the sync path, | ||
537 | * then the map->active field must be cleared so the interrupt can be taken | ||
538 | * again. | ||
539 | */ | ||
540 | static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) | ||
541 | { | ||
542 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
543 | struct list_head *root; | ||
544 | struct irq_phys_map_entry *entry; | ||
545 | struct irq_phys_map *map; | ||
546 | |||
547 | rcu_read_lock(); | ||
548 | |||
549 | /* Check for PPIs */ | ||
550 | root = &vgic_cpu->irq_phys_map_list; | ||
551 | list_for_each_entry_rcu(entry, root, entry) { | ||
552 | map = &entry->map; | ||
553 | |||
554 | if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && | ||
555 | !vgic_irq_is_active(vcpu, map->virt_irq)) | ||
556 | map->active = false; | ||
557 | } | ||
558 | |||
559 | rcu_read_unlock(); | ||
560 | } | ||
561 | |||
562 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, | 544 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, |
563 | struct kvm_exit_mmio *mmio, | 545 | struct kvm_exit_mmio *mmio, |
564 | phys_addr_t offset, int vcpu_id) | 546 | phys_addr_t offset, int vcpu_id) |
@@ -589,7 +571,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, | |||
589 | vcpu_id, offset); | 571 | vcpu_id, offset); |
590 | vgic_reg_access(mmio, reg, offset, mode); | 572 | vgic_reg_access(mmio, reg, offset, mode); |
591 | 573 | ||
592 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
593 | vgic_update_state(kvm); | 574 | vgic_update_state(kvm); |
594 | return true; | 575 | return true; |
595 | } | 576 | } |
@@ -627,7 +608,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, | |||
627 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | 608 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); |
628 | 609 | ||
629 | if (mmio->is_write) { | 610 | if (mmio->is_write) { |
630 | vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); | ||
631 | vgic_update_state(kvm); | 611 | vgic_update_state(kvm); |
632 | return true; | 612 | return true; |
633 | } | 613 | } |
@@ -684,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
684 | vgic_reg_access(mmio, &val, offset, | 664 | vgic_reg_access(mmio, &val, offset, |
685 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | 665 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); |
686 | if (mmio->is_write) { | 666 | if (mmio->is_write) { |
687 | if (offset < 8) { | 667 | /* Ignore writes to read-only SGI and PPI bits */ |
688 | *reg = ~0U; /* Force PPIs/SGIs to 1 */ | 668 | if (offset < 8) |
689 | return false; | 669 | return false; |
690 | } | ||
691 | 670 | ||
692 | val = vgic_cfg_compress(val); | 671 | val = vgic_cfg_compress(val); |
693 | if (offset & 4) { | 672 | if (offset & 4) { |
@@ -713,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | |||
713 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | 692 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) |
714 | { | 693 | { |
715 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 694 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
695 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
696 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
716 | int i; | 697 | int i; |
717 | 698 | ||
718 | for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { | 699 | for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { |
719 | struct vgic_lr lr = vgic_get_lr(vcpu, i); | 700 | struct vgic_lr lr = vgic_get_lr(vcpu, i); |
720 | 701 | ||
721 | /* | 702 | /* |
@@ -736,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
736 | * interrupt then move the active state to the | 717 | * interrupt then move the active state to the |
737 | * distributor tracking bit. | 718 | * distributor tracking bit. |
738 | */ | 719 | */ |
739 | if (lr.state & LR_STATE_ACTIVE) { | 720 | if (lr.state & LR_STATE_ACTIVE) |
740 | vgic_irq_set_active(vcpu, lr.irq); | 721 | vgic_irq_set_active(vcpu, lr.irq); |
741 | lr.state &= ~LR_STATE_ACTIVE; | ||
742 | } | ||
743 | 722 | ||
744 | /* | 723 | /* |
745 | * Reestablish the pending state on the distributor and the | 724 | * Reestablish the pending state on the distributor and the |
746 | * CPU interface. It may have already been pending, but that | 725 | * CPU interface and mark the LR as free for other use. |
747 | * is fine, then we are only setting a few bits that were | ||
748 | * already set. | ||
749 | */ | 726 | */ |
750 | if (lr.state & LR_STATE_PENDING) { | 727 | vgic_retire_lr(i, vcpu); |
751 | vgic_dist_irq_set_pending(vcpu, lr.irq); | ||
752 | lr.state &= ~LR_STATE_PENDING; | ||
753 | } | ||
754 | |||
755 | vgic_set_lr(vcpu, i, lr); | ||
756 | |||
757 | /* | ||
758 | * Mark the LR as free for other use. | ||
759 | */ | ||
760 | BUG_ON(lr.state & LR_STATE_MASK); | ||
761 | vgic_retire_lr(i, lr.irq, vcpu); | ||
762 | vgic_irq_clear_queued(vcpu, lr.irq); | ||
763 | 728 | ||
764 | /* Finally update the VGIC state. */ | 729 | /* Finally update the VGIC state. */ |
765 | vgic_update_state(vcpu->kvm); | 730 | vgic_update_state(vcpu->kvm); |
@@ -1067,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
1067 | vgic_ops->set_lr(vcpu, lr, vlr); | 1032 | vgic_ops->set_lr(vcpu, lr, vlr); |
1068 | } | 1033 | } |
1069 | 1034 | ||
1070 | static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, | ||
1071 | struct vgic_lr vlr) | ||
1072 | { | ||
1073 | vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); | ||
1074 | } | ||
1075 | |||
1076 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) | 1035 | static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) |
1077 | { | 1036 | { |
1078 | return vgic_ops->get_elrsr(vcpu); | 1037 | return vgic_ops->get_elrsr(vcpu); |
@@ -1118,25 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) | |||
1118 | vgic_ops->enable(vcpu); | 1077 | vgic_ops->enable(vcpu); |
1119 | } | 1078 | } |
1120 | 1079 | ||
1121 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | 1080 | static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) |
1122 | { | 1081 | { |
1123 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1124 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); | 1082 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); |
1125 | 1083 | ||
1084 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1085 | |||
1126 | /* | 1086 | /* |
1127 | * We must transfer the pending state back to the distributor before | 1087 | * We must transfer the pending state back to the distributor before |
1128 | * retiring the LR, otherwise we may loose edge-triggered interrupts. | 1088 | * retiring the LR, otherwise we may loose edge-triggered interrupts. |
1129 | */ | 1089 | */ |
1130 | if (vlr.state & LR_STATE_PENDING) { | 1090 | if (vlr.state & LR_STATE_PENDING) { |
1131 | vgic_dist_irq_set_pending(vcpu, irq); | 1091 | vgic_dist_irq_set_pending(vcpu, vlr.irq); |
1132 | vlr.hwirq = 0; | 1092 | vlr.hwirq = 0; |
1133 | } | 1093 | } |
1134 | 1094 | ||
1135 | vlr.state = 0; | 1095 | vlr.state = 0; |
1136 | vgic_set_lr(vcpu, lr_nr, vlr); | 1096 | vgic_set_lr(vcpu, lr_nr, vlr); |
1137 | clear_bit(lr_nr, vgic_cpu->lr_used); | ||
1138 | vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; | ||
1139 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
1140 | } | 1097 | } |
1141 | 1098 | ||
1142 | /* | 1099 | /* |
@@ -1150,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) | |||
1150 | */ | 1107 | */ |
1151 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | 1108 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) |
1152 | { | 1109 | { |
1153 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1110 | u64 elrsr = vgic_get_elrsr(vcpu); |
1111 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1154 | int lr; | 1112 | int lr; |
1155 | 1113 | ||
1156 | for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { | 1114 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
1157 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1115 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1158 | 1116 | ||
1159 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { | 1117 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) |
1160 | vgic_retire_lr(lr, vlr.irq, vcpu); | 1118 | vgic_retire_lr(lr, vcpu); |
1161 | if (vgic_irq_is_queued(vcpu, vlr.irq)) | ||
1162 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1163 | } | ||
1164 | } | 1119 | } |
1165 | } | 1120 | } |
1166 | 1121 | ||
@@ -1200,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
1200 | } | 1155 | } |
1201 | 1156 | ||
1202 | vgic_set_lr(vcpu, lr_nr, vlr); | 1157 | vgic_set_lr(vcpu, lr_nr, vlr); |
1203 | vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); | ||
1204 | } | 1158 | } |
1205 | 1159 | ||
1206 | /* | 1160 | /* |
@@ -1210,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, | |||
1210 | */ | 1164 | */ |
1211 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 1165 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
1212 | { | 1166 | { |
1213 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1214 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1167 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1168 | u64 elrsr = vgic_get_elrsr(vcpu); | ||
1169 | unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1215 | struct vgic_lr vlr; | 1170 | struct vgic_lr vlr; |
1216 | int lr; | 1171 | int lr; |
1217 | 1172 | ||
@@ -1222,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | |||
1222 | 1177 | ||
1223 | kvm_debug("Queue IRQ%d\n", irq); | 1178 | kvm_debug("Queue IRQ%d\n", irq); |
1224 | 1179 | ||
1225 | lr = vgic_cpu->vgic_irq_lr_map[irq]; | ||
1226 | |||
1227 | /* Do we have an active interrupt for the same CPUID? */ | 1180 | /* Do we have an active interrupt for the same CPUID? */ |
1228 | if (lr != LR_EMPTY) { | 1181 | for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { |
1229 | vlr = vgic_get_lr(vcpu, lr); | 1182 | vlr = vgic_get_lr(vcpu, lr); |
1230 | if (vlr.source == sgi_source_id) { | 1183 | if (vlr.irq == irq && vlr.source == sgi_source_id) { |
1231 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); | 1184 | kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); |
1232 | BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); | ||
1233 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); | 1185 | vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); |
1234 | return true; | 1186 | return true; |
1235 | } | 1187 | } |
1236 | } | 1188 | } |
1237 | 1189 | ||
1238 | /* Try to use another LR for this interrupt */ | 1190 | /* Try to use another LR for this interrupt */ |
1239 | lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, | 1191 | lr = find_first_bit(elrsr_ptr, vgic->nr_lr); |
1240 | vgic->nr_lr); | ||
1241 | if (lr >= vgic->nr_lr) | 1192 | if (lr >= vgic->nr_lr) |
1242 | return false; | 1193 | return false; |
1243 | 1194 | ||
1244 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); | 1195 | kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); |
1245 | vgic_cpu->vgic_irq_lr_map[irq] = lr; | ||
1246 | set_bit(lr, vgic_cpu->lr_used); | ||
1247 | 1196 | ||
1248 | vlr.irq = irq; | 1197 | vlr.irq = irq; |
1249 | vlr.source = sgi_source_id; | 1198 | vlr.source = sgi_source_id; |
@@ -1338,12 +1287,60 @@ epilog: | |||
1338 | } | 1287 | } |
1339 | } | 1288 | } |
1340 | 1289 | ||
1290 | static int process_queued_irq(struct kvm_vcpu *vcpu, | ||
1291 | int lr, struct vgic_lr vlr) | ||
1292 | { | ||
1293 | int pending = 0; | ||
1294 | |||
1295 | /* | ||
1296 | * If the IRQ was EOIed (called from vgic_process_maintenance) or it | ||
1297 | * went from active to non-active (called from vgic_sync_hwirq) it was | ||
1298 | * also ACKed and we we therefore assume we can clear the soft pending | ||
1299 | * state (should it had been set) for this interrupt. | ||
1300 | * | ||
1301 | * Note: if the IRQ soft pending state was set after the IRQ was | ||
1302 | * acked, it actually shouldn't be cleared, but we have no way of | ||
1303 | * knowing that unless we start trapping ACKs when the soft-pending | ||
1304 | * state is set. | ||
1305 | */ | ||
1306 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1307 | |||
1308 | /* | ||
1309 | * Tell the gic to start sampling this interrupt again. | ||
1310 | */ | ||
1311 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1312 | |||
1313 | /* Any additional pending interrupt? */ | ||
1314 | if (vgic_irq_is_edge(vcpu, vlr.irq)) { | ||
1315 | BUG_ON(!(vlr.state & LR_HW)); | ||
1316 | pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); | ||
1317 | } else { | ||
1318 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
1319 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
1320 | pending = 1; | ||
1321 | } else { | ||
1322 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1323 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
1324 | } | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Despite being EOIed, the LR may not have | ||
1329 | * been marked as empty. | ||
1330 | */ | ||
1331 | vlr.state = 0; | ||
1332 | vlr.hwirq = 0; | ||
1333 | vgic_set_lr(vcpu, lr, vlr); | ||
1334 | |||
1335 | return pending; | ||
1336 | } | ||
1337 | |||
1341 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | 1338 | static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) |
1342 | { | 1339 | { |
1343 | u32 status = vgic_get_interrupt_status(vcpu); | 1340 | u32 status = vgic_get_interrupt_status(vcpu); |
1344 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1341 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1345 | bool level_pending = false; | ||
1346 | struct kvm *kvm = vcpu->kvm; | 1342 | struct kvm *kvm = vcpu->kvm; |
1343 | int level_pending = 0; | ||
1347 | 1344 | ||
1348 | kvm_debug("STATUS = %08x\n", status); | 1345 | kvm_debug("STATUS = %08x\n", status); |
1349 | 1346 | ||
@@ -1358,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1358 | 1355 | ||
1359 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { | 1356 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { |
1360 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1357 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1361 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); | ||
1362 | 1358 | ||
1363 | spin_lock(&dist->lock); | 1359 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); |
1364 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1365 | WARN_ON(vlr.state & LR_STATE_MASK); | 1360 | WARN_ON(vlr.state & LR_STATE_MASK); |
1366 | vlr.state = 0; | ||
1367 | vgic_set_lr(vcpu, lr, vlr); | ||
1368 | 1361 | ||
1369 | /* | ||
1370 | * If the IRQ was EOIed it was also ACKed and we we | ||
1371 | * therefore assume we can clear the soft pending | ||
1372 | * state (should it had been set) for this interrupt. | ||
1373 | * | ||
1374 | * Note: if the IRQ soft pending state was set after | ||
1375 | * the IRQ was acked, it actually shouldn't be | ||
1376 | * cleared, but we have no way of knowing that unless | ||
1377 | * we start trapping ACKs when the soft-pending state | ||
1378 | * is set. | ||
1379 | */ | ||
1380 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1381 | 1362 | ||
1382 | /* | 1363 | /* |
1383 | * kvm_notify_acked_irq calls kvm_set_irq() | 1364 | * kvm_notify_acked_irq calls kvm_set_irq() |
1384 | * to reset the IRQ level. Need to release the | 1365 | * to reset the IRQ level, which grabs the dist->lock |
1385 | * lock for kvm_set_irq to grab it. | 1366 | * so we call this before taking the dist->lock. |
1386 | */ | 1367 | */ |
1387 | spin_unlock(&dist->lock); | ||
1388 | |||
1389 | kvm_notify_acked_irq(kvm, 0, | 1368 | kvm_notify_acked_irq(kvm, 0, |
1390 | vlr.irq - VGIC_NR_PRIVATE_IRQS); | 1369 | vlr.irq - VGIC_NR_PRIVATE_IRQS); |
1391 | spin_lock(&dist->lock); | ||
1392 | |||
1393 | /* Any additional pending interrupt? */ | ||
1394 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { | ||
1395 | vgic_cpu_irq_set(vcpu, vlr.irq); | ||
1396 | level_pending = true; | ||
1397 | } else { | ||
1398 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1399 | vgic_cpu_irq_clear(vcpu, vlr.irq); | ||
1400 | } | ||
1401 | 1370 | ||
1371 | spin_lock(&dist->lock); | ||
1372 | level_pending |= process_queued_irq(vcpu, lr, vlr); | ||
1402 | spin_unlock(&dist->lock); | 1373 | spin_unlock(&dist->lock); |
1403 | |||
1404 | /* | ||
1405 | * Despite being EOIed, the LR may not have | ||
1406 | * been marked as empty. | ||
1407 | */ | ||
1408 | vgic_sync_lr_elrsr(vcpu, lr, vlr); | ||
1409 | } | 1374 | } |
1410 | } | 1375 | } |
1411 | 1376 | ||
@@ -1426,35 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1426 | /* | 1391 | /* |
1427 | * Save the physical active state, and reset it to inactive. | 1392 | * Save the physical active state, and reset it to inactive. |
1428 | * | 1393 | * |
1429 | * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. | 1394 | * Return true if there's a pending forwarded interrupt to queue. |
1430 | */ | 1395 | */ |
1431 | static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) | 1396 | static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) |
1432 | { | 1397 | { |
1398 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
1433 | struct irq_phys_map *map; | 1399 | struct irq_phys_map *map; |
1400 | bool phys_active; | ||
1401 | bool level_pending; | ||
1434 | int ret; | 1402 | int ret; |
1435 | 1403 | ||
1436 | if (!(vlr.state & LR_HW)) | 1404 | if (!(vlr.state & LR_HW)) |
1437 | return 0; | 1405 | return false; |
1438 | 1406 | ||
1439 | map = vgic_irq_map_search(vcpu, vlr.irq); | 1407 | map = vgic_irq_map_search(vcpu, vlr.irq); |
1440 | BUG_ON(!map); | 1408 | BUG_ON(!map); |
1441 | 1409 | ||
1442 | ret = irq_get_irqchip_state(map->irq, | 1410 | ret = irq_get_irqchip_state(map->irq, |
1443 | IRQCHIP_STATE_ACTIVE, | 1411 | IRQCHIP_STATE_ACTIVE, |
1444 | &map->active); | 1412 | &phys_active); |
1445 | 1413 | ||
1446 | WARN_ON(ret); | 1414 | WARN_ON(ret); |
1447 | 1415 | ||
1448 | if (map->active) | 1416 | if (phys_active) |
1449 | return 0; | 1417 | return 0; |
1450 | 1418 | ||
1451 | return 1; | 1419 | spin_lock(&dist->lock); |
1420 | level_pending = process_queued_irq(vcpu, lr, vlr); | ||
1421 | spin_unlock(&dist->lock); | ||
1422 | return level_pending; | ||
1452 | } | 1423 | } |
1453 | 1424 | ||
1454 | /* Sync back the VGIC state after a guest run */ | 1425 | /* Sync back the VGIC state after a guest run */ |
1455 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | 1426 | static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) |
1456 | { | 1427 | { |
1457 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1458 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1428 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1459 | u64 elrsr; | 1429 | u64 elrsr; |
1460 | unsigned long *elrsr_ptr; | 1430 | unsigned long *elrsr_ptr; |
@@ -1462,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
1462 | bool level_pending; | 1432 | bool level_pending; |
1463 | 1433 | ||
1464 | level_pending = vgic_process_maintenance(vcpu); | 1434 | level_pending = vgic_process_maintenance(vcpu); |
1465 | elrsr = vgic_get_elrsr(vcpu); | ||
1466 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1467 | 1435 | ||
1468 | /* Deal with HW interrupts, and clear mappings for empty LRs */ | 1436 | /* Deal with HW interrupts, and clear mappings for empty LRs */ |
1469 | for (lr = 0; lr < vgic->nr_lr; lr++) { | 1437 | for (lr = 0; lr < vgic->nr_lr; lr++) { |
1470 | struct vgic_lr vlr; | 1438 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1471 | |||
1472 | if (!test_bit(lr, vgic_cpu->lr_used)) | ||
1473 | continue; | ||
1474 | |||
1475 | vlr = vgic_get_lr(vcpu, lr); | ||
1476 | if (vgic_sync_hwirq(vcpu, vlr)) { | ||
1477 | /* | ||
1478 | * So this is a HW interrupt that the guest | ||
1479 | * EOI-ed. Clean the LR state and allow the | ||
1480 | * interrupt to be sampled again. | ||
1481 | */ | ||
1482 | vlr.state = 0; | ||
1483 | vlr.hwirq = 0; | ||
1484 | vgic_set_lr(vcpu, lr, vlr); | ||
1485 | vgic_irq_clear_queued(vcpu, vlr.irq); | ||
1486 | set_bit(lr, elrsr_ptr); | ||
1487 | } | ||
1488 | |||
1489 | if (!test_bit(lr, elrsr_ptr)) | ||
1490 | continue; | ||
1491 | |||
1492 | clear_bit(lr, vgic_cpu->lr_used); | ||
1493 | 1439 | ||
1440 | level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); | ||
1494 | BUG_ON(vlr.irq >= dist->nr_irqs); | 1441 | BUG_ON(vlr.irq >= dist->nr_irqs); |
1495 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; | ||
1496 | } | 1442 | } |
1497 | 1443 | ||
1498 | /* Check if we still have something up our sleeve... */ | 1444 | /* Check if we still have something up our sleeve... */ |
1445 | elrsr = vgic_get_elrsr(vcpu); | ||
1446 | elrsr_ptr = u64_to_bitmask(&elrsr); | ||
1499 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); | 1447 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); |
1500 | if (level_pending || pending < vgic->nr_lr) | 1448 | if (level_pending || pending < vgic->nr_lr) |
1501 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | 1449 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
@@ -1585,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
1585 | int enabled; | 1533 | int enabled; |
1586 | bool ret = true, can_inject = true; | 1534 | bool ret = true, can_inject = true; |
1587 | 1535 | ||
1536 | trace_vgic_update_irq_pending(cpuid, irq_num, level); | ||
1537 | |||
1588 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) | 1538 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) |
1589 | return -EINVAL; | 1539 | return -EINVAL; |
1590 | 1540 | ||
@@ -1864,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) | |||
1864 | } | 1814 | } |
1865 | 1815 | ||
1866 | /** | 1816 | /** |
1867 | * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ | ||
1868 | * | ||
1869 | * Return the logical active state of a mapped interrupt. This doesn't | ||
1870 | * necessarily reflects the current HW state. | ||
1871 | */ | ||
1872 | bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) | ||
1873 | { | ||
1874 | BUG_ON(!map); | ||
1875 | return map->active; | ||
1876 | } | ||
1877 | |||
1878 | /** | ||
1879 | * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ | ||
1880 | * | ||
1881 | * Set the logical active state of a mapped interrupt. This doesn't | ||
1882 | * immediately affects the HW state. | ||
1883 | */ | ||
1884 | void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) | ||
1885 | { | ||
1886 | BUG_ON(!map); | ||
1887 | map->active = active; | ||
1888 | } | ||
1889 | |||
1890 | /** | ||
1891 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping | 1817 | * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping |
1892 | * @vcpu: The VCPU pointer | 1818 | * @vcpu: The VCPU pointer |
1893 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq | 1819 | * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq |
@@ -1942,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
1942 | kfree(vgic_cpu->pending_shared); | 1868 | kfree(vgic_cpu->pending_shared); |
1943 | kfree(vgic_cpu->active_shared); | 1869 | kfree(vgic_cpu->active_shared); |
1944 | kfree(vgic_cpu->pend_act_shared); | 1870 | kfree(vgic_cpu->pend_act_shared); |
1945 | kfree(vgic_cpu->vgic_irq_lr_map); | ||
1946 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); | 1871 | vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); |
1947 | vgic_cpu->pending_shared = NULL; | 1872 | vgic_cpu->pending_shared = NULL; |
1948 | vgic_cpu->active_shared = NULL; | 1873 | vgic_cpu->active_shared = NULL; |
1949 | vgic_cpu->pend_act_shared = NULL; | 1874 | vgic_cpu->pend_act_shared = NULL; |
1950 | vgic_cpu->vgic_irq_lr_map = NULL; | ||
1951 | } | 1875 | } |
1952 | 1876 | ||
1953 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | 1877 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) |
@@ -1958,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | |||
1958 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); | 1882 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); |
1959 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); | 1883 | vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); |
1960 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); | 1884 | vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); |
1961 | vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); | ||
1962 | 1885 | ||
1963 | if (!vgic_cpu->pending_shared | 1886 | if (!vgic_cpu->pending_shared |
1964 | || !vgic_cpu->active_shared | 1887 | || !vgic_cpu->active_shared |
1965 | || !vgic_cpu->pend_act_shared | 1888 | || !vgic_cpu->pend_act_shared) { |
1966 | || !vgic_cpu->vgic_irq_lr_map) { | ||
1967 | kvm_vgic_vcpu_destroy(vcpu); | 1889 | kvm_vgic_vcpu_destroy(vcpu); |
1968 | return -ENOMEM; | 1890 | return -ENOMEM; |
1969 | } | 1891 | } |
1970 | 1892 | ||
1971 | memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); | ||
1972 | |||
1973 | /* | 1893 | /* |
1974 | * Store the number of LRs per vcpu, so we don't have to go | 1894 | * Store the number of LRs per vcpu, so we don't have to go |
1975 | * all the way to the distributor structure to find out. Only | 1895 | * all the way to the distributor structure to find out. Only |
@@ -2111,14 +2031,24 @@ int vgic_init(struct kvm *kvm) | |||
2111 | break; | 2031 | break; |
2112 | } | 2032 | } |
2113 | 2033 | ||
2114 | for (i = 0; i < dist->nr_irqs; i++) { | 2034 | /* |
2115 | if (i < VGIC_NR_PPIS) | 2035 | * Enable and configure all SGIs to be edge-triggere and |
2036 | * configure all PPIs as level-triggered. | ||
2037 | */ | ||
2038 | for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { | ||
2039 | if (i < VGIC_NR_SGIS) { | ||
2040 | /* SGIs */ | ||
2116 | vgic_bitmap_set_irq_val(&dist->irq_enabled, | 2041 | vgic_bitmap_set_irq_val(&dist->irq_enabled, |
2117 | vcpu->vcpu_id, i, 1); | 2042 | vcpu->vcpu_id, i, 1); |
2118 | if (i < VGIC_NR_PRIVATE_IRQS) | ||
2119 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | 2043 | vgic_bitmap_set_irq_val(&dist->irq_cfg, |
2120 | vcpu->vcpu_id, i, | 2044 | vcpu->vcpu_id, i, |
2121 | VGIC_CFG_EDGE); | 2045 | VGIC_CFG_EDGE); |
2046 | } else if (i < VGIC_NR_PRIVATE_IRQS) { | ||
2047 | /* PPIs */ | ||
2048 | vgic_bitmap_set_irq_val(&dist->irq_cfg, | ||
2049 | vcpu->vcpu_id, i, | ||
2050 | VGIC_CFG_LEVEL); | ||
2051 | } | ||
2122 | } | 2052 | } |
2123 | 2053 | ||
2124 | vgic_enable(vcpu); | 2054 | vgic_enable(vcpu); |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f..77d42be6970e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work) | |||
94 | 94 | ||
95 | trace_kvm_async_pf_completed(addr, gva); | 95 | trace_kvm_async_pf_completed(addr, gva); |
96 | 96 | ||
97 | /* | ||
98 | * This memory barrier pairs with prepare_to_wait's set_current_state() | ||
99 | */ | ||
100 | smp_mb(); | ||
97 | if (waitqueue_active(&vcpu->wq)) | 101 | if (waitqueue_active(&vcpu->wq)) |
98 | wake_up_interruptible(&vcpu->wq); | 102 | wake_up_interruptible(&vcpu->wq); |
99 | 103 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
25 | #include <linux/kvm.h> | 25 | #include <linux/kvm.h> |
26 | #include <linux/kvm_irqfd.h> | ||
26 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
27 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
28 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
@@ -34,73 +35,20 @@ | |||
34 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
36 | #include <linux/seqlock.h> | 37 | #include <linux/seqlock.h> |
38 | #include <linux/irqbypass.h> | ||
37 | #include <trace/events/kvm.h> | 39 | #include <trace/events/kvm.h> |
38 | 40 | ||
39 | #include <kvm/iodev.h> | 41 | #include <kvm/iodev.h> |
40 | 42 | ||
41 | #ifdef CONFIG_HAVE_KVM_IRQFD | 43 | #ifdef CONFIG_HAVE_KVM_IRQFD |
42 | /* | ||
43 | * -------------------------------------------------------------------- | ||
44 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
45 | * | ||
46 | * Credit goes to Avi Kivity for the original idea. | ||
47 | * -------------------------------------------------------------------- | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
52 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
53 | * trigger. On acknowledgement through the irq ack notifier, the | ||
54 | * interrupt is de-asserted and userspace is notified through the | ||
55 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
56 | * together, so we don't need to track the state of each individual | ||
57 | * user. We can also therefore share the same irq source ID. | ||
58 | */ | ||
59 | struct _irqfd_resampler { | ||
60 | struct kvm *kvm; | ||
61 | /* | ||
62 | * List of resampling struct _irqfd objects sharing this gsi. | ||
63 | * RCU list modified under kvm->irqfds.resampler_lock | ||
64 | */ | ||
65 | struct list_head list; | ||
66 | struct kvm_irq_ack_notifier notifier; | ||
67 | /* | ||
68 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
69 | * resamplers among irqfds on the same gsi. | ||
70 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
71 | */ | ||
72 | struct list_head link; | ||
73 | }; | ||
74 | |||
75 | struct _irqfd { | ||
76 | /* Used for MSI fast-path */ | ||
77 | struct kvm *kvm; | ||
78 | wait_queue_t wait; | ||
79 | /* Update side is protected by irqfds.lock */ | ||
80 | struct kvm_kernel_irq_routing_entry irq_entry; | ||
81 | seqcount_t irq_entry_sc; | ||
82 | /* Used for level IRQ fast-path */ | ||
83 | int gsi; | ||
84 | struct work_struct inject; | ||
85 | /* The resampler used by this irqfd (resampler-only) */ | ||
86 | struct _irqfd_resampler *resampler; | ||
87 | /* Eventfd notified on resample (resampler-only) */ | ||
88 | struct eventfd_ctx *resamplefd; | ||
89 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
90 | struct list_head resampler_link; | ||
91 | /* Used for setup/shutdown */ | ||
92 | struct eventfd_ctx *eventfd; | ||
93 | struct list_head list; | ||
94 | poll_table pt; | ||
95 | struct work_struct shutdown; | ||
96 | }; | ||
97 | 44 | ||
98 | static struct workqueue_struct *irqfd_cleanup_wq; | 45 | static struct workqueue_struct *irqfd_cleanup_wq; |
99 | 46 | ||
100 | static void | 47 | static void |
101 | irqfd_inject(struct work_struct *work) | 48 | irqfd_inject(struct work_struct *work) |
102 | { | 49 | { |
103 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 50 | struct kvm_kernel_irqfd *irqfd = |
51 | container_of(work, struct kvm_kernel_irqfd, inject); | ||
104 | struct kvm *kvm = irqfd->kvm; | 52 | struct kvm *kvm = irqfd->kvm; |
105 | 53 | ||
106 | if (!irqfd->resampler) { | 54 | if (!irqfd->resampler) { |
@@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work) | |||
121 | static void | 69 | static void |
122 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | 70 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) |
123 | { | 71 | { |
124 | struct _irqfd_resampler *resampler; | 72 | struct kvm_kernel_irqfd_resampler *resampler; |
125 | struct kvm *kvm; | 73 | struct kvm *kvm; |
126 | struct _irqfd *irqfd; | 74 | struct kvm_kernel_irqfd *irqfd; |
127 | int idx; | 75 | int idx; |
128 | 76 | ||
129 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | 77 | resampler = container_of(kian, |
78 | struct kvm_kernel_irqfd_resampler, notifier); | ||
130 | kvm = resampler->kvm; | 79 | kvm = resampler->kvm; |
131 | 80 | ||
132 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 81 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
@@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | |||
141 | } | 90 | } |
142 | 91 | ||
143 | static void | 92 | static void |
144 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | 93 | irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) |
145 | { | 94 | { |
146 | struct _irqfd_resampler *resampler = irqfd->resampler; | 95 | struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; |
147 | struct kvm *kvm = resampler->kvm; | 96 | struct kvm *kvm = resampler->kvm; |
148 | 97 | ||
149 | mutex_lock(&kvm->irqfds.resampler_lock); | 98 | mutex_lock(&kvm->irqfds.resampler_lock); |
@@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) | |||
168 | static void | 117 | static void |
169 | irqfd_shutdown(struct work_struct *work) | 118 | irqfd_shutdown(struct work_struct *work) |
170 | { | 119 | { |
171 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | 120 | struct kvm_kernel_irqfd *irqfd = |
121 | container_of(work, struct kvm_kernel_irqfd, shutdown); | ||
172 | u64 cnt; | 122 | u64 cnt; |
173 | 123 | ||
174 | /* | 124 | /* |
@@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) | |||
191 | /* | 141 | /* |
192 | * It is now safe to release the object's resources | 142 | * It is now safe to release the object's resources |
193 | */ | 143 | */ |
144 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
145 | irq_bypass_unregister_consumer(&irqfd->consumer); | ||
146 | #endif | ||
194 | eventfd_ctx_put(irqfd->eventfd); | 147 | eventfd_ctx_put(irqfd->eventfd); |
195 | kfree(irqfd); | 148 | kfree(irqfd); |
196 | } | 149 | } |
@@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work) | |||
198 | 151 | ||
199 | /* assumes kvm->irqfds.lock is held */ | 152 | /* assumes kvm->irqfds.lock is held */ |
200 | static bool | 153 | static bool |
201 | irqfd_is_active(struct _irqfd *irqfd) | 154 | irqfd_is_active(struct kvm_kernel_irqfd *irqfd) |
202 | { | 155 | { |
203 | return list_empty(&irqfd->list) ? false : true; | 156 | return list_empty(&irqfd->list) ? false : true; |
204 | } | 157 | } |
@@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd) | |||
209 | * assumes kvm->irqfds.lock is held | 162 | * assumes kvm->irqfds.lock is held |
210 | */ | 163 | */ |
211 | static void | 164 | static void |
212 | irqfd_deactivate(struct _irqfd *irqfd) | 165 | irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) |
213 | { | 166 | { |
214 | BUG_ON(!irqfd_is_active(irqfd)); | 167 | BUG_ON(!irqfd_is_active(irqfd)); |
215 | 168 | ||
@@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd) | |||
218 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | 171 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); |
219 | } | 172 | } |
220 | 173 | ||
174 | int __attribute__((weak)) kvm_arch_set_irq_inatomic( | ||
175 | struct kvm_kernel_irq_routing_entry *irq, | ||
176 | struct kvm *kvm, int irq_source_id, | ||
177 | int level, | ||
178 | bool line_status) | ||
179 | { | ||
180 | return -EWOULDBLOCK; | ||
181 | } | ||
182 | |||
221 | /* | 183 | /* |
222 | * Called with wqh->lock held and interrupts disabled | 184 | * Called with wqh->lock held and interrupts disabled |
223 | */ | 185 | */ |
224 | static int | 186 | static int |
225 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | 187 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) |
226 | { | 188 | { |
227 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | 189 | struct kvm_kernel_irqfd *irqfd = |
190 | container_of(wait, struct kvm_kernel_irqfd, wait); | ||
228 | unsigned long flags = (unsigned long)key; | 191 | unsigned long flags = (unsigned long)key; |
229 | struct kvm_kernel_irq_routing_entry irq; | 192 | struct kvm_kernel_irq_routing_entry irq; |
230 | struct kvm *kvm = irqfd->kvm; | 193 | struct kvm *kvm = irqfd->kvm; |
@@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
238 | irq = irqfd->irq_entry; | 201 | irq = irqfd->irq_entry; |
239 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); | 202 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); |
240 | /* An event has been signaled, inject an interrupt */ | 203 | /* An event has been signaled, inject an interrupt */ |
241 | if (irq.type == KVM_IRQ_ROUTING_MSI) | 204 | if (kvm_arch_set_irq_inatomic(&irq, kvm, |
242 | kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, | 205 | KVM_USERSPACE_IRQ_SOURCE_ID, 1, |
243 | false); | 206 | false) == -EWOULDBLOCK) |
244 | else | ||
245 | schedule_work(&irqfd->inject); | 207 | schedule_work(&irqfd->inject); |
246 | srcu_read_unlock(&kvm->irq_srcu, idx); | 208 | srcu_read_unlock(&kvm->irq_srcu, idx); |
247 | } | 209 | } |
@@ -274,37 +236,54 @@ static void | |||
274 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | 236 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, |
275 | poll_table *pt) | 237 | poll_table *pt) |
276 | { | 238 | { |
277 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 239 | struct kvm_kernel_irqfd *irqfd = |
240 | container_of(pt, struct kvm_kernel_irqfd, pt); | ||
278 | add_wait_queue(wqh, &irqfd->wait); | 241 | add_wait_queue(wqh, &irqfd->wait); |
279 | } | 242 | } |
280 | 243 | ||
281 | /* Must be called under irqfds.lock */ | 244 | /* Must be called under irqfds.lock */ |
282 | static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) | 245 | static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) |
283 | { | 246 | { |
284 | struct kvm_kernel_irq_routing_entry *e; | 247 | struct kvm_kernel_irq_routing_entry *e; |
285 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | 248 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; |
286 | int i, n_entries; | 249 | int n_entries; |
287 | 250 | ||
288 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); | 251 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); |
289 | 252 | ||
290 | write_seqcount_begin(&irqfd->irq_entry_sc); | 253 | write_seqcount_begin(&irqfd->irq_entry_sc); |
291 | 254 | ||
292 | irqfd->irq_entry.type = 0; | ||
293 | |||
294 | e = entries; | 255 | e = entries; |
295 | for (i = 0; i < n_entries; ++i, ++e) { | 256 | if (n_entries == 1) |
296 | /* Only fast-path MSI. */ | 257 | irqfd->irq_entry = *e; |
297 | if (e->type == KVM_IRQ_ROUTING_MSI) | 258 | else |
298 | irqfd->irq_entry = *e; | 259 | irqfd->irq_entry.type = 0; |
299 | } | ||
300 | 260 | ||
301 | write_seqcount_end(&irqfd->irq_entry_sc); | 261 | write_seqcount_end(&irqfd->irq_entry_sc); |
302 | } | 262 | } |
303 | 263 | ||
264 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
265 | void __attribute__((weak)) kvm_arch_irq_bypass_stop( | ||
266 | struct irq_bypass_consumer *cons) | ||
267 | { | ||
268 | } | ||
269 | |||
270 | void __attribute__((weak)) kvm_arch_irq_bypass_start( | ||
271 | struct irq_bypass_consumer *cons) | ||
272 | { | ||
273 | } | ||
274 | |||
275 | int __attribute__((weak)) kvm_arch_update_irqfd_routing( | ||
276 | struct kvm *kvm, unsigned int host_irq, | ||
277 | uint32_t guest_irq, bool set) | ||
278 | { | ||
279 | return 0; | ||
280 | } | ||
281 | #endif | ||
282 | |||
304 | static int | 283 | static int |
305 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | 284 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
306 | { | 285 | { |
307 | struct _irqfd *irqfd, *tmp; | 286 | struct kvm_kernel_irqfd *irqfd, *tmp; |
308 | struct fd f; | 287 | struct fd f; |
309 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; | 288 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
310 | int ret; | 289 | int ret; |
@@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
340 | irqfd->eventfd = eventfd; | 319 | irqfd->eventfd = eventfd; |
341 | 320 | ||
342 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | 321 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { |
343 | struct _irqfd_resampler *resampler; | 322 | struct kvm_kernel_irqfd_resampler *resampler; |
344 | 323 | ||
345 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | 324 | resamplefd = eventfd_ctx_fdget(args->resamplefd); |
346 | if (IS_ERR(resamplefd)) { | 325 | if (IS_ERR(resamplefd)) { |
@@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
428 | * we might race against the POLLHUP | 407 | * we might race against the POLLHUP |
429 | */ | 408 | */ |
430 | fdput(f); | 409 | fdput(f); |
410 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
411 | irqfd->consumer.token = (void *)irqfd->eventfd; | ||
412 | irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; | ||
413 | irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; | ||
414 | irqfd->consumer.stop = kvm_arch_irq_bypass_stop; | ||
415 | irqfd->consumer.start = kvm_arch_irq_bypass_start; | ||
416 | ret = irq_bypass_register_consumer(&irqfd->consumer); | ||
417 | if (ret) | ||
418 | pr_info("irq bypass consumer (token %p) registration fails: %d\n", | ||
419 | irqfd->consumer.token, ret); | ||
420 | #endif | ||
431 | 421 | ||
432 | return 0; | 422 | return 0; |
433 | 423 | ||
@@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
469 | } | 459 | } |
470 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | 460 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); |
471 | 461 | ||
472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 462 | void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) |
473 | { | 463 | { |
474 | struct kvm_irq_ack_notifier *kian; | 464 | struct kvm_irq_ack_notifier *kian; |
465 | |||
466 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
467 | link) | ||
468 | if (kian->gsi == gsi) | ||
469 | kian->irq_acked(kian); | ||
470 | } | ||
471 | |||
472 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
473 | { | ||
475 | int gsi, idx; | 474 | int gsi, idx; |
476 | 475 | ||
477 | trace_kvm_ack_irq(irqchip, pin); | 476 | trace_kvm_ack_irq(irqchip, pin); |
@@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
479 | idx = srcu_read_lock(&kvm->irq_srcu); | 478 | idx = srcu_read_lock(&kvm->irq_srcu); |
480 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); | 479 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); |
481 | if (gsi != -1) | 480 | if (gsi != -1) |
482 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | 481 | kvm_notify_acked_gsi(kvm, gsi); |
483 | link) | ||
484 | if (kian->gsi == gsi) | ||
485 | kian->irq_acked(kian); | ||
486 | srcu_read_unlock(&kvm->irq_srcu, idx); | 482 | srcu_read_unlock(&kvm->irq_srcu, idx); |
487 | } | 483 | } |
488 | 484 | ||
@@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm) | |||
525 | static int | 521 | static int |
526 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | 522 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) |
527 | { | 523 | { |
528 | struct _irqfd *irqfd, *tmp; | 524 | struct kvm_kernel_irqfd *irqfd, *tmp; |
529 | struct eventfd_ctx *eventfd; | 525 | struct eventfd_ctx *eventfd; |
530 | 526 | ||
531 | eventfd = eventfd_ctx_fdget(args->fd); | 527 | eventfd = eventfd_ctx_fdget(args->fd); |
@@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | |||
581 | void | 577 | void |
582 | kvm_irqfd_release(struct kvm *kvm) | 578 | kvm_irqfd_release(struct kvm *kvm) |
583 | { | 579 | { |
584 | struct _irqfd *irqfd, *tmp; | 580 | struct kvm_kernel_irqfd *irqfd, *tmp; |
585 | 581 | ||
586 | spin_lock_irq(&kvm->irqfds.lock); | 582 | spin_lock_irq(&kvm->irqfds.lock); |
587 | 583 | ||
@@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm) | |||
604 | */ | 600 | */ |
605 | void kvm_irq_routing_update(struct kvm *kvm) | 601 | void kvm_irq_routing_update(struct kvm *kvm) |
606 | { | 602 | { |
607 | struct _irqfd *irqfd; | 603 | struct kvm_kernel_irqfd *irqfd; |
608 | 604 | ||
609 | spin_lock_irq(&kvm->irqfds.lock); | 605 | spin_lock_irq(&kvm->irqfds.lock); |
610 | 606 | ||
611 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) | 607 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) { |
612 | irqfd_update(kvm, irqfd); | 608 | irqfd_update(kvm, irqfd); |
613 | 609 | ||
610 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS | ||
611 | if (irqfd->producer) { | ||
612 | int ret = kvm_arch_update_irqfd_routing( | ||
613 | irqfd->kvm, irqfd->producer->irq, | ||
614 | irqfd->gsi, 1); | ||
615 | WARN_ON(ret); | ||
616 | } | ||
617 | #endif | ||
618 | } | ||
619 | |||
614 | spin_unlock_irq(&kvm->irqfds.lock); | 620 | spin_unlock_irq(&kvm->irqfds.lock); |
615 | } | 621 | } |
616 | 622 | ||
@@ -914,9 +920,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
914 | return -EINVAL; | 920 | return -EINVAL; |
915 | 921 | ||
916 | /* ioeventfd with no length can't be combined with DATAMATCH */ | 922 | /* ioeventfd with no length can't be combined with DATAMATCH */ |
917 | if (!args->len && | 923 | if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) |
918 | args->flags & (KVM_IOEVENTFD_FLAG_PIO | | ||
919 | KVM_IOEVENTFD_FLAG_DATAMATCH)) | ||
920 | return -EINVAL; | 924 | return -EINVAL; |
921 | 925 | ||
922 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); | 926 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..f0b08a2a48ba 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -31,16 +31,6 @@ | |||
31 | #include <trace/events/kvm.h> | 31 | #include <trace/events/kvm.h> |
32 | #include "irq.h" | 32 | #include "irq.h" |
33 | 33 | ||
34 | struct kvm_irq_routing_table { | ||
35 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; | ||
36 | u32 nr_rt_entries; | ||
37 | /* | ||
38 | * Array indexed by gsi. Each entry contains list of irq chips | ||
39 | * the gsi is connected to. | ||
40 | */ | ||
41 | struct hlist_head map[0]; | ||
42 | }; | ||
43 | |||
44 | int kvm_irq_map_gsi(struct kvm *kvm, | 34 | int kvm_irq_map_gsi(struct kvm *kvm, |
45 | struct kvm_kernel_irq_routing_entry *entries, int gsi) | 35 | struct kvm_kernel_irq_routing_entry *entries, int gsi) |
46 | { | 36 | { |
@@ -154,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
154 | 144 | ||
155 | /* | 145 | /* |
156 | * Do not allow GSI to be mapped to the same irqchip more than once. | 146 | * Do not allow GSI to be mapped to the same irqchip more than once. |
157 | * Allow only one to one mapping between GSI and MSI. | 147 | * Allow only one to one mapping between GSI and non-irqchip routing. |
158 | */ | 148 | */ |
159 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) | 149 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) |
160 | if (ei->type == KVM_IRQ_ROUTING_MSI || | 150 | if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || |
161 | ue->type == KVM_IRQ_ROUTING_MSI || | 151 | ue->type != KVM_IRQ_ROUTING_IRQCHIP || |
162 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | 152 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) |
163 | return r; | 153 | return r; |
164 | 154 | ||
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
231 | kvm_irq_routing_update(kvm); | 221 | kvm_irq_routing_update(kvm); |
232 | mutex_unlock(&kvm->irq_lock); | 222 | mutex_unlock(&kvm->irq_lock); |
233 | 223 | ||
224 | kvm_arch_irq_routing_update(kvm); | ||
225 | |||
234 | synchronize_srcu_expedited(&kvm->irq_srcu); | 226 | synchronize_srcu_expedited(&kvm->irq_srcu); |
235 | 227 | ||
236 | new = old; | 228 | new = old; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..484079efea5b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
230 | init_waitqueue_head(&vcpu->wq); | 230 | init_waitqueue_head(&vcpu->wq); |
231 | kvm_async_pf_vcpu_init(vcpu); | 231 | kvm_async_pf_vcpu_init(vcpu); |
232 | 232 | ||
233 | vcpu->pre_pcpu = -1; | ||
234 | INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); | ||
235 | |||
233 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 236 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
234 | if (!page) { | 237 | if (!page) { |
235 | r = -ENOMEM; | 238 | r = -ENOMEM; |
@@ -2018,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
2018 | } while (single_task_running() && ktime_before(cur, stop)); | 2021 | } while (single_task_running() && ktime_before(cur, stop)); |
2019 | } | 2022 | } |
2020 | 2023 | ||
2024 | kvm_arch_vcpu_blocking(vcpu); | ||
2025 | |||
2021 | for (;;) { | 2026 | for (;;) { |
2022 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 2027 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
2023 | 2028 | ||
@@ -2031,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
2031 | finish_wait(&vcpu->wq, &wait); | 2036 | finish_wait(&vcpu->wq, &wait); |
2032 | cur = ktime_get(); | 2037 | cur = ktime_get(); |
2033 | 2038 | ||
2039 | kvm_arch_vcpu_unblocking(vcpu); | ||
2034 | out: | 2040 | out: |
2035 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); | 2041 | block_ns = ktime_to_ns(cur) - ktime_to_ns(start); |
2036 | 2042 | ||
@@ -2718,6 +2724,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
2718 | case KVM_CAP_IRQFD: | 2724 | case KVM_CAP_IRQFD: |
2719 | case KVM_CAP_IRQFD_RESAMPLE: | 2725 | case KVM_CAP_IRQFD_RESAMPLE: |
2720 | #endif | 2726 | #endif |
2727 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: | ||
2721 | case KVM_CAP_CHECK_EXTENSION_VM: | 2728 | case KVM_CAP_CHECK_EXTENSION_VM: |
2722 | return 1; | 2729 | return 1; |
2723 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | 2730 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
@@ -3341,7 +3348,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3341 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3348 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
3342 | return -ENOSPC; | 3349 | return -ENOSPC; |
3343 | 3350 | ||
3344 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3351 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * |
3345 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3352 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
3346 | if (!new_bus) | 3353 | if (!new_bus) |
3347 | return -ENOMEM; | 3354 | return -ENOMEM; |
@@ -3373,7 +3380,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
3373 | if (r) | 3380 | if (r) |
3374 | return r; | 3381 | return r; |
3375 | 3382 | ||
3376 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3383 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * |
3377 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3384 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
3378 | if (!new_bus) | 3385 | if (!new_bus) |
3379 | return -ENOMEM; | 3386 | return -ENOMEM; |
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig new file mode 100644 index 000000000000..89a414f815d2 --- /dev/null +++ b/virt/lib/Kconfig | |||
@@ -0,0 +1,2 @@ | |||
1 | config IRQ_BYPASS_MANAGER | ||
2 | tristate | ||
diff --git a/virt/lib/Makefile b/virt/lib/Makefile new file mode 100644 index 000000000000..901228d1ffbc --- /dev/null +++ b/virt/lib/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o | |||
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c new file mode 100644 index 000000000000..09a03b5a21ff --- /dev/null +++ b/virt/lib/irqbypass.c | |||
@@ -0,0 +1,257 @@ | |||
1 | /* | ||
2 | * IRQ offload/bypass manager | ||
3 | * | ||
4 | * Copyright (C) 2015 Red Hat, Inc. | ||
5 | * Copyright (c) 2015 Linaro Ltd. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Various virtualization hardware acceleration techniques allow bypassing or | ||
12 | * offloading interrupts received from devices around the host kernel. Posted | ||
13 | * Interrupts on Intel VT-d systems can allow interrupts to be received | ||
14 | * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical | ||
15 | * interrupts to be directly deactivated by the guest. This manager allows | ||
16 | * interrupt producers and consumers to find each other to enable this sort of | ||
17 | * bypass. | ||
18 | */ | ||
19 | |||
20 | #include <linux/irqbypass.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/mutex.h> | ||
24 | |||
25 | MODULE_LICENSE("GPL v2"); | ||
26 | MODULE_DESCRIPTION("IRQ bypass manager utility module"); | ||
27 | |||
28 | static LIST_HEAD(producers); | ||
29 | static LIST_HEAD(consumers); | ||
30 | static DEFINE_MUTEX(lock); | ||
31 | |||
32 | /* @lock must be held when calling connect */ | ||
33 | static int __connect(struct irq_bypass_producer *prod, | ||
34 | struct irq_bypass_consumer *cons) | ||
35 | { | ||
36 | int ret = 0; | ||
37 | |||
38 | if (prod->stop) | ||
39 | prod->stop(prod); | ||
40 | if (cons->stop) | ||
41 | cons->stop(cons); | ||
42 | |||
43 | if (prod->add_consumer) | ||
44 | ret = prod->add_consumer(prod, cons); | ||
45 | |||
46 | if (!ret) { | ||
47 | ret = cons->add_producer(cons, prod); | ||
48 | if (ret && prod->del_consumer) | ||
49 | prod->del_consumer(prod, cons); | ||
50 | } | ||
51 | |||
52 | if (cons->start) | ||
53 | cons->start(cons); | ||
54 | if (prod->start) | ||
55 | prod->start(prod); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | /* @lock must be held when calling disconnect */ | ||
61 | static void __disconnect(struct irq_bypass_producer *prod, | ||
62 | struct irq_bypass_consumer *cons) | ||
63 | { | ||
64 | if (prod->stop) | ||
65 | prod->stop(prod); | ||
66 | if (cons->stop) | ||
67 | cons->stop(cons); | ||
68 | |||
69 | cons->del_producer(cons, prod); | ||
70 | |||
71 | if (prod->del_consumer) | ||
72 | prod->del_consumer(prod, cons); | ||
73 | |||
74 | if (cons->start) | ||
75 | cons->start(cons); | ||
76 | if (prod->start) | ||
77 | prod->start(prod); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * irq_bypass_register_producer - register IRQ bypass producer | ||
82 | * @producer: pointer to producer structure | ||
83 | * | ||
84 | * Add the provided IRQ producer to the list of producers and connect | ||
85 | * with any matching token found on the IRQ consumers list. | ||
86 | */ | ||
87 | int irq_bypass_register_producer(struct irq_bypass_producer *producer) | ||
88 | { | ||
89 | struct irq_bypass_producer *tmp; | ||
90 | struct irq_bypass_consumer *consumer; | ||
91 | |||
92 | might_sleep(); | ||
93 | |||
94 | if (!try_module_get(THIS_MODULE)) | ||
95 | return -ENODEV; | ||
96 | |||
97 | mutex_lock(&lock); | ||
98 | |||
99 | list_for_each_entry(tmp, &producers, node) { | ||
100 | if (tmp->token == producer->token) { | ||
101 | mutex_unlock(&lock); | ||
102 | module_put(THIS_MODULE); | ||
103 | return -EBUSY; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | list_for_each_entry(consumer, &consumers, node) { | ||
108 | if (consumer->token == producer->token) { | ||
109 | int ret = __connect(producer, consumer); | ||
110 | if (ret) { | ||
111 | mutex_unlock(&lock); | ||
112 | module_put(THIS_MODULE); | ||
113 | return ret; | ||
114 | } | ||
115 | break; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | list_add(&producer->node, &producers); | ||
120 | |||
121 | mutex_unlock(&lock); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(irq_bypass_register_producer); | ||
126 | |||
127 | /** | ||
128 | * irq_bypass_unregister_producer - unregister IRQ bypass producer | ||
129 | * @producer: pointer to producer structure | ||
130 | * | ||
131 | * Remove a previously registered IRQ producer from the list of producers | ||
132 | * and disconnect it from any connected IRQ consumer. | ||
133 | */ | ||
134 | void irq_bypass_unregister_producer(struct irq_bypass_producer *producer) | ||
135 | { | ||
136 | struct irq_bypass_producer *tmp; | ||
137 | struct irq_bypass_consumer *consumer; | ||
138 | |||
139 | might_sleep(); | ||
140 | |||
141 | if (!try_module_get(THIS_MODULE)) | ||
142 | return; /* nothing in the list anyway */ | ||
143 | |||
144 | mutex_lock(&lock); | ||
145 | |||
146 | list_for_each_entry(tmp, &producers, node) { | ||
147 | if (tmp->token != producer->token) | ||
148 | continue; | ||
149 | |||
150 | list_for_each_entry(consumer, &consumers, node) { | ||
151 | if (consumer->token == producer->token) { | ||
152 | __disconnect(producer, consumer); | ||
153 | break; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | list_del(&producer->node); | ||
158 | module_put(THIS_MODULE); | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | mutex_unlock(&lock); | ||
163 | |||
164 | module_put(THIS_MODULE); | ||
165 | } | ||
166 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer); | ||
167 | |||
168 | /** | ||
169 | * irq_bypass_register_consumer - register IRQ bypass consumer | ||
170 | * @consumer: pointer to consumer structure | ||
171 | * | ||
172 | * Add the provided IRQ consumer to the list of consumers and connect | ||
173 | * with any matching token found on the IRQ producer list. | ||
174 | */ | ||
175 | int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) | ||
176 | { | ||
177 | struct irq_bypass_consumer *tmp; | ||
178 | struct irq_bypass_producer *producer; | ||
179 | |||
180 | if (!consumer->add_producer || !consumer->del_producer) | ||
181 | return -EINVAL; | ||
182 | |||
183 | might_sleep(); | ||
184 | |||
185 | if (!try_module_get(THIS_MODULE)) | ||
186 | return -ENODEV; | ||
187 | |||
188 | mutex_lock(&lock); | ||
189 | |||
190 | list_for_each_entry(tmp, &consumers, node) { | ||
191 | if (tmp->token == consumer->token) { | ||
192 | mutex_unlock(&lock); | ||
193 | module_put(THIS_MODULE); | ||
194 | return -EBUSY; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | list_for_each_entry(producer, &producers, node) { | ||
199 | if (producer->token == consumer->token) { | ||
200 | int ret = __connect(producer, consumer); | ||
201 | if (ret) { | ||
202 | mutex_unlock(&lock); | ||
203 | module_put(THIS_MODULE); | ||
204 | return ret; | ||
205 | } | ||
206 | break; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | list_add(&consumer->node, &consumers); | ||
211 | |||
212 | mutex_unlock(&lock); | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); | ||
217 | |||
218 | /** | ||
219 | * irq_bypass_unregister_consumer - unregister IRQ bypass consumer | ||
220 | * @consumer: pointer to consumer structure | ||
221 | * | ||
222 | * Remove a previously registered IRQ consumer from the list of consumers | ||
223 | * and disconnect it from any connected IRQ producer. | ||
224 | */ | ||
225 | void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) | ||
226 | { | ||
227 | struct irq_bypass_consumer *tmp; | ||
228 | struct irq_bypass_producer *producer; | ||
229 | |||
230 | might_sleep(); | ||
231 | |||
232 | if (!try_module_get(THIS_MODULE)) | ||
233 | return; /* nothing in the list anyway */ | ||
234 | |||
235 | mutex_lock(&lock); | ||
236 | |||
237 | list_for_each_entry(tmp, &consumers, node) { | ||
238 | if (tmp->token != consumer->token) | ||
239 | continue; | ||
240 | |||
241 | list_for_each_entry(producer, &producers, node) { | ||
242 | if (producer->token == consumer->token) { | ||
243 | __disconnect(producer, consumer); | ||
244 | break; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | list_del(&consumer->node); | ||
249 | module_put(THIS_MODULE); | ||
250 | break; | ||
251 | } | ||
252 | |||
253 | mutex_unlock(&lock); | ||
254 | |||
255 | module_put(THIS_MODULE); | ||
256 | } | ||
257 | EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer); | ||