aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 13:03:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 13:03:30 -0400
commit6cd8e300b49332eb9eeda45816c711c198d31505 (patch)
tree246faf935687066da6efc6506ed71a04e61a2f02 /virt/kvm
parentddbb868493abdb71d6c0e3ff93f735923842de38 (diff)
parent09f8ca74ae6c2d78b2c7f6c0751ed0cbe815a3d9 (diff)
Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: Prevent overflow in largepages calculation KVM: Disable large pages on misaligned memory slots KVM: Add VT-x machine check support KVM: VMX: Rename rmode.active to rmode.vm86_active KVM: Move "exit due to NMI" handling into vmx_complete_interrupts() KVM: Disable CR8 intercept if tpr patching is active KVM: Do not migrate pending software interrupts. KVM: inject NMI after IRET from a previous NMI, not before. KVM: Always request IRQ/NMI window if an interrupt is pending KVM: Do not re-execute INTn instruction. KVM: skip_emulated_instruction() decode instruction if size is not known KVM: Remove irq_pending bitmap KVM: Do not allow interrupt injection from userspace if there is a pending event. KVM: Unprotect a page if #PF happens during NMI injection. KVM: s390: Verify memory in kvm run KVM: s390: Sanity check on validity intercept KVM: s390: Unlink vcpu on destroy - v2 KVM: s390: optimize float int lock: spin_lock_bh --> spin_lock KVM: s390: use hrtimer for clock wakeup from idle - v2 KVM: s390: Fix memory slot versus run - v3 ...
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/ioapic.c153
-rw-r--r--virt/kvm/ioapic.h27
-rw-r--r--virt/kvm/iommu.c27
-rw-r--r--virt/kvm/irq_comm.c111
-rw-r--r--virt/kvm/kvm_main.c678
5 files changed, 604 insertions, 392 deletions
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c3b99def9cbc..1eddae94bab3 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
85 85
86static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) 86static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
87{ 87{
88 union ioapic_redir_entry *pent; 88 union kvm_ioapic_redirect_entry *pent;
89 int injected = -1; 89 int injected = -1;
90 90
91 pent = &ioapic->redirtbl[idx]; 91 pent = &ioapic->redirtbl[idx];
@@ -142,149 +142,40 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
142 } 142 }
143} 143}
144 144
145static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
146 struct kvm_vcpu *vcpu,
147 u8 vector, u8 trig_mode, u8 delivery_mode)
148{
149 ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
150 delivery_mode);
151
152 ASSERT((delivery_mode == IOAPIC_FIXED) ||
153 (delivery_mode == IOAPIC_LOWEST_PRIORITY));
154
155 return kvm_apic_set_irq(vcpu, vector, trig_mode);
156}
157
158static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
159{
160 kvm_inject_nmi(vcpu);
161 kvm_vcpu_kick(vcpu);
162}
163
164u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
165 u8 dest_mode)
166{
167 u32 mask = 0;
168 int i;
169 struct kvm *kvm = ioapic->kvm;
170 struct kvm_vcpu *vcpu;
171
172 ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
173
174 if (dest_mode == 0) { /* Physical mode. */
175 if (dest == 0xFF) { /* Broadcast. */
176 for (i = 0; i < KVM_MAX_VCPUS; ++i)
177 if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
178 mask |= 1 << i;
179 return mask;
180 }
181 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
182 vcpu = kvm->vcpus[i];
183 if (!vcpu)
184 continue;
185 if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
186 if (vcpu->arch.apic)
187 mask = 1 << i;
188 break;
189 }
190 }
191 } else if (dest != 0) /* Logical mode, MDA non-zero. */
192 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
193 vcpu = kvm->vcpus[i];
194 if (!vcpu)
195 continue;
196 if (vcpu->arch.apic &&
197 kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
198 mask |= 1 << vcpu->vcpu_id;
199 }
200 ioapic_debug("mask %x\n", mask);
201 return mask;
202}
203
204static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) 145static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
205{ 146{
206 u8 dest = ioapic->redirtbl[irq].fields.dest_id; 147 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
207 u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; 148 struct kvm_lapic_irq irqe;
208 u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
209 u8 vector = ioapic->redirtbl[irq].fields.vector;
210 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
211 u32 deliver_bitmask;
212 struct kvm_vcpu *vcpu;
213 int vcpu_id, r = -1;
214 149
215 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 150 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
216 "vector=%x trig_mode=%x\n", 151 "vector=%x trig_mode=%x\n",
217 dest, dest_mode, delivery_mode, vector, trig_mode); 152 entry->fields.dest, entry->fields.dest_mode,
218 153 entry->fields.delivery_mode, entry->fields.vector,
219 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, 154 entry->fields.trig_mode);
220 dest_mode); 155
221 if (!deliver_bitmask) { 156 irqe.dest_id = entry->fields.dest_id;
222 ioapic_debug("no target on destination\n"); 157 irqe.vector = entry->fields.vector;
223 return 0; 158 irqe.dest_mode = entry->fields.dest_mode;
224 } 159 irqe.trig_mode = entry->fields.trig_mode;
160 irqe.delivery_mode = entry->fields.delivery_mode << 8;
161 irqe.level = 1;
162 irqe.shorthand = 0;
225 163
226 switch (delivery_mode) {
227 case IOAPIC_LOWEST_PRIORITY:
228 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
229 deliver_bitmask);
230#ifdef CONFIG_X86 164#ifdef CONFIG_X86
231 if (irq == 0) 165 /* Always delivery PIT interrupt to vcpu 0 */
232 vcpu = ioapic->kvm->vcpus[0]; 166 if (irq == 0) {
233#endif 167 irqe.dest_mode = 0; /* Physical mode. */
234 if (vcpu != NULL) 168 irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
235 r = ioapic_inj_irq(ioapic, vcpu, vector,
236 trig_mode, delivery_mode);
237 else
238 ioapic_debug("null lowest prio vcpu: "
239 "mask=%x vector=%x delivery_mode=%x\n",
240 deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
241 break;
242 case IOAPIC_FIXED:
243#ifdef CONFIG_X86
244 if (irq == 0)
245 deliver_bitmask = 1;
246#endif
247 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
248 if (!(deliver_bitmask & (1 << vcpu_id)))
249 continue;
250 deliver_bitmask &= ~(1 << vcpu_id);
251 vcpu = ioapic->kvm->vcpus[vcpu_id];
252 if (vcpu) {
253 if (r < 0)
254 r = 0;
255 r += ioapic_inj_irq(ioapic, vcpu, vector,
256 trig_mode, delivery_mode);
257 }
258 }
259 break;
260 case IOAPIC_NMI:
261 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
262 if (!(deliver_bitmask & (1 << vcpu_id)))
263 continue;
264 deliver_bitmask &= ~(1 << vcpu_id);
265 vcpu = ioapic->kvm->vcpus[vcpu_id];
266 if (vcpu) {
267 ioapic_inj_nmi(vcpu);
268 r = 1;
269 }
270 else
271 ioapic_debug("NMI to vcpu %d failed\n",
272 vcpu->vcpu_id);
273 }
274 break;
275 default:
276 printk(KERN_WARNING "Unsupported delivery mode %d\n",
277 delivery_mode);
278 break;
279 } 169 }
280 return r; 170#endif
171 return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
281} 172}
282 173
283int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) 174int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
284{ 175{
285 u32 old_irr = ioapic->irr; 176 u32 old_irr = ioapic->irr;
286 u32 mask = 1 << irq; 177 u32 mask = 1 << irq;
287 union ioapic_redir_entry entry; 178 union kvm_ioapic_redirect_entry entry;
288 int ret = 1; 179 int ret = 1;
289 180
290 if (irq >= 0 && irq < IOAPIC_NUM_PINS) { 181 if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@@ -305,7 +196,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
305static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, 196static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
306 int trigger_mode) 197 int trigger_mode)
307{ 198{
308 union ioapic_redir_entry *ent; 199 union kvm_ioapic_redirect_entry *ent;
309 200
310 ent = &ioapic->redirtbl[pin]; 201 ent = &ioapic->redirtbl[pin];
311 202
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index a34bd5e6436b..7080b713c160 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@ struct kvm_ioapic {
40 u32 id; 40 u32 id;
41 u32 irr; 41 u32 irr;
42 u32 pad; 42 u32 pad;
43 union ioapic_redir_entry { 43 union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
44 u64 bits;
45 struct {
46 u8 vector;
47 u8 delivery_mode:3;
48 u8 dest_mode:1;
49 u8 delivery_status:1;
50 u8 polarity:1;
51 u8 remote_irr:1;
52 u8 trig_mode:1;
53 u8 mask:1;
54 u8 reserve:7;
55 u8 reserved[4];
56 u8 dest_id;
57 } fields;
58 } redirtbl[IOAPIC_NUM_PINS];
59 struct kvm_io_device dev; 44 struct kvm_io_device dev;
60 struct kvm *kvm; 45 struct kvm *kvm;
61 void (*ack_notifier)(void *opaque, int irq); 46 void (*ack_notifier)(void *opaque, int irq);
@@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
79 return kvm->arch.vioapic; 64 return kvm->arch.vioapic;
80} 65}
81 66
82struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, 67int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
83 unsigned long bitmap); 68 int short_hand, int dest, int dest_mode);
69int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
84void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 70void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
85int kvm_ioapic_init(struct kvm *kvm); 71int kvm_ioapic_init(struct kvm *kvm);
86int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 72int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
87void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 73void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
88u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 74int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
89 u8 dest_mode); 75 struct kvm_lapic_irq *irq);
90
91#endif 76#endif
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c4037503600..15147583abd1 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
39 pfn_t pfn; 39 pfn_t pfn;
40 int i, r = 0; 40 int i, r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 41 struct iommu_domain *domain = kvm->arch.iommu_domain;
42 int flags;
42 43
43 /* check if iommu exists and in use */ 44 /* check if iommu exists and in use */
44 if (!domain) 45 if (!domain)
45 return 0; 46 return 0;
46 47
48 flags = IOMMU_READ | IOMMU_WRITE;
49 if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
50 flags |= IOMMU_CACHE;
51
47 for (i = 0; i < npages; i++) { 52 for (i = 0; i < npages; i++) {
48 /* check if already mapped */ 53 /* check if already mapped */
49 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
53 r = iommu_map_range(domain, 58 r = iommu_map_range(domain,
54 gfn_to_gpa(gfn), 59 gfn_to_gpa(gfn),
55 pfn_to_hpa(pfn), 60 pfn_to_hpa(pfn),
56 PAGE_SIZE, 61 PAGE_SIZE, flags);
57 IOMMU_READ | IOMMU_WRITE);
58 if (r) { 62 if (r) {
59 printk(KERN_ERR "kvm_iommu_map_address:" 63 printk(KERN_ERR "kvm_iommu_map_address:"
60 "iommu failed to map pfn=%lx\n", pfn); 64 "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
88{ 92{
89 struct pci_dev *pdev = NULL; 93 struct pci_dev *pdev = NULL;
90 struct iommu_domain *domain = kvm->arch.iommu_domain; 94 struct iommu_domain *domain = kvm->arch.iommu_domain;
91 int r; 95 int r, last_flags;
92 96
93 /* check if iommu exists and in use */ 97 /* check if iommu exists and in use */
94 if (!domain) 98 if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
107 return r; 111 return r;
108 } 112 }
109 113
114 last_flags = kvm->arch.iommu_flags;
115 if (iommu_domain_has_cap(kvm->arch.iommu_domain,
116 IOMMU_CAP_CACHE_COHERENCY))
117 kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
118
119 /* Check if need to update IOMMU page table for guest memory */
120 if ((last_flags ^ kvm->arch.iommu_flags) ==
121 KVM_IOMMU_CACHE_COHERENCY) {
122 kvm_iommu_unmap_memslots(kvm);
123 r = kvm_iommu_map_memslots(kvm);
124 if (r)
125 goto out_unmap;
126 }
127
110 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 128 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
111 assigned_dev->host_busnr, 129 assigned_dev->host_busnr,
112 PCI_SLOT(assigned_dev->host_devfn), 130 PCI_SLOT(assigned_dev->host_devfn),
113 PCI_FUNC(assigned_dev->host_devfn)); 131 PCI_FUNC(assigned_dev->host_devfn));
114 132
115 return 0; 133 return 0;
134out_unmap:
135 kvm_iommu_unmap_memslots(kvm);
136 return r;
116} 137}
117 138
118int kvm_deassign_device(struct kvm *kvm, 139int kvm_deassign_device(struct kvm *kvm,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 864ac5483baa..a8bd466d00cc 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,9 @@
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23 23
24#include <asm/msidef.h> 24#include <asm/msidef.h>
25#ifdef CONFIG_IA64
26#include <asm/iosapic.h>
27#endif
25 28
26#include "irq.h" 29#include "irq.h"
27 30
@@ -43,57 +46,73 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
43 return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); 46 return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
44} 47}
45 48
46static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, 49inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
47 struct kvm *kvm, int level)
48{ 50{
49 int vcpu_id, r = -1; 51#ifdef CONFIG_IA64
50 struct kvm_vcpu *vcpu; 52 return irq->delivery_mode ==
51 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 53 (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
52 int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) 54#else
53 >> MSI_ADDR_DEST_ID_SHIFT; 55 return irq->delivery_mode == APIC_DM_LOWEST;
54 int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) 56#endif
55 >> MSI_DATA_VECTOR_SHIFT; 57}
56 int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, 58
57 (unsigned long *)&e->msi.address_lo); 59int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
58 int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, 60 struct kvm_lapic_irq *irq)
59 (unsigned long *)&e->msi.data); 61{
60 int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, 62 int i, r = -1;
61 (unsigned long *)&e->msi.data); 63 struct kvm_vcpu *vcpu, *lowest = NULL;
62 u32 deliver_bitmask; 64
63 65 if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
64 BUG_ON(!ioapic); 66 kvm_is_dm_lowest_prio(irq))
65 67 printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
66 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, 68
67 dest_id, dest_mode); 69 for (i = 0; i < KVM_MAX_VCPUS; i++) {
68 /* IOAPIC delivery mode value is the same as MSI here */ 70 vcpu = kvm->vcpus[i];
69 switch (delivery_mode) { 71
70 case IOAPIC_LOWEST_PRIORITY: 72 if (!vcpu || !kvm_apic_present(vcpu))
71 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, 73 continue;
72 deliver_bitmask); 74
73 if (vcpu != NULL) 75 if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
74 r = kvm_apic_set_irq(vcpu, vector, trig_mode); 76 irq->dest_id, irq->dest_mode))
75 else 77 continue;
76 printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); 78
77 break; 79 if (!kvm_is_dm_lowest_prio(irq)) {
78 case IOAPIC_FIXED: 80 if (r < 0)
79 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { 81 r = 0;
80 if (!(deliver_bitmask & (1 << vcpu_id))) 82 r += kvm_apic_set_irq(vcpu, irq);
81 continue; 83 } else {
82 deliver_bitmask &= ~(1 << vcpu_id); 84 if (!lowest)
83 vcpu = ioapic->kvm->vcpus[vcpu_id]; 85 lowest = vcpu;
84 if (vcpu) { 86 else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
85 if (r < 0) 87 lowest = vcpu;
86 r = 0;
87 r += kvm_apic_set_irq(vcpu, vector, trig_mode);
88 }
89 } 88 }
90 break;
91 default:
92 break;
93 } 89 }
90
91 if (lowest)
92 r = kvm_apic_set_irq(lowest, irq);
93
94 return r; 94 return r;
95} 95}
96 96
97static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
98 struct kvm *kvm, int level)
99{
100 struct kvm_lapic_irq irq;
101
102 irq.dest_id = (e->msi.address_lo &
103 MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
104 irq.vector = (e->msi.data &
105 MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
106 irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
107 irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
108 irq.delivery_mode = e->msi.data & 0x700;
109 irq.level = 1;
110 irq.shorthand = 0;
111
112 /* TODO Deal with RH bit of MSI message address */
113 return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
114}
115
97/* This should be called with the kvm->lock mutex held 116/* This should be called with the kvm->lock mutex held
98 * Return value: 117 * Return value:
99 * < 0 Interrupt was ignored (masked or not delivered for other reasons) 118 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
@@ -252,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
252 delta = 8; 271 delta = 8;
253 break; 272 break;
254 case KVM_IRQCHIP_IOAPIC: 273 case KVM_IRQCHIP_IOAPIC:
255 e->set = kvm_set_ioapic_irq; 274 e->set = kvm_set_ioapic_irq;
256 break; 275 break;
257 default: 276 default:
258 goto out; 277 goto out;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d0dd390aa50..e21194566b71 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,8 @@
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h> 43#include <linux/swap.h>
44#include <linux/bitops.h>
45#include <linux/spinlock.h>
44 46
45#include <asm/processor.h> 47#include <asm/processor.h>
46#include <asm/io.h> 48#include <asm/io.h>
@@ -60,9 +62,6 @@
60MODULE_AUTHOR("Qumranet"); 62MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL"); 63MODULE_LICENSE("GPL");
62 64
63static int msi2intx = 1;
64module_param(msi2intx, bool, 0);
65
66DEFINE_SPINLOCK(kvm_lock); 65DEFINE_SPINLOCK(kvm_lock);
67LIST_HEAD(vm_list); 66LIST_HEAD(vm_list);
68 67
@@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
95 return NULL; 94 return NULL;
96} 95}
97 96
97static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
98 *assigned_dev, int irq)
99{
100 int i, index;
101 struct msix_entry *host_msix_entries;
102
103 host_msix_entries = assigned_dev->host_msix_entries;
104
105 index = -1;
106 for (i = 0; i < assigned_dev->entries_nr; i++)
107 if (irq == host_msix_entries[i].vector) {
108 index = i;
109 break;
110 }
111 if (index < 0) {
112 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
113 return 0;
114 }
115
116 return index;
117}
118
98static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 119static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
99{ 120{
100 struct kvm_assigned_dev_kernel *assigned_dev; 121 struct kvm_assigned_dev_kernel *assigned_dev;
122 struct kvm *kvm;
123 int irq, i;
101 124
102 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 125 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
103 interrupt_work); 126 interrupt_work);
127 kvm = assigned_dev->kvm;
104 128
105 /* This is taken to safely inject irq inside the guest. When 129 /* This is taken to safely inject irq inside the guest. When
106 * the interrupt injection (or the ioapic code) uses a 130 * the interrupt injection (or the ioapic code) uses a
107 * finer-grained lock, update this 131 * finer-grained lock, update this
108 */ 132 */
109 mutex_lock(&assigned_dev->kvm->lock); 133 mutex_lock(&kvm->lock);
110 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 134 spin_lock_irq(&assigned_dev->assigned_dev_lock);
111 assigned_dev->guest_irq, 1); 135 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
112 136 struct kvm_guest_msix_entry *guest_entries =
113 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { 137 assigned_dev->guest_msix_entries;
114 enable_irq(assigned_dev->host_irq); 138 for (i = 0; i < assigned_dev->entries_nr; i++) {
115 assigned_dev->host_irq_disabled = false; 139 if (!(guest_entries[i].flags &
140 KVM_ASSIGNED_MSIX_PENDING))
141 continue;
142 guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
143 kvm_set_irq(assigned_dev->kvm,
144 assigned_dev->irq_source_id,
145 guest_entries[i].vector, 1);
146 irq = assigned_dev->host_msix_entries[i].vector;
147 if (irq != 0)
148 enable_irq(irq);
149 assigned_dev->host_irq_disabled = false;
150 }
151 } else {
152 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
153 assigned_dev->guest_irq, 1);
154 if (assigned_dev->irq_requested_type &
155 KVM_DEV_IRQ_GUEST_MSI) {
156 enable_irq(assigned_dev->host_irq);
157 assigned_dev->host_irq_disabled = false;
158 }
116 } 159 }
160
161 spin_unlock_irq(&assigned_dev->assigned_dev_lock);
117 mutex_unlock(&assigned_dev->kvm->lock); 162 mutex_unlock(&assigned_dev->kvm->lock);
118} 163}
119 164
120static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 165static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
121{ 166{
167 unsigned long flags;
122 struct kvm_assigned_dev_kernel *assigned_dev = 168 struct kvm_assigned_dev_kernel *assigned_dev =
123 (struct kvm_assigned_dev_kernel *) dev_id; 169 (struct kvm_assigned_dev_kernel *) dev_id;
124 170
171 spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
172 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
173 int index = find_index_from_host_irq(assigned_dev, irq);
174 if (index < 0)
175 goto out;
176 assigned_dev->guest_msix_entries[index].flags |=
177 KVM_ASSIGNED_MSIX_PENDING;
178 }
179
125 schedule_work(&assigned_dev->interrupt_work); 180 schedule_work(&assigned_dev->interrupt_work);
126 181
127 disable_irq_nosync(irq); 182 disable_irq_nosync(irq);
128 assigned_dev->host_irq_disabled = true; 183 assigned_dev->host_irq_disabled = true;
129 184
185out:
186 spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
130 return IRQ_HANDLED; 187 return IRQ_HANDLED;
131} 188}
132 189
@@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
134static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 191static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
135{ 192{
136 struct kvm_assigned_dev_kernel *dev; 193 struct kvm_assigned_dev_kernel *dev;
194 unsigned long flags;
137 195
138 if (kian->gsi == -1) 196 if (kian->gsi == -1)
139 return; 197 return;
@@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
146 /* The guest irq may be shared so this ack may be 204 /* The guest irq may be shared so this ack may be
147 * from another device. 205 * from another device.
148 */ 206 */
207 spin_lock_irqsave(&dev->assigned_dev_lock, flags);
149 if (dev->host_irq_disabled) { 208 if (dev->host_irq_disabled) {
150 enable_irq(dev->host_irq); 209 enable_irq(dev->host_irq);
151 dev->host_irq_disabled = false; 210 dev->host_irq_disabled = false;
152 } 211 }
212 spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
153} 213}
154 214
155/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 215static void deassign_guest_irq(struct kvm *kvm,
156static void kvm_free_assigned_irq(struct kvm *kvm, 216 struct kvm_assigned_dev_kernel *assigned_dev)
157 struct kvm_assigned_dev_kernel *assigned_dev)
158{ 217{
159 if (!irqchip_in_kernel(kvm))
160 return;
161
162 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); 218 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
219 assigned_dev->ack_notifier.gsi = -1;
163 220
164 if (assigned_dev->irq_source_id != -1) 221 if (assigned_dev->irq_source_id != -1)
165 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 222 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
166 assigned_dev->irq_source_id = -1; 223 assigned_dev->irq_source_id = -1;
224 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
225}
167 226
168 if (!assigned_dev->irq_requested_type) 227/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
169 return; 228static void deassign_host_irq(struct kvm *kvm,
170 229 struct kvm_assigned_dev_kernel *assigned_dev)
230{
171 /* 231 /*
172 * In kvm_free_device_irq, cancel_work_sync return true if: 232 * In kvm_free_device_irq, cancel_work_sync return true if:
173 * 1. work is scheduled, and then cancelled. 233 * 1. work is scheduled, and then cancelled.
@@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
184 * now, the kvm state is still legal for probably we also have to wait 244 * now, the kvm state is still legal for probably we also have to wait
185 * interrupt_work done. 245 * interrupt_work done.
186 */ 246 */
187 disable_irq_nosync(assigned_dev->host_irq); 247 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
188 cancel_work_sync(&assigned_dev->interrupt_work); 248 int i;
249 for (i = 0; i < assigned_dev->entries_nr; i++)
250 disable_irq_nosync(assigned_dev->
251 host_msix_entries[i].vector);
252
253 cancel_work_sync(&assigned_dev->interrupt_work);
254
255 for (i = 0; i < assigned_dev->entries_nr; i++)
256 free_irq(assigned_dev->host_msix_entries[i].vector,
257 (void *)assigned_dev);
258
259 assigned_dev->entries_nr = 0;
260 kfree(assigned_dev->host_msix_entries);
261 kfree(assigned_dev->guest_msix_entries);
262 pci_disable_msix(assigned_dev->dev);
263 } else {
264 /* Deal with MSI and INTx */
265 disable_irq_nosync(assigned_dev->host_irq);
266 cancel_work_sync(&assigned_dev->interrupt_work);
189 267
190 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 268 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
191 269
192 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 270 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
193 pci_disable_msi(assigned_dev->dev); 271 pci_disable_msi(assigned_dev->dev);
272 }
194 273
195 assigned_dev->irq_requested_type = 0; 274 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
196} 275}
197 276
277static int kvm_deassign_irq(struct kvm *kvm,
278 struct kvm_assigned_dev_kernel *assigned_dev,
279 unsigned long irq_requested_type)
280{
281 unsigned long guest_irq_type, host_irq_type;
282
283 if (!irqchip_in_kernel(kvm))
284 return -EINVAL;
285 /* no irq assignment to deassign */
286 if (!assigned_dev->irq_requested_type)
287 return -ENXIO;
288
289 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
290 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
291
292 if (host_irq_type)
293 deassign_host_irq(kvm, assigned_dev);
294 if (guest_irq_type)
295 deassign_guest_irq(kvm, assigned_dev);
296
297 return 0;
298}
299
300static void kvm_free_assigned_irq(struct kvm *kvm,
301 struct kvm_assigned_dev_kernel *assigned_dev)
302{
303 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
304}
198 305
199static void kvm_free_assigned_device(struct kvm *kvm, 306static void kvm_free_assigned_device(struct kvm *kvm,
200 struct kvm_assigned_dev_kernel 307 struct kvm_assigned_dev_kernel
@@ -226,190 +333,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
226 } 333 }
227} 334}
228 335
229static int assigned_device_update_intx(struct kvm *kvm, 336static int assigned_device_enable_host_intx(struct kvm *kvm,
230 struct kvm_assigned_dev_kernel *adev, 337 struct kvm_assigned_dev_kernel *dev)
231 struct kvm_assigned_irq *airq)
232{ 338{
233 adev->guest_irq = airq->guest_irq; 339 dev->host_irq = dev->dev->irq;
234 adev->ack_notifier.gsi = airq->guest_irq; 340 /* Even though this is PCI, we don't want to use shared
341 * interrupts. Sharing host devices with guest-assigned devices
342 * on the same interrupt line is not a happy situation: there
343 * are going to be long delays in accepting, acking, etc.
344 */
345 if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
346 0, "kvm_assigned_intx_device", (void *)dev))
347 return -EIO;
348 return 0;
349}
235 350
236 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) 351#ifdef __KVM_HAVE_MSI
237 return 0; 352static int assigned_device_enable_host_msi(struct kvm *kvm,
353 struct kvm_assigned_dev_kernel *dev)
354{
355 int r;
238 356
239 if (irqchip_in_kernel(kvm)) { 357 if (!dev->dev->msi_enabled) {
240 if (!msi2intx && 358 r = pci_enable_msi(dev->dev);
241 (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { 359 if (r)
242 free_irq(adev->host_irq, (void *)adev); 360 return r;
243 pci_disable_msi(adev->dev); 361 }
244 }
245 362
246 if (!capable(CAP_SYS_RAWIO)) 363 dev->host_irq = dev->dev->irq;
247 return -EPERM; 364 if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
365 "kvm_assigned_msi_device", (void *)dev)) {
366 pci_disable_msi(dev->dev);
367 return -EIO;
368 }
248 369
249 if (airq->host_irq) 370 return 0;
250 adev->host_irq = airq->host_irq; 371}
251 else 372#endif
252 adev->host_irq = adev->dev->irq;
253 373
254 /* Even though this is PCI, we don't want to use shared 374#ifdef __KVM_HAVE_MSIX
255 * interrupts. Sharing host devices with guest-assigned devices 375static int assigned_device_enable_host_msix(struct kvm *kvm,
256 * on the same interrupt line is not a happy situation: there 376 struct kvm_assigned_dev_kernel *dev)
257 * are going to be long delays in accepting, acking, etc. 377{
258 */ 378 int i, r = -EINVAL;
259 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 379
260 0, "kvm_assigned_intx_device", (void *)adev)) 380 /* host_msix_entries and guest_msix_entries should have been
261 return -EIO; 381 * initialized */
382 if (dev->entries_nr == 0)
383 return r;
384
385 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
386 if (r)
387 return r;
388
389 for (i = 0; i < dev->entries_nr; i++) {
390 r = request_irq(dev->host_msix_entries[i].vector,
391 kvm_assigned_dev_intr, 0,
392 "kvm_assigned_msix_device",
393 (void *)dev);
394 /* FIXME: free requested_irq's on failure */
395 if (r)
396 return r;
262 } 397 }
263 398
264 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
265 KVM_ASSIGNED_DEV_HOST_INTX;
266 return 0; 399 return 0;
267} 400}
268 401
269#ifdef CONFIG_X86 402#endif
270static int assigned_device_update_msi(struct kvm *kvm, 403
271 struct kvm_assigned_dev_kernel *adev, 404static int assigned_device_enable_guest_intx(struct kvm *kvm,
272 struct kvm_assigned_irq *airq) 405 struct kvm_assigned_dev_kernel *dev,
406 struct kvm_assigned_irq *irq)
273{ 407{
274 int r; 408 dev->guest_irq = irq->guest_irq;
409 dev->ack_notifier.gsi = irq->guest_irq;
410 return 0;
411}
275 412
276 adev->guest_irq = airq->guest_irq; 413#ifdef __KVM_HAVE_MSI
277 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 414static int assigned_device_enable_guest_msi(struct kvm *kvm,
278 /* x86 don't care upper address of guest msi message addr */ 415 struct kvm_assigned_dev_kernel *dev,
279 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 416 struct kvm_assigned_irq *irq)
280 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; 417{
281 adev->ack_notifier.gsi = -1; 418 dev->guest_irq = irq->guest_irq;
282 } else if (msi2intx) { 419 dev->ack_notifier.gsi = -1;
283 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; 420 return 0;
284 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; 421}
285 adev->ack_notifier.gsi = airq->guest_irq; 422#endif
286 } else { 423#ifdef __KVM_HAVE_MSIX
287 /* 424static int assigned_device_enable_guest_msix(struct kvm *kvm,
288 * Guest require to disable device MSI, we disable MSI and 425 struct kvm_assigned_dev_kernel *dev,
289 * re-enable INTx by default again. Notice it's only for 426 struct kvm_assigned_irq *irq)
290 * non-msi2intx. 427{
291 */ 428 dev->guest_irq = irq->guest_irq;
292 assigned_device_update_intx(kvm, adev, airq); 429 dev->ack_notifier.gsi = -1;
293 return 0; 430 return 0;
431}
432#endif
433
434static int assign_host_irq(struct kvm *kvm,
435 struct kvm_assigned_dev_kernel *dev,
436 __u32 host_irq_type)
437{
438 int r = -EEXIST;
439
440 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
441 return r;
442
443 switch (host_irq_type) {
444 case KVM_DEV_IRQ_HOST_INTX:
445 r = assigned_device_enable_host_intx(kvm, dev);
446 break;
447#ifdef __KVM_HAVE_MSI
448 case KVM_DEV_IRQ_HOST_MSI:
449 r = assigned_device_enable_host_msi(kvm, dev);
450 break;
451#endif
452#ifdef __KVM_HAVE_MSIX
453 case KVM_DEV_IRQ_HOST_MSIX:
454 r = assigned_device_enable_host_msix(kvm, dev);
455 break;
456#endif
457 default:
458 r = -EINVAL;
294 } 459 }
295 460
296 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 461 if (!r)
297 return 0; 462 dev->irq_requested_type |= host_irq_type;
298 463
299 if (irqchip_in_kernel(kvm)) { 464 return r;
300 if (!msi2intx) { 465}
301 if (adev->irq_requested_type &
302 KVM_ASSIGNED_DEV_HOST_INTX)
303 free_irq(adev->host_irq, (void *)adev);
304 466
305 r = pci_enable_msi(adev->dev); 467static int assign_guest_irq(struct kvm *kvm,
306 if (r) 468 struct kvm_assigned_dev_kernel *dev,
307 return r; 469 struct kvm_assigned_irq *irq,
308 } 470 unsigned long guest_irq_type)
471{
472 int id;
473 int r = -EEXIST;
474
475 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
476 return r;
309 477
310 adev->host_irq = adev->dev->irq; 478 id = kvm_request_irq_source_id(kvm);
311 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, 479 if (id < 0)
312 "kvm_assigned_msi_device", (void *)adev)) 480 return id;
313 return -EIO; 481
482 dev->irq_source_id = id;
483
484 switch (guest_irq_type) {
485 case KVM_DEV_IRQ_GUEST_INTX:
486 r = assigned_device_enable_guest_intx(kvm, dev, irq);
487 break;
488#ifdef __KVM_HAVE_MSI
489 case KVM_DEV_IRQ_GUEST_MSI:
490 r = assigned_device_enable_guest_msi(kvm, dev, irq);
491 break;
492#endif
493#ifdef __KVM_HAVE_MSIX
494 case KVM_DEV_IRQ_GUEST_MSIX:
495 r = assigned_device_enable_guest_msix(kvm, dev, irq);
496 break;
497#endif
498 default:
499 r = -EINVAL;
314 } 500 }
315 501
316 if (!msi2intx) 502 if (!r) {
317 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; 503 dev->irq_requested_type |= guest_irq_type;
504 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
505 } else
506 kvm_free_irq_source_id(kvm, dev->irq_source_id);
318 507
319 adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; 508 return r;
320 return 0;
321} 509}
322#endif
323 510
511/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
324static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 512static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
325 struct kvm_assigned_irq 513 struct kvm_assigned_irq *assigned_irq)
326 *assigned_irq)
327{ 514{
328 int r = 0; 515 int r = -EINVAL;
329 struct kvm_assigned_dev_kernel *match; 516 struct kvm_assigned_dev_kernel *match;
330 u32 current_flags = 0, changed_flags; 517 unsigned long host_irq_type, guest_irq_type;
331 518
332 mutex_lock(&kvm->lock); 519 if (!capable(CAP_SYS_RAWIO))
520 return -EPERM;
333 521
522 if (!irqchip_in_kernel(kvm))
523 return r;
524
525 mutex_lock(&kvm->lock);
526 r = -ENODEV;
334 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 527 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
335 assigned_irq->assigned_dev_id); 528 assigned_irq->assigned_dev_id);
336 if (!match) { 529 if (!match)
337 mutex_unlock(&kvm->lock); 530 goto out;
338 return -EINVAL;
339 }
340
341 if (!match->irq_requested_type) {
342 INIT_WORK(&match->interrupt_work,
343 kvm_assigned_dev_interrupt_work_handler);
344 if (irqchip_in_kernel(kvm)) {
345 /* Register ack nofitier */
346 match->ack_notifier.gsi = -1;
347 match->ack_notifier.irq_acked =
348 kvm_assigned_dev_ack_irq;
349 kvm_register_irq_ack_notifier(kvm,
350 &match->ack_notifier);
351
352 /* Request IRQ source ID */
353 r = kvm_request_irq_source_id(kvm);
354 if (r < 0)
355 goto out_release;
356 else
357 match->irq_source_id = r;
358
359#ifdef CONFIG_X86
360 /* Determine host device irq type, we can know the
361 * result from dev->msi_enabled */
362 if (msi2intx)
363 pci_enable_msi(match->dev);
364#endif
365 }
366 }
367 531
368 if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && 532 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
369 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) 533 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
370 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
371 534
372 changed_flags = assigned_irq->flags ^ current_flags; 535 r = -EINVAL;
536 /* can only assign one type at a time */
537 if (hweight_long(host_irq_type) > 1)
538 goto out;
539 if (hweight_long(guest_irq_type) > 1)
540 goto out;
541 if (host_irq_type == 0 && guest_irq_type == 0)
542 goto out;
373 543
374 if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || 544 r = 0;
375 (msi2intx && match->dev->msi_enabled)) { 545 if (host_irq_type)
376#ifdef CONFIG_X86 546 r = assign_host_irq(kvm, match, host_irq_type);
377 r = assigned_device_update_msi(kvm, match, assigned_irq); 547 if (r)
378 if (r) { 548 goto out;
379 printk(KERN_WARNING "kvm: failed to enable "
380 "MSI device!\n");
381 goto out_release;
382 }
383#else
384 r = -ENOTTY;
385#endif
386 } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
387 /* Host device IRQ 0 means don't support INTx */
388 if (!msi2intx) {
389 printk(KERN_WARNING
390 "kvm: wait device to enable MSI!\n");
391 r = 0;
392 } else {
393 printk(KERN_WARNING
394 "kvm: failed to enable MSI device!\n");
395 r = -ENOTTY;
396 goto out_release;
397 }
398 } else {
399 /* Non-sharing INTx mode */
400 r = assigned_device_update_intx(kvm, match, assigned_irq);
401 if (r) {
402 printk(KERN_WARNING "kvm: failed to enable "
403 "INTx device!\n");
404 goto out_release;
405 }
406 }
407 549
550 if (guest_irq_type)
551 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
552out:
408 mutex_unlock(&kvm->lock); 553 mutex_unlock(&kvm->lock);
409 return r; 554 return r;
410out_release: 555}
556
557static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
558 struct kvm_assigned_irq
559 *assigned_irq)
560{
561 int r = -ENODEV;
562 struct kvm_assigned_dev_kernel *match;
563
564 mutex_lock(&kvm->lock);
565
566 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
567 assigned_irq->assigned_dev_id);
568 if (!match)
569 goto out;
570
571 r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
572out:
411 mutex_unlock(&kvm->lock); 573 mutex_unlock(&kvm->lock);
412 kvm_free_assigned_device(kvm, match);
413 return r; 574 return r;
414} 575}
415 576
@@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
427 assigned_dev->assigned_dev_id); 588 assigned_dev->assigned_dev_id);
428 if (match) { 589 if (match) {
429 /* device already assigned */ 590 /* device already assigned */
430 r = -EINVAL; 591 r = -EEXIST;
431 goto out; 592 goto out;
432 } 593 }
433 594
@@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
464 match->host_devfn = assigned_dev->devfn; 625 match->host_devfn = assigned_dev->devfn;
465 match->flags = assigned_dev->flags; 626 match->flags = assigned_dev->flags;
466 match->dev = dev; 627 match->dev = dev;
628 spin_lock_init(&match->assigned_dev_lock);
467 match->irq_source_id = -1; 629 match->irq_source_id = -1;
468 match->kvm = kvm; 630 match->kvm = kvm;
631 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
632 INIT_WORK(&match->interrupt_work,
633 kvm_assigned_dev_interrupt_work_handler);
469 634
470 list_add(&match->list, &kvm->arch.assigned_dev_head); 635 list_add(&match->list, &kvm->arch.assigned_dev_head);
471 636
@@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
878#endif 1043#endif
879#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 1044#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
880 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 1045 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
1046#else
1047 kvm_arch_flush_shadow(kvm);
881#endif 1048#endif
882 kvm_arch_destroy_vm(kvm); 1049 kvm_arch_destroy_vm(kvm);
883 mmdrop(mm); 1050 mmdrop(mm);
@@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
919{ 1086{
920 int r; 1087 int r;
921 gfn_t base_gfn; 1088 gfn_t base_gfn;
922 unsigned long npages; 1089 unsigned long npages, ugfn;
923 int largepages; 1090 unsigned long largepages, i;
924 unsigned long i;
925 struct kvm_memory_slot *memslot; 1091 struct kvm_memory_slot *memslot;
926 struct kvm_memory_slot old, new; 1092 struct kvm_memory_slot old, new;
927 1093
@@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
1010 new.lpage_info[0].write_count = 1; 1176 new.lpage_info[0].write_count = 1;
1011 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) 1177 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
1012 new.lpage_info[largepages-1].write_count = 1; 1178 new.lpage_info[largepages-1].write_count = 1;
1179 ugfn = new.userspace_addr >> PAGE_SHIFT;
1180 /*
1181 * If the gfn and userspace address are not aligned wrt each
1182 * other, disable large page support for this slot
1183 */
1184 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
1185 for (i = 0; i < largepages; ++i)
1186 new.lpage_info[i].write_count = 1;
1013 } 1187 }
1014 1188
1015 /* Allocate page dirty bitmap if needed */ 1189 /* Allocate page dirty bitmap if needed */
@@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
1043 1217
1044 kvm_free_physmem_slot(&old, npages ? &new : NULL); 1218 kvm_free_physmem_slot(&old, npages ? &new : NULL);
1045 /* Slot deletion case: we have to update the current slot */ 1219 /* Slot deletion case: we have to update the current slot */
1220 spin_lock(&kvm->mmu_lock);
1046 if (!npages) 1221 if (!npages)
1047 *memslot = old; 1222 *memslot = old;
1223 spin_unlock(&kvm->mmu_lock);
1048#ifdef CONFIG_DMAR 1224#ifdef CONFIG_DMAR
1049 /* map the pages in iommu page table */ 1225 /* map the pages in iommu page table */
1050 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 1226 r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1454 for (;;) { 1630 for (;;) {
1455 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1631 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1456 1632
1457 if (kvm_cpu_has_interrupt(vcpu) || 1633 if ((kvm_arch_interrupt_allowed(vcpu) &&
1458 kvm_cpu_has_pending_timer(vcpu) || 1634 kvm_cpu_has_interrupt(vcpu)) ||
1459 kvm_arch_vcpu_runnable(vcpu)) { 1635 kvm_arch_vcpu_runnable(vcpu)) {
1460 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1636 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1461 break; 1637 break;
1462 } 1638 }
1639 if (kvm_cpu_has_pending_timer(vcpu))
1640 break;
1463 if (signal_pending(current)) 1641 if (signal_pending(current))
1464 break; 1642 break;
1465 1643
@@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1593 return 0; 1771 return 0;
1594} 1772}
1595 1773
1774#ifdef __KVM_HAVE_MSIX
1775static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
1776 struct kvm_assigned_msix_nr *entry_nr)
1777{
1778 int r = 0;
1779 struct kvm_assigned_dev_kernel *adev;
1780
1781 mutex_lock(&kvm->lock);
1782
1783 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1784 entry_nr->assigned_dev_id);
1785 if (!adev) {
1786 r = -EINVAL;
1787 goto msix_nr_out;
1788 }
1789
1790 if (adev->entries_nr == 0) {
1791 adev->entries_nr = entry_nr->entry_nr;
1792 if (adev->entries_nr == 0 ||
1793 adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
1794 r = -EINVAL;
1795 goto msix_nr_out;
1796 }
1797
1798 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
1799 entry_nr->entry_nr,
1800 GFP_KERNEL);
1801 if (!adev->host_msix_entries) {
1802 r = -ENOMEM;
1803 goto msix_nr_out;
1804 }
1805 adev->guest_msix_entries = kzalloc(
1806 sizeof(struct kvm_guest_msix_entry) *
1807 entry_nr->entry_nr, GFP_KERNEL);
1808 if (!adev->guest_msix_entries) {
1809 kfree(adev->host_msix_entries);
1810 r = -ENOMEM;
1811 goto msix_nr_out;
1812 }
1813 } else /* Not allowed set MSI-X number twice */
1814 r = -EINVAL;
1815msix_nr_out:
1816 mutex_unlock(&kvm->lock);
1817 return r;
1818}
1819
1820static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
1821 struct kvm_assigned_msix_entry *entry)
1822{
1823 int r = 0, i;
1824 struct kvm_assigned_dev_kernel *adev;
1825
1826 mutex_lock(&kvm->lock);
1827
1828 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
1829 entry->assigned_dev_id);
1830
1831 if (!adev) {
1832 r = -EINVAL;
1833 goto msix_entry_out;
1834 }
1835
1836 for (i = 0; i < adev->entries_nr; i++)
1837 if (adev->guest_msix_entries[i].vector == 0 ||
1838 adev->guest_msix_entries[i].entry == entry->entry) {
1839 adev->guest_msix_entries[i].entry = entry->entry;
1840 adev->guest_msix_entries[i].vector = entry->gsi;
1841 adev->host_msix_entries[i].entry = entry->entry;
1842 break;
1843 }
1844 if (i == adev->entries_nr) {
1845 r = -ENOSPC;
1846 goto msix_entry_out;
1847 }
1848
1849msix_entry_out:
1850 mutex_unlock(&kvm->lock);
1851
1852 return r;
1853}
1854#endif
1855
1596static long kvm_vcpu_ioctl(struct file *filp, 1856static long kvm_vcpu_ioctl(struct file *filp,
1597 unsigned int ioctl, unsigned long arg) 1857 unsigned int ioctl, unsigned long arg)
1598{ 1858{
@@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp,
1864 break; 2124 break;
1865 } 2125 }
1866 case KVM_ASSIGN_IRQ: { 2126 case KVM_ASSIGN_IRQ: {
2127 r = -EOPNOTSUPP;
2128 break;
2129 }
2130#ifdef KVM_CAP_ASSIGN_DEV_IRQ
2131 case KVM_ASSIGN_DEV_IRQ: {
1867 struct kvm_assigned_irq assigned_irq; 2132 struct kvm_assigned_irq assigned_irq;
1868 2133
1869 r = -EFAULT; 2134 r = -EFAULT;
@@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp,
1874 goto out; 2139 goto out;
1875 break; 2140 break;
1876 } 2141 }
2142 case KVM_DEASSIGN_DEV_IRQ: {
2143 struct kvm_assigned_irq assigned_irq;
2144
2145 r = -EFAULT;
2146 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2147 goto out;
2148 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
2149 if (r)
2150 goto out;
2151 break;
2152 }
2153#endif
1877#endif 2154#endif
1878#ifdef KVM_CAP_DEVICE_DEASSIGNMENT 2155#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1879 case KVM_DEASSIGN_PCI_DEVICE: { 2156 case KVM_DEASSIGN_PCI_DEVICE: {
@@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp,
1917 vfree(entries); 2194 vfree(entries);
1918 break; 2195 break;
1919 } 2196 }
2197#ifdef __KVM_HAVE_MSIX
2198 case KVM_ASSIGN_SET_MSIX_NR: {
2199 struct kvm_assigned_msix_nr entry_nr;
2200 r = -EFAULT;
2201 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
2202 goto out;
2203 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
2204 if (r)
2205 goto out;
2206 break;
2207 }
2208 case KVM_ASSIGN_SET_MSIX_ENTRY: {
2209 struct kvm_assigned_msix_entry entry;
2210 r = -EFAULT;
2211 if (copy_from_user(&entry, argp, sizeof entry))
2212 goto out;
2213 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
2214 if (r)
2215 goto out;
2216 break;
2217 }
1920#endif 2218#endif
2219#endif /* KVM_CAP_IRQ_ROUTING */
1921 default: 2220 default:
1922 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2221 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1923 } 2222 }
@@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
2112static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 2411static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2113 void *v) 2412 void *v)
2114{ 2413{
2115 if (val == SYS_RESTART) { 2414 /*
2116 /* 2415 * Some (well, at least mine) BIOSes hang on reboot if
2117 * Some (well, at least mine) BIOSes hang on reboot if 2416 * in vmx root mode.
2118 * in vmx root mode. 2417 *
2119 */ 2418 * And Intel TXT required VMX off for all cpu when system shutdown.
2120 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2419 */
2121 kvm_rebooting = true; 2420 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2122 on_each_cpu(hardware_disable, NULL, 1); 2421 kvm_rebooting = true;
2123 } 2422 on_each_cpu(hardware_disable, NULL, 1);
2124 return NOTIFY_OK; 2423 return NOTIFY_OK;
2125} 2424}
2126 2425
@@ -2354,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2354 2653
2355 kvm_preempt_ops.sched_in = kvm_sched_in; 2654 kvm_preempt_ops.sched_in = kvm_sched_in;
2356 kvm_preempt_ops.sched_out = kvm_sched_out; 2655 kvm_preempt_ops.sched_out = kvm_sched_out;
2357#ifndef CONFIG_X86
2358 msi2intx = 0;
2359#endif
2360 2656
2361 return 0; 2657 return 0;
2362 2658