aboutsummaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/assigned-dev.c12
-rw-r--r--virt/kvm/coalesced_mmio.c43
-rw-r--r--virt/kvm/coalesced_mmio.h15
-rw-r--r--virt/kvm/eventfd.c21
-rw-r--r--virt/kvm/ioapic.c38
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/iommu.c36
-rw-r--r--virt/kvm/kvm_main.c392
9 files changed, 384 insertions, 178 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index daece36c0a57..7f1178f6b839 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD
12 12
13config KVM_APIC_ARCHITECTURE 13config KVM_APIC_ARCHITECTURE
14 bool 14 bool
15
16config KVM_MMIO
17 bool
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..057e2cca6af5 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@ out:
504static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 504static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
505 struct kvm_assigned_pci_dev *assigned_dev) 505 struct kvm_assigned_pci_dev *assigned_dev)
506{ 506{
507 int r = 0; 507 int r = 0, idx;
508 struct kvm_assigned_dev_kernel *match; 508 struct kvm_assigned_dev_kernel *match;
509 struct pci_dev *dev; 509 struct pci_dev *dev;
510 510
511 mutex_lock(&kvm->lock); 511 mutex_lock(&kvm->lock);
512 down_read(&kvm->slots_lock); 512 idx = srcu_read_lock(&kvm->srcu);
513 513
514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
515 assigned_dev->assigned_dev_id); 515 assigned_dev->assigned_dev_id);
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
526 r = -ENOMEM; 526 r = -ENOMEM;
527 goto out; 527 goto out;
528 } 528 }
529 dev = pci_get_bus_and_slot(assigned_dev->busnr, 529 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
530 assigned_dev->busnr,
530 assigned_dev->devfn); 531 assigned_dev->devfn);
531 if (!dev) { 532 if (!dev) {
532 printk(KERN_INFO "%s: host device not found\n", __func__); 533 printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
548 pci_reset_function(dev); 549 pci_reset_function(dev);
549 550
550 match->assigned_dev_id = assigned_dev->assigned_dev_id; 551 match->assigned_dev_id = assigned_dev->assigned_dev_id;
552 match->host_segnr = assigned_dev->segnr;
551 match->host_busnr = assigned_dev->busnr; 553 match->host_busnr = assigned_dev->busnr;
552 match->host_devfn = assigned_dev->devfn; 554 match->host_devfn = assigned_dev->devfn;
553 match->flags = assigned_dev->flags; 555 match->flags = assigned_dev->flags;
@@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
573 } 575 }
574 576
575out: 577out:
576 up_read(&kvm->slots_lock); 578 srcu_read_unlock(&kvm->srcu, idx);
577 mutex_unlock(&kvm->lock); 579 mutex_unlock(&kvm->lock);
578 return r; 580 return r;
579out_list_del: 581out_list_del:
@@ -585,7 +587,7 @@ out_put:
585 pci_dev_put(dev); 587 pci_dev_put(dev);
586out_free: 588out_free:
587 kfree(match); 589 kfree(match);
588 up_read(&kvm->slots_lock); 590 srcu_read_unlock(&kvm->srcu, idx);
589 mutex_unlock(&kvm->lock); 591 mutex_unlock(&kvm->lock);
590 return r; 592 return r;
591} 593}
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 04d69cd7049b..5169736377a3 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -92,41 +92,64 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
92int kvm_coalesced_mmio_init(struct kvm *kvm) 92int kvm_coalesced_mmio_init(struct kvm *kvm)
93{ 93{
94 struct kvm_coalesced_mmio_dev *dev; 94 struct kvm_coalesced_mmio_dev *dev;
95 struct page *page;
95 int ret; 96 int ret;
96 97
98 ret = -ENOMEM;
99 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
100 if (!page)
101 goto out_err;
102 kvm->coalesced_mmio_ring = page_address(page);
103
104 ret = -ENOMEM;
97 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); 105 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
98 if (!dev) 106 if (!dev)
99 return -ENOMEM; 107 goto out_free_page;
100 spin_lock_init(&dev->lock); 108 spin_lock_init(&dev->lock);
101 kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); 109 kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
102 dev->kvm = kvm; 110 dev->kvm = kvm;
103 kvm->coalesced_mmio_dev = dev; 111 kvm->coalesced_mmio_dev = dev;
104 112
105 ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); 113 mutex_lock(&kvm->slots_lock);
114 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
115 mutex_unlock(&kvm->slots_lock);
106 if (ret < 0) 116 if (ret < 0)
107 kfree(dev); 117 goto out_free_dev;
118
119 return ret;
108 120
121out_free_dev:
122 kfree(dev);
123out_free_page:
124 __free_page(page);
125out_err:
109 return ret; 126 return ret;
110} 127}
111 128
129void kvm_coalesced_mmio_free(struct kvm *kvm)
130{
131 if (kvm->coalesced_mmio_ring)
132 free_page((unsigned long)kvm->coalesced_mmio_ring);
133}
134
112int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 135int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
113 struct kvm_coalesced_mmio_zone *zone) 136 struct kvm_coalesced_mmio_zone *zone)
114{ 137{
115 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; 138 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
116 139
117 if (dev == NULL) 140 if (dev == NULL)
118 return -EINVAL; 141 return -EINVAL;
119 142
120 down_write(&kvm->slots_lock); 143 mutex_lock(&kvm->slots_lock);
121 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { 144 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
122 up_write(&kvm->slots_lock); 145 mutex_unlock(&kvm->slots_lock);
123 return -ENOBUFS; 146 return -ENOBUFS;
124 } 147 }
125 148
126 dev->zone[dev->nb_zones] = *zone; 149 dev->zone[dev->nb_zones] = *zone;
127 dev->nb_zones++; 150 dev->nb_zones++;
128 151
129 up_write(&kvm->slots_lock); 152 mutex_unlock(&kvm->slots_lock);
130 return 0; 153 return 0;
131} 154}
132 155
@@ -140,10 +163,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
140 if (dev == NULL) 163 if (dev == NULL)
141 return -EINVAL; 164 return -EINVAL;
142 165
143 down_write(&kvm->slots_lock); 166 mutex_lock(&kvm->slots_lock);
144 167
145 i = dev->nb_zones; 168 i = dev->nb_zones;
146 while(i) { 169 while (i) {
147 z = &dev->zone[i - 1]; 170 z = &dev->zone[i - 1];
148 171
149 /* unregister all zones 172 /* unregister all zones
@@ -158,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
158 i--; 181 i--;
159 } 182 }
160 183
161 up_write(&kvm->slots_lock); 184 mutex_unlock(&kvm->slots_lock);
162 185
163 return 0; 186 return 0;
164} 187}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 4b49f27fa31e..8a5959e3535f 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -1,3 +1,6 @@
1#ifndef __KVM_COALESCED_MMIO_H__
2#define __KVM_COALESCED_MMIO_H__
3
1/* 4/*
2 * KVM coalesced MMIO 5 * KVM coalesced MMIO
3 * 6 *
@@ -7,6 +10,8 @@
7 * 10 *
8 */ 11 */
9 12
13#ifdef CONFIG_KVM_MMIO
14
10#define KVM_COALESCED_MMIO_ZONE_MAX 100 15#define KVM_COALESCED_MMIO_ZONE_MAX 100
11 16
12struct kvm_coalesced_mmio_dev { 17struct kvm_coalesced_mmio_dev {
@@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev {
18}; 23};
19 24
20int kvm_coalesced_mmio_init(struct kvm *kvm); 25int kvm_coalesced_mmio_init(struct kvm *kvm);
26void kvm_coalesced_mmio_free(struct kvm *kvm);
21int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 27int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
22 struct kvm_coalesced_mmio_zone *zone); 28 struct kvm_coalesced_mmio_zone *zone);
23int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, 29int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
24 struct kvm_coalesced_mmio_zone *zone); 30 struct kvm_coalesced_mmio_zone *zone);
31
32#else
33
34static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
35static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
36
37#endif
38
39#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6c681c..7016319b1ec0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -47,7 +47,6 @@ struct _irqfd {
47 int gsi; 47 int gsi;
48 struct list_head list; 48 struct list_head list;
49 poll_table pt; 49 poll_table pt;
50 wait_queue_head_t *wqh;
51 wait_queue_t wait; 50 wait_queue_t wait;
52 struct work_struct inject; 51 struct work_struct inject;
53 struct work_struct shutdown; 52 struct work_struct shutdown;
@@ -159,8 +158,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
159 poll_table *pt) 158 poll_table *pt)
160{ 159{
161 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 160 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
162
163 irqfd->wqh = wqh;
164 add_wait_queue(wqh, &irqfd->wait); 161 add_wait_queue(wqh, &irqfd->wait);
165} 162}
166 163
@@ -463,7 +460,7 @@ static int
463kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 460kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
464{ 461{
465 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 462 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
466 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 463 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
467 struct _ioeventfd *p; 464 struct _ioeventfd *p;
468 struct eventfd_ctx *eventfd; 465 struct eventfd_ctx *eventfd;
469 int ret; 466 int ret;
@@ -508,7 +505,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
508 else 505 else
509 p->wildcard = true; 506 p->wildcard = true;
510 507
511 down_write(&kvm->slots_lock); 508 mutex_lock(&kvm->slots_lock);
512 509
513 /* Verify that there isnt a match already */ 510 /* Verify that there isnt a match already */
514 if (ioeventfd_check_collision(kvm, p)) { 511 if (ioeventfd_check_collision(kvm, p)) {
@@ -518,18 +515,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
518 515
519 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 516 kvm_iodevice_init(&p->dev, &ioeventfd_ops);
520 517
521 ret = __kvm_io_bus_register_dev(bus, &p->dev); 518 ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
522 if (ret < 0) 519 if (ret < 0)
523 goto unlock_fail; 520 goto unlock_fail;
524 521
525 list_add_tail(&p->list, &kvm->ioeventfds); 522 list_add_tail(&p->list, &kvm->ioeventfds);
526 523
527 up_write(&kvm->slots_lock); 524 mutex_unlock(&kvm->slots_lock);
528 525
529 return 0; 526 return 0;
530 527
531unlock_fail: 528unlock_fail:
532 up_write(&kvm->slots_lock); 529 mutex_unlock(&kvm->slots_lock);
533 530
534fail: 531fail:
535 kfree(p); 532 kfree(p);
@@ -542,7 +539,7 @@ static int
542kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 539kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
543{ 540{
544 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 541 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
545 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 542 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
546 struct _ioeventfd *p, *tmp; 543 struct _ioeventfd *p, *tmp;
547 struct eventfd_ctx *eventfd; 544 struct eventfd_ctx *eventfd;
548 int ret = -ENOENT; 545 int ret = -ENOENT;
@@ -551,7 +548,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
551 if (IS_ERR(eventfd)) 548 if (IS_ERR(eventfd))
552 return PTR_ERR(eventfd); 549 return PTR_ERR(eventfd);
553 550
554 down_write(&kvm->slots_lock); 551 mutex_lock(&kvm->slots_lock);
555 552
556 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 553 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
557 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 554 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -565,13 +562,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
565 if (!p->wildcard && p->datamatch != args->datamatch) 562 if (!p->wildcard && p->datamatch != args->datamatch)
566 continue; 563 continue;
567 564
568 __kvm_io_bus_unregister_dev(bus, &p->dev); 565 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
569 ioeventfd_release(p); 566 ioeventfd_release(p);
570 ret = 0; 567 ret = 0;
571 break; 568 break;
572 } 569 }
573 570
574 up_write(&kvm->slots_lock); 571 mutex_unlock(&kvm->slots_lock);
575 572
576 eventfd_ctx_put(eventfd); 573 eventfd_ctx_put(eventfd);
577 574
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20b89de..3db15a807f80 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -100,6 +100,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
100 return injected; 100 return injected;
101} 101}
102 102
103static void update_handled_vectors(struct kvm_ioapic *ioapic)
104{
105 DECLARE_BITMAP(handled_vectors, 256);
106 int i;
107
108 memset(handled_vectors, 0, sizeof(handled_vectors));
109 for (i = 0; i < IOAPIC_NUM_PINS; ++i)
110 __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
111 memcpy(ioapic->handled_vectors, handled_vectors,
112 sizeof(handled_vectors));
113 smp_wmb();
114}
115
103static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) 116static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
104{ 117{
105 unsigned index; 118 unsigned index;
@@ -134,6 +147,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
134 e->bits |= (u32) val; 147 e->bits |= (u32) val;
135 e->fields.remote_irr = 0; 148 e->fields.remote_irr = 0;
136 } 149 }
150 update_handled_vectors(ioapic);
137 mask_after = e->fields.mask; 151 mask_after = e->fields.mask;
138 if (mask_before != mask_after) 152 if (mask_before != mask_after)
139 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 153 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
@@ -241,6 +255,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
241{ 255{
242 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 256 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
243 257
258 smp_rmb();
259 if (!test_bit(vector, ioapic->handled_vectors))
260 return;
244 mutex_lock(&ioapic->lock); 261 mutex_lock(&ioapic->lock);
245 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); 262 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
246 mutex_unlock(&ioapic->lock); 263 mutex_unlock(&ioapic->lock);
@@ -352,6 +369,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
352 ioapic->ioregsel = 0; 369 ioapic->ioregsel = 0;
353 ioapic->irr = 0; 370 ioapic->irr = 0;
354 ioapic->id = 0; 371 ioapic->id = 0;
372 update_handled_vectors(ioapic);
355} 373}
356 374
357static const struct kvm_io_device_ops ioapic_mmio_ops = { 375static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -372,13 +390,28 @@ int kvm_ioapic_init(struct kvm *kvm)
372 kvm_ioapic_reset(ioapic); 390 kvm_ioapic_reset(ioapic);
373 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 391 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
374 ioapic->kvm = kvm; 392 ioapic->kvm = kvm;
375 ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); 393 mutex_lock(&kvm->slots_lock);
376 if (ret < 0) 394 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
395 mutex_unlock(&kvm->slots_lock);
396 if (ret < 0) {
397 kvm->arch.vioapic = NULL;
377 kfree(ioapic); 398 kfree(ioapic);
399 }
378 400
379 return ret; 401 return ret;
380} 402}
381 403
404void kvm_ioapic_destroy(struct kvm *kvm)
405{
406 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
407
408 if (ioapic) {
409 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
410 kvm->arch.vioapic = NULL;
411 kfree(ioapic);
412 }
413}
414
382int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) 415int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
383{ 416{
384 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 417 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
@@ -399,6 +432,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
399 432
400 mutex_lock(&ioapic->lock); 433 mutex_lock(&ioapic->lock);
401 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 434 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
435 update_handled_vectors(ioapic);
402 mutex_unlock(&ioapic->lock); 436 mutex_unlock(&ioapic->lock);
403 return 0; 437 return 0;
404} 438}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 419c43b667ab..8a751b78a430 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -46,6 +46,7 @@ struct kvm_ioapic {
46 struct kvm *kvm; 46 struct kvm *kvm;
47 void (*ack_notifier)(void *opaque, int irq); 47 void (*ack_notifier)(void *opaque, int irq);
48 struct mutex lock; 48 struct mutex lock;
49 DECLARE_BITMAP(handled_vectors, 256);
49}; 50};
50 51
51#ifdef DEBUG 52#ifdef DEBUG
@@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
71int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 72int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
72void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 73void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
73int kvm_ioapic_init(struct kvm *kvm); 74int kvm_ioapic_init(struct kvm *kvm);
75void kvm_ioapic_destroy(struct kvm *kvm);
74int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 76int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
75void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 77void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
76int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 78int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 15147583abd1..80fd3ad3b2de 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
32static void kvm_iommu_put_pages(struct kvm *kvm, 32static void kvm_iommu_put_pages(struct kvm *kvm,
33 gfn_t base_gfn, unsigned long npages); 33 gfn_t base_gfn, unsigned long npages);
34 34
35int kvm_iommu_map_pages(struct kvm *kvm, 35int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
36 gfn_t base_gfn, unsigned long npages)
37{ 36{
38 gfn_t gfn = base_gfn; 37 gfn_t gfn = slot->base_gfn;
38 unsigned long npages = slot->npages;
39 pfn_t pfn; 39 pfn_t pfn;
40 int i, r = 0; 40 int i, r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 41 struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
55 continue; 55 continue;
56 56
57 pfn = gfn_to_pfn(kvm, gfn); 57 pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
58 r = iommu_map_range(domain, 58 r = iommu_map_range(domain,
59 gfn_to_gpa(gfn), 59 gfn_to_gpa(gfn),
60 pfn_to_hpa(pfn), 60 pfn_to_hpa(pfn),
@@ -69,17 +69,19 @@ int kvm_iommu_map_pages(struct kvm *kvm,
69 return 0; 69 return 0;
70 70
71unmap_pages: 71unmap_pages:
72 kvm_iommu_put_pages(kvm, base_gfn, i); 72 kvm_iommu_put_pages(kvm, slot->base_gfn, i);
73 return r; 73 return r;
74} 74}
75 75
76static int kvm_iommu_map_memslots(struct kvm *kvm) 76static int kvm_iommu_map_memslots(struct kvm *kvm)
77{ 77{
78 int i, r = 0; 78 int i, r = 0;
79 struct kvm_memslots *slots;
80
81 slots = rcu_dereference(kvm->memslots);
79 82
80 for (i = 0; i < kvm->nmemslots; i++) { 83 for (i = 0; i < slots->nmemslots; i++) {
81 r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, 84 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
82 kvm->memslots[i].npages);
83 if (r) 85 if (r)
84 break; 86 break;
85 } 87 }
@@ -104,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
104 106
105 r = iommu_attach_device(domain, &pdev->dev); 107 r = iommu_attach_device(domain, &pdev->dev);
106 if (r) { 108 if (r) {
107 printk(KERN_ERR "assign device %x:%x.%x failed", 109 printk(KERN_ERR "assign device %x:%x:%x.%x failed",
110 pci_domain_nr(pdev->bus),
108 pdev->bus->number, 111 pdev->bus->number,
109 PCI_SLOT(pdev->devfn), 112 PCI_SLOT(pdev->devfn),
110 PCI_FUNC(pdev->devfn)); 113 PCI_FUNC(pdev->devfn));
@@ -125,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
125 goto out_unmap; 128 goto out_unmap;
126 } 129 }
127 130
128 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 131 printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
132 assigned_dev->host_segnr,
129 assigned_dev->host_busnr, 133 assigned_dev->host_busnr,
130 PCI_SLOT(assigned_dev->host_devfn), 134 PCI_SLOT(assigned_dev->host_devfn),
131 PCI_FUNC(assigned_dev->host_devfn)); 135 PCI_FUNC(assigned_dev->host_devfn));
@@ -152,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
152 156
153 iommu_detach_device(domain, &pdev->dev); 157 iommu_detach_device(domain, &pdev->dev);
154 158
155 printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", 159 printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
160 assigned_dev->host_segnr,
156 assigned_dev->host_busnr, 161 assigned_dev->host_busnr,
157 PCI_SLOT(assigned_dev->host_devfn), 162 PCI_SLOT(assigned_dev->host_devfn),
158 PCI_FUNC(assigned_dev->host_devfn)); 163 PCI_FUNC(assigned_dev->host_devfn));
@@ -210,10 +215,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
210static int kvm_iommu_unmap_memslots(struct kvm *kvm) 215static int kvm_iommu_unmap_memslots(struct kvm *kvm)
211{ 216{
212 int i; 217 int i;
218 struct kvm_memslots *slots;
219
220 slots = rcu_dereference(kvm->memslots);
213 221
214 for (i = 0; i < kvm->nmemslots; i++) { 222 for (i = 0; i < slots->nmemslots; i++) {
215 kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, 223 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
216 kvm->memslots[i].npages); 224 slots->memslots[i].npages);
217 } 225 }
218 226
219 return 0; 227 return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be392d6e..548f9253c195 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,8 @@
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/compat.h> 46#include <linux/compat.h>
47#include <linux/srcu.h>
48#include <linux/hugetlb.h>
47 49
48#include <asm/processor.h> 50#include <asm/processor.h>
49#include <asm/io.h> 51#include <asm/io.h>
@@ -51,9 +53,7 @@
51#include <asm/pgtable.h> 53#include <asm/pgtable.h>
52#include <asm-generic/bitops/le.h> 54#include <asm-generic/bitops/le.h>
53 55
54#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
55#include "coalesced_mmio.h" 56#include "coalesced_mmio.h"
56#endif
57 57
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/kvm.h> 59#include <trace/events/kvm.h>
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
86static int hardware_enable_all(void); 86static int hardware_enable_all(void);
87static void hardware_disable_all(void); 87static void hardware_disable_all(void);
88 88
89static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
90
89static bool kvm_rebooting; 91static bool kvm_rebooting;
90 92
91static bool largepages_enabled = true; 93static bool largepages_enabled = true;
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
136 138
137 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 139 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
138 140
139 spin_lock(&kvm->requests_lock); 141 raw_spin_lock(&kvm->requests_lock);
140 me = smp_processor_id(); 142 me = smp_processor_id();
141 kvm_for_each_vcpu(i, vcpu, kvm) { 143 kvm_for_each_vcpu(i, vcpu, kvm) {
142 if (test_and_set_bit(req, &vcpu->requests)) 144 if (test_and_set_bit(req, &vcpu->requests))
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
151 smp_call_function_many(cpus, ack_flush, NULL, 1); 153 smp_call_function_many(cpus, ack_flush, NULL, 1);
152 else 154 else
153 called = false; 155 called = false;
154 spin_unlock(&kvm->requests_lock); 156 raw_spin_unlock(&kvm->requests_lock);
155 free_cpumask_var(cpus); 157 free_cpumask_var(cpus);
156 return called; 158 return called;
157} 159}
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
215 unsigned long address) 217 unsigned long address)
216{ 218{
217 struct kvm *kvm = mmu_notifier_to_kvm(mn); 219 struct kvm *kvm = mmu_notifier_to_kvm(mn);
218 int need_tlb_flush; 220 int need_tlb_flush, idx;
219 221
220 /* 222 /*
221 * When ->invalidate_page runs, the linux pte has been zapped 223 * When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
235 * pte after kvm_unmap_hva returned, without noticing the page 237 * pte after kvm_unmap_hva returned, without noticing the page
236 * is going to be freed. 238 * is going to be freed.
237 */ 239 */
240 idx = srcu_read_lock(&kvm->srcu);
238 spin_lock(&kvm->mmu_lock); 241 spin_lock(&kvm->mmu_lock);
239 kvm->mmu_notifier_seq++; 242 kvm->mmu_notifier_seq++;
240 need_tlb_flush = kvm_unmap_hva(kvm, address); 243 need_tlb_flush = kvm_unmap_hva(kvm, address);
241 spin_unlock(&kvm->mmu_lock); 244 spin_unlock(&kvm->mmu_lock);
245 srcu_read_unlock(&kvm->srcu, idx);
242 246
243 /* we've to flush the tlb before the pages can be freed */ 247 /* we've to flush the tlb before the pages can be freed */
244 if (need_tlb_flush) 248 if (need_tlb_flush)
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
252 pte_t pte) 256 pte_t pte)
253{ 257{
254 struct kvm *kvm = mmu_notifier_to_kvm(mn); 258 struct kvm *kvm = mmu_notifier_to_kvm(mn);
259 int idx;
255 260
261 idx = srcu_read_lock(&kvm->srcu);
256 spin_lock(&kvm->mmu_lock); 262 spin_lock(&kvm->mmu_lock);
257 kvm->mmu_notifier_seq++; 263 kvm->mmu_notifier_seq++;
258 kvm_set_spte_hva(kvm, address, pte); 264 kvm_set_spte_hva(kvm, address, pte);
259 spin_unlock(&kvm->mmu_lock); 265 spin_unlock(&kvm->mmu_lock);
266 srcu_read_unlock(&kvm->srcu, idx);
260} 267}
261 268
262static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 269static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
265 unsigned long end) 272 unsigned long end)
266{ 273{
267 struct kvm *kvm = mmu_notifier_to_kvm(mn); 274 struct kvm *kvm = mmu_notifier_to_kvm(mn);
268 int need_tlb_flush = 0; 275 int need_tlb_flush = 0, idx;
269 276
277 idx = srcu_read_lock(&kvm->srcu);
270 spin_lock(&kvm->mmu_lock); 278 spin_lock(&kvm->mmu_lock);
271 /* 279 /*
272 * The count increase must become visible at unlock time as no 280 * The count increase must become visible at unlock time as no
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
277 for (; start < end; start += PAGE_SIZE) 285 for (; start < end; start += PAGE_SIZE)
278 need_tlb_flush |= kvm_unmap_hva(kvm, start); 286 need_tlb_flush |= kvm_unmap_hva(kvm, start);
279 spin_unlock(&kvm->mmu_lock); 287 spin_unlock(&kvm->mmu_lock);
288 srcu_read_unlock(&kvm->srcu, idx);
280 289
281 /* we've to flush the tlb before the pages can be freed */ 290 /* we've to flush the tlb before the pages can be freed */
282 if (need_tlb_flush) 291 if (need_tlb_flush)
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
314 unsigned long address) 323 unsigned long address)
315{ 324{
316 struct kvm *kvm = mmu_notifier_to_kvm(mn); 325 struct kvm *kvm = mmu_notifier_to_kvm(mn);
317 int young; 326 int young, idx;
318 327
328 idx = srcu_read_lock(&kvm->srcu);
319 spin_lock(&kvm->mmu_lock); 329 spin_lock(&kvm->mmu_lock);
320 young = kvm_age_hva(kvm, address); 330 young = kvm_age_hva(kvm, address);
321 spin_unlock(&kvm->mmu_lock); 331 spin_unlock(&kvm->mmu_lock);
332 srcu_read_unlock(&kvm->srcu, idx);
322 333
323 if (young) 334 if (young)
324 kvm_flush_remote_tlbs(kvm); 335 kvm_flush_remote_tlbs(kvm);
@@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
341 .change_pte = kvm_mmu_notifier_change_pte, 352 .change_pte = kvm_mmu_notifier_change_pte,
342 .release = kvm_mmu_notifier_release, 353 .release = kvm_mmu_notifier_release,
343}; 354};
355
356static int kvm_init_mmu_notifier(struct kvm *kvm)
357{
358 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
359 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
360}
361
362#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
363
364static int kvm_init_mmu_notifier(struct kvm *kvm)
365{
366 return 0;
367}
368
344#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 369#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
345 370
346static struct kvm *kvm_create_vm(void) 371static struct kvm *kvm_create_vm(void)
347{ 372{
348 int r = 0; 373 int r = 0, i;
349 struct kvm *kvm = kvm_arch_create_vm(); 374 struct kvm *kvm = kvm_arch_create_vm();
350#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
351 struct page *page;
352#endif
353 375
354 if (IS_ERR(kvm)) 376 if (IS_ERR(kvm))
355 goto out; 377 goto out;
@@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void)
363 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 385 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
364#endif 386#endif
365 387
366#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 388 r = -ENOMEM;
367 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
368 if (!page) { 390 if (!kvm->memslots)
369 r = -ENOMEM;
370 goto out_err; 391 goto out_err;
371 } 392 if (init_srcu_struct(&kvm->srcu))
372 kvm->coalesced_mmio_ring = 393 goto out_err;
373 (struct kvm_coalesced_mmio_ring *)page_address(page); 394 for (i = 0; i < KVM_NR_BUSES; i++) {
374#endif 395 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
375 396 GFP_KERNEL);
376#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 397 if (!kvm->buses[i]) {
377 { 398 cleanup_srcu_struct(&kvm->srcu);
378 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
379 r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
380 if (r) {
381#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
382 put_page(page);
383#endif
384 goto out_err; 399 goto out_err;
385 } 400 }
386 } 401 }
387#endif 402
403 r = kvm_init_mmu_notifier(kvm);
404 if (r) {
405 cleanup_srcu_struct(&kvm->srcu);
406 goto out_err;
407 }
388 408
389 kvm->mm = current->mm; 409 kvm->mm = current->mm;
390 atomic_inc(&kvm->mm->mm_count); 410 atomic_inc(&kvm->mm->mm_count);
391 spin_lock_init(&kvm->mmu_lock); 411 spin_lock_init(&kvm->mmu_lock);
392 spin_lock_init(&kvm->requests_lock); 412 raw_spin_lock_init(&kvm->requests_lock);
393 kvm_io_bus_init(&kvm->pio_bus);
394 kvm_eventfd_init(kvm); 413 kvm_eventfd_init(kvm);
395 mutex_init(&kvm->lock); 414 mutex_init(&kvm->lock);
396 mutex_init(&kvm->irq_lock); 415 mutex_init(&kvm->irq_lock);
397 kvm_io_bus_init(&kvm->mmio_bus); 416 mutex_init(&kvm->slots_lock);
398 init_rwsem(&kvm->slots_lock);
399 atomic_set(&kvm->users_count, 1); 417 atomic_set(&kvm->users_count, 1);
400 spin_lock(&kvm_lock); 418 spin_lock(&kvm_lock);
401 list_add(&kvm->vm_list, &vm_list); 419 list_add(&kvm->vm_list, &vm_list);
@@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void)
406out: 424out:
407 return kvm; 425 return kvm;
408 426
409#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
410 (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
411out_err: 427out_err:
412 hardware_disable_all(); 428 hardware_disable_all();
413#endif
414out_err_nodisable: 429out_err_nodisable:
430 for (i = 0; i < KVM_NR_BUSES; i++)
431 kfree(kvm->buses[i]);
432 kfree(kvm->memslots);
415 kfree(kvm); 433 kfree(kvm);
416 return ERR_PTR(r); 434 return ERR_PTR(r);
417} 435}
@@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
446void kvm_free_physmem(struct kvm *kvm) 464void kvm_free_physmem(struct kvm *kvm)
447{ 465{
448 int i; 466 int i;
467 struct kvm_memslots *slots = kvm->memslots;
468
469 for (i = 0; i < slots->nmemslots; ++i)
470 kvm_free_physmem_slot(&slots->memslots[i], NULL);
449 471
450 for (i = 0; i < kvm->nmemslots; ++i) 472 kfree(kvm->memslots);
451 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
452} 473}
453 474
454static void kvm_destroy_vm(struct kvm *kvm) 475static void kvm_destroy_vm(struct kvm *kvm)
455{ 476{
477 int i;
456 struct mm_struct *mm = kvm->mm; 478 struct mm_struct *mm = kvm->mm;
457 479
458 kvm_arch_sync_events(kvm); 480 kvm_arch_sync_events(kvm);
@@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
460 list_del(&kvm->vm_list); 482 list_del(&kvm->vm_list);
461 spin_unlock(&kvm_lock); 483 spin_unlock(&kvm_lock);
462 kvm_free_irq_routing(kvm); 484 kvm_free_irq_routing(kvm);
463 kvm_io_bus_destroy(&kvm->pio_bus); 485 for (i = 0; i < KVM_NR_BUSES; i++)
464 kvm_io_bus_destroy(&kvm->mmio_bus); 486 kvm_io_bus_destroy(kvm->buses[i]);
465#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 487 kvm_coalesced_mmio_free(kvm);
466 if (kvm->coalesced_mmio_ring != NULL)
467 free_page((unsigned long)kvm->coalesced_mmio_ring);
468#endif
469#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 488#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
470 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 489 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
471#else 490#else
@@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
512 struct kvm_userspace_memory_region *mem, 531 struct kvm_userspace_memory_region *mem,
513 int user_alloc) 532 int user_alloc)
514{ 533{
515 int r; 534 int r, flush_shadow = 0;
516 gfn_t base_gfn; 535 gfn_t base_gfn;
517 unsigned long npages; 536 unsigned long npages;
518 unsigned long i; 537 unsigned long i;
519 struct kvm_memory_slot *memslot; 538 struct kvm_memory_slot *memslot;
520 struct kvm_memory_slot old, new; 539 struct kvm_memory_slot old, new;
540 struct kvm_memslots *slots, *old_memslots;
521 541
522 r = -EINVAL; 542 r = -EINVAL;
523 /* General sanity checks */ 543 /* General sanity checks */
@@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
532 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 552 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
533 goto out; 553 goto out;
534 554
535 memslot = &kvm->memslots[mem->slot]; 555 memslot = &kvm->memslots->memslots[mem->slot];
536 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 556 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
537 npages = mem->memory_size >> PAGE_SHIFT; 557 npages = mem->memory_size >> PAGE_SHIFT;
538 558
@@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
553 /* Check for overlaps */ 573 /* Check for overlaps */
554 r = -EEXIST; 574 r = -EEXIST;
555 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 575 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
556 struct kvm_memory_slot *s = &kvm->memslots[i]; 576 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
557 577
558 if (s == memslot || !s->npages) 578 if (s == memslot || !s->npages)
559 continue; 579 continue;
@@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
579 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 599 memset(new.rmap, 0, npages * sizeof(*new.rmap));
580 600
581 new.user_alloc = user_alloc; 601 new.user_alloc = user_alloc;
582 /* 602 new.userspace_addr = mem->userspace_addr;
583 * hva_to_rmmap() serialzies with the mmu_lock and to be
584 * safe it has to ignore memslots with !user_alloc &&
585 * !userspace_addr.
586 */
587 if (user_alloc)
588 new.userspace_addr = mem->userspace_addr;
589 else
590 new.userspace_addr = 0;
591 } 603 }
592 if (!npages) 604 if (!npages)
593 goto skip_lpage; 605 goto skip_lpage;
@@ -642,8 +654,9 @@ skip_lpage:
642 if (!new.dirty_bitmap) 654 if (!new.dirty_bitmap)
643 goto out_free; 655 goto out_free;
644 memset(new.dirty_bitmap, 0, dirty_bytes); 656 memset(new.dirty_bitmap, 0, dirty_bytes);
657 /* destroy any largepage mappings for dirty tracking */
645 if (old.npages) 658 if (old.npages)
646 kvm_arch_flush_shadow(kvm); 659 flush_shadow = 1;
647 } 660 }
648#else /* not defined CONFIG_S390 */ 661#else /* not defined CONFIG_S390 */
649 new.user_alloc = user_alloc; 662 new.user_alloc = user_alloc;
@@ -651,36 +664,72 @@ skip_lpage:
651 new.userspace_addr = mem->userspace_addr; 664 new.userspace_addr = mem->userspace_addr;
652#endif /* not defined CONFIG_S390 */ 665#endif /* not defined CONFIG_S390 */
653 666
654 if (!npages) 667 if (!npages) {
668 r = -ENOMEM;
669 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
670 if (!slots)
671 goto out_free;
672 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
673 if (mem->slot >= slots->nmemslots)
674 slots->nmemslots = mem->slot + 1;
675 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
676
677 old_memslots = kvm->memslots;
678 rcu_assign_pointer(kvm->memslots, slots);
679 synchronize_srcu_expedited(&kvm->srcu);
680 /* From this point no new shadow pages pointing to a deleted
681 * memslot will be created.
682 *
683 * validation of sp->gfn happens in:
684 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
685 * - kvm_is_visible_gfn (mmu_check_roots)
686 */
655 kvm_arch_flush_shadow(kvm); 687 kvm_arch_flush_shadow(kvm);
688 kfree(old_memslots);
689 }
656 690
657 spin_lock(&kvm->mmu_lock); 691 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
658 if (mem->slot >= kvm->nmemslots) 692 if (r)
659 kvm->nmemslots = mem->slot + 1;
660
661 *memslot = new;
662 spin_unlock(&kvm->mmu_lock);
663
664 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
665 if (r) {
666 spin_lock(&kvm->mmu_lock);
667 *memslot = old;
668 spin_unlock(&kvm->mmu_lock);
669 goto out_free; 693 goto out_free;
670 }
671 694
672 kvm_free_physmem_slot(&old, npages ? &new : NULL);
673 /* Slot deletion case: we have to update the current slot */
674 spin_lock(&kvm->mmu_lock);
675 if (!npages)
676 *memslot = old;
677 spin_unlock(&kvm->mmu_lock);
678#ifdef CONFIG_DMAR 695#ifdef CONFIG_DMAR
679 /* map the pages in iommu page table */ 696 /* map the pages in iommu page table */
680 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 697 if (npages) {
681 if (r) 698 r = kvm_iommu_map_pages(kvm, &new);
682 goto out; 699 if (r)
700 goto out_free;
701 }
683#endif 702#endif
703
704 r = -ENOMEM;
705 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
706 if (!slots)
707 goto out_free;
708 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
709 if (mem->slot >= slots->nmemslots)
710 slots->nmemslots = mem->slot + 1;
711
712 /* actual memory is freed via old in kvm_free_physmem_slot below */
713 if (!npages) {
714 new.rmap = NULL;
715 new.dirty_bitmap = NULL;
716 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
717 new.lpage_info[i] = NULL;
718 }
719
720 slots->memslots[mem->slot] = new;
721 old_memslots = kvm->memslots;
722 rcu_assign_pointer(kvm->memslots, slots);
723 synchronize_srcu_expedited(&kvm->srcu);
724
725 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
726
727 kvm_free_physmem_slot(&old, &new);
728 kfree(old_memslots);
729
730 if (flush_shadow)
731 kvm_arch_flush_shadow(kvm);
732
684 return 0; 733 return 0;
685 734
686out_free: 735out_free:
@@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm,
697{ 746{
698 int r; 747 int r;
699 748
700 down_write(&kvm->slots_lock); 749 mutex_lock(&kvm->slots_lock);
701 r = __kvm_set_memory_region(kvm, mem, user_alloc); 750 r = __kvm_set_memory_region(kvm, mem, user_alloc);
702 up_write(&kvm->slots_lock); 751 mutex_unlock(&kvm->slots_lock);
703 return r; 752 return r;
704} 753}
705EXPORT_SYMBOL_GPL(kvm_set_memory_region); 754EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
726 if (log->slot >= KVM_MEMORY_SLOTS) 775 if (log->slot >= KVM_MEMORY_SLOTS)
727 goto out; 776 goto out;
728 777
729 memslot = &kvm->memslots[log->slot]; 778 memslot = &kvm->memslots->memslots[log->slot];
730 r = -ENOENT; 779 r = -ENOENT;
731 if (!memslot->dirty_bitmap) 780 if (!memslot->dirty_bitmap)
732 goto out; 781 goto out;
@@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
780struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 829struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
781{ 830{
782 int i; 831 int i;
832 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
783 833
784 for (i = 0; i < kvm->nmemslots; ++i) { 834 for (i = 0; i < slots->nmemslots; ++i) {
785 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 835 struct kvm_memory_slot *memslot = &slots->memslots[i];
786 836
787 if (gfn >= memslot->base_gfn 837 if (gfn >= memslot->base_gfn
788 && gfn < memslot->base_gfn + memslot->npages) 838 && gfn < memslot->base_gfn + memslot->npages)
@@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
801int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 851int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
802{ 852{
803 int i; 853 int i;
854 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
804 855
805 gfn = unalias_gfn(kvm, gfn); 856 gfn = unalias_gfn_instantiation(kvm, gfn);
806 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 857 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
807 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 858 struct kvm_memory_slot *memslot = &slots->memslots[i];
859
860 if (memslot->flags & KVM_MEMSLOT_INVALID)
861 continue;
808 862
809 if (gfn >= memslot->base_gfn 863 if (gfn >= memslot->base_gfn
810 && gfn < memslot->base_gfn + memslot->npages) 864 && gfn < memslot->base_gfn + memslot->npages)
@@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
814} 868}
815EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 869EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
816 870
871unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
872{
873 struct vm_area_struct *vma;
874 unsigned long addr, size;
875
876 size = PAGE_SIZE;
877
878 addr = gfn_to_hva(kvm, gfn);
879 if (kvm_is_error_hva(addr))
880 return PAGE_SIZE;
881
882 down_read(&current->mm->mmap_sem);
883 vma = find_vma(current->mm, addr);
884 if (!vma)
885 goto out;
886
887 size = vma_kernel_pagesize(vma);
888
889out:
890 up_read(&current->mm->mmap_sem);
891
892 return size;
893}
894
895int memslot_id(struct kvm *kvm, gfn_t gfn)
896{
897 int i;
898 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
899 struct kvm_memory_slot *memslot = NULL;
900
901 gfn = unalias_gfn(kvm, gfn);
902 for (i = 0; i < slots->nmemslots; ++i) {
903 memslot = &slots->memslots[i];
904
905 if (gfn >= memslot->base_gfn
906 && gfn < memslot->base_gfn + memslot->npages)
907 break;
908 }
909
910 return memslot - slots->memslots;
911}
912
817unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 913unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
818{ 914{
819 struct kvm_memory_slot *slot; 915 struct kvm_memory_slot *slot;
820 916
821 gfn = unalias_gfn(kvm, gfn); 917 gfn = unalias_gfn_instantiation(kvm, gfn);
822 slot = gfn_to_memslot_unaliased(kvm, gfn); 918 slot = gfn_to_memslot_unaliased(kvm, gfn);
823 if (!slot) 919 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
824 return bad_hva(); 920 return bad_hva();
825 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 921 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
826} 922}
827EXPORT_SYMBOL_GPL(gfn_to_hva); 923EXPORT_SYMBOL_GPL(gfn_to_hva);
828 924
829pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 925static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
830{ 926{
831 struct page *page[1]; 927 struct page *page[1];
832 unsigned long addr;
833 int npages; 928 int npages;
834 pfn_t pfn; 929 pfn_t pfn;
835 930
836 might_sleep(); 931 might_sleep();
837 932
838 addr = gfn_to_hva(kvm, gfn);
839 if (kvm_is_error_hva(addr)) {
840 get_page(bad_page);
841 return page_to_pfn(bad_page);
842 }
843
844 npages = get_user_pages_fast(addr, 1, 1, page); 933 npages = get_user_pages_fast(addr, 1, 1, page);
845 934
846 if (unlikely(npages != 1)) { 935 if (unlikely(npages != 1)) {
@@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
865 return pfn; 954 return pfn;
866} 955}
867 956
957pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
958{
959 unsigned long addr;
960
961 addr = gfn_to_hva(kvm, gfn);
962 if (kvm_is_error_hva(addr)) {
963 get_page(bad_page);
964 return page_to_pfn(bad_page);
965 }
966
967 return hva_to_pfn(kvm, addr);
968}
868EXPORT_SYMBOL_GPL(gfn_to_pfn); 969EXPORT_SYMBOL_GPL(gfn_to_pfn);
869 970
971static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
972{
973 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
974}
975
976pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
977 struct kvm_memory_slot *slot, gfn_t gfn)
978{
979 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
980 return hva_to_pfn(kvm, addr);
981}
982
870struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 983struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
871{ 984{
872 pfn_t pfn; 985 pfn_t pfn;
@@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = {
1854 .priority = 0, 1967 .priority = 0,
1855}; 1968};
1856 1969
1857void kvm_io_bus_init(struct kvm_io_bus *bus) 1970static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1858{
1859 memset(bus, 0, sizeof(*bus));
1860}
1861
1862void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1863{ 1971{
1864 int i; 1972 int i;
1865 1973
@@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1868 1976
1869 kvm_iodevice_destructor(pos); 1977 kvm_iodevice_destructor(pos);
1870 } 1978 }
1979 kfree(bus);
1871} 1980}
1872 1981
1873/* kvm_io_bus_write - called under kvm->slots_lock */ 1982/* kvm_io_bus_write - called under kvm->slots_lock */
1874int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, 1983int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1875 int len, const void *val) 1984 int len, const void *val)
1876{ 1985{
1877 int i; 1986 int i;
1987 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
1878 for (i = 0; i < bus->dev_count; i++) 1988 for (i = 0; i < bus->dev_count; i++)
1879 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1989 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
1880 return 0; 1990 return 0;
@@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
1882} 1992}
1883 1993
1884/* kvm_io_bus_read - called under kvm->slots_lock */ 1994/* kvm_io_bus_read - called under kvm->slots_lock */
1885int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) 1995int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1996 int len, void *val)
1886{ 1997{
1887 int i; 1998 int i;
1999 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
2000
1888 for (i = 0; i < bus->dev_count; i++) 2001 for (i = 0; i < bus->dev_count; i++)
1889 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2002 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
1890 return 0; 2003 return 0;
1891 return -EOPNOTSUPP; 2004 return -EOPNOTSUPP;
1892} 2005}
1893 2006
1894int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 2007/* Caller must hold slots_lock. */
1895 struct kvm_io_device *dev) 2008int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2009 struct kvm_io_device *dev)
1896{ 2010{
1897 int ret; 2011 struct kvm_io_bus *new_bus, *bus;
1898
1899 down_write(&kvm->slots_lock);
1900 ret = __kvm_io_bus_register_dev(bus, dev);
1901 up_write(&kvm->slots_lock);
1902 2012
1903 return ret; 2013 bus = kvm->buses[bus_idx];
1904}
1905
1906/* An unlocked version. Caller must have write lock on slots_lock. */
1907int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
1908 struct kvm_io_device *dev)
1909{
1910 if (bus->dev_count > NR_IOBUS_DEVS-1) 2014 if (bus->dev_count > NR_IOBUS_DEVS-1)
1911 return -ENOSPC; 2015 return -ENOSPC;
1912 2016
1913 bus->devs[bus->dev_count++] = dev; 2017 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2018 if (!new_bus)
2019 return -ENOMEM;
2020 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2021 new_bus->devs[new_bus->dev_count++] = dev;
2022 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2023 synchronize_srcu_expedited(&kvm->srcu);
2024 kfree(bus);
1914 2025
1915 return 0; 2026 return 0;
1916} 2027}
1917 2028
1918void kvm_io_bus_unregister_dev(struct kvm *kvm, 2029/* Caller must hold slots_lock. */
1919 struct kvm_io_bus *bus, 2030int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
1920 struct kvm_io_device *dev) 2031 struct kvm_io_device *dev)
1921{ 2032{
1922 down_write(&kvm->slots_lock); 2033 int i, r;
1923 __kvm_io_bus_unregister_dev(bus, dev); 2034 struct kvm_io_bus *new_bus, *bus;
1924 up_write(&kvm->slots_lock);
1925}
1926 2035
1927/* An unlocked version. Caller must have write lock on slots_lock. */ 2036 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
1928void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 2037 if (!new_bus)
1929 struct kvm_io_device *dev) 2038 return -ENOMEM;
1930{
1931 int i;
1932 2039
1933 for (i = 0; i < bus->dev_count; i++) 2040 bus = kvm->buses[bus_idx];
1934 if (bus->devs[i] == dev) { 2041 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
1935 bus->devs[i] = bus->devs[--bus->dev_count]; 2042
2043 r = -ENOENT;
2044 for (i = 0; i < new_bus->dev_count; i++)
2045 if (new_bus->devs[i] == dev) {
2046 r = 0;
2047 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
1936 break; 2048 break;
1937 } 2049 }
2050
2051 if (r) {
2052 kfree(new_bus);
2053 return r;
2054 }
2055
2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2057 synchronize_srcu_expedited(&kvm->srcu);
2058 kfree(bus);
2059 return r;
1938} 2060}
1939 2061
1940static struct notifier_block kvm_cpu_notifier = { 2062static struct notifier_block kvm_cpu_notifier = {