aboutsummaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorAnton Vorontsov <cbouatmailru@gmail.com>2008-07-29 18:05:23 -0400
committerAnton Vorontsov <cbouatmailru@gmail.com>2008-07-29 18:05:23 -0400
commit9fec6060d9e48ed7db0dac0e16d0f0f0e615b7f6 (patch)
tree74b41f31a08f6500ff3dfcf64ba21e2d9a8e87e5 /virt
parentfece418418f51e92dd7e67e17c5e3fe5a28d3279 (diff)
parent6e86841d05f371b5b9b86ce76c02aaee83352298 (diff)
Merge branch 'master' of /home/cbou/linux-2.6
Conflicts: drivers/power/Kconfig drivers/power/Makefile
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/coalesced_mmio.c156
-rw-r--r--virt/kvm/coalesced_mmio.h23
-rw-r--r--virt/kvm/ioapic.c77
-rw-r--r--virt/kvm/iodev.h8
-rw-r--r--virt/kvm/kvm_main.c197
-rw-r--r--virt/kvm/kvm_trace.c18
6 files changed, 392 insertions, 87 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
new file mode 100644
index 000000000000..5ae620d32fac
--- /dev/null
+++ b/virt/kvm/coalesced_mmio.c
@@ -0,0 +1,156 @@
1/*
2 * KVM coalesced MMIO
3 *
4 * Copyright (c) 2008 Bull S.A.S.
5 *
6 * Author: Laurent Vivier <Laurent.Vivier@bull.net>
7 *
8 */
9
10#include "iodev.h"
11
12#include <linux/kvm_host.h>
13#include <linux/kvm.h>
14
15#include "coalesced_mmio.h"
16
17static int coalesced_mmio_in_range(struct kvm_io_device *this,
18 gpa_t addr, int len, int is_write)
19{
20 struct kvm_coalesced_mmio_dev *dev =
21 (struct kvm_coalesced_mmio_dev*)this->private;
22 struct kvm_coalesced_mmio_zone *zone;
23 int next;
24 int i;
25
26 if (!is_write)
27 return 0;
28
29 /* kvm->lock is taken by the caller and must be not released before
30 * dev.read/write
31 */
32
33 /* Are we able to batch it ? */
34
35 /* last is the first free entry
36 * check if we don't meet the first used entry
37 * there is always one unused entry in the buffer
38 */
39
40 next = (dev->kvm->coalesced_mmio_ring->last + 1) %
41 KVM_COALESCED_MMIO_MAX;
42 if (next == dev->kvm->coalesced_mmio_ring->first) {
43 /* full */
44 return 0;
45 }
46
47 /* is it in a batchable area ? */
48
49 for (i = 0; i < dev->nb_zones; i++) {
50 zone = &dev->zone[i];
51
52 /* (addr,len) is fully included in
53 * (zone->addr, zone->size)
54 */
55
56 if (zone->addr <= addr &&
57 addr + len <= zone->addr + zone->size)
58 return 1;
59 }
60 return 0;
61}
62
63static void coalesced_mmio_write(struct kvm_io_device *this,
64 gpa_t addr, int len, const void *val)
65{
66 struct kvm_coalesced_mmio_dev *dev =
67 (struct kvm_coalesced_mmio_dev*)this->private;
68 struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
69
70 /* kvm->lock must be taken by caller before call to in_range()*/
71
72 /* copy data in first free entry of the ring */
73
74 ring->coalesced_mmio[ring->last].phys_addr = addr;
75 ring->coalesced_mmio[ring->last].len = len;
76 memcpy(ring->coalesced_mmio[ring->last].data, val, len);
77 smp_wmb();
78 ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
79}
80
81static void coalesced_mmio_destructor(struct kvm_io_device *this)
82{
83 kfree(this);
84}
85
86int kvm_coalesced_mmio_init(struct kvm *kvm)
87{
88 struct kvm_coalesced_mmio_dev *dev;
89
90 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
91 if (!dev)
92 return -ENOMEM;
93 dev->dev.write = coalesced_mmio_write;
94 dev->dev.in_range = coalesced_mmio_in_range;
95 dev->dev.destructor = coalesced_mmio_destructor;
96 dev->dev.private = dev;
97 dev->kvm = kvm;
98 kvm->coalesced_mmio_dev = dev;
99 kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
100
101 return 0;
102}
103
104int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
105 struct kvm_coalesced_mmio_zone *zone)
106{
107 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
108
109 if (dev == NULL)
110 return -EINVAL;
111
112 mutex_lock(&kvm->lock);
113 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
114 mutex_unlock(&kvm->lock);
115 return -ENOBUFS;
116 }
117
118 dev->zone[dev->nb_zones] = *zone;
119 dev->nb_zones++;
120
121 mutex_unlock(&kvm->lock);
122 return 0;
123}
124
125int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
126 struct kvm_coalesced_mmio_zone *zone)
127{
128 int i;
129 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
130 struct kvm_coalesced_mmio_zone *z;
131
132 if (dev == NULL)
133 return -EINVAL;
134
135 mutex_lock(&kvm->lock);
136
137 i = dev->nb_zones;
138 while(i) {
139 z = &dev->zone[i - 1];
140
141 /* unregister all zones
142 * included in (zone->addr, zone->size)
143 */
144
145 if (zone->addr <= z->addr &&
146 z->addr + z->size <= zone->addr + zone->size) {
147 dev->nb_zones--;
148 *z = dev->zone[dev->nb_zones];
149 }
150 i--;
151 }
152
153 mutex_unlock(&kvm->lock);
154
155 return 0;
156}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
new file mode 100644
index 000000000000..5ac0ec628461
--- /dev/null
+++ b/virt/kvm/coalesced_mmio.h
@@ -0,0 +1,23 @@
1/*
2 * KVM coalesced MMIO
3 *
4 * Copyright (c) 2008 Bull S.A.S.
5 *
6 * Author: Laurent Vivier <Laurent.Vivier@bull.net>
7 *
8 */
9
10#define KVM_COALESCED_MMIO_ZONE_MAX 100
11
12struct kvm_coalesced_mmio_dev {
13 struct kvm_io_device dev;
14 struct kvm *kvm;
15 int nb_zones;
16 struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
17};
18
19int kvm_coalesced_mmio_init(struct kvm *kvm);
20int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
21 struct kvm_coalesced_mmio_zone *zone);
22int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
23 struct kvm_coalesced_mmio_zone *zone);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 4232fd75dd20..c0d22870ee9c 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -45,7 +45,7 @@
45#else 45#else
46#define ioapic_debug(fmt, arg...) 46#define ioapic_debug(fmt, arg...)
47#endif 47#endif
48static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq); 48static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
49 49
50static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, 50static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
51 unsigned long addr, 51 unsigned long addr,
@@ -89,8 +89,8 @@ static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
89 pent = &ioapic->redirtbl[idx]; 89 pent = &ioapic->redirtbl[idx];
90 90
91 if (!pent->fields.mask) { 91 if (!pent->fields.mask) {
92 ioapic_deliver(ioapic, idx); 92 int injected = ioapic_deliver(ioapic, idx);
93 if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 93 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
94 pent->fields.remote_irr = 1; 94 pent->fields.remote_irr = 1;
95 } 95 }
96 if (!pent->fields.trig_mode) 96 if (!pent->fields.trig_mode)
@@ -133,7 +133,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
133 } 133 }
134} 134}
135 135
136static void ioapic_inj_irq(struct kvm_ioapic *ioapic, 136static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
137 struct kvm_vcpu *vcpu, 137 struct kvm_vcpu *vcpu,
138 u8 vector, u8 trig_mode, u8 delivery_mode) 138 u8 vector, u8 trig_mode, u8 delivery_mode)
139{ 139{
@@ -143,7 +143,12 @@ static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
143 ASSERT((delivery_mode == IOAPIC_FIXED) || 143 ASSERT((delivery_mode == IOAPIC_FIXED) ||
144 (delivery_mode == IOAPIC_LOWEST_PRIORITY)); 144 (delivery_mode == IOAPIC_LOWEST_PRIORITY));
145 145
146 kvm_apic_set_irq(vcpu, vector, trig_mode); 146 return kvm_apic_set_irq(vcpu, vector, trig_mode);
147}
148
149static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
150{
151 kvm_inject_nmi(vcpu);
147} 152}
148 153
149static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 154static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
@@ -186,7 +191,7 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
186 return mask; 191 return mask;
187} 192}
188 193
189static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq) 194static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
190{ 195{
191 u8 dest = ioapic->redirtbl[irq].fields.dest_id; 196 u8 dest = ioapic->redirtbl[irq].fields.dest_id;
192 u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; 197 u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
@@ -195,7 +200,7 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
195 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; 200 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
196 u32 deliver_bitmask; 201 u32 deliver_bitmask;
197 struct kvm_vcpu *vcpu; 202 struct kvm_vcpu *vcpu;
198 int vcpu_id; 203 int vcpu_id, r = 0;
199 204
200 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 205 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
201 "vector=%x trig_mode=%x\n", 206 "vector=%x trig_mode=%x\n",
@@ -204,7 +209,7 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
204 deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); 209 deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
205 if (!deliver_bitmask) { 210 if (!deliver_bitmask) {
206 ioapic_debug("no target on destination\n"); 211 ioapic_debug("no target on destination\n");
207 return; 212 return 0;
208 } 213 }
209 214
210 switch (delivery_mode) { 215 switch (delivery_mode) {
@@ -216,7 +221,7 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
216 vcpu = ioapic->kvm->vcpus[0]; 221 vcpu = ioapic->kvm->vcpus[0];
217#endif 222#endif
218 if (vcpu != NULL) 223 if (vcpu != NULL)
219 ioapic_inj_irq(ioapic, vcpu, vector, 224 r = ioapic_inj_irq(ioapic, vcpu, vector,
220 trig_mode, delivery_mode); 225 trig_mode, delivery_mode);
221 else 226 else
222 ioapic_debug("null lowest prio vcpu: " 227 ioapic_debug("null lowest prio vcpu: "
@@ -234,18 +239,30 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
234 deliver_bitmask &= ~(1 << vcpu_id); 239 deliver_bitmask &= ~(1 << vcpu_id);
235 vcpu = ioapic->kvm->vcpus[vcpu_id]; 240 vcpu = ioapic->kvm->vcpus[vcpu_id];
236 if (vcpu) { 241 if (vcpu) {
237 ioapic_inj_irq(ioapic, vcpu, vector, 242 r = ioapic_inj_irq(ioapic, vcpu, vector,
238 trig_mode, delivery_mode); 243 trig_mode, delivery_mode);
239 } 244 }
240 } 245 }
241 break; 246 break;
242 247 case IOAPIC_NMI:
243 /* TODO: NMI */ 248 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
249 if (!(deliver_bitmask & (1 << vcpu_id)))
250 continue;
251 deliver_bitmask &= ~(1 << vcpu_id);
252 vcpu = ioapic->kvm->vcpus[vcpu_id];
253 if (vcpu)
254 ioapic_inj_nmi(vcpu);
255 else
256 ioapic_debug("NMI to vcpu %d failed\n",
257 vcpu->vcpu_id);
258 }
259 break;
244 default: 260 default:
245 printk(KERN_WARNING "Unsupported delivery mode %d\n", 261 printk(KERN_WARNING "Unsupported delivery mode %d\n",
246 delivery_mode); 262 delivery_mode);
247 break; 263 break;
248 } 264 }
265 return r;
249} 266}
250 267
251void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) 268void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
@@ -268,38 +285,30 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
268 } 285 }
269} 286}
270 287
271static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) 288static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
272{ 289{
273 int i;
274
275 for (i = 0; i < IOAPIC_NUM_PINS; i++)
276 if (ioapic->redirtbl[i].fields.vector == vector)
277 return i;
278 return -1;
279}
280
281void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
282{
283 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
284 union ioapic_redir_entry *ent; 290 union ioapic_redir_entry *ent;
285 int gsi;
286
287 gsi = get_eoi_gsi(ioapic, vector);
288 if (gsi == -1) {
289 printk(KERN_WARNING "Can't find redir item for %d EOI\n",
290 vector);
291 return;
292 }
293 291
294 ent = &ioapic->redirtbl[gsi]; 292 ent = &ioapic->redirtbl[gsi];
295 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 293 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
296 294
297 ent->fields.remote_irr = 0; 295 ent->fields.remote_irr = 0;
298 if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) 296 if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
299 ioapic_deliver(ioapic, gsi); 297 ioapic_service(ioapic, gsi);
298}
299
300void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
301{
302 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
303 int i;
304
305 for (i = 0; i < IOAPIC_NUM_PINS; i++)
306 if (ioapic->redirtbl[i].fields.vector == vector)
307 __kvm_ioapic_update_eoi(ioapic, i);
300} 308}
301 309
302static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) 310static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
311 int len, int is_write)
303{ 312{
304 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; 313 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
305 314
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index c14e642027b2..55e8846ac3a6 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -27,7 +27,8 @@ struct kvm_io_device {
27 gpa_t addr, 27 gpa_t addr,
28 int len, 28 int len,
29 const void *val); 29 const void *val);
30 int (*in_range)(struct kvm_io_device *this, gpa_t addr); 30 int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
31 int is_write);
31 void (*destructor)(struct kvm_io_device *this); 32 void (*destructor)(struct kvm_io_device *this);
32 33
33 void *private; 34 void *private;
@@ -49,9 +50,10 @@ static inline void kvm_iodevice_write(struct kvm_io_device *dev,
49 dev->write(dev, addr, len, val); 50 dev->write(dev, addr, len, val);
50} 51}
51 52
52static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr) 53static inline int kvm_iodevice_inrange(struct kvm_io_device *dev,
54 gpa_t addr, int len, int is_write)
53{ 55{
54 return dev->in_range(dev, addr); 56 return dev->in_range(dev, addr, len, is_write);
55} 57}
56 58
57static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) 59static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e89338e2b043..a845890b6800 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48#include <asm/pgtable.h> 48#include <asm/pgtable.h>
49 49
50#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
51#include "coalesced_mmio.h"
52#endif
53
50MODULE_AUTHOR("Qumranet"); 54MODULE_AUTHOR("Qumranet");
51MODULE_LICENSE("GPL"); 55MODULE_LICENSE("GPL");
52 56
@@ -65,6 +69,8 @@ struct dentry *kvm_debugfs_dir;
65static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 69static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
66 unsigned long arg); 70 unsigned long arg);
67 71
72bool kvm_rebooting;
73
68static inline int valid_vcpu(int n) 74static inline int valid_vcpu(int n)
69{ 75{
70 return likely(n >= 0 && n < KVM_MAX_VCPUS); 76 return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -99,10 +105,11 @@ static void ack_flush(void *_completed)
99 105
100void kvm_flush_remote_tlbs(struct kvm *kvm) 106void kvm_flush_remote_tlbs(struct kvm *kvm)
101{ 107{
102 int i, cpu; 108 int i, cpu, me;
103 cpumask_t cpus; 109 cpumask_t cpus;
104 struct kvm_vcpu *vcpu; 110 struct kvm_vcpu *vcpu;
105 111
112 me = get_cpu();
106 cpus_clear(cpus); 113 cpus_clear(cpus);
107 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 114 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
108 vcpu = kvm->vcpus[i]; 115 vcpu = kvm->vcpus[i];
@@ -111,21 +118,24 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
111 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 118 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
112 continue; 119 continue;
113 cpu = vcpu->cpu; 120 cpu = vcpu->cpu;
114 if (cpu != -1 && cpu != raw_smp_processor_id()) 121 if (cpu != -1 && cpu != me)
115 cpu_set(cpu, cpus); 122 cpu_set(cpu, cpus);
116 } 123 }
117 if (cpus_empty(cpus)) 124 if (cpus_empty(cpus))
118 return; 125 goto out;
119 ++kvm->stat.remote_tlb_flush; 126 ++kvm->stat.remote_tlb_flush;
120 smp_call_function_mask(cpus, ack_flush, NULL, 1); 127 smp_call_function_mask(cpus, ack_flush, NULL, 1);
128out:
129 put_cpu();
121} 130}
122 131
123void kvm_reload_remote_mmus(struct kvm *kvm) 132void kvm_reload_remote_mmus(struct kvm *kvm)
124{ 133{
125 int i, cpu; 134 int i, cpu, me;
126 cpumask_t cpus; 135 cpumask_t cpus;
127 struct kvm_vcpu *vcpu; 136 struct kvm_vcpu *vcpu;
128 137
138 me = get_cpu();
129 cpus_clear(cpus); 139 cpus_clear(cpus);
130 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 140 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
131 vcpu = kvm->vcpus[i]; 141 vcpu = kvm->vcpus[i];
@@ -134,12 +144,14 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
134 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 144 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
135 continue; 145 continue;
136 cpu = vcpu->cpu; 146 cpu = vcpu->cpu;
137 if (cpu != -1 && cpu != raw_smp_processor_id()) 147 if (cpu != -1 && cpu != me)
138 cpu_set(cpu, cpus); 148 cpu_set(cpu, cpus);
139 } 149 }
140 if (cpus_empty(cpus)) 150 if (cpus_empty(cpus))
141 return; 151 goto out;
142 smp_call_function_mask(cpus, ack_flush, NULL, 1); 152 smp_call_function_mask(cpus, ack_flush, NULL, 1);
153out:
154 put_cpu();
143} 155}
144 156
145 157
@@ -183,10 +195,23 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
183static struct kvm *kvm_create_vm(void) 195static struct kvm *kvm_create_vm(void)
184{ 196{
185 struct kvm *kvm = kvm_arch_create_vm(); 197 struct kvm *kvm = kvm_arch_create_vm();
198#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
199 struct page *page;
200#endif
186 201
187 if (IS_ERR(kvm)) 202 if (IS_ERR(kvm))
188 goto out; 203 goto out;
189 204
205#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
206 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
207 if (!page) {
208 kfree(kvm);
209 return ERR_PTR(-ENOMEM);
210 }
211 kvm->coalesced_mmio_ring =
212 (struct kvm_coalesced_mmio_ring *)page_address(page);
213#endif
214
190 kvm->mm = current->mm; 215 kvm->mm = current->mm;
191 atomic_inc(&kvm->mm->mm_count); 216 atomic_inc(&kvm->mm->mm_count);
192 spin_lock_init(&kvm->mmu_lock); 217 spin_lock_init(&kvm->mmu_lock);
@@ -198,6 +223,9 @@ static struct kvm *kvm_create_vm(void)
198 spin_lock(&kvm_lock); 223 spin_lock(&kvm_lock);
199 list_add(&kvm->vm_list, &vm_list); 224 list_add(&kvm->vm_list, &vm_list);
200 spin_unlock(&kvm_lock); 225 spin_unlock(&kvm_lock);
226#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
227 kvm_coalesced_mmio_init(kvm);
228#endif
201out: 229out:
202 return kvm; 230 return kvm;
203} 231}
@@ -240,6 +268,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
240 spin_unlock(&kvm_lock); 268 spin_unlock(&kvm_lock);
241 kvm_io_bus_destroy(&kvm->pio_bus); 269 kvm_io_bus_destroy(&kvm->pio_bus);
242 kvm_io_bus_destroy(&kvm->mmio_bus); 270 kvm_io_bus_destroy(&kvm->mmio_bus);
271#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
272 if (kvm->coalesced_mmio_ring != NULL)
273 free_page((unsigned long)kvm->coalesced_mmio_ring);
274#endif
243 kvm_arch_destroy_vm(kvm); 275 kvm_arch_destroy_vm(kvm);
244 mmdrop(mm); 276 mmdrop(mm);
245} 277}
@@ -333,6 +365,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
333 r = -ENOMEM; 365 r = -ENOMEM;
334 366
335 /* Allocate if a slot is being created */ 367 /* Allocate if a slot is being created */
368#ifndef CONFIG_S390
336 if (npages && !new.rmap) { 369 if (npages && !new.rmap) {
337 new.rmap = vmalloc(npages * sizeof(struct page *)); 370 new.rmap = vmalloc(npages * sizeof(struct page *));
338 371
@@ -373,10 +406,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
373 goto out_free; 406 goto out_free;
374 memset(new.dirty_bitmap, 0, dirty_bytes); 407 memset(new.dirty_bitmap, 0, dirty_bytes);
375 } 408 }
409#endif /* not defined CONFIG_S390 */
376 410
377 if (mem->slot >= kvm->nmemslots) 411 if (mem->slot >= kvm->nmemslots)
378 kvm->nmemslots = mem->slot + 1; 412 kvm->nmemslots = mem->slot + 1;
379 413
414 if (!npages)
415 kvm_arch_flush_shadow(kvm);
416
380 *memslot = new; 417 *memslot = new;
381 418
382 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); 419 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
@@ -522,6 +559,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
522 return bad_hva(); 559 return bad_hva();
523 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 560 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
524} 561}
562EXPORT_SYMBOL_GPL(gfn_to_hva);
525 563
526/* 564/*
527 * Requires current->mm->mmap_sem to be held 565 * Requires current->mm->mmap_sem to be held
@@ -531,6 +569,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
531 struct page *page[1]; 569 struct page *page[1];
532 unsigned long addr; 570 unsigned long addr;
533 int npages; 571 int npages;
572 pfn_t pfn;
534 573
535 might_sleep(); 574 might_sleep();
536 575
@@ -543,19 +582,38 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
543 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, 582 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
544 NULL); 583 NULL);
545 584
546 if (npages != 1) { 585 if (unlikely(npages != 1)) {
547 get_page(bad_page); 586 struct vm_area_struct *vma;
548 return page_to_pfn(bad_page); 587
549 } 588 vma = find_vma(current->mm, addr);
589 if (vma == NULL || addr < vma->vm_start ||
590 !(vma->vm_flags & VM_PFNMAP)) {
591 get_page(bad_page);
592 return page_to_pfn(bad_page);
593 }
594
595 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
596 BUG_ON(pfn_valid(pfn));
597 } else
598 pfn = page_to_pfn(page[0]);
550 599
551 return page_to_pfn(page[0]); 600 return pfn;
552} 601}
553 602
554EXPORT_SYMBOL_GPL(gfn_to_pfn); 603EXPORT_SYMBOL_GPL(gfn_to_pfn);
555 604
556struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 605struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
557{ 606{
558 return pfn_to_page(gfn_to_pfn(kvm, gfn)); 607 pfn_t pfn;
608
609 pfn = gfn_to_pfn(kvm, gfn);
610 if (pfn_valid(pfn))
611 return pfn_to_page(pfn);
612
613 WARN_ON(!pfn_valid(pfn));
614
615 get_page(bad_page);
616 return bad_page;
559} 617}
560 618
561EXPORT_SYMBOL_GPL(gfn_to_page); 619EXPORT_SYMBOL_GPL(gfn_to_page);
@@ -568,7 +626,8 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
568 626
569void kvm_release_pfn_clean(pfn_t pfn) 627void kvm_release_pfn_clean(pfn_t pfn)
570{ 628{
571 put_page(pfn_to_page(pfn)); 629 if (pfn_valid(pfn))
630 put_page(pfn_to_page(pfn));
572} 631}
573EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 632EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
574 633
@@ -593,21 +652,25 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
593 652
594void kvm_set_pfn_dirty(pfn_t pfn) 653void kvm_set_pfn_dirty(pfn_t pfn)
595{ 654{
596 struct page *page = pfn_to_page(pfn); 655 if (pfn_valid(pfn)) {
597 if (!PageReserved(page)) 656 struct page *page = pfn_to_page(pfn);
598 SetPageDirty(page); 657 if (!PageReserved(page))
658 SetPageDirty(page);
659 }
599} 660}
600EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 661EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
601 662
602void kvm_set_pfn_accessed(pfn_t pfn) 663void kvm_set_pfn_accessed(pfn_t pfn)
603{ 664{
604 mark_page_accessed(pfn_to_page(pfn)); 665 if (pfn_valid(pfn))
666 mark_page_accessed(pfn_to_page(pfn));
605} 667}
606EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 668EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
607 669
608void kvm_get_pfn(pfn_t pfn) 670void kvm_get_pfn(pfn_t pfn)
609{ 671{
610 get_page(pfn_to_page(pfn)); 672 if (pfn_valid(pfn))
673 get_page(pfn_to_page(pfn));
611} 674}
612EXPORT_SYMBOL_GPL(kvm_get_pfn); 675EXPORT_SYMBOL_GPL(kvm_get_pfn);
613 676
@@ -757,25 +820,26 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
757 */ 820 */
758void kvm_vcpu_block(struct kvm_vcpu *vcpu) 821void kvm_vcpu_block(struct kvm_vcpu *vcpu)
759{ 822{
760 DECLARE_WAITQUEUE(wait, current); 823 DEFINE_WAIT(wait);
824
825 for (;;) {
826 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
761 827
762 add_wait_queue(&vcpu->wq, &wait); 828 if (kvm_cpu_has_interrupt(vcpu))
829 break;
830 if (kvm_cpu_has_pending_timer(vcpu))
831 break;
832 if (kvm_arch_vcpu_runnable(vcpu))
833 break;
834 if (signal_pending(current))
835 break;
763 836
764 /*
765 * We will block until either an interrupt or a signal wakes us up
766 */
767 while (!kvm_cpu_has_interrupt(vcpu)
768 && !kvm_cpu_has_pending_timer(vcpu)
769 && !signal_pending(current)
770 && !kvm_arch_vcpu_runnable(vcpu)) {
771 set_current_state(TASK_INTERRUPTIBLE);
772 vcpu_put(vcpu); 837 vcpu_put(vcpu);
773 schedule(); 838 schedule();
774 vcpu_load(vcpu); 839 vcpu_load(vcpu);
775 } 840 }
776 841
777 __set_current_state(TASK_RUNNING); 842 finish_wait(&vcpu->wq, &wait);
778 remove_wait_queue(&vcpu->wq, &wait);
779} 843}
780 844
781void kvm_resched(struct kvm_vcpu *vcpu) 845void kvm_resched(struct kvm_vcpu *vcpu)
@@ -797,6 +861,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
797 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 861 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
798 page = virt_to_page(vcpu->arch.pio_data); 862 page = virt_to_page(vcpu->arch.pio_data);
799#endif 863#endif
864#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
865 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
866 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
867#endif
800 else 868 else
801 return VM_FAULT_SIGBUS; 869 return VM_FAULT_SIGBUS;
802 get_page(page); 870 get_page(page);
@@ -834,7 +902,7 @@ static const struct file_operations kvm_vcpu_fops = {
834 */ 902 */
835static int create_vcpu_fd(struct kvm_vcpu *vcpu) 903static int create_vcpu_fd(struct kvm_vcpu *vcpu)
836{ 904{
837 int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu); 905 int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
838 if (fd < 0) 906 if (fd < 0)
839 kvm_put_kvm(vcpu->kvm); 907 kvm_put_kvm(vcpu->kvm);
840 return fd; 908 return fd;
@@ -1119,6 +1187,32 @@ static long kvm_vm_ioctl(struct file *filp,
1119 goto out; 1187 goto out;
1120 break; 1188 break;
1121 } 1189 }
1190#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1191 case KVM_REGISTER_COALESCED_MMIO: {
1192 struct kvm_coalesced_mmio_zone zone;
1193 r = -EFAULT;
1194 if (copy_from_user(&zone, argp, sizeof zone))
1195 goto out;
1196 r = -ENXIO;
1197 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
1198 if (r)
1199 goto out;
1200 r = 0;
1201 break;
1202 }
1203 case KVM_UNREGISTER_COALESCED_MMIO: {
1204 struct kvm_coalesced_mmio_zone zone;
1205 r = -EFAULT;
1206 if (copy_from_user(&zone, argp, sizeof zone))
1207 goto out;
1208 r = -ENXIO;
1209 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
1210 if (r)
1211 goto out;
1212 r = 0;
1213 break;
1214 }
1215#endif
1122 default: 1216 default:
1123 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1217 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1124 } 1218 }
@@ -1167,7 +1261,7 @@ static int kvm_dev_ioctl_create_vm(void)
1167 kvm = kvm_create_vm(); 1261 kvm = kvm_create_vm();
1168 if (IS_ERR(kvm)) 1262 if (IS_ERR(kvm))
1169 return PTR_ERR(kvm); 1263 return PTR_ERR(kvm);
1170 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm); 1264 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0);
1171 if (fd < 0) 1265 if (fd < 0)
1172 kvm_put_kvm(kvm); 1266 kvm_put_kvm(kvm);
1173 1267
@@ -1177,7 +1271,6 @@ static int kvm_dev_ioctl_create_vm(void)
1177static long kvm_dev_ioctl(struct file *filp, 1271static long kvm_dev_ioctl(struct file *filp,
1178 unsigned int ioctl, unsigned long arg) 1272 unsigned int ioctl, unsigned long arg)
1179{ 1273{
1180 void __user *argp = (void __user *)arg;
1181 long r = -EINVAL; 1274 long r = -EINVAL;
1182 1275
1183 switch (ioctl) { 1276 switch (ioctl) {
@@ -1194,7 +1287,7 @@ static long kvm_dev_ioctl(struct file *filp,
1194 r = kvm_dev_ioctl_create_vm(); 1287 r = kvm_dev_ioctl_create_vm();
1195 break; 1288 break;
1196 case KVM_CHECK_EXTENSION: 1289 case KVM_CHECK_EXTENSION:
1197 r = kvm_dev_ioctl_check_extension((long)argp); 1290 r = kvm_dev_ioctl_check_extension(arg);
1198 break; 1291 break;
1199 case KVM_GET_VCPU_MMAP_SIZE: 1292 case KVM_GET_VCPU_MMAP_SIZE:
1200 r = -EINVAL; 1293 r = -EINVAL;
@@ -1204,6 +1297,9 @@ static long kvm_dev_ioctl(struct file *filp,
1204#ifdef CONFIG_X86 1297#ifdef CONFIG_X86
1205 r += PAGE_SIZE; /* pio data page */ 1298 r += PAGE_SIZE; /* pio data page */
1206#endif 1299#endif
1300#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1301 r += PAGE_SIZE; /* coalesced mmio ring page */
1302#endif
1207 break; 1303 break;
1208 case KVM_TRACE_ENABLE: 1304 case KVM_TRACE_ENABLE:
1209 case KVM_TRACE_PAUSE: 1305 case KVM_TRACE_PAUSE:
@@ -1245,7 +1341,6 @@ static void hardware_disable(void *junk)
1245 if (!cpu_isset(cpu, cpus_hardware_enabled)) 1341 if (!cpu_isset(cpu, cpus_hardware_enabled))
1246 return; 1342 return;
1247 cpu_clear(cpu, cpus_hardware_enabled); 1343 cpu_clear(cpu, cpus_hardware_enabled);
1248 decache_vcpus_on_cpu(cpu);
1249 kvm_arch_hardware_disable(NULL); 1344 kvm_arch_hardware_disable(NULL);
1250} 1345}
1251 1346
@@ -1264,17 +1359,29 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1264 case CPU_UP_CANCELED: 1359 case CPU_UP_CANCELED:
1265 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1360 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1266 cpu); 1361 cpu);
1267 smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); 1362 smp_call_function_single(cpu, hardware_disable, NULL, 1);
1268 break; 1363 break;
1269 case CPU_ONLINE: 1364 case CPU_ONLINE:
1270 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1365 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1271 cpu); 1366 cpu);
1272 smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); 1367 smp_call_function_single(cpu, hardware_enable, NULL, 1);
1273 break; 1368 break;
1274 } 1369 }
1275 return NOTIFY_OK; 1370 return NOTIFY_OK;
1276} 1371}
1277 1372
1373
1374asmlinkage void kvm_handle_fault_on_reboot(void)
1375{
1376 if (kvm_rebooting)
1377 /* spin while reset goes on */
1378 while (true)
1379 ;
1380 /* Fault while not rebooting. We want the trace. */
1381 BUG();
1382}
1383EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
1384
1278static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1385static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1279 void *v) 1386 void *v)
1280{ 1387{
@@ -1284,7 +1391,8 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1284 * in vmx root mode. 1391 * in vmx root mode.
1285 */ 1392 */
1286 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1393 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1287 on_each_cpu(hardware_disable, NULL, 0, 1); 1394 kvm_rebooting = true;
1395 on_each_cpu(hardware_disable, NULL, 1);
1288 } 1396 }
1289 return NOTIFY_OK; 1397 return NOTIFY_OK;
1290} 1398}
@@ -1310,14 +1418,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1310 } 1418 }
1311} 1419}
1312 1420
1313struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) 1421struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
1422 gpa_t addr, int len, int is_write)
1314{ 1423{
1315 int i; 1424 int i;
1316 1425
1317 for (i = 0; i < bus->dev_count; i++) { 1426 for (i = 0; i < bus->dev_count; i++) {
1318 struct kvm_io_device *pos = bus->devs[i]; 1427 struct kvm_io_device *pos = bus->devs[i];
1319 1428
1320 if (pos->in_range(pos, addr)) 1429 if (pos->in_range(pos, addr, len, is_write))
1321 return pos; 1430 return pos;
1322 } 1431 }
1323 1432
@@ -1472,12 +1581,12 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
1472 for_each_online_cpu(cpu) { 1581 for_each_online_cpu(cpu) {
1473 smp_call_function_single(cpu, 1582 smp_call_function_single(cpu,
1474 kvm_arch_check_processor_compat, 1583 kvm_arch_check_processor_compat,
1475 &r, 0, 1); 1584 &r, 1);
1476 if (r < 0) 1585 if (r < 0)
1477 goto out_free_1; 1586 goto out_free_1;
1478 } 1587 }
1479 1588
1480 on_each_cpu(hardware_enable, NULL, 0, 1); 1589 on_each_cpu(hardware_enable, NULL, 1);
1481 r = register_cpu_notifier(&kvm_cpu_notifier); 1590 r = register_cpu_notifier(&kvm_cpu_notifier);
1482 if (r) 1591 if (r)
1483 goto out_free_2; 1592 goto out_free_2;
@@ -1523,7 +1632,7 @@ out_free_3:
1523 unregister_reboot_notifier(&kvm_reboot_notifier); 1632 unregister_reboot_notifier(&kvm_reboot_notifier);
1524 unregister_cpu_notifier(&kvm_cpu_notifier); 1633 unregister_cpu_notifier(&kvm_cpu_notifier);
1525out_free_2: 1634out_free_2:
1526 on_each_cpu(hardware_disable, NULL, 0, 1); 1635 on_each_cpu(hardware_disable, NULL, 1);
1527out_free_1: 1636out_free_1:
1528 kvm_arch_hardware_unsetup(); 1637 kvm_arch_hardware_unsetup();
1529out_free_0: 1638out_free_0:
@@ -1545,7 +1654,7 @@ void kvm_exit(void)
1545 sysdev_class_unregister(&kvm_sysdev_class); 1654 sysdev_class_unregister(&kvm_sysdev_class);
1546 unregister_reboot_notifier(&kvm_reboot_notifier); 1655 unregister_reboot_notifier(&kvm_reboot_notifier);
1547 unregister_cpu_notifier(&kvm_cpu_notifier); 1656 unregister_cpu_notifier(&kvm_cpu_notifier);
1548 on_each_cpu(hardware_disable, NULL, 0, 1); 1657 on_each_cpu(hardware_disable, NULL, 1);
1549 kvm_arch_hardware_unsetup(); 1658 kvm_arch_hardware_unsetup();
1550 kvm_arch_exit(); 1659 kvm_arch_exit();
1551 kvm_exit_debug(); 1660 kvm_exit_debug();
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 0e495470788d..58141f31ea8f 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -72,11 +72,7 @@ static void kvm_add_trace(void *probe_private, void *call_data,
72 rec.cycle_in = p->cycle_in; 72 rec.cycle_in = p->cycle_in;
73 73
74 if (rec.cycle_in) { 74 if (rec.cycle_in) {
75 u64 cycle = 0; 75 rec.u.cycle.cycle_u64 = get_cycles();
76
77 cycle = get_cycles();
78 rec.u.cycle.cycle_lo = (u32)cycle;
79 rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
80 76
81 for (i = 0; i < rec.extra_u32; i++) 77 for (i = 0; i < rec.extra_u32; i++)
82 rec.u.cycle.extra_u32[i] = va_arg(*args, u32); 78 rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
@@ -114,8 +110,18 @@ static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
114{ 110{
115 struct kvm_trace *kt; 111 struct kvm_trace *kt;
116 112
117 if (!relay_buf_full(buf)) 113 if (!relay_buf_full(buf)) {
114 if (!prev_subbuf) {
115 /*
116 * executed only once when the channel is opened
117 * save metadata as first record
118 */
119 subbuf_start_reserve(buf, sizeof(u32));
120 *(u32 *)subbuf = 0x12345678;
121 }
122
118 return 1; 123 return 1;
124 }
119 125
120 kt = buf->chan->private_data; 126 kt = buf->chan->private_data;
121 atomic_inc(&kt->lost_records); 127 atomic_inc(&kt->lost_records);