diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 14 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 74 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 1 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 578 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 78 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 55 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 51 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 298 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 285 |
9 files changed, 963 insertions, 471 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig new file mode 100644 index 000000000000..daece36c0a57 --- /dev/null +++ b/virt/kvm/Kconfig | |||
@@ -0,0 +1,14 @@ | |||
1 | # KVM common configuration items and defaults | ||
2 | |||
3 | config HAVE_KVM | ||
4 | bool | ||
5 | |||
6 | config HAVE_KVM_IRQCHIP | ||
7 | bool | ||
8 | |||
9 | config HAVE_KVM_EVENTFD | ||
10 | bool | ||
11 | select EVENTFD | ||
12 | |||
13 | config KVM_APIC_ARCHITECTURE | ||
14 | bool | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 5ae620d32fac..04d69cd7049b 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -14,32 +14,28 @@ | |||
14 | 14 | ||
15 | #include "coalesced_mmio.h" | 15 | #include "coalesced_mmio.h" |
16 | 16 | ||
17 | static int coalesced_mmio_in_range(struct kvm_io_device *this, | 17 | static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) |
18 | gpa_t addr, int len, int is_write) | 18 | { |
19 | return container_of(dev, struct kvm_coalesced_mmio_dev, dev); | ||
20 | } | ||
21 | |||
22 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | ||
23 | gpa_t addr, int len) | ||
19 | { | 24 | { |
20 | struct kvm_coalesced_mmio_dev *dev = | ||
21 | (struct kvm_coalesced_mmio_dev*)this->private; | ||
22 | struct kvm_coalesced_mmio_zone *zone; | 25 | struct kvm_coalesced_mmio_zone *zone; |
23 | int next; | 26 | struct kvm_coalesced_mmio_ring *ring; |
27 | unsigned avail; | ||
24 | int i; | 28 | int i; |
25 | 29 | ||
26 | if (!is_write) | ||
27 | return 0; | ||
28 | |||
29 | /* kvm->lock is taken by the caller and must be not released before | ||
30 | * dev.read/write | ||
31 | */ | ||
32 | |||
33 | /* Are we able to batch it ? */ | 30 | /* Are we able to batch it ? */ |
34 | 31 | ||
35 | /* last is the first free entry | 32 | /* last is the first free entry |
36 | * check if we don't meet the first used entry | 33 | * check if we don't meet the first used entry |
37 | * there is always one unused entry in the buffer | 34 | * there is always one unused entry in the buffer |
38 | */ | 35 | */ |
39 | 36 | ring = dev->kvm->coalesced_mmio_ring; | |
40 | next = (dev->kvm->coalesced_mmio_ring->last + 1) % | 37 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; |
41 | KVM_COALESCED_MMIO_MAX; | 38 | if (avail < KVM_MAX_VCPUS) { |
42 | if (next == dev->kvm->coalesced_mmio_ring->first) { | ||
43 | /* full */ | 39 | /* full */ |
44 | return 0; | 40 | return 0; |
45 | } | 41 | } |
@@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this, | |||
60 | return 0; | 56 | return 0; |
61 | } | 57 | } |
62 | 58 | ||
63 | static void coalesced_mmio_write(struct kvm_io_device *this, | 59 | static int coalesced_mmio_write(struct kvm_io_device *this, |
64 | gpa_t addr, int len, const void *val) | 60 | gpa_t addr, int len, const void *val) |
65 | { | 61 | { |
66 | struct kvm_coalesced_mmio_dev *dev = | 62 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
67 | (struct kvm_coalesced_mmio_dev*)this->private; | ||
68 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; | 63 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; |
64 | if (!coalesced_mmio_in_range(dev, addr, len)) | ||
65 | return -EOPNOTSUPP; | ||
69 | 66 | ||
70 | /* kvm->lock must be taken by caller before call to in_range()*/ | 67 | spin_lock(&dev->lock); |
71 | 68 | ||
72 | /* copy data in first free entry of the ring */ | 69 | /* copy data in first free entry of the ring */ |
73 | 70 | ||
@@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this, | |||
76 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); | 73 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); |
77 | smp_wmb(); | 74 | smp_wmb(); |
78 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; | 75 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; |
76 | spin_unlock(&dev->lock); | ||
77 | return 0; | ||
79 | } | 78 | } |
80 | 79 | ||
81 | static void coalesced_mmio_destructor(struct kvm_io_device *this) | 80 | static void coalesced_mmio_destructor(struct kvm_io_device *this) |
82 | { | 81 | { |
83 | kfree(this); | 82 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
83 | |||
84 | kfree(dev); | ||
84 | } | 85 | } |
85 | 86 | ||
87 | static const struct kvm_io_device_ops coalesced_mmio_ops = { | ||
88 | .write = coalesced_mmio_write, | ||
89 | .destructor = coalesced_mmio_destructor, | ||
90 | }; | ||
91 | |||
86 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 92 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
87 | { | 93 | { |
88 | struct kvm_coalesced_mmio_dev *dev; | 94 | struct kvm_coalesced_mmio_dev *dev; |
95 | int ret; | ||
89 | 96 | ||
90 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 97 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
91 | if (!dev) | 98 | if (!dev) |
92 | return -ENOMEM; | 99 | return -ENOMEM; |
93 | dev->dev.write = coalesced_mmio_write; | 100 | spin_lock_init(&dev->lock); |
94 | dev->dev.in_range = coalesced_mmio_in_range; | 101 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); |
95 | dev->dev.destructor = coalesced_mmio_destructor; | ||
96 | dev->dev.private = dev; | ||
97 | dev->kvm = kvm; | 102 | dev->kvm = kvm; |
98 | kvm->coalesced_mmio_dev = dev; | 103 | kvm->coalesced_mmio_dev = dev; |
99 | kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); | ||
100 | 104 | ||
101 | return 0; | 105 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); |
106 | if (ret < 0) | ||
107 | kfree(dev); | ||
108 | |||
109 | return ret; | ||
102 | } | 110 | } |
103 | 111 | ||
104 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 112 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
@@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
109 | if (dev == NULL) | 117 | if (dev == NULL) |
110 | return -EINVAL; | 118 | return -EINVAL; |
111 | 119 | ||
112 | mutex_lock(&kvm->lock); | 120 | down_write(&kvm->slots_lock); |
113 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 121 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
114 | mutex_unlock(&kvm->lock); | 122 | up_write(&kvm->slots_lock); |
115 | return -ENOBUFS; | 123 | return -ENOBUFS; |
116 | } | 124 | } |
117 | 125 | ||
118 | dev->zone[dev->nb_zones] = *zone; | 126 | dev->zone[dev->nb_zones] = *zone; |
119 | dev->nb_zones++; | 127 | dev->nb_zones++; |
120 | 128 | ||
121 | mutex_unlock(&kvm->lock); | 129 | up_write(&kvm->slots_lock); |
122 | return 0; | 130 | return 0; |
123 | } | 131 | } |
124 | 132 | ||
@@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
132 | if (dev == NULL) | 140 | if (dev == NULL) |
133 | return -EINVAL; | 141 | return -EINVAL; |
134 | 142 | ||
135 | mutex_lock(&kvm->lock); | 143 | down_write(&kvm->slots_lock); |
136 | 144 | ||
137 | i = dev->nb_zones; | 145 | i = dev->nb_zones; |
138 | while(i) { | 146 | while(i) { |
@@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
150 | i--; | 158 | i--; |
151 | } | 159 | } |
152 | 160 | ||
153 | mutex_unlock(&kvm->lock); | 161 | up_write(&kvm->slots_lock); |
154 | 162 | ||
155 | return 0; | 163 | return 0; |
156 | } | 164 | } |
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5ac0ec628461..4b49f27fa31e 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -12,6 +12,7 @@ | |||
12 | struct kvm_coalesced_mmio_dev { | 12 | struct kvm_coalesced_mmio_dev { |
13 | struct kvm_io_device dev; | 13 | struct kvm_io_device dev; |
14 | struct kvm *kvm; | 14 | struct kvm *kvm; |
15 | spinlock_t lock; | ||
15 | int nb_zones; | 16 | int nb_zones; |
16 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; | 17 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; |
17 | }; | 18 | }; |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c new file mode 100644 index 000000000000..bb4ebd89b9ff --- /dev/null +++ b/virt/kvm/eventfd.c | |||
@@ -0,0 +1,578 @@ | |||
1 | /* | ||
2 | * kvm eventfd support - use eventfd objects to signal various KVM events | ||
3 | * | ||
4 | * Copyright 2009 Novell. All Rights Reserved. | ||
5 | * | ||
6 | * Author: | ||
7 | * Gregory Haskins <ghaskins@novell.com> | ||
8 | * | ||
9 | * This file is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of version 2 of the GNU General Public License | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software Foundation, | ||
20 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kvm_host.h> | ||
24 | #include <linux/kvm.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/syscalls.h> | ||
27 | #include <linux/wait.h> | ||
28 | #include <linux/poll.h> | ||
29 | #include <linux/file.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/eventfd.h> | ||
32 | #include <linux/kernel.h> | ||
33 | |||
34 | #include "iodev.h" | ||
35 | |||
36 | /* | ||
37 | * -------------------------------------------------------------------- | ||
38 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | ||
39 | * | ||
40 | * Credit goes to Avi Kivity for the original idea. | ||
41 | * -------------------------------------------------------------------- | ||
42 | */ | ||
43 | |||
44 | struct _irqfd { | ||
45 | struct kvm *kvm; | ||
46 | struct eventfd_ctx *eventfd; | ||
47 | int gsi; | ||
48 | struct list_head list; | ||
49 | poll_table pt; | ||
50 | wait_queue_head_t *wqh; | ||
51 | wait_queue_t wait; | ||
52 | struct work_struct inject; | ||
53 | struct work_struct shutdown; | ||
54 | }; | ||
55 | |||
56 | static struct workqueue_struct *irqfd_cleanup_wq; | ||
57 | |||
58 | static void | ||
59 | irqfd_inject(struct work_struct *work) | ||
60 | { | ||
61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | ||
62 | struct kvm *kvm = irqfd->kvm; | ||
63 | |||
64 | mutex_lock(&kvm->irq_lock); | ||
65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | ||
66 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | ||
67 | mutex_unlock(&kvm->irq_lock); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Race-free decouple logic (ordering is critical) | ||
72 | */ | ||
73 | static void | ||
74 | irqfd_shutdown(struct work_struct *work) | ||
75 | { | ||
76 | struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); | ||
77 | |||
78 | /* | ||
79 | * Synchronize with the wait-queue and unhook ourselves to prevent | ||
80 | * further events. | ||
81 | */ | ||
82 | remove_wait_queue(irqfd->wqh, &irqfd->wait); | ||
83 | |||
84 | /* | ||
85 | * We know no new events will be scheduled at this point, so block | ||
86 | * until all previously outstanding events have completed | ||
87 | */ | ||
88 | flush_work(&irqfd->inject); | ||
89 | |||
90 | /* | ||
91 | * It is now safe to release the object's resources | ||
92 | */ | ||
93 | eventfd_ctx_put(irqfd->eventfd); | ||
94 | kfree(irqfd); | ||
95 | } | ||
96 | |||
97 | |||
98 | /* assumes kvm->irqfds.lock is held */ | ||
99 | static bool | ||
100 | irqfd_is_active(struct _irqfd *irqfd) | ||
101 | { | ||
102 | return list_empty(&irqfd->list) ? false : true; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Mark the irqfd as inactive and schedule it for removal | ||
107 | * | ||
108 | * assumes kvm->irqfds.lock is held | ||
109 | */ | ||
110 | static void | ||
111 | irqfd_deactivate(struct _irqfd *irqfd) | ||
112 | { | ||
113 | BUG_ON(!irqfd_is_active(irqfd)); | ||
114 | |||
115 | list_del_init(&irqfd->list); | ||
116 | |||
117 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Called with wqh->lock held and interrupts disabled | ||
122 | */ | ||
123 | static int | ||
124 | irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | ||
125 | { | ||
126 | struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); | ||
127 | unsigned long flags = (unsigned long)key; | ||
128 | |||
129 | if (flags & POLLIN) | ||
130 | /* An event has been signaled, inject an interrupt */ | ||
131 | schedule_work(&irqfd->inject); | ||
132 | |||
133 | if (flags & POLLHUP) { | ||
134 | /* The eventfd is closing, detach from KVM */ | ||
135 | struct kvm *kvm = irqfd->kvm; | ||
136 | unsigned long flags; | ||
137 | |||
138 | spin_lock_irqsave(&kvm->irqfds.lock, flags); | ||
139 | |||
140 | /* | ||
141 | * We must check if someone deactivated the irqfd before | ||
142 | * we could acquire the irqfds.lock since the item is | ||
143 | * deactivated from the KVM side before it is unhooked from | ||
144 | * the wait-queue. If it is already deactivated, we can | ||
145 | * simply return knowing the other side will cleanup for us. | ||
146 | * We cannot race against the irqfd going away since the | ||
147 | * other side is required to acquire wqh->lock, which we hold | ||
148 | */ | ||
149 | if (irqfd_is_active(irqfd)) | ||
150 | irqfd_deactivate(irqfd); | ||
151 | |||
152 | spin_unlock_irqrestore(&kvm->irqfds.lock, flags); | ||
153 | } | ||
154 | |||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static void | ||
159 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | ||
160 | poll_table *pt) | ||
161 | { | ||
162 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | ||
163 | |||
164 | irqfd->wqh = wqh; | ||
165 | add_wait_queue(wqh, &irqfd->wait); | ||
166 | } | ||
167 | |||
168 | static int | ||
169 | kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) | ||
170 | { | ||
171 | struct _irqfd *irqfd; | ||
172 | struct file *file = NULL; | ||
173 | struct eventfd_ctx *eventfd = NULL; | ||
174 | int ret; | ||
175 | unsigned int events; | ||
176 | |||
177 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | ||
178 | if (!irqfd) | ||
179 | return -ENOMEM; | ||
180 | |||
181 | irqfd->kvm = kvm; | ||
182 | irqfd->gsi = gsi; | ||
183 | INIT_LIST_HEAD(&irqfd->list); | ||
184 | INIT_WORK(&irqfd->inject, irqfd_inject); | ||
185 | INIT_WORK(&irqfd->shutdown, irqfd_shutdown); | ||
186 | |||
187 | file = eventfd_fget(fd); | ||
188 | if (IS_ERR(file)) { | ||
189 | ret = PTR_ERR(file); | ||
190 | goto fail; | ||
191 | } | ||
192 | |||
193 | eventfd = eventfd_ctx_fileget(file); | ||
194 | if (IS_ERR(eventfd)) { | ||
195 | ret = PTR_ERR(eventfd); | ||
196 | goto fail; | ||
197 | } | ||
198 | |||
199 | irqfd->eventfd = eventfd; | ||
200 | |||
201 | /* | ||
202 | * Install our own custom wake-up handling so we are notified via | ||
203 | * a callback whenever someone signals the underlying eventfd | ||
204 | */ | ||
205 | init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); | ||
206 | init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); | ||
207 | |||
208 | events = file->f_op->poll(file, &irqfd->pt); | ||
209 | |||
210 | spin_lock_irq(&kvm->irqfds.lock); | ||
211 | list_add_tail(&irqfd->list, &kvm->irqfds.items); | ||
212 | spin_unlock_irq(&kvm->irqfds.lock); | ||
213 | |||
214 | /* | ||
215 | * Check if there was an event already pending on the eventfd | ||
216 | * before we registered, and trigger it as if we didn't miss it. | ||
217 | */ | ||
218 | if (events & POLLIN) | ||
219 | schedule_work(&irqfd->inject); | ||
220 | |||
221 | /* | ||
222 | * do not drop the file until the irqfd is fully initialized, otherwise | ||
223 | * we might race against the POLLHUP | ||
224 | */ | ||
225 | fput(file); | ||
226 | |||
227 | return 0; | ||
228 | |||
229 | fail: | ||
230 | if (eventfd && !IS_ERR(eventfd)) | ||
231 | eventfd_ctx_put(eventfd); | ||
232 | |||
233 | if (!IS_ERR(file)) | ||
234 | fput(file); | ||
235 | |||
236 | kfree(irqfd); | ||
237 | return ret; | ||
238 | } | ||
239 | |||
240 | void | ||
241 | kvm_eventfd_init(struct kvm *kvm) | ||
242 | { | ||
243 | spin_lock_init(&kvm->irqfds.lock); | ||
244 | INIT_LIST_HEAD(&kvm->irqfds.items); | ||
245 | INIT_LIST_HEAD(&kvm->ioeventfds); | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * shutdown any irqfd's that match fd+gsi | ||
250 | */ | ||
251 | static int | ||
252 | kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) | ||
253 | { | ||
254 | struct _irqfd *irqfd, *tmp; | ||
255 | struct eventfd_ctx *eventfd; | ||
256 | |||
257 | eventfd = eventfd_ctx_fdget(fd); | ||
258 | if (IS_ERR(eventfd)) | ||
259 | return PTR_ERR(eventfd); | ||
260 | |||
261 | spin_lock_irq(&kvm->irqfds.lock); | ||
262 | |||
263 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | ||
264 | if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) | ||
265 | irqfd_deactivate(irqfd); | ||
266 | } | ||
267 | |||
268 | spin_unlock_irq(&kvm->irqfds.lock); | ||
269 | eventfd_ctx_put(eventfd); | ||
270 | |||
271 | /* | ||
272 | * Block until we know all outstanding shutdown jobs have completed | ||
273 | * so that we guarantee there will not be any more interrupts on this | ||
274 | * gsi once this deassign function returns. | ||
275 | */ | ||
276 | flush_workqueue(irqfd_cleanup_wq); | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | int | ||
282 | kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) | ||
283 | { | ||
284 | if (flags & KVM_IRQFD_FLAG_DEASSIGN) | ||
285 | return kvm_irqfd_deassign(kvm, fd, gsi); | ||
286 | |||
287 | return kvm_irqfd_assign(kvm, fd, gsi); | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * This function is called as the kvm VM fd is being released. Shutdown all | ||
292 | * irqfds that still remain open | ||
293 | */ | ||
294 | void | ||
295 | kvm_irqfd_release(struct kvm *kvm) | ||
296 | { | ||
297 | struct _irqfd *irqfd, *tmp; | ||
298 | |||
299 | spin_lock_irq(&kvm->irqfds.lock); | ||
300 | |||
301 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) | ||
302 | irqfd_deactivate(irqfd); | ||
303 | |||
304 | spin_unlock_irq(&kvm->irqfds.lock); | ||
305 | |||
306 | /* | ||
307 | * Block until we know all outstanding shutdown jobs have completed | ||
308 | * since we do not take a kvm* reference. | ||
309 | */ | ||
310 | flush_workqueue(irqfd_cleanup_wq); | ||
311 | |||
312 | } | ||
313 | |||
314 | /* | ||
315 | * create a host-wide workqueue for issuing deferred shutdown requests | ||
316 | * aggregated from all vm* instances. We need our own isolated single-thread | ||
317 | * queue to prevent deadlock against flushing the normal work-queue. | ||
318 | */ | ||
319 | static int __init irqfd_module_init(void) | ||
320 | { | ||
321 | irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); | ||
322 | if (!irqfd_cleanup_wq) | ||
323 | return -ENOMEM; | ||
324 | |||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static void __exit irqfd_module_exit(void) | ||
329 | { | ||
330 | destroy_workqueue(irqfd_cleanup_wq); | ||
331 | } | ||
332 | |||
333 | module_init(irqfd_module_init); | ||
334 | module_exit(irqfd_module_exit); | ||
335 | |||
336 | /* | ||
337 | * -------------------------------------------------------------------- | ||
338 | * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. | ||
339 | * | ||
340 | * userspace can register a PIO/MMIO address with an eventfd for receiving | ||
341 | * notification when the memory has been touched. | ||
342 | * -------------------------------------------------------------------- | ||
343 | */ | ||
344 | |||
345 | struct _ioeventfd { | ||
346 | struct list_head list; | ||
347 | u64 addr; | ||
348 | int length; | ||
349 | struct eventfd_ctx *eventfd; | ||
350 | u64 datamatch; | ||
351 | struct kvm_io_device dev; | ||
352 | bool wildcard; | ||
353 | }; | ||
354 | |||
355 | static inline struct _ioeventfd * | ||
356 | to_ioeventfd(struct kvm_io_device *dev) | ||
357 | { | ||
358 | return container_of(dev, struct _ioeventfd, dev); | ||
359 | } | ||
360 | |||
361 | static void | ||
362 | ioeventfd_release(struct _ioeventfd *p) | ||
363 | { | ||
364 | eventfd_ctx_put(p->eventfd); | ||
365 | list_del(&p->list); | ||
366 | kfree(p); | ||
367 | } | ||
368 | |||
369 | static bool | ||
370 | ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) | ||
371 | { | ||
372 | u64 _val; | ||
373 | |||
374 | if (!(addr == p->addr && len == p->length)) | ||
375 | /* address-range must be precise for a hit */ | ||
376 | return false; | ||
377 | |||
378 | if (p->wildcard) | ||
379 | /* all else equal, wildcard is always a hit */ | ||
380 | return true; | ||
381 | |||
382 | /* otherwise, we have to actually compare the data */ | ||
383 | |||
384 | BUG_ON(!IS_ALIGNED((unsigned long)val, len)); | ||
385 | |||
386 | switch (len) { | ||
387 | case 1: | ||
388 | _val = *(u8 *)val; | ||
389 | break; | ||
390 | case 2: | ||
391 | _val = *(u16 *)val; | ||
392 | break; | ||
393 | case 4: | ||
394 | _val = *(u32 *)val; | ||
395 | break; | ||
396 | case 8: | ||
397 | _val = *(u64 *)val; | ||
398 | break; | ||
399 | default: | ||
400 | return false; | ||
401 | } | ||
402 | |||
403 | return _val == p->datamatch ? true : false; | ||
404 | } | ||
405 | |||
406 | /* MMIO/PIO writes trigger an event if the addr/val match */ | ||
407 | static int | ||
408 | ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, | ||
409 | const void *val) | ||
410 | { | ||
411 | struct _ioeventfd *p = to_ioeventfd(this); | ||
412 | |||
413 | if (!ioeventfd_in_range(p, addr, len, val)) | ||
414 | return -EOPNOTSUPP; | ||
415 | |||
416 | eventfd_signal(p->eventfd, 1); | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * This function is called as KVM is completely shutting down. We do not | ||
422 | * need to worry about locking just nuke anything we have as quickly as possible | ||
423 | */ | ||
424 | static void | ||
425 | ioeventfd_destructor(struct kvm_io_device *this) | ||
426 | { | ||
427 | struct _ioeventfd *p = to_ioeventfd(this); | ||
428 | |||
429 | ioeventfd_release(p); | ||
430 | } | ||
431 | |||
432 | static const struct kvm_io_device_ops ioeventfd_ops = { | ||
433 | .write = ioeventfd_write, | ||
434 | .destructor = ioeventfd_destructor, | ||
435 | }; | ||
436 | |||
437 | /* assumes kvm->slots_lock held */ | ||
438 | static bool | ||
439 | ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | ||
440 | { | ||
441 | struct _ioeventfd *_p; | ||
442 | |||
443 | list_for_each_entry(_p, &kvm->ioeventfds, list) | ||
444 | if (_p->addr == p->addr && _p->length == p->length && | ||
445 | (_p->wildcard || p->wildcard || | ||
446 | _p->datamatch == p->datamatch)) | ||
447 | return true; | ||
448 | |||
449 | return false; | ||
450 | } | ||
451 | |||
452 | static int | ||
453 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
454 | { | ||
455 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | ||
456 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | ||
457 | struct _ioeventfd *p; | ||
458 | struct eventfd_ctx *eventfd; | ||
459 | int ret; | ||
460 | |||
461 | /* must be natural-word sized */ | ||
462 | switch (args->len) { | ||
463 | case 1: | ||
464 | case 2: | ||
465 | case 4: | ||
466 | case 8: | ||
467 | break; | ||
468 | default: | ||
469 | return -EINVAL; | ||
470 | } | ||
471 | |||
472 | /* check for range overflow */ | ||
473 | if (args->addr + args->len < args->addr) | ||
474 | return -EINVAL; | ||
475 | |||
476 | /* check for extra flags that we don't understand */ | ||
477 | if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) | ||
478 | return -EINVAL; | ||
479 | |||
480 | eventfd = eventfd_ctx_fdget(args->fd); | ||
481 | if (IS_ERR(eventfd)) | ||
482 | return PTR_ERR(eventfd); | ||
483 | |||
484 | p = kzalloc(sizeof(*p), GFP_KERNEL); | ||
485 | if (!p) { | ||
486 | ret = -ENOMEM; | ||
487 | goto fail; | ||
488 | } | ||
489 | |||
490 | INIT_LIST_HEAD(&p->list); | ||
491 | p->addr = args->addr; | ||
492 | p->length = args->len; | ||
493 | p->eventfd = eventfd; | ||
494 | |||
495 | /* The datamatch feature is optional, otherwise this is a wildcard */ | ||
496 | if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) | ||
497 | p->datamatch = args->datamatch; | ||
498 | else | ||
499 | p->wildcard = true; | ||
500 | |||
501 | down_write(&kvm->slots_lock); | ||
502 | |||
503 | /* Verify that there isnt a match already */ | ||
504 | if (ioeventfd_check_collision(kvm, p)) { | ||
505 | ret = -EEXIST; | ||
506 | goto unlock_fail; | ||
507 | } | ||
508 | |||
509 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | ||
510 | |||
511 | ret = __kvm_io_bus_register_dev(bus, &p->dev); | ||
512 | if (ret < 0) | ||
513 | goto unlock_fail; | ||
514 | |||
515 | list_add_tail(&p->list, &kvm->ioeventfds); | ||
516 | |||
517 | up_write(&kvm->slots_lock); | ||
518 | |||
519 | return 0; | ||
520 | |||
521 | unlock_fail: | ||
522 | up_write(&kvm->slots_lock); | ||
523 | |||
524 | fail: | ||
525 | kfree(p); | ||
526 | eventfd_ctx_put(eventfd); | ||
527 | |||
528 | return ret; | ||
529 | } | ||
530 | |||
531 | static int | ||
532 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
533 | { | ||
534 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | ||
535 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | ||
536 | struct _ioeventfd *p, *tmp; | ||
537 | struct eventfd_ctx *eventfd; | ||
538 | int ret = -ENOENT; | ||
539 | |||
540 | eventfd = eventfd_ctx_fdget(args->fd); | ||
541 | if (IS_ERR(eventfd)) | ||
542 | return PTR_ERR(eventfd); | ||
543 | |||
544 | down_write(&kvm->slots_lock); | ||
545 | |||
546 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | ||
547 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | ||
548 | |||
549 | if (p->eventfd != eventfd || | ||
550 | p->addr != args->addr || | ||
551 | p->length != args->len || | ||
552 | p->wildcard != wildcard) | ||
553 | continue; | ||
554 | |||
555 | if (!p->wildcard && p->datamatch != args->datamatch) | ||
556 | continue; | ||
557 | |||
558 | __kvm_io_bus_unregister_dev(bus, &p->dev); | ||
559 | ioeventfd_release(p); | ||
560 | ret = 0; | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | up_write(&kvm->slots_lock); | ||
565 | |||
566 | eventfd_ctx_put(eventfd); | ||
567 | |||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | int | ||
572 | kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||
573 | { | ||
574 | if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) | ||
575 | return kvm_deassign_ioeventfd(kvm, args); | ||
576 | |||
577 | return kvm_assign_ioeventfd(kvm, args); | ||
578 | } | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1150c6d5c7b8..9fe140bb38ec 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/processor.h> | 36 | #include <asm/processor.h> |
37 | #include <asm/page.h> | 37 | #include <asm/page.h> |
38 | #include <asm/current.h> | 38 | #include <asm/current.h> |
39 | #include <trace/events/kvm.h> | ||
39 | 40 | ||
40 | #include "ioapic.h" | 41 | #include "ioapic.h" |
41 | #include "lapic.h" | 42 | #include "lapic.h" |
@@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
103 | { | 104 | { |
104 | unsigned index; | 105 | unsigned index; |
105 | bool mask_before, mask_after; | 106 | bool mask_before, mask_after; |
107 | union kvm_ioapic_redirect_entry *e; | ||
106 | 108 | ||
107 | switch (ioapic->ioregsel) { | 109 | switch (ioapic->ioregsel) { |
108 | case IOAPIC_REG_VERSION: | 110 | case IOAPIC_REG_VERSION: |
@@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
122 | ioapic_debug("change redir index %x val %x\n", index, val); | 124 | ioapic_debug("change redir index %x val %x\n", index, val); |
123 | if (index >= IOAPIC_NUM_PINS) | 125 | if (index >= IOAPIC_NUM_PINS) |
124 | return; | 126 | return; |
125 | mask_before = ioapic->redirtbl[index].fields.mask; | 127 | e = &ioapic->redirtbl[index]; |
128 | mask_before = e->fields.mask; | ||
126 | if (ioapic->ioregsel & 1) { | 129 | if (ioapic->ioregsel & 1) { |
127 | ioapic->redirtbl[index].bits &= 0xffffffff; | 130 | e->bits &= 0xffffffff; |
128 | ioapic->redirtbl[index].bits |= (u64) val << 32; | 131 | e->bits |= (u64) val << 32; |
129 | } else { | 132 | } else { |
130 | ioapic->redirtbl[index].bits &= ~0xffffffffULL; | 133 | e->bits &= ~0xffffffffULL; |
131 | ioapic->redirtbl[index].bits |= (u32) val; | 134 | e->bits |= (u32) val; |
132 | ioapic->redirtbl[index].fields.remote_irr = 0; | 135 | e->fields.remote_irr = 0; |
133 | } | 136 | } |
134 | mask_after = ioapic->redirtbl[index].fields.mask; | 137 | mask_after = e->fields.mask; |
135 | if (mask_before != mask_after) | 138 | if (mask_before != mask_after) |
136 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 139 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); |
137 | if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG | 140 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
138 | && ioapic->irr & (1 << index)) | 141 | && ioapic->irr & (1 << index)) |
139 | ioapic_service(ioapic, index); | 142 | ioapic_service(ioapic, index); |
140 | break; | 143 | break; |
@@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
164 | /* Always delivery PIT interrupt to vcpu 0 */ | 167 | /* Always delivery PIT interrupt to vcpu 0 */ |
165 | if (irq == 0) { | 168 | if (irq == 0) { |
166 | irqe.dest_mode = 0; /* Physical mode. */ | 169 | irqe.dest_mode = 0; /* Physical mode. */ |
167 | irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; | 170 | /* need to read apic_id from apic regiest since |
171 | * it can be rewritten */ | ||
172 | irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; | ||
168 | } | 173 | } |
169 | #endif | 174 | #endif |
170 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | 175 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); |
@@ -188,7 +193,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
188 | if ((edge && old_irr != ioapic->irr) || | 193 | if ((edge && old_irr != ioapic->irr) || |
189 | (!edge && !entry.fields.remote_irr)) | 194 | (!edge && !entry.fields.remote_irr)) |
190 | ret = ioapic_service(ioapic, irq); | 195 | ret = ioapic_service(ioapic, irq); |
196 | else | ||
197 | ret = 0; /* report coalesced interrupt */ | ||
191 | } | 198 | } |
199 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | ||
192 | } | 200 | } |
193 | return ret; | 201 | return ret; |
194 | } | 202 | } |
@@ -220,24 +228,29 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | |||
220 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); | 228 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); |
221 | } | 229 | } |
222 | 230 | ||
223 | static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, | 231 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
224 | int len, int is_write) | ||
225 | { | 232 | { |
226 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 233 | return container_of(dev, struct kvm_ioapic, dev); |
234 | } | ||
227 | 235 | ||
236 | static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) | ||
237 | { | ||
228 | return ((addr >= ioapic->base_address && | 238 | return ((addr >= ioapic->base_address && |
229 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); | 239 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); |
230 | } | 240 | } |
231 | 241 | ||
232 | static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | 242 | static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, |
233 | void *val) | 243 | void *val) |
234 | { | 244 | { |
235 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 245 | struct kvm_ioapic *ioapic = to_ioapic(this); |
236 | u32 result; | 246 | u32 result; |
247 | if (!ioapic_in_range(ioapic, addr)) | ||
248 | return -EOPNOTSUPP; | ||
237 | 249 | ||
238 | ioapic_debug("addr %lx\n", (unsigned long)addr); | 250 | ioapic_debug("addr %lx\n", (unsigned long)addr); |
239 | ASSERT(!(addr & 0xf)); /* check alignment */ | 251 | ASSERT(!(addr & 0xf)); /* check alignment */ |
240 | 252 | ||
253 | mutex_lock(&ioapic->kvm->irq_lock); | ||
241 | addr &= 0xff; | 254 | addr &= 0xff; |
242 | switch (addr) { | 255 | switch (addr) { |
243 | case IOAPIC_REG_SELECT: | 256 | case IOAPIC_REG_SELECT: |
@@ -264,22 +277,28 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
264 | default: | 277 | default: |
265 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | 278 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); |
266 | } | 279 | } |
280 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
281 | return 0; | ||
267 | } | 282 | } |
268 | 283 | ||
269 | static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | 284 | static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, |
270 | const void *val) | 285 | const void *val) |
271 | { | 286 | { |
272 | struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; | 287 | struct kvm_ioapic *ioapic = to_ioapic(this); |
273 | u32 data; | 288 | u32 data; |
289 | if (!ioapic_in_range(ioapic, addr)) | ||
290 | return -EOPNOTSUPP; | ||
274 | 291 | ||
275 | ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", | 292 | ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", |
276 | (void*)addr, len, val); | 293 | (void*)addr, len, val); |
277 | ASSERT(!(addr & 0xf)); /* check alignment */ | 294 | ASSERT(!(addr & 0xf)); /* check alignment */ |
295 | |||
296 | mutex_lock(&ioapic->kvm->irq_lock); | ||
278 | if (len == 4 || len == 8) | 297 | if (len == 4 || len == 8) |
279 | data = *(u32 *) val; | 298 | data = *(u32 *) val; |
280 | else { | 299 | else { |
281 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 300 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
282 | return; | 301 | goto unlock; |
283 | } | 302 | } |
284 | 303 | ||
285 | addr &= 0xff; | 304 | addr &= 0xff; |
@@ -300,6 +319,9 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
300 | default: | 319 | default: |
301 | break; | 320 | break; |
302 | } | 321 | } |
322 | unlock: | ||
323 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
324 | return 0; | ||
303 | } | 325 | } |
304 | 326 | ||
305 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | 327 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic) |
@@ -314,21 +336,27 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
314 | ioapic->id = 0; | 336 | ioapic->id = 0; |
315 | } | 337 | } |
316 | 338 | ||
339 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | ||
340 | .read = ioapic_mmio_read, | ||
341 | .write = ioapic_mmio_write, | ||
342 | }; | ||
343 | |||
317 | int kvm_ioapic_init(struct kvm *kvm) | 344 | int kvm_ioapic_init(struct kvm *kvm) |
318 | { | 345 | { |
319 | struct kvm_ioapic *ioapic; | 346 | struct kvm_ioapic *ioapic; |
347 | int ret; | ||
320 | 348 | ||
321 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 349 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); |
322 | if (!ioapic) | 350 | if (!ioapic) |
323 | return -ENOMEM; | 351 | return -ENOMEM; |
324 | kvm->arch.vioapic = ioapic; | 352 | kvm->arch.vioapic = ioapic; |
325 | kvm_ioapic_reset(ioapic); | 353 | kvm_ioapic_reset(ioapic); |
326 | ioapic->dev.read = ioapic_mmio_read; | 354 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
327 | ioapic->dev.write = ioapic_mmio_write; | ||
328 | ioapic->dev.in_range = ioapic_in_range; | ||
329 | ioapic->dev.private = ioapic; | ||
330 | ioapic->kvm = kvm; | 355 | ioapic->kvm = kvm; |
331 | kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); | 356 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); |
332 | return 0; | 357 | if (ret < 0) |
358 | kfree(ioapic); | ||
359 | |||
360 | return ret; | ||
333 | } | 361 | } |
334 | 362 | ||
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index 55e8846ac3a6..12fd3caffd2b 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h | |||
@@ -17,49 +17,54 @@ | |||
17 | #define __KVM_IODEV_H__ | 17 | #define __KVM_IODEV_H__ |
18 | 18 | ||
19 | #include <linux/kvm_types.h> | 19 | #include <linux/kvm_types.h> |
20 | #include <asm/errno.h> | ||
20 | 21 | ||
21 | struct kvm_io_device { | 22 | struct kvm_io_device; |
22 | void (*read)(struct kvm_io_device *this, | 23 | |
24 | /** | ||
25 | * kvm_io_device_ops are called under kvm slots_lock. | ||
26 | * read and write handlers return 0 if the transaction has been handled, | ||
27 | * or non-zero to have it passed to the next device. | ||
28 | **/ | ||
29 | struct kvm_io_device_ops { | ||
30 | int (*read)(struct kvm_io_device *this, | ||
31 | gpa_t addr, | ||
32 | int len, | ||
33 | void *val); | ||
34 | int (*write)(struct kvm_io_device *this, | ||
23 | gpa_t addr, | 35 | gpa_t addr, |
24 | int len, | 36 | int len, |
25 | void *val); | 37 | const void *val); |
26 | void (*write)(struct kvm_io_device *this, | ||
27 | gpa_t addr, | ||
28 | int len, | ||
29 | const void *val); | ||
30 | int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, | ||
31 | int is_write); | ||
32 | void (*destructor)(struct kvm_io_device *this); | 38 | void (*destructor)(struct kvm_io_device *this); |
39 | }; | ||
33 | 40 | ||
34 | void *private; | 41 | |
42 | struct kvm_io_device { | ||
43 | const struct kvm_io_device_ops *ops; | ||
35 | }; | 44 | }; |
36 | 45 | ||
37 | static inline void kvm_iodevice_read(struct kvm_io_device *dev, | 46 | static inline void kvm_iodevice_init(struct kvm_io_device *dev, |
38 | gpa_t addr, | 47 | const struct kvm_io_device_ops *ops) |
39 | int len, | ||
40 | void *val) | ||
41 | { | 48 | { |
42 | dev->read(dev, addr, len, val); | 49 | dev->ops = ops; |
43 | } | 50 | } |
44 | 51 | ||
45 | static inline void kvm_iodevice_write(struct kvm_io_device *dev, | 52 | static inline int kvm_iodevice_read(struct kvm_io_device *dev, |
46 | gpa_t addr, | 53 | gpa_t addr, int l, void *v) |
47 | int len, | ||
48 | const void *val) | ||
49 | { | 54 | { |
50 | dev->write(dev, addr, len, val); | 55 | return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; |
51 | } | 56 | } |
52 | 57 | ||
53 | static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, | 58 | static inline int kvm_iodevice_write(struct kvm_io_device *dev, |
54 | gpa_t addr, int len, int is_write) | 59 | gpa_t addr, int l, const void *v) |
55 | { | 60 | { |
56 | return dev->in_range(dev, addr, len, is_write); | 61 | return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; |
57 | } | 62 | } |
58 | 63 | ||
59 | static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) | 64 | static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) |
60 | { | 65 | { |
61 | if (dev->destructor) | 66 | if (dev->ops->destructor) |
62 | dev->destructor(dev); | 67 | dev->ops->destructor(dev); |
63 | } | 68 | } |
64 | 69 | ||
65 | #endif /* __KVM_IODEV_H__ */ | 70 | #endif /* __KVM_IODEV_H__ */ |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ddc17f0e2f35..001663ff401a 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <trace/events/kvm.h> | ||
23 | 24 | ||
24 | #include <asm/msidef.h> | 25 | #include <asm/msidef.h> |
25 | #ifdef CONFIG_IA64 | 26 | #ifdef CONFIG_IA64 |
@@ -62,14 +63,14 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
62 | int i, r = -1; | 63 | int i, r = -1; |
63 | struct kvm_vcpu *vcpu, *lowest = NULL; | 64 | struct kvm_vcpu *vcpu, *lowest = NULL; |
64 | 65 | ||
66 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
67 | |||
65 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 68 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
66 | kvm_is_dm_lowest_prio(irq)) | 69 | kvm_is_dm_lowest_prio(irq)) |
67 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 70 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
68 | 71 | ||
69 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | 72 | kvm_for_each_vcpu(i, vcpu, kvm) { |
70 | vcpu = kvm->vcpus[i]; | 73 | if (!kvm_apic_present(vcpu)) |
71 | |||
72 | if (!vcpu || !kvm_apic_present(vcpu)) | ||
73 | continue; | 74 | continue; |
74 | 75 | ||
75 | if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, | 76 | if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, |
@@ -99,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
99 | { | 100 | { |
100 | struct kvm_lapic_irq irq; | 101 | struct kvm_lapic_irq irq; |
101 | 102 | ||
103 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | ||
104 | |||
102 | irq.dest_id = (e->msi.address_lo & | 105 | irq.dest_id = (e->msi.address_lo & |
103 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; | 106 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; |
104 | irq.vector = (e->msi.data & | 107 | irq.vector = (e->msi.data & |
@@ -113,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
113 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 116 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); |
114 | } | 117 | } |
115 | 118 | ||
116 | /* This should be called with the kvm->lock mutex held | 119 | /* This should be called with the kvm->irq_lock mutex held |
117 | * Return value: | 120 | * Return value: |
118 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 121 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
119 | * = 0 Interrupt was coalesced (previous irq is still pending) | 122 | * = 0 Interrupt was coalesced (previous irq is still pending) |
@@ -125,6 +128,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | |||
125 | unsigned long *irq_state, sig_level; | 128 | unsigned long *irq_state, sig_level; |
126 | int ret = -1; | 129 | int ret = -1; |
127 | 130 | ||
131 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
132 | |||
133 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
134 | |||
128 | if (irq < KVM_IOAPIC_NUM_PINS) { | 135 | if (irq < KVM_IOAPIC_NUM_PINS) { |
129 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; | 136 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; |
130 | 137 | ||
@@ -134,7 +141,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | |||
134 | else | 141 | else |
135 | clear_bit(irq_source_id, irq_state); | 142 | clear_bit(irq_source_id, irq_state); |
136 | sig_level = !!(*irq_state); | 143 | sig_level = !!(*irq_state); |
137 | } else /* Deal with MSI/MSI-X */ | 144 | } else if (!level) |
145 | return ret; | ||
146 | else /* Deal with MSI/MSI-X */ | ||
138 | sig_level = 1; | 147 | sig_level = 1; |
139 | 148 | ||
140 | /* Not possible to detect if the guest uses the PIC or the | 149 | /* Not possible to detect if the guest uses the PIC or the |
@@ -159,6 +168,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
159 | struct hlist_node *n; | 168 | struct hlist_node *n; |
160 | unsigned gsi = pin; | 169 | unsigned gsi = pin; |
161 | 170 | ||
171 | trace_kvm_ack_irq(irqchip, pin); | ||
172 | |||
162 | list_for_each_entry(e, &kvm->irq_routing, link) | 173 | list_for_each_entry(e, &kvm->irq_routing, link) |
163 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && | 174 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && |
164 | e->irqchip.irqchip == irqchip && | 175 | e->irqchip.irqchip == irqchip && |
@@ -175,19 +186,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |||
175 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 186 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
176 | struct kvm_irq_ack_notifier *kian) | 187 | struct kvm_irq_ack_notifier *kian) |
177 | { | 188 | { |
189 | mutex_lock(&kvm->irq_lock); | ||
178 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | 190 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); |
191 | mutex_unlock(&kvm->irq_lock); | ||
179 | } | 192 | } |
180 | 193 | ||
181 | void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) | 194 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, |
195 | struct kvm_irq_ack_notifier *kian) | ||
182 | { | 196 | { |
197 | mutex_lock(&kvm->irq_lock); | ||
183 | hlist_del_init(&kian->link); | 198 | hlist_del_init(&kian->link); |
199 | mutex_unlock(&kvm->irq_lock); | ||
184 | } | 200 | } |
185 | 201 | ||
186 | /* The caller must hold kvm->lock mutex */ | ||
187 | int kvm_request_irq_source_id(struct kvm *kvm) | 202 | int kvm_request_irq_source_id(struct kvm *kvm) |
188 | { | 203 | { |
189 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; | 204 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; |
190 | int irq_source_id = find_first_zero_bit(bitmap, | 205 | int irq_source_id; |
206 | |||
207 | mutex_lock(&kvm->irq_lock); | ||
208 | irq_source_id = find_first_zero_bit(bitmap, | ||
191 | sizeof(kvm->arch.irq_sources_bitmap)); | 209 | sizeof(kvm->arch.irq_sources_bitmap)); |
192 | 210 | ||
193 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 211 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { |
@@ -197,6 +215,7 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
197 | 215 | ||
198 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 216 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
199 | set_bit(irq_source_id, bitmap); | 217 | set_bit(irq_source_id, bitmap); |
218 | mutex_unlock(&kvm->irq_lock); | ||
200 | 219 | ||
201 | return irq_source_id; | 220 | return irq_source_id; |
202 | } | 221 | } |
@@ -207,6 +226,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
207 | 226 | ||
208 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 227 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
209 | 228 | ||
229 | mutex_lock(&kvm->irq_lock); | ||
210 | if (irq_source_id < 0 || | 230 | if (irq_source_id < 0 || |
211 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 231 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { |
212 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); | 232 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); |
@@ -215,19 +235,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
215 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | 235 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) |
216 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); | 236 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); |
217 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 237 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
238 | mutex_unlock(&kvm->irq_lock); | ||
218 | } | 239 | } |
219 | 240 | ||
220 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | 241 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, |
221 | struct kvm_irq_mask_notifier *kimn) | 242 | struct kvm_irq_mask_notifier *kimn) |
222 | { | 243 | { |
244 | mutex_lock(&kvm->irq_lock); | ||
223 | kimn->irq = irq; | 245 | kimn->irq = irq; |
224 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); | 246 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); |
247 | mutex_unlock(&kvm->irq_lock); | ||
225 | } | 248 | } |
226 | 249 | ||
227 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | 250 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, |
228 | struct kvm_irq_mask_notifier *kimn) | 251 | struct kvm_irq_mask_notifier *kimn) |
229 | { | 252 | { |
253 | mutex_lock(&kvm->irq_lock); | ||
230 | hlist_del(&kimn->link); | 254 | hlist_del(&kimn->link); |
255 | mutex_unlock(&kvm->irq_lock); | ||
231 | } | 256 | } |
232 | 257 | ||
233 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 258 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) |
@@ -235,6 +260,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | |||
235 | struct kvm_irq_mask_notifier *kimn; | 260 | struct kvm_irq_mask_notifier *kimn; |
236 | struct hlist_node *n; | 261 | struct hlist_node *n; |
237 | 262 | ||
263 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
264 | |||
238 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) | 265 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) |
239 | if (kimn->irq == irq) | 266 | if (kimn->irq == irq) |
240 | kimn->func(kimn, mask); | 267 | kimn->func(kimn, mask); |
@@ -250,7 +277,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing) | |||
250 | 277 | ||
251 | void kvm_free_irq_routing(struct kvm *kvm) | 278 | void kvm_free_irq_routing(struct kvm *kvm) |
252 | { | 279 | { |
280 | mutex_lock(&kvm->irq_lock); | ||
253 | __kvm_free_irq_routing(&kvm->irq_routing); | 281 | __kvm_free_irq_routing(&kvm->irq_routing); |
282 | mutex_unlock(&kvm->irq_lock); | ||
254 | } | 283 | } |
255 | 284 | ||
256 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | 285 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, |
@@ -325,13 +354,13 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
325 | e = NULL; | 354 | e = NULL; |
326 | } | 355 | } |
327 | 356 | ||
328 | mutex_lock(&kvm->lock); | 357 | mutex_lock(&kvm->irq_lock); |
329 | list_splice(&kvm->irq_routing, &tmp); | 358 | list_splice(&kvm->irq_routing, &tmp); |
330 | INIT_LIST_HEAD(&kvm->irq_routing); | 359 | INIT_LIST_HEAD(&kvm->irq_routing); |
331 | list_splice(&irq_list, &kvm->irq_routing); | 360 | list_splice(&irq_list, &kvm->irq_routing); |
332 | INIT_LIST_HEAD(&irq_list); | 361 | INIT_LIST_HEAD(&irq_list); |
333 | list_splice(&tmp, &irq_list); | 362 | list_splice(&tmp, &irq_list); |
334 | mutex_unlock(&kvm->lock); | 363 | mutex_unlock(&kvm->irq_lock); |
335 | 364 | ||
336 | r = 0; | 365 | r = 0; |
337 | 366 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2884baf1d5f9..897bff3b7df9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -59,9 +59,18 @@ | |||
59 | #include "irq.h" | 59 | #include "irq.h" |
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | #define CREATE_TRACE_POINTS | ||
63 | #include <trace/events/kvm.h> | ||
64 | |||
62 | MODULE_AUTHOR("Qumranet"); | 65 | MODULE_AUTHOR("Qumranet"); |
63 | MODULE_LICENSE("GPL"); | 66 | MODULE_LICENSE("GPL"); |
64 | 67 | ||
68 | /* | ||
69 | * Ordering of locks: | ||
70 | * | ||
71 | * kvm->slots_lock --> kvm->lock --> kvm->irq_lock | ||
72 | */ | ||
73 | |||
65 | DEFINE_SPINLOCK(kvm_lock); | 74 | DEFINE_SPINLOCK(kvm_lock); |
66 | LIST_HEAD(vm_list); | 75 | LIST_HEAD(vm_list); |
67 | 76 | ||
@@ -79,6 +88,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | |||
79 | 88 | ||
80 | static bool kvm_rebooting; | 89 | static bool kvm_rebooting; |
81 | 90 | ||
91 | static bool largepages_enabled = true; | ||
92 | |||
82 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | 93 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT |
83 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | 94 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, |
84 | int assigned_dev_id) | 95 | int assigned_dev_id) |
@@ -120,17 +131,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | |||
120 | { | 131 | { |
121 | struct kvm_assigned_dev_kernel *assigned_dev; | 132 | struct kvm_assigned_dev_kernel *assigned_dev; |
122 | struct kvm *kvm; | 133 | struct kvm *kvm; |
123 | int irq, i; | 134 | int i; |
124 | 135 | ||
125 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | 136 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, |
126 | interrupt_work); | 137 | interrupt_work); |
127 | kvm = assigned_dev->kvm; | 138 | kvm = assigned_dev->kvm; |
128 | 139 | ||
129 | /* This is taken to safely inject irq inside the guest. When | 140 | mutex_lock(&kvm->irq_lock); |
130 | * the interrupt injection (or the ioapic code) uses a | ||
131 | * finer-grained lock, update this | ||
132 | */ | ||
133 | mutex_lock(&kvm->lock); | ||
134 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | 141 | spin_lock_irq(&assigned_dev->assigned_dev_lock); |
135 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 142 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
136 | struct kvm_guest_msix_entry *guest_entries = | 143 | struct kvm_guest_msix_entry *guest_entries = |
@@ -143,23 +150,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | |||
143 | kvm_set_irq(assigned_dev->kvm, | 150 | kvm_set_irq(assigned_dev->kvm, |
144 | assigned_dev->irq_source_id, | 151 | assigned_dev->irq_source_id, |
145 | guest_entries[i].vector, 1); | 152 | guest_entries[i].vector, 1); |
146 | irq = assigned_dev->host_msix_entries[i].vector; | ||
147 | if (irq != 0) | ||
148 | enable_irq(irq); | ||
149 | assigned_dev->host_irq_disabled = false; | ||
150 | } | 153 | } |
151 | } else { | 154 | } else |
152 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 155 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
153 | assigned_dev->guest_irq, 1); | 156 | assigned_dev->guest_irq, 1); |
154 | if (assigned_dev->irq_requested_type & | ||
155 | KVM_DEV_IRQ_GUEST_MSI) { | ||
156 | enable_irq(assigned_dev->host_irq); | ||
157 | assigned_dev->host_irq_disabled = false; | ||
158 | } | ||
159 | } | ||
160 | 157 | ||
161 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | 158 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); |
162 | mutex_unlock(&assigned_dev->kvm->lock); | 159 | mutex_unlock(&assigned_dev->kvm->irq_lock); |
163 | } | 160 | } |
164 | 161 | ||
165 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | 162 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) |
@@ -179,8 +176,10 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | |||
179 | 176 | ||
180 | schedule_work(&assigned_dev->interrupt_work); | 177 | schedule_work(&assigned_dev->interrupt_work); |
181 | 178 | ||
182 | disable_irq_nosync(irq); | 179 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { |
183 | assigned_dev->host_irq_disabled = true; | 180 | disable_irq_nosync(irq); |
181 | assigned_dev->host_irq_disabled = true; | ||
182 | } | ||
184 | 183 | ||
185 | out: | 184 | out: |
186 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | 185 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); |
@@ -215,7 +214,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
215 | static void deassign_guest_irq(struct kvm *kvm, | 214 | static void deassign_guest_irq(struct kvm *kvm, |
216 | struct kvm_assigned_dev_kernel *assigned_dev) | 215 | struct kvm_assigned_dev_kernel *assigned_dev) |
217 | { | 216 | { |
218 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | 217 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); |
219 | assigned_dev->ack_notifier.gsi = -1; | 218 | assigned_dev->ack_notifier.gsi = -1; |
220 | 219 | ||
221 | if (assigned_dev->irq_source_id != -1) | 220 | if (assigned_dev->irq_source_id != -1) |
@@ -417,6 +416,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, | |||
417 | { | 416 | { |
418 | dev->guest_irq = irq->guest_irq; | 417 | dev->guest_irq = irq->guest_irq; |
419 | dev->ack_notifier.gsi = -1; | 418 | dev->ack_notifier.gsi = -1; |
419 | dev->host_irq_disabled = false; | ||
420 | return 0; | 420 | return 0; |
421 | } | 421 | } |
422 | #endif | 422 | #endif |
@@ -427,6 +427,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, | |||
427 | { | 427 | { |
428 | dev->guest_irq = irq->guest_irq; | 428 | dev->guest_irq = irq->guest_irq; |
429 | dev->ack_notifier.gsi = -1; | 429 | dev->ack_notifier.gsi = -1; |
430 | dev->host_irq_disabled = false; | ||
430 | return 0; | 431 | return 0; |
431 | } | 432 | } |
432 | #endif | 433 | #endif |
@@ -693,11 +694,6 @@ out: | |||
693 | } | 694 | } |
694 | #endif | 695 | #endif |
695 | 696 | ||
696 | static inline int valid_vcpu(int n) | ||
697 | { | ||
698 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | ||
699 | } | ||
700 | |||
701 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 697 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
702 | { | 698 | { |
703 | if (pfn_valid(pfn)) { | 699 | if (pfn_valid(pfn)) { |
@@ -745,12 +741,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
745 | if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) | 741 | if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) |
746 | cpumask_clear(cpus); | 742 | cpumask_clear(cpus); |
747 | 743 | ||
748 | me = get_cpu(); | ||
749 | spin_lock(&kvm->requests_lock); | 744 | spin_lock(&kvm->requests_lock); |
750 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 745 | me = smp_processor_id(); |
751 | vcpu = kvm->vcpus[i]; | 746 | kvm_for_each_vcpu(i, vcpu, kvm) { |
752 | if (!vcpu) | ||
753 | continue; | ||
754 | if (test_and_set_bit(req, &vcpu->requests)) | 747 | if (test_and_set_bit(req, &vcpu->requests)) |
755 | continue; | 748 | continue; |
756 | cpu = vcpu->cpu; | 749 | cpu = vcpu->cpu; |
@@ -764,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
764 | else | 757 | else |
765 | called = false; | 758 | called = false; |
766 | spin_unlock(&kvm->requests_lock); | 759 | spin_unlock(&kvm->requests_lock); |
767 | put_cpu(); | ||
768 | free_cpumask_var(cpus); | 760 | free_cpumask_var(cpus); |
769 | return called; | 761 | return called; |
770 | } | 762 | } |
@@ -986,7 +978,9 @@ static struct kvm *kvm_create_vm(void) | |||
986 | spin_lock_init(&kvm->mmu_lock); | 978 | spin_lock_init(&kvm->mmu_lock); |
987 | spin_lock_init(&kvm->requests_lock); | 979 | spin_lock_init(&kvm->requests_lock); |
988 | kvm_io_bus_init(&kvm->pio_bus); | 980 | kvm_io_bus_init(&kvm->pio_bus); |
981 | kvm_eventfd_init(kvm); | ||
989 | mutex_init(&kvm->lock); | 982 | mutex_init(&kvm->lock); |
983 | mutex_init(&kvm->irq_lock); | ||
990 | kvm_io_bus_init(&kvm->mmio_bus); | 984 | kvm_io_bus_init(&kvm->mmio_bus); |
991 | init_rwsem(&kvm->slots_lock); | 985 | init_rwsem(&kvm->slots_lock); |
992 | atomic_set(&kvm->users_count, 1); | 986 | atomic_set(&kvm->users_count, 1); |
@@ -1006,19 +1000,25 @@ out: | |||
1006 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 1000 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
1007 | struct kvm_memory_slot *dont) | 1001 | struct kvm_memory_slot *dont) |
1008 | { | 1002 | { |
1003 | int i; | ||
1004 | |||
1009 | if (!dont || free->rmap != dont->rmap) | 1005 | if (!dont || free->rmap != dont->rmap) |
1010 | vfree(free->rmap); | 1006 | vfree(free->rmap); |
1011 | 1007 | ||
1012 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 1008 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
1013 | vfree(free->dirty_bitmap); | 1009 | vfree(free->dirty_bitmap); |
1014 | 1010 | ||
1015 | if (!dont || free->lpage_info != dont->lpage_info) | 1011 | |
1016 | vfree(free->lpage_info); | 1012 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
1013 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
1014 | vfree(free->lpage_info[i]); | ||
1015 | free->lpage_info[i] = NULL; | ||
1016 | } | ||
1017 | } | ||
1017 | 1018 | ||
1018 | free->npages = 0; | 1019 | free->npages = 0; |
1019 | free->dirty_bitmap = NULL; | 1020 | free->dirty_bitmap = NULL; |
1020 | free->rmap = NULL; | 1021 | free->rmap = NULL; |
1021 | free->lpage_info = NULL; | ||
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | void kvm_free_physmem(struct kvm *kvm) | 1024 | void kvm_free_physmem(struct kvm *kvm) |
@@ -1071,6 +1071,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
1071 | { | 1071 | { |
1072 | struct kvm *kvm = filp->private_data; | 1072 | struct kvm *kvm = filp->private_data; |
1073 | 1073 | ||
1074 | kvm_irqfd_release(kvm); | ||
1075 | |||
1074 | kvm_put_kvm(kvm); | 1076 | kvm_put_kvm(kvm); |
1075 | return 0; | 1077 | return 0; |
1076 | } | 1078 | } |
@@ -1089,8 +1091,8 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1089 | { | 1091 | { |
1090 | int r; | 1092 | int r; |
1091 | gfn_t base_gfn; | 1093 | gfn_t base_gfn; |
1092 | unsigned long npages, ugfn; | 1094 | unsigned long npages; |
1093 | unsigned long largepages, i; | 1095 | unsigned long i; |
1094 | struct kvm_memory_slot *memslot; | 1096 | struct kvm_memory_slot *memslot; |
1095 | struct kvm_memory_slot old, new; | 1097 | struct kvm_memory_slot old, new; |
1096 | 1098 | ||
@@ -1164,31 +1166,51 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1164 | else | 1166 | else |
1165 | new.userspace_addr = 0; | 1167 | new.userspace_addr = 0; |
1166 | } | 1168 | } |
1167 | if (npages && !new.lpage_info) { | 1169 | if (!npages) |
1168 | largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE; | 1170 | goto skip_lpage; |
1169 | largepages -= base_gfn / KVM_PAGES_PER_HPAGE; | ||
1170 | 1171 | ||
1171 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | 1172 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
1173 | unsigned long ugfn; | ||
1174 | unsigned long j; | ||
1175 | int lpages; | ||
1176 | int level = i + 2; | ||
1172 | 1177 | ||
1173 | if (!new.lpage_info) | 1178 | /* Avoid unused variable warning if no large pages */ |
1179 | (void)level; | ||
1180 | |||
1181 | if (new.lpage_info[i]) | ||
1182 | continue; | ||
1183 | |||
1184 | lpages = 1 + (base_gfn + npages - 1) / | ||
1185 | KVM_PAGES_PER_HPAGE(level); | ||
1186 | lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); | ||
1187 | |||
1188 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | ||
1189 | |||
1190 | if (!new.lpage_info[i]) | ||
1174 | goto out_free; | 1191 | goto out_free; |
1175 | 1192 | ||
1176 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | 1193 | memset(new.lpage_info[i], 0, |
1194 | lpages * sizeof(*new.lpage_info[i])); | ||
1177 | 1195 | ||
1178 | if (base_gfn % KVM_PAGES_PER_HPAGE) | 1196 | if (base_gfn % KVM_PAGES_PER_HPAGE(level)) |
1179 | new.lpage_info[0].write_count = 1; | 1197 | new.lpage_info[i][0].write_count = 1; |
1180 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | 1198 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) |
1181 | new.lpage_info[largepages-1].write_count = 1; | 1199 | new.lpage_info[i][lpages - 1].write_count = 1; |
1182 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 1200 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
1183 | /* | 1201 | /* |
1184 | * If the gfn and userspace address are not aligned wrt each | 1202 | * If the gfn and userspace address are not aligned wrt each |
1185 | * other, disable large page support for this slot | 1203 | * other, or if explicitly asked to, disable large page |
1204 | * support for this slot | ||
1186 | */ | 1205 | */ |
1187 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) | 1206 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || |
1188 | for (i = 0; i < largepages; ++i) | 1207 | !largepages_enabled) |
1189 | new.lpage_info[i].write_count = 1; | 1208 | for (j = 0; j < lpages; ++j) |
1209 | new.lpage_info[i][j].write_count = 1; | ||
1190 | } | 1210 | } |
1191 | 1211 | ||
1212 | skip_lpage: | ||
1213 | |||
1192 | /* Allocate page dirty bitmap if needed */ | 1214 | /* Allocate page dirty bitmap if needed */ |
1193 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 1215 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
1194 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; | 1216 | unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; |
@@ -1200,6 +1222,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1200 | if (old.npages) | 1222 | if (old.npages) |
1201 | kvm_arch_flush_shadow(kvm); | 1223 | kvm_arch_flush_shadow(kvm); |
1202 | } | 1224 | } |
1225 | #else /* not defined CONFIG_S390 */ | ||
1226 | new.user_alloc = user_alloc; | ||
1227 | if (user_alloc) | ||
1228 | new.userspace_addr = mem->userspace_addr; | ||
1203 | #endif /* not defined CONFIG_S390 */ | 1229 | #endif /* not defined CONFIG_S390 */ |
1204 | 1230 | ||
1205 | if (!npages) | 1231 | if (!npages) |
@@ -1299,6 +1325,12 @@ out: | |||
1299 | return r; | 1325 | return r; |
1300 | } | 1326 | } |
1301 | 1327 | ||
1328 | void kvm_disable_largepages(void) | ||
1329 | { | ||
1330 | largepages_enabled = false; | ||
1331 | } | ||
1332 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | ||
1333 | |||
1302 | int is_error_page(struct page *page) | 1334 | int is_error_page(struct page *page) |
1303 | { | 1335 | { |
1304 | return page == bad_page; | 1336 | return page == bad_page; |
@@ -1635,9 +1667,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
1635 | for (;;) { | 1667 | for (;;) { |
1636 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1668 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1637 | 1669 | ||
1638 | if ((kvm_arch_interrupt_allowed(vcpu) && | 1670 | if (kvm_arch_vcpu_runnable(vcpu)) { |
1639 | kvm_cpu_has_interrupt(vcpu)) || | ||
1640 | kvm_arch_vcpu_runnable(vcpu)) { | ||
1641 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1671 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); |
1642 | break; | 1672 | break; |
1643 | } | 1673 | } |
@@ -1714,24 +1744,18 @@ static struct file_operations kvm_vcpu_fops = { | |||
1714 | */ | 1744 | */ |
1715 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1745 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1716 | { | 1746 | { |
1717 | int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); | 1747 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); |
1718 | if (fd < 0) | ||
1719 | kvm_put_kvm(vcpu->kvm); | ||
1720 | return fd; | ||
1721 | } | 1748 | } |
1722 | 1749 | ||
1723 | /* | 1750 | /* |
1724 | * Creates some virtual cpus. Good luck creating more than one. | 1751 | * Creates some virtual cpus. Good luck creating more than one. |
1725 | */ | 1752 | */ |
1726 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | 1753 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) |
1727 | { | 1754 | { |
1728 | int r; | 1755 | int r; |
1729 | struct kvm_vcpu *vcpu; | 1756 | struct kvm_vcpu *vcpu, *v; |
1730 | |||
1731 | if (!valid_vcpu(n)) | ||
1732 | return -EINVAL; | ||
1733 | 1757 | ||
1734 | vcpu = kvm_arch_vcpu_create(kvm, n); | 1758 | vcpu = kvm_arch_vcpu_create(kvm, id); |
1735 | if (IS_ERR(vcpu)) | 1759 | if (IS_ERR(vcpu)) |
1736 | return PTR_ERR(vcpu); | 1760 | return PTR_ERR(vcpu); |
1737 | 1761 | ||
@@ -1742,23 +1766,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
1742 | return r; | 1766 | return r; |
1743 | 1767 | ||
1744 | mutex_lock(&kvm->lock); | 1768 | mutex_lock(&kvm->lock); |
1745 | if (kvm->vcpus[n]) { | 1769 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1746 | r = -EEXIST; | 1770 | r = -EINVAL; |
1747 | goto vcpu_destroy; | 1771 | goto vcpu_destroy; |
1748 | } | 1772 | } |
1749 | kvm->vcpus[n] = vcpu; | 1773 | |
1750 | mutex_unlock(&kvm->lock); | 1774 | kvm_for_each_vcpu(r, v, kvm) |
1775 | if (v->vcpu_id == id) { | ||
1776 | r = -EEXIST; | ||
1777 | goto vcpu_destroy; | ||
1778 | } | ||
1779 | |||
1780 | BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); | ||
1751 | 1781 | ||
1752 | /* Now it's all set up, let userspace reach it */ | 1782 | /* Now it's all set up, let userspace reach it */ |
1753 | kvm_get_kvm(kvm); | 1783 | kvm_get_kvm(kvm); |
1754 | r = create_vcpu_fd(vcpu); | 1784 | r = create_vcpu_fd(vcpu); |
1755 | if (r < 0) | 1785 | if (r < 0) { |
1756 | goto unlink; | 1786 | kvm_put_kvm(kvm); |
1787 | goto vcpu_destroy; | ||
1788 | } | ||
1789 | |||
1790 | kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; | ||
1791 | smp_wmb(); | ||
1792 | atomic_inc(&kvm->online_vcpus); | ||
1793 | |||
1794 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
1795 | if (kvm->bsp_vcpu_id == id) | ||
1796 | kvm->bsp_vcpu = vcpu; | ||
1797 | #endif | ||
1798 | mutex_unlock(&kvm->lock); | ||
1757 | return r; | 1799 | return r; |
1758 | 1800 | ||
1759 | unlink: | ||
1760 | mutex_lock(&kvm->lock); | ||
1761 | kvm->vcpus[n] = NULL; | ||
1762 | vcpu_destroy: | 1801 | vcpu_destroy: |
1763 | mutex_unlock(&kvm->lock); | 1802 | mutex_unlock(&kvm->lock); |
1764 | kvm_arch_vcpu_destroy(vcpu); | 1803 | kvm_arch_vcpu_destroy(vcpu); |
@@ -2199,6 +2238,7 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2199 | vfree(entries); | 2238 | vfree(entries); |
2200 | break; | 2239 | break; |
2201 | } | 2240 | } |
2241 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
2202 | #ifdef __KVM_HAVE_MSIX | 2242 | #ifdef __KVM_HAVE_MSIX |
2203 | case KVM_ASSIGN_SET_MSIX_NR: { | 2243 | case KVM_ASSIGN_SET_MSIX_NR: { |
2204 | struct kvm_assigned_msix_nr entry_nr; | 2244 | struct kvm_assigned_msix_nr entry_nr; |
@@ -2221,7 +2261,35 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2221 | break; | 2261 | break; |
2222 | } | 2262 | } |
2223 | #endif | 2263 | #endif |
2224 | #endif /* KVM_CAP_IRQ_ROUTING */ | 2264 | case KVM_IRQFD: { |
2265 | struct kvm_irqfd data; | ||
2266 | |||
2267 | r = -EFAULT; | ||
2268 | if (copy_from_user(&data, argp, sizeof data)) | ||
2269 | goto out; | ||
2270 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); | ||
2271 | break; | ||
2272 | } | ||
2273 | case KVM_IOEVENTFD: { | ||
2274 | struct kvm_ioeventfd data; | ||
2275 | |||
2276 | r = -EFAULT; | ||
2277 | if (copy_from_user(&data, argp, sizeof data)) | ||
2278 | goto out; | ||
2279 | r = kvm_ioeventfd(kvm, &data); | ||
2280 | break; | ||
2281 | } | ||
2282 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
2283 | case KVM_SET_BOOT_CPU_ID: | ||
2284 | r = 0; | ||
2285 | mutex_lock(&kvm->lock); | ||
2286 | if (atomic_read(&kvm->online_vcpus) != 0) | ||
2287 | r = -EBUSY; | ||
2288 | else | ||
2289 | kvm->bsp_vcpu_id = arg; | ||
2290 | mutex_unlock(&kvm->lock); | ||
2291 | break; | ||
2292 | #endif | ||
2225 | default: | 2293 | default: |
2226 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 2294 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
2227 | } | 2295 | } |
@@ -2288,6 +2356,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
2288 | case KVM_CAP_USER_MEMORY: | 2356 | case KVM_CAP_USER_MEMORY: |
2289 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: | 2357 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: |
2290 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: | 2358 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: |
2359 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | ||
2360 | case KVM_CAP_SET_BOOT_CPU_ID: | ||
2361 | #endif | ||
2291 | return 1; | 2362 | return 1; |
2292 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 2363 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
2293 | case KVM_CAP_IRQ_ROUTING: | 2364 | case KVM_CAP_IRQ_ROUTING: |
@@ -2335,7 +2406,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2335 | case KVM_TRACE_ENABLE: | 2406 | case KVM_TRACE_ENABLE: |
2336 | case KVM_TRACE_PAUSE: | 2407 | case KVM_TRACE_PAUSE: |
2337 | case KVM_TRACE_DISABLE: | 2408 | case KVM_TRACE_DISABLE: |
2338 | r = kvm_trace_ioctl(ioctl, arg); | 2409 | r = -EOPNOTSUPP; |
2339 | break; | 2410 | break; |
2340 | default: | 2411 | default: |
2341 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 2412 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
@@ -2449,26 +2520,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2449 | } | 2520 | } |
2450 | } | 2521 | } |
2451 | 2522 | ||
2452 | struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, | 2523 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2453 | gpa_t addr, int len, int is_write) | 2524 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, |
2525 | int len, const void *val) | ||
2454 | { | 2526 | { |
2455 | int i; | 2527 | int i; |
2528 | for (i = 0; i < bus->dev_count; i++) | ||
2529 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | ||
2530 | return 0; | ||
2531 | return -EOPNOTSUPP; | ||
2532 | } | ||
2456 | 2533 | ||
2457 | for (i = 0; i < bus->dev_count; i++) { | 2534 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
2458 | struct kvm_io_device *pos = bus->devs[i]; | 2535 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) |
2536 | { | ||
2537 | int i; | ||
2538 | for (i = 0; i < bus->dev_count; i++) | ||
2539 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | ||
2540 | return 0; | ||
2541 | return -EOPNOTSUPP; | ||
2542 | } | ||
2459 | 2543 | ||
2460 | if (pos->in_range(pos, addr, len, is_write)) | 2544 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, |
2461 | return pos; | 2545 | struct kvm_io_device *dev) |
2462 | } | 2546 | { |
2547 | int ret; | ||
2463 | 2548 | ||
2464 | return NULL; | 2549 | down_write(&kvm->slots_lock); |
2550 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
2551 | up_write(&kvm->slots_lock); | ||
2552 | |||
2553 | return ret; | ||
2465 | } | 2554 | } |
2466 | 2555 | ||
2467 | void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) | 2556 | /* An unlocked version. Caller must have write lock on slots_lock. */ |
2557 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
2558 | struct kvm_io_device *dev) | ||
2468 | { | 2559 | { |
2469 | BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); | 2560 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
2561 | return -ENOSPC; | ||
2470 | 2562 | ||
2471 | bus->devs[bus->dev_count++] = dev; | 2563 | bus->devs[bus->dev_count++] = dev; |
2564 | |||
2565 | return 0; | ||
2566 | } | ||
2567 | |||
2568 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | ||
2569 | struct kvm_io_bus *bus, | ||
2570 | struct kvm_io_device *dev) | ||
2571 | { | ||
2572 | down_write(&kvm->slots_lock); | ||
2573 | __kvm_io_bus_unregister_dev(bus, dev); | ||
2574 | up_write(&kvm->slots_lock); | ||
2575 | } | ||
2576 | |||
2577 | /* An unlocked version. Caller must have write lock on slots_lock. */ | ||
2578 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | ||
2579 | struct kvm_io_device *dev) | ||
2580 | { | ||
2581 | int i; | ||
2582 | |||
2583 | for (i = 0; i < bus->dev_count; i++) | ||
2584 | if (bus->devs[i] == dev) { | ||
2585 | bus->devs[i] = bus->devs[--bus->dev_count]; | ||
2586 | break; | ||
2587 | } | ||
2472 | } | 2588 | } |
2473 | 2589 | ||
2474 | static struct notifier_block kvm_cpu_notifier = { | 2590 | static struct notifier_block kvm_cpu_notifier = { |
@@ -2501,11 +2617,9 @@ static int vcpu_stat_get(void *_offset, u64 *val) | |||
2501 | *val = 0; | 2617 | *val = 0; |
2502 | spin_lock(&kvm_lock); | 2618 | spin_lock(&kvm_lock); |
2503 | list_for_each_entry(kvm, &vm_list, vm_list) | 2619 | list_for_each_entry(kvm, &vm_list, vm_list) |
2504 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 2620 | kvm_for_each_vcpu(i, vcpu, kvm) |
2505 | vcpu = kvm->vcpus[i]; | 2621 | *val += *(u32 *)((void *)vcpu + offset); |
2506 | if (vcpu) | 2622 | |
2507 | *val += *(u32 *)((void *)vcpu + offset); | ||
2508 | } | ||
2509 | spin_unlock(&kvm_lock); | 2623 | spin_unlock(&kvm_lock); |
2510 | return 0; | 2624 | return 0; |
2511 | } | 2625 | } |
@@ -2679,15 +2793,15 @@ out_free_0: | |||
2679 | __free_page(bad_page); | 2793 | __free_page(bad_page); |
2680 | out: | 2794 | out: |
2681 | kvm_arch_exit(); | 2795 | kvm_arch_exit(); |
2682 | kvm_exit_debug(); | ||
2683 | out_fail: | 2796 | out_fail: |
2797 | kvm_exit_debug(); | ||
2684 | return r; | 2798 | return r; |
2685 | } | 2799 | } |
2686 | EXPORT_SYMBOL_GPL(kvm_init); | 2800 | EXPORT_SYMBOL_GPL(kvm_init); |
2687 | 2801 | ||
2688 | void kvm_exit(void) | 2802 | void kvm_exit(void) |
2689 | { | 2803 | { |
2690 | kvm_trace_cleanup(); | 2804 | tracepoint_synchronize_unregister(); |
2691 | misc_deregister(&kvm_dev); | 2805 | misc_deregister(&kvm_dev); |
2692 | kmem_cache_destroy(kvm_vcpu_cache); | 2806 | kmem_cache_destroy(kvm_vcpu_cache); |
2693 | sysdev_unregister(&kvm_sysdev); | 2807 | sysdev_unregister(&kvm_sysdev); |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c deleted file mode 100644 index f59874446440..000000000000 --- a/virt/kvm/kvm_trace.c +++ /dev/null | |||
@@ -1,285 +0,0 @@ | |||
1 | /* | ||
2 | * kvm trace | ||
3 | * | ||
4 | * It is designed to allow debugging traces of kvm to be generated | ||
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
6 | * it's possible to reconstruct a chronological record of trace events. | ||
7 | * The implementation refers to blktrace kernel support. | ||
8 | * | ||
9 | * Copyright (c) 2008 Intel Corporation | ||
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
11 | * | ||
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
13 | * | ||
14 | * Date: Feb 2008 | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/relay.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/ktime.h> | ||
21 | |||
22 | #include <linux/kvm_host.h> | ||
23 | |||
24 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
25 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
26 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
27 | |||
28 | struct kvm_trace { | ||
29 | int trace_state; | ||
30 | struct rchan *rchan; | ||
31 | struct dentry *lost_file; | ||
32 | atomic_t lost_records; | ||
33 | }; | ||
34 | static struct kvm_trace *kvm_trace; | ||
35 | |||
36 | struct kvm_trace_probe { | ||
37 | const char *name; | ||
38 | const char *format; | ||
39 | u32 timestamp_in; | ||
40 | marker_probe_func *probe_func; | ||
41 | }; | ||
42 | |||
43 | static inline int calc_rec_size(int timestamp, int extra) | ||
44 | { | ||
45 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
46 | |||
47 | rec_size += extra; | ||
48 | return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
49 | } | ||
50 | |||
51 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
52 | const char *format, va_list *args) | ||
53 | { | ||
54 | struct kvm_trace_probe *p = probe_private; | ||
55 | struct kvm_trace *kt = kvm_trace; | ||
56 | struct kvm_trace_rec rec; | ||
57 | struct kvm_vcpu *vcpu; | ||
58 | int i, size; | ||
59 | u32 extra; | ||
60 | |||
61 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
62 | return; | ||
63 | |||
64 | rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); | ||
65 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
66 | rec.pid = current->tgid; | ||
67 | rec.vcpu_id = vcpu->vcpu_id; | ||
68 | |||
69 | extra = va_arg(*args, u32); | ||
70 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
71 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
72 | |||
73 | rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | ||
74 | | TRACE_REC_NUM_DATA_ARGS(extra); | ||
75 | |||
76 | if (p->timestamp_in) { | ||
77 | rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); | ||
78 | |||
79 | for (i = 0; i < extra; i++) | ||
80 | rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); | ||
81 | } else { | ||
82 | for (i = 0; i < extra; i++) | ||
83 | rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); | ||
84 | } | ||
85 | |||
86 | size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); | ||
87 | relay_write(kt->rchan, &rec, size); | ||
88 | } | ||
89 | |||
90 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
91 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
92 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
93 | }; | ||
94 | |||
95 | static int lost_records_get(void *data, u64 *val) | ||
96 | { | ||
97 | struct kvm_trace *kt = data; | ||
98 | |||
99 | *val = atomic_read(&kt->lost_records); | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
104 | |||
105 | /* | ||
106 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
107 | * many times we encountered a full subbuffer, to tell user space app the | ||
108 | * lost records there were. | ||
109 | */ | ||
110 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
111 | void *prev_subbuf, size_t prev_padding) | ||
112 | { | ||
113 | struct kvm_trace *kt; | ||
114 | |||
115 | if (!relay_buf_full(buf)) { | ||
116 | if (!prev_subbuf) { | ||
117 | /* | ||
118 | * executed only once when the channel is opened | ||
119 | * save metadata as first record | ||
120 | */ | ||
121 | subbuf_start_reserve(buf, sizeof(u32)); | ||
122 | *(u32 *)subbuf = 0x12345678; | ||
123 | } | ||
124 | |||
125 | return 1; | ||
126 | } | ||
127 | |||
128 | kt = buf->chan->private_data; | ||
129 | atomic_inc(&kt->lost_records); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
135 | struct dentry *parent, | ||
136 | int mode, | ||
137 | struct rchan_buf *buf, | ||
138 | int *is_global) | ||
139 | { | ||
140 | return debugfs_create_file(filename, mode, parent, buf, | ||
141 | &relay_file_operations); | ||
142 | } | ||
143 | |||
144 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
145 | { | ||
146 | debugfs_remove(dentry); | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
151 | .subbuf_start = kvm_subbuf_start_callback, | ||
152 | .create_buf_file = kvm_create_buf_file_callack, | ||
153 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
154 | }; | ||
155 | |||
156 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
157 | { | ||
158 | struct kvm_trace *kt; | ||
159 | int i, r = -ENOMEM; | ||
160 | |||
161 | if (!kuts->buf_size || !kuts->buf_nr) | ||
162 | return -EINVAL; | ||
163 | |||
164 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
165 | if (!kt) | ||
166 | goto err; | ||
167 | |||
168 | r = -EIO; | ||
169 | atomic_set(&kt->lost_records, 0); | ||
170 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | ||
171 | kt, &kvm_trace_lost_ops); | ||
172 | if (!kt->lost_file) | ||
173 | goto err; | ||
174 | |||
175 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | ||
176 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
177 | if (!kt->rchan) | ||
178 | goto err; | ||
179 | |||
180 | kvm_trace = kt; | ||
181 | |||
182 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
183 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
184 | |||
185 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
186 | if (r) | ||
187 | printk(KERN_INFO "Unable to register probe %s\n", | ||
188 | p->name); | ||
189 | } | ||
190 | |||
191 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
192 | |||
193 | return 0; | ||
194 | err: | ||
195 | if (kt) { | ||
196 | if (kt->lost_file) | ||
197 | debugfs_remove(kt->lost_file); | ||
198 | if (kt->rchan) | ||
199 | relay_close(kt->rchan); | ||
200 | kfree(kt); | ||
201 | } | ||
202 | return r; | ||
203 | } | ||
204 | |||
205 | static int kvm_trace_enable(char __user *arg) | ||
206 | { | ||
207 | struct kvm_user_trace_setup kuts; | ||
208 | int ret; | ||
209 | |||
210 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
211 | if (ret) | ||
212 | return -EFAULT; | ||
213 | |||
214 | ret = do_kvm_trace_enable(&kuts); | ||
215 | if (ret) | ||
216 | return ret; | ||
217 | |||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static int kvm_trace_pause(void) | ||
222 | { | ||
223 | struct kvm_trace *kt = kvm_trace; | ||
224 | int r = -EINVAL; | ||
225 | |||
226 | if (kt == NULL) | ||
227 | return r; | ||
228 | |||
229 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
230 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
231 | relay_flush(kt->rchan); | ||
232 | r = 0; | ||
233 | } | ||
234 | |||
235 | return r; | ||
236 | } | ||
237 | |||
238 | void kvm_trace_cleanup(void) | ||
239 | { | ||
240 | struct kvm_trace *kt = kvm_trace; | ||
241 | int i; | ||
242 | |||
243 | if (kt == NULL) | ||
244 | return; | ||
245 | |||
246 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
247 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
248 | |||
249 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
250 | |||
251 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
252 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
253 | marker_probe_unregister(p->name, p->probe_func, p); | ||
254 | } | ||
255 | marker_synchronize_unregister(); | ||
256 | |||
257 | relay_close(kt->rchan); | ||
258 | debugfs_remove(kt->lost_file); | ||
259 | kfree(kt); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
264 | { | ||
265 | void __user *argp = (void __user *)arg; | ||
266 | long r = -EINVAL; | ||
267 | |||
268 | if (!capable(CAP_SYS_ADMIN)) | ||
269 | return -EPERM; | ||
270 | |||
271 | switch (ioctl) { | ||
272 | case KVM_TRACE_ENABLE: | ||
273 | r = kvm_trace_enable(argp); | ||
274 | break; | ||
275 | case KVM_TRACE_PAUSE: | ||
276 | r = kvm_trace_pause(); | ||
277 | break; | ||
278 | case KVM_TRACE_DISABLE: | ||
279 | r = 0; | ||
280 | kvm_trace_cleanup(); | ||
281 | break; | ||
282 | } | ||
283 | |||
284 | return r; | ||
285 | } | ||