aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2007-11-12 22:30:26 -0500
committerRusty Russell <rusty@rustcorp.com.au>2008-02-04 07:50:11 -0500
commit3343660d8c62c6b00b2f15324ef3fcb6be207bfa (patch)
tree06d86446da65fc1814edad944e43aeb62f092422
parentd50ed907dc3db5bf2dd0a05b4e199a65793a3788 (diff)
virtio: PCI device
This is a PCI device that implements a transport for virtio. It allows virtio devices to be used by QEMU based VMMs like KVM or Xen. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--drivers/virtio/Kconfig17
-rw-r--r--drivers/virtio/Makefile1
-rw-r--r--drivers/virtio/virtio_pci.c440
-rw-r--r--include/linux/virtio_pci.h55
4 files changed, 513 insertions, 0 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index de0c8c2654ef..833db2f36e9b 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -6,3 +6,20 @@ config VIRTIO
6config VIRTIO_RING 6config VIRTIO_RING
7 tristate 7 tristate
8 depends on VIRTIO 8 depends on VIRTIO
9
10config VIRTIO_PCI
11 tristate "PCI driver for virtio devices (EXPERIMENTAL)"
12 depends on PCI && EXPERIMENTAL
13 select VIRTIO
14 select VIRTIO_RING
15 ---help---
16 This drivers provides support for virtio based paravirtual device
17 drivers over PCI. This requires that your VMM has appropriate PCI
18 virtio backends. Most QEMU based VMMs should support these devices
19 (like KVM or Xen).
20
21 Currently, the ABI is not considered stable so there is no guarantee
22 that this version of the driver will work with your VMM.
23
24 If unsure, say M.
25
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index f70e40971dd9..cc84999f3057 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,2 +1,3 @@
1obj-$(CONFIG_VIRTIO) += virtio.o 1obj-$(CONFIG_VIRTIO) += virtio.o
2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o 2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
3obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
new file mode 100644
index 000000000000..192687e3a56a
--- /dev/null
+++ b/drivers/virtio/virtio_pci.c
@@ -0,0 +1,440 @@
1/*
2 * Virtio PCI driver
3 *
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
6 *
7 * Copyright IBM Corp. 2007
8 *
9 * Authors:
10 * Anthony Liguori <aliguori@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17#include <linux/module.h>
18#include <linux/list.h>
19#include <linux/pci.h>
20#include <linux/interrupt.h>
21#include <linux/virtio.h>
22#include <linux/virtio_config.h>
23#include <linux/virtio_ring.h>
24#include <linux/virtio_pci.h>
25#include <linux/highmem.h>
26#include <linux/spinlock.h>
27
28MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29MODULE_DESCRIPTION("virtio-pci");
30MODULE_LICENSE("GPL");
31MODULE_VERSION("1");
32
33/* Our device structure */
34struct virtio_pci_device
35{
36 struct virtio_device vdev;
37 struct pci_dev *pci_dev;
38
39 /* the IO mapping for the PCI config space */
40 void *ioaddr;
41
42 /* a list of queues so we can dispatch IRQs */
43 spinlock_t lock;
44 struct list_head virtqueues;
45};
46
47struct virtio_pci_vq_info
48{
49 /* the actual virtqueue */
50 struct virtqueue *vq;
51
52 /* the number of entries in the queue */
53 int num;
54
55 /* the index of the queue */
56 int queue_index;
57
58 /* the virtual address of the ring queue */
59 void *queue;
60
61 /* the list node for the virtqueues list */
62 struct list_head node;
63};
64
65/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
66static struct pci_device_id virtio_pci_id_table[] = {
67 { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
68 { 0 },
69};
70
71MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
72
73/* A PCI device has it's own struct device and so does a virtio device so
74 * we create a place for the virtio devices to show up in sysfs. I think it
75 * would make more sense for virtio to not insist on having it's own device. */
76static struct device virtio_pci_root = {
77 .parent = NULL,
78 .bus_id = "virtio-pci",
79};
80
81/* Unique numbering for devices under the kvm root */
82static unsigned int dev_index;
83
84/* Convert a generic virtio device to our structure */
85static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
86{
87 return container_of(vdev, struct virtio_pci_device, vdev);
88}
89
90/* virtio config->feature() implementation */
91static bool vp_feature(struct virtio_device *vdev, unsigned bit)
92{
93 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
94 u32 mask;
95
96 /* Since this function is supposed to have the side effect of
97 * enabling a queried feature, we simulate that by doing a read
98 * from the host feature bitmask and then writing to the guest
99 * feature bitmask */
100 mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
101 if (mask & (1 << bit)) {
102 mask |= (1 << bit);
103 iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
104 }
105
106 return !!(mask & (1 << bit));
107}
108
109/* virtio config->get() implementation */
110static void vp_get(struct virtio_device *vdev, unsigned offset,
111 void *buf, unsigned len)
112{
113 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
114 void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
115 u8 *ptr = buf;
116 int i;
117
118 for (i = 0; i < len; i++)
119 ptr[i] = ioread8(ioaddr + i);
120}
121
122/* the config->set() implementation. it's symmetric to the config->get()
123 * implementation */
124static void vp_set(struct virtio_device *vdev, unsigned offset,
125 const void *buf, unsigned len)
126{
127 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
128 void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
129 const u8 *ptr = buf;
130 int i;
131
132 for (i = 0; i < len; i++)
133 iowrite8(ptr[i], ioaddr + i);
134}
135
136/* config->{get,set}_status() implementations */
137static u8 vp_get_status(struct virtio_device *vdev)
138{
139 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
140 return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
141}
142
143static void vp_set_status(struct virtio_device *vdev, u8 status)
144{
145 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
146 /* We should never be setting status to 0. */
147 BUG_ON(status == 0);
148 return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
149}
150
151static void vp_reset(struct virtio_device *vdev)
152{
153 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
154 /* 0 status means a reset. */
155 return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
156}
157
158/* the notify function used when creating a virt queue */
159static void vp_notify(struct virtqueue *vq)
160{
161 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
162 struct virtio_pci_vq_info *info = vq->priv;
163
164 /* we write the queue's selector into the notification register to
165 * signal the other end */
166 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
167}
168
169/* A small wrapper to also acknowledge the interrupt when it's handled.
170 * I really need an EIO hook for the vring so I can ack the interrupt once we
171 * know that we'll be handling the IRQ but before we invoke the callback since
172 * the callback may notify the host which results in the host attempting to
173 * raise an interrupt that we would then mask once we acknowledged the
174 * interrupt. */
175static irqreturn_t vp_interrupt(int irq, void *opaque)
176{
177 struct virtio_pci_device *vp_dev = opaque;
178 struct virtio_pci_vq_info *info;
179 irqreturn_t ret = IRQ_NONE;
180 u8 isr;
181
182 /* reading the ISR has the effect of also clearing it so it's very
183 * important to save off the value. */
184 isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
185
186 /* It's definitely not us if the ISR was not high */
187 if (!isr)
188 return IRQ_NONE;
189
190 /* Configuration change? Tell driver if it wants to know. */
191 if (isr & VIRTIO_PCI_ISR_CONFIG) {
192 struct virtio_driver *drv;
193 drv = container_of(vp_dev->vdev.dev.driver,
194 struct virtio_driver, driver);
195
196 if (drv->config_changed)
197 drv->config_changed(&vp_dev->vdev);
198 }
199
200 spin_lock(&vp_dev->lock);
201 list_for_each_entry(info, &vp_dev->virtqueues, node) {
202 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
203 ret = IRQ_HANDLED;
204 }
205 spin_unlock(&vp_dev->lock);
206
207 return ret;
208}
209
210/* the config->find_vq() implementation */
211static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
212 void (*callback)(struct virtqueue *vq))
213{
214 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
215 struct virtio_pci_vq_info *info;
216 struct virtqueue *vq;
217 u16 num;
218 int err;
219
220 /* Select the queue we're interested in */
221 iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
222
223 /* Check if queue is either not available or already active. */
224 num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
225 if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
226 return ERR_PTR(-ENOENT);
227
228 /* allocate and fill out our structure the represents an active
229 * queue */
230 info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
231 if (!info)
232 return ERR_PTR(-ENOMEM);
233
234 info->queue_index = index;
235 info->num = num;
236
237 info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL);
238 if (info->queue == NULL) {
239 err = -ENOMEM;
240 goto out_info;
241 }
242
243 /* activate the queue */
244 iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT,
245 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
246
247 /* create the vring */
248 vq = vring_new_virtqueue(info->num, vdev, info->queue,
249 vp_notify, callback);
250 if (!vq) {
251 err = -ENOMEM;
252 goto out_activate_queue;
253 }
254
255 vq->priv = info;
256 info->vq = vq;
257
258 spin_lock(&vp_dev->lock);
259 list_add(&info->node, &vp_dev->virtqueues);
260 spin_unlock(&vp_dev->lock);
261
262 return vq;
263
264out_activate_queue:
265 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
266 kfree(info->queue);
267out_info:
268 kfree(info);
269 return ERR_PTR(err);
270}
271
272/* the config->del_vq() implementation */
273static void vp_del_vq(struct virtqueue *vq)
274{
275 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
276 struct virtio_pci_vq_info *info = vq->priv;
277
278 spin_lock(&vp_dev->lock);
279 list_del(&info->node);
280 spin_unlock(&vp_dev->lock);
281
282 vring_del_virtqueue(vq);
283
284 /* Select and deactivate the queue */
285 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
286 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
287
288 kfree(info->queue);
289 kfree(info);
290}
291
292static struct virtio_config_ops virtio_pci_config_ops = {
293 .feature = vp_feature,
294 .get = vp_get,
295 .set = vp_set,
296 .get_status = vp_get_status,
297 .set_status = vp_set_status,
298 .reset = vp_reset,
299 .find_vq = vp_find_vq,
300 .del_vq = vp_del_vq,
301};
302
303/* the PCI probing function */
304static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
305 const struct pci_device_id *id)
306{
307 struct virtio_pci_device *vp_dev;
308 int err;
309
310 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
311 if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
312 return -ENODEV;
313
314 /* allocate our structure and fill it out */
315 vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
316 if (vp_dev == NULL)
317 return -ENOMEM;
318
319 snprintf(vp_dev->vdev.dev.bus_id, BUS_ID_SIZE, "virtio%d", dev_index);
320 vp_dev->vdev.index = dev_index;
321 dev_index++;
322
323 vp_dev->vdev.dev.parent = &virtio_pci_root;
324 vp_dev->vdev.config = &virtio_pci_config_ops;
325 vp_dev->pci_dev = pci_dev;
326 INIT_LIST_HEAD(&vp_dev->virtqueues);
327 spin_lock_init(&vp_dev->lock);
328
329 /* enable the device */
330 err = pci_enable_device(pci_dev);
331 if (err)
332 goto out;
333
334 err = pci_request_regions(pci_dev, "virtio-pci");
335 if (err)
336 goto out_enable_device;
337
338 vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
339 if (vp_dev->ioaddr == NULL)
340 goto out_req_regions;
341
342 pci_set_drvdata(pci_dev, vp_dev);
343
344 /* we use the subsystem vendor/device id as the virtio vendor/device
345 * id. this allows us to use the same PCI vendor/device id for all
346 * virtio devices and to identify the particular virtio driver by
347 * the subsytem ids */
348 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
349 vp_dev->vdev.id.device = pci_dev->subsystem_device;
350
351 /* register a handler for the queue with the PCI device's interrupt */
352 err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
353 vp_dev->vdev.dev.bus_id, vp_dev);
354 if (err)
355 goto out_set_drvdata;
356
357 /* finally register the virtio device */
358 err = register_virtio_device(&vp_dev->vdev);
359 if (err)
360 goto out_req_irq;
361
362 return 0;
363
364out_req_irq:
365 free_irq(pci_dev->irq, vp_dev);
366out_set_drvdata:
367 pci_set_drvdata(pci_dev, NULL);
368 pci_iounmap(pci_dev, vp_dev->ioaddr);
369out_req_regions:
370 pci_release_regions(pci_dev);
371out_enable_device:
372 pci_disable_device(pci_dev);
373out:
374 kfree(vp_dev);
375 return err;
376}
377
378static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
379{
380 struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
381
382 free_irq(pci_dev->irq, vp_dev);
383 pci_set_drvdata(pci_dev, NULL);
384 pci_iounmap(pci_dev, vp_dev->ioaddr);
385 pci_release_regions(pci_dev);
386 pci_disable_device(pci_dev);
387 kfree(vp_dev);
388}
389
390#ifdef CONFIG_PM
391static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
392{
393 pci_save_state(pci_dev);
394 pci_set_power_state(pci_dev, PCI_D3hot);
395 return 0;
396}
397
398static int virtio_pci_resume(struct pci_dev *pci_dev)
399{
400 pci_restore_state(pci_dev);
401 pci_set_power_state(pci_dev, PCI_D0);
402 return 0;
403}
404#endif
405
406static struct pci_driver virtio_pci_driver = {
407 .name = "virtio-pci",
408 .id_table = virtio_pci_id_table,
409 .probe = virtio_pci_probe,
410 .remove = virtio_pci_remove,
411#ifdef CONFIG_PM
412 .suspend = virtio_pci_suspend,
413 .resume = virtio_pci_resume,
414#endif
415};
416
417static int __init virtio_pci_init(void)
418{
419 int err;
420
421 err = device_register(&virtio_pci_root);
422 if (err)
423 return err;
424
425 err = pci_register_driver(&virtio_pci_driver);
426 if (err)
427 device_unregister(&virtio_pci_root);
428
429 return err;
430}
431
432module_init(virtio_pci_init);
433
434static void __exit virtio_pci_exit(void)
435{
436 device_unregister(&virtio_pci_root);
437 pci_unregister_driver(&virtio_pci_driver);
438}
439
440module_exit(virtio_pci_exit);
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
new file mode 100644
index 000000000000..860eb37bfa07
--- /dev/null
+++ b/include/linux/virtio_pci.h
@@ -0,0 +1,55 @@
1/*
2 * Virtio PCI driver
3 *
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
6 *
7 * Copyright IBM Corp. 2007
8 *
9 * Authors:
10 * Anthony Liguori <aliguori@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17#ifndef _LINUX_VIRTIO_PCI_H
18#define _LINUX_VIRTIO_PCI_H
19
20#include <linux/virtio_config.h>
21
22/* A 32-bit r/o bitmask of the features supported by the host */
23#define VIRTIO_PCI_HOST_FEATURES 0
24
25/* A 32-bit r/w bitmask of features activated by the guest */
26#define VIRTIO_PCI_GUEST_FEATURES 4
27
28/* A 32-bit r/w PFN for the currently selected queue */
29#define VIRTIO_PCI_QUEUE_PFN 8
30
31/* A 16-bit r/o queue size for the currently selected queue */
32#define VIRTIO_PCI_QUEUE_NUM 12
33
34/* A 16-bit r/w queue selector */
35#define VIRTIO_PCI_QUEUE_SEL 14
36
37/* A 16-bit r/w queue notifier */
38#define VIRTIO_PCI_QUEUE_NOTIFY 16
39
40/* An 8-bit device status register. */
41#define VIRTIO_PCI_STATUS 18
42
43/* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46#define VIRTIO_PCI_ISR 19
47
48/* The bit of the ISR which indicates a device configuration change. */
49#define VIRTIO_PCI_ISR_CONFIG 0x2
50
51/* The remaining space is defined by each driver as the per-driver
52 * configuration space */
53#define VIRTIO_PCI_CONFIG 20
54
55#endif