aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-02-04 11:00:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-02-04 11:00:54 -0500
commit93890b71a34f9490673a6edd56b61c2124215e46 (patch)
treec5d82620f2cb69f0bf43639e63f54b0c0e2eb744 /drivers
parentf5bb3a5e9dcdb8435471562b6cada89525cf4df1 (diff)
parent6b35e40767c6c1ac783330109ae8e0c09ea6bc82 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (25 commits) virtio: balloon driver virtio: Use PCI revision field to indicate virtio PCI ABI version virtio: PCI device virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz virtio_blk: Dont waste major numbers virtio_blk: provide getgeo virtio_net: parametrize the napi_weight for virtio receive queue. virtio: free transmit skbs when notified, not on next xmit. virtio: flush buffers on open virtnet: remove double ether_setup virtio: Allow virtio to be modular and used by modules virtio: Use the sg_phys convenience function. virtio: Put the virtio under the virtualization menu virtio: handle interrupts after callbacks turned off virtio: reset function virtio: populate network rings in the probe routine, not open virtio: Tweak virtio_net defines virtio: Net header needs hdr_len virtio: remove unused id field from struct virtio_blk_outhdr virtio: clarify NO_NOTIFY flag usage ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/block/Kconfig3
-rw-r--r--drivers/block/virtio_blk.c106
-rw-r--r--drivers/char/virtio_console.c4
-rw-r--r--drivers/lguest/lguest_device.c146
-rw-r--r--drivers/net/Kconfig3
-rw-r--r--drivers/net/virtio_net.c155
-rw-r--r--drivers/virtio/Kconfig31
-rw-r--r--drivers/virtio/Makefile2
-rw-r--r--drivers/virtio/virtio.c65
-rw-r--r--drivers/virtio/virtio_balloon.c284
-rw-r--r--drivers/virtio/virtio_pci.c446
-rw-r--r--drivers/virtio/virtio_ring.c51
13 files changed, 1055 insertions, 243 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 08d4ae201597..3f8a231fe754 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -91,6 +91,4 @@ source "drivers/dca/Kconfig"
91source "drivers/auxdisplay/Kconfig" 91source "drivers/auxdisplay/Kconfig"
92 92
93source "drivers/uio/Kconfig" 93source "drivers/uio/Kconfig"
94
95source "drivers/virtio/Kconfig"
96endmenu 94endmenu
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index f2122855d4ec..64e5148d82bc 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -440,6 +440,7 @@ config VIRTIO_BLK
440 tristate "Virtio block driver (EXPERIMENTAL)" 440 tristate "Virtio block driver (EXPERIMENTAL)"
441 depends on EXPERIMENTAL && VIRTIO 441 depends on EXPERIMENTAL && VIRTIO
442 ---help--- 442 ---help---
443 This is the virtual block driver for lguest. Say Y or M. 443 This is the virtual block driver for virtio. It can be used with
444 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
444 445
445endif # BLK_DEV 446endif # BLK_DEV
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 924ddd8bccd2..3b1a68d6eddb 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -7,8 +7,10 @@
7#include <linux/scatterlist.h> 7#include <linux/scatterlist.h>
8 8
9#define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) 9#define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS)
10#define PART_BITS 4
11
12static int major, index;
10 13
11static unsigned char virtblk_index = 'a';
12struct virtio_blk 14struct virtio_blk
13{ 15{
14 spinlock_t lock; 16 spinlock_t lock;
@@ -36,7 +38,7 @@ struct virtblk_req
36 struct virtio_blk_inhdr in_hdr; 38 struct virtio_blk_inhdr in_hdr;
37}; 39};
38 40
39static bool blk_done(struct virtqueue *vq) 41static void blk_done(struct virtqueue *vq)
40{ 42{
41 struct virtio_blk *vblk = vq->vdev->priv; 43 struct virtio_blk *vblk = vq->vdev->priv;
42 struct virtblk_req *vbr; 44 struct virtblk_req *vbr;
@@ -65,7 +67,6 @@ static bool blk_done(struct virtqueue *vq)
65 /* In case queue is stopped waiting for more buffers. */ 67 /* In case queue is stopped waiting for more buffers. */
66 blk_start_queue(vblk->disk->queue); 68 blk_start_queue(vblk->disk->queue);
67 spin_unlock_irqrestore(&vblk->lock, flags); 69 spin_unlock_irqrestore(&vblk->lock, flags);
68 return true;
69} 70}
70 71
71static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 72static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -153,20 +154,37 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp,
153 (void __user *)data); 154 (void __user *)data);
154} 155}
155 156
157/* We provide getgeo only to please some old bootloader/partitioning tools */
158static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
159{
160 /* some standard values, similar to sd */
161 geo->heads = 1 << 6;
162 geo->sectors = 1 << 5;
163 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
164 return 0;
165}
166
156static struct block_device_operations virtblk_fops = { 167static struct block_device_operations virtblk_fops = {
157 .ioctl = virtblk_ioctl, 168 .ioctl = virtblk_ioctl,
158 .owner = THIS_MODULE, 169 .owner = THIS_MODULE,
170 .getgeo = virtblk_getgeo,
159}; 171};
160 172
173static int index_to_minor(int index)
174{
175 return index << PART_BITS;
176}
177
161static int virtblk_probe(struct virtio_device *vdev) 178static int virtblk_probe(struct virtio_device *vdev)
162{ 179{
163 struct virtio_blk *vblk; 180 struct virtio_blk *vblk;
164 int err, major; 181 int err;
165 void *token;
166 unsigned int len;
167 u64 cap; 182 u64 cap;
168 u32 v; 183 u32 v;
169 184
185 if (index_to_minor(index) >= 1 << MINORBITS)
186 return -ENOSPC;
187
170 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 188 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
171 if (!vblk) { 189 if (!vblk) {
172 err = -ENOMEM; 190 err = -ENOMEM;
@@ -178,7 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
178 vblk->vdev = vdev; 196 vblk->vdev = vdev;
179 197
180 /* We expect one virtqueue, for output. */ 198 /* We expect one virtqueue, for output. */
181 vblk->vq = vdev->config->find_vq(vdev, blk_done); 199 vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
182 if (IS_ERR(vblk->vq)) { 200 if (IS_ERR(vblk->vq)) {
183 err = PTR_ERR(vblk->vq); 201 err = PTR_ERR(vblk->vq);
184 goto out_free_vblk; 202 goto out_free_vblk;
@@ -190,17 +208,11 @@ static int virtblk_probe(struct virtio_device *vdev)
190 goto out_free_vq; 208 goto out_free_vq;
191 } 209 }
192 210
193 major = register_blkdev(0, "virtblk");
194 if (major < 0) {
195 err = major;
196 goto out_mempool;
197 }
198
199 /* FIXME: How many partitions? How long is a piece of string? */ 211 /* FIXME: How many partitions? How long is a piece of string? */
200 vblk->disk = alloc_disk(1 << 4); 212 vblk->disk = alloc_disk(1 << PART_BITS);
201 if (!vblk->disk) { 213 if (!vblk->disk) {
202 err = -ENOMEM; 214 err = -ENOMEM;
203 goto out_unregister_blkdev; 215 goto out_mempool;
204 } 216 }
205 217
206 vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); 218 vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
@@ -209,22 +221,32 @@ static int virtblk_probe(struct virtio_device *vdev)
209 goto out_put_disk; 221 goto out_put_disk;
210 } 222 }
211 223
212 sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); 224 if (index < 26) {
225 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
226 } else if (index < (26 + 1) * 26) {
227 sprintf(vblk->disk->disk_name, "vd%c%c",
228 'a' + index / 26 - 1, 'a' + index % 26);
229 } else {
230 const unsigned int m1 = (index / 26 - 1) / 26 - 1;
231 const unsigned int m2 = (index / 26 - 1) % 26;
232 const unsigned int m3 = index % 26;
233 sprintf(vblk->disk->disk_name, "vd%c%c%c",
234 'a' + m1, 'a' + m2, 'a' + m3);
235 }
236
213 vblk->disk->major = major; 237 vblk->disk->major = major;
214 vblk->disk->first_minor = 0; 238 vblk->disk->first_minor = index_to_minor(index);
215 vblk->disk->private_data = vblk; 239 vblk->disk->private_data = vblk;
216 vblk->disk->fops = &virtblk_fops; 240 vblk->disk->fops = &virtblk_fops;
241 index++;
217 242
218 /* If barriers are supported, tell block layer that queue is ordered */ 243 /* If barriers are supported, tell block layer that queue is ordered */
219 token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); 244 if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER))
220 if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER))
221 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); 245 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
222 246
223 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); 247 /* Host must always specify the capacity. */
224 if (err) { 248 __virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity),
225 dev_err(&vdev->dev, "Bad/missing capacity in config\n"); 249 &cap);
226 goto out_cleanup_queue;
227 }
228 250
229 /* If capacity is too big, truncate with warning. */ 251 /* If capacity is too big, truncate with warning. */
230 if ((sector_t)cap != cap) { 252 if ((sector_t)cap != cap) {
@@ -234,31 +256,25 @@ static int virtblk_probe(struct virtio_device *vdev)
234 } 256 }
235 set_capacity(vblk->disk, cap); 257 set_capacity(vblk->disk, cap);
236 258
237 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); 259 /* Host can optionally specify maximum segment size and number of
260 * segments. */
261 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
262 offsetof(struct virtio_blk_config, size_max),
263 &v);
238 if (!err) 264 if (!err)
239 blk_queue_max_segment_size(vblk->disk->queue, v); 265 blk_queue_max_segment_size(vblk->disk->queue, v);
240 else if (err != -ENOENT) {
241 dev_err(&vdev->dev, "Bad SIZE_MAX in config\n");
242 goto out_cleanup_queue;
243 }
244 266
245 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); 267 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
268 offsetof(struct virtio_blk_config, seg_max),
269 &v);
246 if (!err) 270 if (!err)
247 blk_queue_max_hw_segments(vblk->disk->queue, v); 271 blk_queue_max_hw_segments(vblk->disk->queue, v);
248 else if (err != -ENOENT) {
249 dev_err(&vdev->dev, "Bad SEG_MAX in config\n");
250 goto out_cleanup_queue;
251 }
252 272
253 add_disk(vblk->disk); 273 add_disk(vblk->disk);
254 return 0; 274 return 0;
255 275
256out_cleanup_queue:
257 blk_cleanup_queue(vblk->disk->queue);
258out_put_disk: 276out_put_disk:
259 put_disk(vblk->disk); 277 put_disk(vblk->disk);
260out_unregister_blkdev:
261 unregister_blkdev(major, "virtblk");
262out_mempool: 278out_mempool:
263 mempool_destroy(vblk->pool); 279 mempool_destroy(vblk->pool);
264out_free_vq: 280out_free_vq:
@@ -274,12 +290,16 @@ static void virtblk_remove(struct virtio_device *vdev)
274 struct virtio_blk *vblk = vdev->priv; 290 struct virtio_blk *vblk = vdev->priv;
275 int major = vblk->disk->major; 291 int major = vblk->disk->major;
276 292
293 /* Nothing should be pending. */
277 BUG_ON(!list_empty(&vblk->reqs)); 294 BUG_ON(!list_empty(&vblk->reqs));
295
296 /* Stop all the virtqueues. */
297 vdev->config->reset(vdev);
298
278 blk_cleanup_queue(vblk->disk->queue); 299 blk_cleanup_queue(vblk->disk->queue);
279 put_disk(vblk->disk); 300 put_disk(vblk->disk);
280 unregister_blkdev(major, "virtblk"); 301 unregister_blkdev(major, "virtblk");
281 mempool_destroy(vblk->pool); 302 mempool_destroy(vblk->pool);
282 /* There should be nothing in the queue now, so no need to shutdown */
283 vdev->config->del_vq(vblk->vq); 303 vdev->config->del_vq(vblk->vq);
284 kfree(vblk); 304 kfree(vblk);
285} 305}
@@ -299,11 +319,15 @@ static struct virtio_driver virtio_blk = {
299 319
300static int __init init(void) 320static int __init init(void)
301{ 321{
322 major = register_blkdev(0, "virtblk");
323 if (major < 0)
324 return major;
302 return register_virtio_driver(&virtio_blk); 325 return register_virtio_driver(&virtio_blk);
303} 326}
304 327
305static void __exit fini(void) 328static void __exit fini(void)
306{ 329{
330 unregister_blkdev(major, "virtblk");
307 unregister_virtio_driver(&virtio_blk); 331 unregister_virtio_driver(&virtio_blk);
308} 332}
309module_init(init); 333module_init(init);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e34da5c97196..dc17fe3a88bc 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -158,13 +158,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
158 /* Find the input queue. */ 158 /* Find the input queue. */
159 /* FIXME: This is why we want to wean off hvc: we do nothing 159 /* FIXME: This is why we want to wean off hvc: we do nothing
160 * when input comes in. */ 160 * when input comes in. */
161 in_vq = vdev->config->find_vq(vdev, NULL); 161 in_vq = vdev->config->find_vq(vdev, 0, NULL);
162 if (IS_ERR(in_vq)) { 162 if (IS_ERR(in_vq)) {
163 err = PTR_ERR(in_vq); 163 err = PTR_ERR(in_vq);
164 goto free; 164 goto free;
165 } 165 }
166 166
167 out_vq = vdev->config->find_vq(vdev, NULL); 167 out_vq = vdev->config->find_vq(vdev, 1, NULL);
168 if (IS_ERR(out_vq)) { 168 if (IS_ERR(out_vq)) {
169 err = PTR_ERR(out_vq); 169 err = PTR_ERR(out_vq);
170 goto free_in_vq; 170 goto free_in_vq;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index e2eec38c83c2..84f85e23cca7 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -52,57 +52,82 @@ struct lguest_device {
52/*D:130 52/*D:130
53 * Device configurations 53 * Device configurations
54 * 54 *
55 * The configuration information for a device consists of a series of fields. 55 * The configuration information for a device consists of one or more
56 * We don't really care what they are: the Launcher set them up, and the driver 56 * virtqueues, a feature bitmaks, and some configuration bytes. The
57 * will look at them during setup. 57 * configuration bytes don't really matter to us: the Launcher sets them up, and
58 * the driver will look at them during setup.
58 * 59 *
59 * For us these fields come immediately after that device's descriptor in the 60 * A convenient routine to return the device's virtqueue config array:
60 * lguest_devices page. 61 * immediately after the descriptor. */
61 * 62static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc)
62 * Each field starts with a "type" byte, a "length" byte, then that number of 63{
63 * bytes of configuration information. The device descriptor tells us the 64 return (void *)(desc + 1);
64 * total configuration length so we know when we've reached the last field. */ 65}
65 66
66/* type + length bytes */ 67/* The features come immediately after the virtqueues. */
67#define FHDR_LEN 2 68static u8 *lg_features(const struct lguest_device_desc *desc)
69{
70 return (void *)(lg_vq(desc) + desc->num_vq);
71}
68 72
69/* This finds the first field of a given type for a device's configuration. */ 73/* The config space comes after the two feature bitmasks. */
70static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) 74static u8 *lg_config(const struct lguest_device_desc *desc)
71{ 75{
72 struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 76 return lg_features(desc) + desc->feature_len * 2;
73 int i; 77}
74
75 for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) {
76 if (desc->config[i] == type) {
77 /* Mark it used, so Host can know we looked at it, and
78 * also so we won't find the same one twice. */
79 desc->config[i] |= 0x80;
80 /* Remember, the second byte is the length. */
81 *len = desc->config[i+1];
82 /* We return a pointer to the field header. */
83 return desc->config + i;
84 }
85 }
86 78
87 /* Not found: return NULL for failure. */ 79/* The total size of the config page used by this device (incl. desc) */
88 return NULL; 80static unsigned desc_size(const struct lguest_device_desc *desc)
81{
82 return sizeof(*desc)
83 + desc->num_vq * sizeof(struct lguest_vqconfig)
84 + desc->feature_len * 2
85 + desc->config_len;
86}
87
88/* This tests (and acknowleges) a feature bit. */
89static bool lg_feature(struct virtio_device *vdev, unsigned fbit)
90{
91 struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
92 u8 *features;
93
94 /* Obviously if they ask for a feature off the end of our feature
95 * bitmap, it's not set. */
96 if (fbit / 8 > desc->feature_len)
97 return false;
98
99 /* The feature bitmap comes after the virtqueues. */
100 features = lg_features(desc);
101 if (!(features[fbit / 8] & (1 << (fbit % 8))))
102 return false;
103
104 /* We set the matching bit in the other half of the bitmap to tell the
105 * Host we want to use this feature. We don't use this yet, but we
106 * could in future. */
107 features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
108 return true;
89} 109}
90 110
91/* Once they've found a field, getting a copy of it is easy. */ 111/* Once they've found a field, getting a copy of it is easy. */
92static void lg_get(struct virtio_device *vdev, void *token, 112static void lg_get(struct virtio_device *vdev, unsigned int offset,
93 void *buf, unsigned len) 113 void *buf, unsigned len)
94{ 114{
95 /* Check they didn't ask for more than the length of the field! */ 115 struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
96 BUG_ON(len > ((u8 *)token)[1]); 116
97 memcpy(buf, token + FHDR_LEN, len); 117 /* Check they didn't ask for more than the length of the config! */
118 BUG_ON(offset + len > desc->config_len);
119 memcpy(buf, lg_config(desc) + offset, len);
98} 120}
99 121
100/* Setting the contents is also trivial. */ 122/* Setting the contents is also trivial. */
101static void lg_set(struct virtio_device *vdev, void *token, 123static void lg_set(struct virtio_device *vdev, unsigned int offset,
102 const void *buf, unsigned len) 124 const void *buf, unsigned len)
103{ 125{
104 BUG_ON(len > ((u8 *)token)[1]); 126 struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
105 memcpy(token + FHDR_LEN, buf, len); 127
128 /* Check they didn't ask for more than the length of the config! */
129 BUG_ON(offset + len > desc->config_len);
130 memcpy(lg_config(desc) + offset, buf, len);
106} 131}
107 132
108/* The operations to get and set the status word just access the status field 133/* The operations to get and set the status word just access the status field
@@ -114,9 +139,20 @@ static u8 lg_get_status(struct virtio_device *vdev)
114 139
115static void lg_set_status(struct virtio_device *vdev, u8 status) 140static void lg_set_status(struct virtio_device *vdev, u8 status)
116{ 141{
142 BUG_ON(!status);
117 to_lgdev(vdev)->desc->status = status; 143 to_lgdev(vdev)->desc->status = status;
118} 144}
119 145
146/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
147 * address of the device. The Host will zero the status and all the
148 * features. */
149static void lg_reset(struct virtio_device *vdev)
150{
151 unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
152
153 hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0);
154}
155
120/* 156/*
121 * Virtqueues 157 * Virtqueues
122 * 158 *
@@ -165,39 +201,29 @@ static void lg_notify(struct virtqueue *vq)
165 * 201 *
166 * So we provide devices with a "find virtqueue and set it up" function. */ 202 * So we provide devices with a "find virtqueue and set it up" function. */
167static struct virtqueue *lg_find_vq(struct virtio_device *vdev, 203static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
168 bool (*callback)(struct virtqueue *vq)) 204 unsigned index,
205 void (*callback)(struct virtqueue *vq))
169{ 206{
207 struct lguest_device *ldev = to_lgdev(vdev);
170 struct lguest_vq_info *lvq; 208 struct lguest_vq_info *lvq;
171 struct virtqueue *vq; 209 struct virtqueue *vq;
172 unsigned int len;
173 void *token;
174 int err; 210 int err;
175 211
176 /* Look for a field of the correct type to mark a virtqueue. Note that 212 /* We must have this many virtqueues. */
177 * if this succeeds, then the type will be changed so it won't be found 213 if (index >= ldev->desc->num_vq)
178 * again, and future lg_find_vq() calls will find the next
179 * virtqueue (if any). */
180 token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len);
181 if (!token)
182 return ERR_PTR(-ENOENT); 214 return ERR_PTR(-ENOENT);
183 215
184 lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); 216 lvq = kmalloc(sizeof(*lvq), GFP_KERNEL);
185 if (!lvq) 217 if (!lvq)
186 return ERR_PTR(-ENOMEM); 218 return ERR_PTR(-ENOMEM);
187 219
188 /* Note: we could use a configuration space inside here, just like we 220 /* Make a copy of the "struct lguest_vqconfig" entry, which sits after
189 * do for the device. This would allow expansion in future, because 221 * the descriptor. We need a copy because the config space might not
190 * our configuration system is designed to be expansible. But this is 222 * be aligned correctly. */
191 * way easier. */ 223 memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config));
192 if (len != sizeof(lvq->config)) {
193 dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len);
194 err = -EIO;
195 goto free_lvq;
196 }
197 /* Make a copy of the "struct lguest_vqconfig" field. We need a copy
198 * because the config space might not be aligned correctly. */
199 vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config));
200 224
225 printk("Mapping virtqueue %i addr %lx\n", index,
226 (unsigned long)lvq->config.pfn << PAGE_SHIFT);
201 /* Figure out how many pages the ring will take, and map that memory */ 227 /* Figure out how many pages the ring will take, and map that memory */
202 lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, 228 lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
203 DIV_ROUND_UP(vring_size(lvq->config.num, 229 DIV_ROUND_UP(vring_size(lvq->config.num,
@@ -259,11 +285,12 @@ static void lg_del_vq(struct virtqueue *vq)
259 285
260/* The ops structure which hooks everything together. */ 286/* The ops structure which hooks everything together. */
261static struct virtio_config_ops lguest_config_ops = { 287static struct virtio_config_ops lguest_config_ops = {
262 .find = lg_find, 288 .feature = lg_feature,
263 .get = lg_get, 289 .get = lg_get,
264 .set = lg_set, 290 .set = lg_set,
265 .get_status = lg_get_status, 291 .get_status = lg_get_status,
266 .set_status = lg_set_status, 292 .set_status = lg_set_status,
293 .reset = lg_reset,
267 .find_vq = lg_find_vq, 294 .find_vq = lg_find_vq,
268 .del_vq = lg_del_vq, 295 .del_vq = lg_del_vq,
269}; 296};
@@ -329,13 +356,14 @@ static void scan_devices(void)
329 struct lguest_device_desc *d; 356 struct lguest_device_desc *d;
330 357
331 /* We start at the page beginning, and skip over each entry. */ 358 /* We start at the page beginning, and skip over each entry. */
332 for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { 359 for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
333 d = lguest_devices + i; 360 d = lguest_devices + i;
334 361
335 /* Once we hit a zero, stop. */ 362 /* Once we hit a zero, stop. */
336 if (d->type == 0) 363 if (d->type == 0)
337 break; 364 break;
338 365
366 printk("Device at %i has size %u\n", i, desc_size(d));
339 add_lguest_device(d); 367 add_lguest_device(d);
340 } 368 }
341} 369}
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 55d224c8a0b9..f234ba3f0404 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3114,6 +3114,7 @@ config VIRTIO_NET
3114 tristate "Virtio network driver (EXPERIMENTAL)" 3114 tristate "Virtio network driver (EXPERIMENTAL)"
3115 depends on EXPERIMENTAL && VIRTIO 3115 depends on EXPERIMENTAL && VIRTIO
3116 ---help--- 3116 ---help---
3117 This is the virtual network driver for lguest. Say Y or M. 3117 This is the virtual network driver for virtio. It can be used with
3118 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
3118 3119
3119endif # NETDEVICES 3120endif # NETDEVICES
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5413dbf3d4ac..e66de0c12fc1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -24,6 +24,13 @@
24#include <linux/virtio_net.h> 24#include <linux/virtio_net.h>
25#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
26 26
27static int napi_weight = 128;
28module_param(napi_weight, int, 0444);
29
30static int csum = 1, gso = 1;
31module_param(csum, bool, 0444);
32module_param(gso, bool, 0444);
33
27/* FIXME: MTU in config. */ 34/* FIXME: MTU in config. */
28#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) 35#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
29 36
@@ -52,13 +59,14 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
52 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); 59 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
53} 60}
54 61
55static bool skb_xmit_done(struct virtqueue *rvq) 62static void skb_xmit_done(struct virtqueue *svq)
56{ 63{
57 struct virtnet_info *vi = rvq->vdev->priv; 64 struct virtnet_info *vi = svq->vdev->priv;
58 65
59 /* In case we were waiting for output buffers. */ 66 /* Suppress further interrupts. */
67 svq->vq_ops->disable_cb(svq);
68 /* We were waiting for more output buffers. */
60 netif_wake_queue(vi->dev); 69 netif_wake_queue(vi->dev);
61 return true;
62} 70}
63 71
64static void receive_skb(struct net_device *dev, struct sk_buff *skb, 72static void receive_skb(struct net_device *dev, struct sk_buff *skb,
@@ -83,28 +91,16 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
83 91
84 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 92 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
85 pr_debug("Needs csum!\n"); 93 pr_debug("Needs csum!\n");
86 skb->ip_summed = CHECKSUM_PARTIAL; 94 if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
87 skb->csum_start = hdr->csum_start;
88 skb->csum_offset = hdr->csum_offset;
89 if (skb->csum_start > skb->len - 2
90 || skb->csum_offset > skb->len - 2) {
91 if (net_ratelimit())
92 printk(KERN_WARNING "%s: csum=%u/%u len=%u\n",
93 dev->name, skb->csum_start,
94 skb->csum_offset, skb->len);
95 goto frame_err; 95 goto frame_err;
96 }
97 } 96 }
98 97
99 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 98 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
100 pr_debug("GSO!\n"); 99 pr_debug("GSO!\n");
101 switch (hdr->gso_type) { 100 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
102 case VIRTIO_NET_HDR_GSO_TCPV4: 101 case VIRTIO_NET_HDR_GSO_TCPV4:
103 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 102 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
104 break; 103 break;
105 case VIRTIO_NET_HDR_GSO_TCPV4_ECN:
106 skb_shinfo(skb)->gso_type = SKB_GSO_TCP_ECN;
107 break;
108 case VIRTIO_NET_HDR_GSO_UDP: 104 case VIRTIO_NET_HDR_GSO_UDP:
109 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 105 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
110 break; 106 break;
@@ -118,6 +114,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
118 goto frame_err; 114 goto frame_err;
119 } 115 }
120 116
117 if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
118 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
119
121 skb_shinfo(skb)->gso_size = hdr->gso_size; 120 skb_shinfo(skb)->gso_size = hdr->gso_size;
122 if (skb_shinfo(skb)->gso_size == 0) { 121 if (skb_shinfo(skb)->gso_size == 0) {
123 if (net_ratelimit()) 122 if (net_ratelimit())
@@ -170,12 +169,14 @@ static void try_fill_recv(struct virtnet_info *vi)
170 vi->rvq->vq_ops->kick(vi->rvq); 169 vi->rvq->vq_ops->kick(vi->rvq);
171} 170}
172 171
173static bool skb_recv_done(struct virtqueue *rvq) 172static void skb_recv_done(struct virtqueue *rvq)
174{ 173{
175 struct virtnet_info *vi = rvq->vdev->priv; 174 struct virtnet_info *vi = rvq->vdev->priv;
176 netif_rx_schedule(vi->dev, &vi->napi); 175 /* Schedule NAPI, Suppress further interrupts if successful. */
177 /* Suppress further interrupts. */ 176 if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
178 return false; 177 rvq->vq_ops->disable_cb(rvq);
178 __netif_rx_schedule(vi->dev, &vi->napi);
179 }
179} 180}
180 181
181static int virtnet_poll(struct napi_struct *napi, int budget) 182static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -201,7 +202,7 @@ again:
201 /* Out of packets? */ 202 /* Out of packets? */
202 if (received < budget) { 203 if (received < budget) {
203 netif_rx_complete(vi->dev, napi); 204 netif_rx_complete(vi->dev, napi);
204 if (unlikely(!vi->rvq->vq_ops->restart(vi->rvq)) 205 if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
205 && netif_rx_reschedule(vi->dev, napi)) 206 && netif_rx_reschedule(vi->dev, napi))
206 goto again; 207 goto again;
207 } 208 }
@@ -236,8 +237,6 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
236 237
237 pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest)); 238 pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest));
238 239
239 free_old_xmit_skbs(vi);
240
241 /* Encode metadata header at front. */ 240 /* Encode metadata header at front. */
242 hdr = skb_vnet_hdr(skb); 241 hdr = skb_vnet_hdr(skb);
243 if (skb->ip_summed == CHECKSUM_PARTIAL) { 242 if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -250,10 +249,9 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
250 } 249 }
251 250
252 if (skb_is_gso(skb)) { 251 if (skb_is_gso(skb)) {
252 hdr->hdr_len = skb_transport_header(skb) - skb->data;
253 hdr->gso_size = skb_shinfo(skb)->gso_size; 253 hdr->gso_size = skb_shinfo(skb)->gso_size;
254 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) 254 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
255 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN;
256 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
257 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 255 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
258 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 256 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
259 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 257 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
@@ -261,19 +259,34 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
261 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 259 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
262 else 260 else
263 BUG(); 261 BUG();
262 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
263 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
264 } else { 264 } else {
265 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 265 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
266 hdr->gso_size = 0; 266 hdr->gso_size = hdr->hdr_len = 0;
267 } 267 }
268 268
269 vnet_hdr_to_sg(sg, skb); 269 vnet_hdr_to_sg(sg, skb);
270 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; 270 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
271 __skb_queue_head(&vi->send, skb); 271 __skb_queue_head(&vi->send, skb);
272
273again:
274 /* Free up any pending old buffers before queueing new ones. */
275 free_old_xmit_skbs(vi);
272 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); 276 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
273 if (err) { 277 if (err) {
274 pr_debug("%s: virtio not prepared to send\n", dev->name); 278 pr_debug("%s: virtio not prepared to send\n", dev->name);
275 skb_unlink(skb, &vi->send);
276 netif_stop_queue(dev); 279 netif_stop_queue(dev);
280
281 /* Activate callback for using skbs: if this fails it
282 * means some were used in the meantime. */
283 if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
284 printk("Unlikely: restart svq failed\n");
285 netif_start_queue(dev);
286 goto again;
287 }
288 __skb_unlink(skb, &vi->send);
289
277 return NETDEV_TX_BUSY; 290 return NETDEV_TX_BUSY;
278 } 291 }
279 vi->svq->vq_ops->kick(vi->svq); 292 vi->svq->vq_ops->kick(vi->svq);
@@ -285,45 +298,31 @@ static int virtnet_open(struct net_device *dev)
285{ 298{
286 struct virtnet_info *vi = netdev_priv(dev); 299 struct virtnet_info *vi = netdev_priv(dev);
287 300
288 try_fill_recv(vi); 301 napi_enable(&vi->napi);
289 302
290 /* If we didn't even get one input buffer, we're useless. */ 303 /* If all buffers were filled by other side before we napi_enabled, we
291 if (vi->num == 0) 304 * won't get another interrupt, so process any outstanding packets
292 return -ENOMEM; 305 * now. virtnet_poll wants re-enable the queue, so we disable here. */
306 vi->rvq->vq_ops->disable_cb(vi->rvq);
307 netif_rx_schedule(vi->dev, &vi->napi);
293 308
294 napi_enable(&vi->napi);
295 return 0; 309 return 0;
296} 310}
297 311
298static int virtnet_close(struct net_device *dev) 312static int virtnet_close(struct net_device *dev)
299{ 313{
300 struct virtnet_info *vi = netdev_priv(dev); 314 struct virtnet_info *vi = netdev_priv(dev);
301 struct sk_buff *skb;
302 315
303 napi_disable(&vi->napi); 316 napi_disable(&vi->napi);
304 317
305 /* networking core has neutered skb_xmit_done/skb_recv_done, so don't
306 * worry about races vs. get(). */
307 vi->rvq->vq_ops->shutdown(vi->rvq);
308 while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
309 kfree_skb(skb);
310 vi->num--;
311 }
312 vi->svq->vq_ops->shutdown(vi->svq);
313 while ((skb = __skb_dequeue(&vi->send)) != NULL)
314 kfree_skb(skb);
315
316 BUG_ON(vi->num != 0);
317 return 0; 318 return 0;
318} 319}
319 320
320static int virtnet_probe(struct virtio_device *vdev) 321static int virtnet_probe(struct virtio_device *vdev)
321{ 322{
322 int err; 323 int err;
323 unsigned int len;
324 struct net_device *dev; 324 struct net_device *dev;
325 struct virtnet_info *vi; 325 struct virtnet_info *vi;
326 void *token;
327 326
328 /* Allocate ourselves a network device with room for our info */ 327 /* Allocate ourselves a network device with room for our info */
329 dev = alloc_etherdev(sizeof(struct virtnet_info)); 328 dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -331,7 +330,6 @@ static int virtnet_probe(struct virtio_device *vdev)
331 return -ENOMEM; 330 return -ENOMEM;
332 331
333 /* Set up network device as normal. */ 332 /* Set up network device as normal. */
334 ether_setup(dev);
335 dev->open = virtnet_open; 333 dev->open = virtnet_open;
336 dev->stop = virtnet_close; 334 dev->stop = virtnet_close;
337 dev->hard_start_xmit = start_xmit; 335 dev->hard_start_xmit = start_xmit;
@@ -339,42 +337,37 @@ static int virtnet_probe(struct virtio_device *vdev)
339 SET_NETDEV_DEV(dev, &vdev->dev); 337 SET_NETDEV_DEV(dev, &vdev->dev);
340 338
341 /* Do we support "hardware" checksums? */ 339 /* Do we support "hardware" checksums? */
342 token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_F, &len); 340 if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) {
343 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_NO_CSUM)) {
344 /* This opens up the world of extra features. */ 341 /* This opens up the world of extra features. */
345 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; 342 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
346 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4)) 343 if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) {
347 dev->features |= NETIF_F_TSO; 344 dev->features |= NETIF_F_TSO | NETIF_F_UFO
348 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_UFO)) 345 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
349 dev->features |= NETIF_F_UFO; 346 }
350 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4_ECN))
351 dev->features |= NETIF_F_TSO_ECN;
352 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO6))
353 dev->features |= NETIF_F_TSO6;
354 } 347 }
355 348
356 /* Configuration may specify what MAC to use. Otherwise random. */ 349 /* Configuration may specify what MAC to use. Otherwise random. */
357 token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_MAC_F, &len); 350 if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) {
358 if (token) { 351 vdev->config->get(vdev,
359 dev->addr_len = len; 352 offsetof(struct virtio_net_config, mac),
360 vdev->config->get(vdev, token, dev->dev_addr, len); 353 dev->dev_addr, dev->addr_len);
361 } else 354 } else
362 random_ether_addr(dev->dev_addr); 355 random_ether_addr(dev->dev_addr);
363 356
364 /* Set up our device-specific information */ 357 /* Set up our device-specific information */
365 vi = netdev_priv(dev); 358 vi = netdev_priv(dev);
366 netif_napi_add(dev, &vi->napi, virtnet_poll, 16); 359 netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
367 vi->dev = dev; 360 vi->dev = dev;
368 vi->vdev = vdev; 361 vi->vdev = vdev;
369 362
370 /* We expect two virtqueues, receive then send. */ 363 /* We expect two virtqueues, receive then send. */
371 vi->rvq = vdev->config->find_vq(vdev, skb_recv_done); 364 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
372 if (IS_ERR(vi->rvq)) { 365 if (IS_ERR(vi->rvq)) {
373 err = PTR_ERR(vi->rvq); 366 err = PTR_ERR(vi->rvq);
374 goto free; 367 goto free;
375 } 368 }
376 369
377 vi->svq = vdev->config->find_vq(vdev, skb_xmit_done); 370 vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
378 if (IS_ERR(vi->svq)) { 371 if (IS_ERR(vi->svq)) {
379 err = PTR_ERR(vi->svq); 372 err = PTR_ERR(vi->svq);
380 goto free_recv; 373 goto free_recv;
@@ -389,10 +382,22 @@ static int virtnet_probe(struct virtio_device *vdev)
389 pr_debug("virtio_net: registering device failed\n"); 382 pr_debug("virtio_net: registering device failed\n");
390 goto free_send; 383 goto free_send;
391 } 384 }
385
386 /* Last of all, set up some receive buffers. */
387 try_fill_recv(vi);
388
389 /* If we didn't even get one input buffer, we're useless. */
390 if (vi->num == 0) {
391 err = -ENOMEM;
392 goto unregister;
393 }
394
392 pr_debug("virtnet: registered device %s\n", dev->name); 395 pr_debug("virtnet: registered device %s\n", dev->name);
393 vdev->priv = vi; 396 vdev->priv = vi;
394 return 0; 397 return 0;
395 398
399unregister:
400 unregister_netdev(dev);
396free_send: 401free_send:
397 vdev->config->del_vq(vi->svq); 402 vdev->config->del_vq(vi->svq);
398free_recv: 403free_recv:
@@ -405,6 +410,20 @@ free:
405static void virtnet_remove(struct virtio_device *vdev) 410static void virtnet_remove(struct virtio_device *vdev)
406{ 411{
407 struct virtnet_info *vi = vdev->priv; 412 struct virtnet_info *vi = vdev->priv;
413 struct sk_buff *skb;
414
415 /* Stop all the virtqueues. */
416 vdev->config->reset(vdev);
417
418 /* Free our skbs in send and recv queues, if any. */
419 while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
420 kfree_skb(skb);
421 vi->num--;
422 }
423 while ((skb = __skb_dequeue(&vi->send)) != NULL)
424 kfree_skb(skb);
425
426 BUG_ON(vi->num != 0);
408 427
409 vdev->config->del_vq(vi->svq); 428 vdev->config->del_vq(vi->svq);
410 vdev->config->del_vq(vi->rvq); 429 vdev->config->del_vq(vi->rvq);
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 9e33fc4da875..3dd6294d10b6 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -1,8 +1,35 @@
1# Virtio always gets selected by whoever wants it. 1# Virtio always gets selected by whoever wants it.
2config VIRTIO 2config VIRTIO
3 bool 3 tristate
4 4
5# Similarly the virtio ring implementation. 5# Similarly the virtio ring implementation.
6config VIRTIO_RING 6config VIRTIO_RING
7 bool 7 tristate
8 depends on VIRTIO 8 depends on VIRTIO
9
10config VIRTIO_PCI
11 tristate "PCI driver for virtio devices (EXPERIMENTAL)"
12 depends on PCI && EXPERIMENTAL
13 select VIRTIO
14 select VIRTIO_RING
15 ---help---
16 This drivers provides support for virtio based paravirtual device
17 drivers over PCI. This requires that your VMM has appropriate PCI
18 virtio backends. Most QEMU based VMMs should support these devices
19 (like KVM or Xen).
20
21 Currently, the ABI is not considered stable so there is no guarantee
22 that this version of the driver will work with your VMM.
23
24 If unsure, say M.
25
26config VIRTIO_BALLOON
27 tristate "Virtio balloon driver (EXPERIMENTAL)"
28 select VIRTIO
29 select VIRTIO_RING
30 ---help---
31 This driver supports increasing and decreasing the amount
32 of memory within a KVM guest.
33
34 If unsure, say M.
35
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index f70e40971dd9..6738c446c199 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,2 +1,4 @@
1obj-$(CONFIG_VIRTIO) += virtio.o 1obj-$(CONFIG_VIRTIO) += virtio.o
2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o 2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
3obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
4obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 69d7ea02cd48..b535483bc556 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -102,9 +102,13 @@ static int virtio_dev_remove(struct device *_d)
102 struct virtio_driver *drv = container_of(dev->dev.driver, 102 struct virtio_driver *drv = container_of(dev->dev.driver,
103 struct virtio_driver, driver); 103 struct virtio_driver, driver);
104 104
105 dev->config->set_status(dev, dev->config->get_status(dev)
106 & ~VIRTIO_CONFIG_S_DRIVER);
107 drv->remove(dev); 105 drv->remove(dev);
106
107 /* Driver should have reset device. */
108 BUG_ON(dev->config->get_status(dev));
109
110 /* Acknowledge the device's existence again. */
111 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
108 return 0; 112 return 0;
109} 113}
110 114
@@ -130,6 +134,10 @@ int register_virtio_device(struct virtio_device *dev)
130 dev->dev.bus = &virtio_bus; 134 dev->dev.bus = &virtio_bus;
131 sprintf(dev->dev.bus_id, "%u", dev->index); 135 sprintf(dev->dev.bus_id, "%u", dev->index);
132 136
137 /* We always start by resetting the device, in case a previous
138 * driver messed it up. This also tests that code path a little. */
139 dev->config->reset(dev);
140
133 /* Acknowledge that we've seen the device. */ 141 /* Acknowledge that we've seen the device. */
134 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 142 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
135 143
@@ -148,55 +156,18 @@ void unregister_virtio_device(struct virtio_device *dev)
148} 156}
149EXPORT_SYMBOL_GPL(unregister_virtio_device); 157EXPORT_SYMBOL_GPL(unregister_virtio_device);
150 158
151int __virtio_config_val(struct virtio_device *vdev,
152 u8 type, void *val, size_t size)
153{
154 void *token;
155 unsigned int len;
156
157 token = vdev->config->find(vdev, type, &len);
158 if (!token)
159 return -ENOENT;
160
161 if (len != size)
162 return -EIO;
163
164 vdev->config->get(vdev, token, val, size);
165 return 0;
166}
167EXPORT_SYMBOL_GPL(__virtio_config_val);
168
169int virtio_use_bit(struct virtio_device *vdev,
170 void *token, unsigned int len, unsigned int bitnum)
171{
172 unsigned long bits[16];
173
174 /* This makes it convenient to pass-through find() results. */
175 if (!token)
176 return 0;
177
178 /* bit not in range of this bitfield? */
179 if (bitnum * 8 >= len / 2)
180 return 0;
181
182 /* Giant feature bitfields are silly. */
183 BUG_ON(len > sizeof(bits));
184 vdev->config->get(vdev, token, bits, len);
185
186 if (!test_bit(bitnum, bits))
187 return 0;
188
189 /* Set acknowledge bit, and write it back. */
190 set_bit(bitnum + len * 8 / 2, bits);
191 vdev->config->set(vdev, token, bits, len);
192 return 1;
193}
194EXPORT_SYMBOL_GPL(virtio_use_bit);
195
196static int virtio_init(void) 159static int virtio_init(void)
197{ 160{
198 if (bus_register(&virtio_bus) != 0) 161 if (bus_register(&virtio_bus) != 0)
199 panic("virtio bus registration failed"); 162 panic("virtio bus registration failed");
200 return 0; 163 return 0;
201} 164}
165
166static void __exit virtio_exit(void)
167{
168 bus_unregister(&virtio_bus);
169}
202core_initcall(virtio_init); 170core_initcall(virtio_init);
171module_exit(virtio_exit);
172
173MODULE_LICENSE("GPL");
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
new file mode 100644
index 000000000000..622aece1acce
--- /dev/null
+++ b/drivers/virtio/virtio_balloon.c
@@ -0,0 +1,284 @@
1/* Virtio balloon implementation, inspired by Dor Loar and Marcelo
2 * Tosatti's implementations.
3 *
4 * Copyright 2008 Rusty Russell IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20//#define DEBUG
21#include <linux/virtio.h>
22#include <linux/virtio_balloon.h>
23#include <linux/swap.h>
24#include <linux/kthread.h>
25#include <linux/freezer.h>
26
27struct virtio_balloon
28{
29 struct virtio_device *vdev;
30 struct virtqueue *inflate_vq, *deflate_vq;
31
32 /* Where the ballooning thread waits for config to change. */
33 wait_queue_head_t config_change;
34
35 /* The thread servicing the balloon. */
36 struct task_struct *thread;
37
38 /* Waiting for host to ack the pages we released. */
39 struct completion acked;
40
41 /* Do we have to tell Host *before* we reuse pages? */
42 bool tell_host_first;
43
44 /* The pages we've told the Host we're not using. */
45 unsigned int num_pages;
46 struct list_head pages;
47
48 /* The array of pfns we tell the Host about. */
49 unsigned int num_pfns;
50 u32 pfns[256];
51};
52
53static struct virtio_device_id id_table[] = {
54 { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
55 { 0 },
56};
57
58static void balloon_ack(struct virtqueue *vq)
59{
60 struct virtio_balloon *vb;
61 unsigned int len;
62
63 vb = vq->vq_ops->get_buf(vq, &len);
64 if (vb)
65 complete(&vb->acked);
66}
67
68static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
69{
70 struct scatterlist sg;
71
72 sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
73
74 init_completion(&vb->acked);
75
76 /* We should always be able to add one buffer to an empty queue. */
77 if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
78 BUG();
79 vq->vq_ops->kick(vq);
80
81 /* When host has read buffer, this completes via balloon_ack */
82 wait_for_completion(&vb->acked);
83}
84
85static void fill_balloon(struct virtio_balloon *vb, size_t num)
86{
87 /* We can only do one array worth at a time. */
88 num = min(num, ARRAY_SIZE(vb->pfns));
89
90 for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
91 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY);
92 if (!page) {
93 if (printk_ratelimit())
94 dev_printk(KERN_INFO, &vb->vdev->dev,
95 "Out of puff! Can't get %zu pages\n",
96 num);
97 /* Sleep for at least 1/5 of a second before retry. */
98 msleep(200);
99 break;
100 }
101 vb->pfns[vb->num_pfns] = page_to_pfn(page);
102 totalram_pages--;
103 vb->num_pages++;
104 list_add(&page->lru, &vb->pages);
105 }
106
107 /* Didn't get any? Oh well. */
108 if (vb->num_pfns == 0)
109 return;
110
111 tell_host(vb, vb->inflate_vq);
112}
113
114static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
115{
116 unsigned int i;
117
118 for (i = 0; i < num; i++) {
119 __free_page(pfn_to_page(pfns[i]));
120 totalram_pages++;
121 }
122}
123
124static void leak_balloon(struct virtio_balloon *vb, size_t num)
125{
126 struct page *page;
127
128 /* We can only do one array worth at a time. */
129 num = min(num, ARRAY_SIZE(vb->pfns));
130
131 for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
132 page = list_first_entry(&vb->pages, struct page, lru);
133 list_del(&page->lru);
134 vb->pfns[vb->num_pfns] = page_to_pfn(page);
135 vb->num_pages--;
136 }
137
138 if (vb->tell_host_first) {
139 tell_host(vb, vb->deflate_vq);
140 release_pages_by_pfn(vb->pfns, vb->num_pfns);
141 } else {
142 release_pages_by_pfn(vb->pfns, vb->num_pfns);
143 tell_host(vb, vb->deflate_vq);
144 }
145}
146
147static void virtballoon_changed(struct virtio_device *vdev)
148{
149 struct virtio_balloon *vb = vdev->priv;
150
151 wake_up(&vb->config_change);
152}
153
154static inline int towards_target(struct virtio_balloon *vb)
155{
156 u32 v;
157 __virtio_config_val(vb->vdev,
158 offsetof(struct virtio_balloon_config, num_pages),
159 &v);
160 return v - vb->num_pages;
161}
162
163static void update_balloon_size(struct virtio_balloon *vb)
164{
165 __le32 actual = cpu_to_le32(vb->num_pages);
166
167 vb->vdev->config->set(vb->vdev,
168 offsetof(struct virtio_balloon_config, actual),
169 &actual, sizeof(actual));
170}
171
172static int balloon(void *_vballoon)
173{
174 struct virtio_balloon *vb = _vballoon;
175
176 set_freezable();
177 while (!kthread_should_stop()) {
178 int diff;
179
180 try_to_freeze();
181 wait_event_interruptible(vb->config_change,
182 (diff = towards_target(vb)) != 0
183 || kthread_should_stop());
184 if (diff > 0)
185 fill_balloon(vb, diff);
186 else if (diff < 0)
187 leak_balloon(vb, -diff);
188 update_balloon_size(vb);
189 }
190 return 0;
191}
192
193static int virtballoon_probe(struct virtio_device *vdev)
194{
195 struct virtio_balloon *vb;
196 int err;
197
198 vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
199 if (!vb) {
200 err = -ENOMEM;
201 goto out;
202 }
203
204 INIT_LIST_HEAD(&vb->pages);
205 vb->num_pages = 0;
206 init_waitqueue_head(&vb->config_change);
207 vb->vdev = vdev;
208
209 /* We expect two virtqueues. */
210 vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
211 if (IS_ERR(vb->inflate_vq)) {
212 err = PTR_ERR(vb->inflate_vq);
213 goto out_free_vb;
214 }
215
216 vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
217 if (IS_ERR(vb->deflate_vq)) {
218 err = PTR_ERR(vb->deflate_vq);
219 goto out_del_inflate_vq;
220 }
221
222 vb->thread = kthread_run(balloon, vb, "vballoon");
223 if (IS_ERR(vb->thread)) {
224 err = PTR_ERR(vb->thread);
225 goto out_del_deflate_vq;
226 }
227
228 vb->tell_host_first
229 = vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
230
231 return 0;
232
233out_del_deflate_vq:
234 vdev->config->del_vq(vb->deflate_vq);
235out_del_inflate_vq:
236 vdev->config->del_vq(vb->inflate_vq);
237out_free_vb:
238 kfree(vb);
239out:
240 return err;
241}
242
243static void virtballoon_remove(struct virtio_device *vdev)
244{
245 struct virtio_balloon *vb = vdev->priv;
246
247 kthread_stop(vb->thread);
248
249 /* There might be pages left in the balloon: free them. */
250 while (vb->num_pages)
251 leak_balloon(vb, vb->num_pages);
252
253 /* Now we reset the device so we can clean up the queues. */
254 vdev->config->reset(vdev);
255
256 vdev->config->del_vq(vb->deflate_vq);
257 vdev->config->del_vq(vb->inflate_vq);
258 kfree(vb);
259}
260
261static struct virtio_driver virtio_balloon = {
262 .driver.name = KBUILD_MODNAME,
263 .driver.owner = THIS_MODULE,
264 .id_table = id_table,
265 .probe = virtballoon_probe,
266 .remove = __devexit_p(virtballoon_remove),
267 .config_changed = virtballoon_changed,
268};
269
270static int __init init(void)
271{
272 return register_virtio_driver(&virtio_balloon);
273}
274
275static void __exit fini(void)
276{
277 unregister_virtio_driver(&virtio_balloon);
278}
279module_init(init);
280module_exit(fini);
281
282MODULE_DEVICE_TABLE(virtio, id_table);
283MODULE_DESCRIPTION("Virtio balloon driver");
284MODULE_LICENSE("GPL");
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
new file mode 100644
index 000000000000..26f787ddd5ff
--- /dev/null
+++ b/drivers/virtio/virtio_pci.c
@@ -0,0 +1,446 @@
1/*
2 * Virtio PCI driver
3 *
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
6 *
7 * Copyright IBM Corp. 2007
8 *
9 * Authors:
10 * Anthony Liguori <aliguori@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17#include <linux/module.h>
18#include <linux/list.h>
19#include <linux/pci.h>
20#include <linux/interrupt.h>
21#include <linux/virtio.h>
22#include <linux/virtio_config.h>
23#include <linux/virtio_ring.h>
24#include <linux/virtio_pci.h>
25#include <linux/highmem.h>
26#include <linux/spinlock.h>
27
28MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29MODULE_DESCRIPTION("virtio-pci");
30MODULE_LICENSE("GPL");
31MODULE_VERSION("1");
32
33/* Our device structure */
34struct virtio_pci_device
35{
36 struct virtio_device vdev;
37 struct pci_dev *pci_dev;
38
39 /* the IO mapping for the PCI config space */
40 void *ioaddr;
41
42 /* a list of queues so we can dispatch IRQs */
43 spinlock_t lock;
44 struct list_head virtqueues;
45};
46
47struct virtio_pci_vq_info
48{
49 /* the actual virtqueue */
50 struct virtqueue *vq;
51
52 /* the number of entries in the queue */
53 int num;
54
55 /* the index of the queue */
56 int queue_index;
57
58 /* the virtual address of the ring queue */
59 void *queue;
60
61 /* the list node for the virtqueues list */
62 struct list_head node;
63};
64
65/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
66static struct pci_device_id virtio_pci_id_table[] = {
67 { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
68 { 0 },
69};
70
71MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
72
73/* A PCI device has it's own struct device and so does a virtio device so
74 * we create a place for the virtio devices to show up in sysfs. I think it
75 * would make more sense for virtio to not insist on having it's own device. */
76static struct device virtio_pci_root = {
77 .parent = NULL,
78 .bus_id = "virtio-pci",
79};
80
81/* Unique numbering for devices under the kvm root */
82static unsigned int dev_index;
83
84/* Convert a generic virtio device to our structure */
85static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
86{
87 return container_of(vdev, struct virtio_pci_device, vdev);
88}
89
90/* virtio config->feature() implementation */
91static bool vp_feature(struct virtio_device *vdev, unsigned bit)
92{
93 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
94 u32 mask;
95
96 /* Since this function is supposed to have the side effect of
97 * enabling a queried feature, we simulate that by doing a read
98 * from the host feature bitmask and then writing to the guest
99 * feature bitmask */
100 mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
101 if (mask & (1 << bit)) {
102 mask |= (1 << bit);
103 iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
104 }
105
106 return !!(mask & (1 << bit));
107}
108
109/* virtio config->get() implementation */
110static void vp_get(struct virtio_device *vdev, unsigned offset,
111 void *buf, unsigned len)
112{
113 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
114 void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
115 u8 *ptr = buf;
116 int i;
117
118 for (i = 0; i < len; i++)
119 ptr[i] = ioread8(ioaddr + i);
120}
121
122/* the config->set() implementation. it's symmetric to the config->get()
123 * implementation */
124static void vp_set(struct virtio_device *vdev, unsigned offset,
125 const void *buf, unsigned len)
126{
127 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
128 void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
129 const u8 *ptr = buf;
130 int i;
131
132 for (i = 0; i < len; i++)
133 iowrite8(ptr[i], ioaddr + i);
134}
135
136/* config->{get,set}_status() implementations */
137static u8 vp_get_status(struct virtio_device *vdev)
138{
139 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
140 return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
141}
142
143static void vp_set_status(struct virtio_device *vdev, u8 status)
144{
145 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
146 /* We should never be setting status to 0. */
147 BUG_ON(status == 0);
148 return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
149}
150
151static void vp_reset(struct virtio_device *vdev)
152{
153 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
154 /* 0 status means a reset. */
155 return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
156}
157
158/* the notify function used when creating a virt queue */
159static void vp_notify(struct virtqueue *vq)
160{
161 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
162 struct virtio_pci_vq_info *info = vq->priv;
163
164 /* we write the queue's selector into the notification register to
165 * signal the other end */
166 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
167}
168
169/* A small wrapper to also acknowledge the interrupt when it's handled.
170 * I really need an EIO hook for the vring so I can ack the interrupt once we
171 * know that we'll be handling the IRQ but before we invoke the callback since
172 * the callback may notify the host which results in the host attempting to
173 * raise an interrupt that we would then mask once we acknowledged the
174 * interrupt. */
175static irqreturn_t vp_interrupt(int irq, void *opaque)
176{
177 struct virtio_pci_device *vp_dev = opaque;
178 struct virtio_pci_vq_info *info;
179 irqreturn_t ret = IRQ_NONE;
180 u8 isr;
181
182 /* reading the ISR has the effect of also clearing it so it's very
183 * important to save off the value. */
184 isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
185
186 /* It's definitely not us if the ISR was not high */
187 if (!isr)
188 return IRQ_NONE;
189
190 /* Configuration change? Tell driver if it wants to know. */
191 if (isr & VIRTIO_PCI_ISR_CONFIG) {
192 struct virtio_driver *drv;
193 drv = container_of(vp_dev->vdev.dev.driver,
194 struct virtio_driver, driver);
195
196 if (drv->config_changed)
197 drv->config_changed(&vp_dev->vdev);
198 }
199
200 spin_lock(&vp_dev->lock);
201 list_for_each_entry(info, &vp_dev->virtqueues, node) {
202 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
203 ret = IRQ_HANDLED;
204 }
205 spin_unlock(&vp_dev->lock);
206
207 return ret;
208}
209
210/* the config->find_vq() implementation */
211static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
212 void (*callback)(struct virtqueue *vq))
213{
214 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
215 struct virtio_pci_vq_info *info;
216 struct virtqueue *vq;
217 u16 num;
218 int err;
219
220 /* Select the queue we're interested in */
221 iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
222
223 /* Check if queue is either not available or already active. */
224 num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
225 if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
226 return ERR_PTR(-ENOENT);
227
228 /* allocate and fill out our structure the represents an active
229 * queue */
230 info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
231 if (!info)
232 return ERR_PTR(-ENOMEM);
233
234 info->queue_index = index;
235 info->num = num;
236
237 info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL);
238 if (info->queue == NULL) {
239 err = -ENOMEM;
240 goto out_info;
241 }
242
243 /* activate the queue */
244 iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT,
245 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
246
247 /* create the vring */
248 vq = vring_new_virtqueue(info->num, vdev, info->queue,
249 vp_notify, callback);
250 if (!vq) {
251 err = -ENOMEM;
252 goto out_activate_queue;
253 }
254
255 vq->priv = info;
256 info->vq = vq;
257
258 spin_lock(&vp_dev->lock);
259 list_add(&info->node, &vp_dev->virtqueues);
260 spin_unlock(&vp_dev->lock);
261
262 return vq;
263
264out_activate_queue:
265 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
266 kfree(info->queue);
267out_info:
268 kfree(info);
269 return ERR_PTR(err);
270}
271
272/* the config->del_vq() implementation */
273static void vp_del_vq(struct virtqueue *vq)
274{
275 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
276 struct virtio_pci_vq_info *info = vq->priv;
277
278 spin_lock(&vp_dev->lock);
279 list_del(&info->node);
280 spin_unlock(&vp_dev->lock);
281
282 vring_del_virtqueue(vq);
283
284 /* Select and deactivate the queue */
285 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
286 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
287
288 kfree(info->queue);
289 kfree(info);
290}
291
292static struct virtio_config_ops virtio_pci_config_ops = {
293 .feature = vp_feature,
294 .get = vp_get,
295 .set = vp_set,
296 .get_status = vp_get_status,
297 .set_status = vp_set_status,
298 .reset = vp_reset,
299 .find_vq = vp_find_vq,
300 .del_vq = vp_del_vq,
301};
302
303/* the PCI probing function */
304static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
305 const struct pci_device_id *id)
306{
307 struct virtio_pci_device *vp_dev;
308 int err;
309
310 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
311 if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
312 return -ENODEV;
313
314 if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
315 printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
316 VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
317 return -ENODEV;
318 }
319
320 /* allocate our structure and fill it out */
321 vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
322 if (vp_dev == NULL)
323 return -ENOMEM;
324
325 snprintf(vp_dev->vdev.dev.bus_id, BUS_ID_SIZE, "virtio%d", dev_index);
326 vp_dev->vdev.index = dev_index;
327 dev_index++;
328
329 vp_dev->vdev.dev.parent = &virtio_pci_root;
330 vp_dev->vdev.config = &virtio_pci_config_ops;
331 vp_dev->pci_dev = pci_dev;
332 INIT_LIST_HEAD(&vp_dev->virtqueues);
333 spin_lock_init(&vp_dev->lock);
334
335 /* enable the device */
336 err = pci_enable_device(pci_dev);
337 if (err)
338 goto out;
339
340 err = pci_request_regions(pci_dev, "virtio-pci");
341 if (err)
342 goto out_enable_device;
343
344 vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
345 if (vp_dev->ioaddr == NULL)
346 goto out_req_regions;
347
348 pci_set_drvdata(pci_dev, vp_dev);
349
350 /* we use the subsystem vendor/device id as the virtio vendor/device
351 * id. this allows us to use the same PCI vendor/device id for all
352 * virtio devices and to identify the particular virtio driver by
353 * the subsytem ids */
354 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
355 vp_dev->vdev.id.device = pci_dev->subsystem_device;
356
357 /* register a handler for the queue with the PCI device's interrupt */
358 err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
359 vp_dev->vdev.dev.bus_id, vp_dev);
360 if (err)
361 goto out_set_drvdata;
362
363 /* finally register the virtio device */
364 err = register_virtio_device(&vp_dev->vdev);
365 if (err)
366 goto out_req_irq;
367
368 return 0;
369
370out_req_irq:
371 free_irq(pci_dev->irq, vp_dev);
372out_set_drvdata:
373 pci_set_drvdata(pci_dev, NULL);
374 pci_iounmap(pci_dev, vp_dev->ioaddr);
375out_req_regions:
376 pci_release_regions(pci_dev);
377out_enable_device:
378 pci_disable_device(pci_dev);
379out:
380 kfree(vp_dev);
381 return err;
382}
383
384static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
385{
386 struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
387
388 free_irq(pci_dev->irq, vp_dev);
389 pci_set_drvdata(pci_dev, NULL);
390 pci_iounmap(pci_dev, vp_dev->ioaddr);
391 pci_release_regions(pci_dev);
392 pci_disable_device(pci_dev);
393 kfree(vp_dev);
394}
395
396#ifdef CONFIG_PM
397static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
398{
399 pci_save_state(pci_dev);
400 pci_set_power_state(pci_dev, PCI_D3hot);
401 return 0;
402}
403
404static int virtio_pci_resume(struct pci_dev *pci_dev)
405{
406 pci_restore_state(pci_dev);
407 pci_set_power_state(pci_dev, PCI_D0);
408 return 0;
409}
410#endif
411
412static struct pci_driver virtio_pci_driver = {
413 .name = "virtio-pci",
414 .id_table = virtio_pci_id_table,
415 .probe = virtio_pci_probe,
416 .remove = virtio_pci_remove,
417#ifdef CONFIG_PM
418 .suspend = virtio_pci_suspend,
419 .resume = virtio_pci_resume,
420#endif
421};
422
423static int __init virtio_pci_init(void)
424{
425 int err;
426
427 err = device_register(&virtio_pci_root);
428 if (err)
429 return err;
430
431 err = pci_register_driver(&virtio_pci_driver);
432 if (err)
433 device_unregister(&virtio_pci_root);
434
435 return err;
436}
437
438module_init(virtio_pci_init);
439
440static void __exit virtio_pci_exit(void)
441{
442 device_unregister(&virtio_pci_root);
443 pci_unregister_driver(&virtio_pci_driver);
444}
445
446module_exit(virtio_pci_exit);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1dc04b6684e6..3a28c1382131 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -87,6 +87,8 @@ static int vring_add_buf(struct virtqueue *_vq,
87 if (vq->num_free < out + in) { 87 if (vq->num_free < out + in) {
88 pr_debug("Can't add buf len %i - avail = %i\n", 88 pr_debug("Can't add buf len %i - avail = %i\n",
89 out + in, vq->num_free); 89 out + in, vq->num_free);
90 /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
91 vq->notify(&vq->vq);
90 END_USE(vq); 92 END_USE(vq);
91 return -ENOSPC; 93 return -ENOSPC;
92 } 94 }
@@ -97,16 +99,14 @@ static int vring_add_buf(struct virtqueue *_vq,
97 head = vq->free_head; 99 head = vq->free_head;
98 for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 100 for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
99 vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 101 vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
100 vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) 102 vq->vring.desc[i].addr = sg_phys(sg);
101 + sg->offset;
102 vq->vring.desc[i].len = sg->length; 103 vq->vring.desc[i].len = sg->length;
103 prev = i; 104 prev = i;
104 sg++; 105 sg++;
105 } 106 }
106 for (; in; i = vq->vring.desc[i].next, in--) { 107 for (; in; i = vq->vring.desc[i].next, in--) {
107 vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 108 vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
108 vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) 109 vq->vring.desc[i].addr = sg_phys(sg);
109 + sg->offset;
110 vq->vring.desc[i].len = sg->length; 110 vq->vring.desc[i].len = sg->length;
111 prev = i; 111 prev = i;
112 sg++; 112 sg++;
@@ -171,16 +171,6 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
171 vq->num_free++; 171 vq->num_free++;
172} 172}
173 173
174/* FIXME: We need to tell other side about removal, to synchronize. */
175static void vring_shutdown(struct virtqueue *_vq)
176{
177 struct vring_virtqueue *vq = to_vvq(_vq);
178 unsigned int i;
179
180 for (i = 0; i < vq->vring.num; i++)
181 detach_buf(vq, i);
182}
183
184static inline bool more_used(const struct vring_virtqueue *vq) 174static inline bool more_used(const struct vring_virtqueue *vq)
185{ 175{
186 return vq->last_used_idx != vq->vring.used->idx; 176 return vq->last_used_idx != vq->vring.used->idx;
@@ -220,7 +210,17 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
220 return ret; 210 return ret;
221} 211}
222 212
223static bool vring_restart(struct virtqueue *_vq) 213static void vring_disable_cb(struct virtqueue *_vq)
214{
215 struct vring_virtqueue *vq = to_vvq(_vq);
216
217 START_USE(vq);
218 BUG_ON(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
219 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
220 END_USE(vq);
221}
222
223static bool vring_enable_cb(struct virtqueue *_vq)
224{ 224{
225 struct vring_virtqueue *vq = to_vvq(_vq); 225 struct vring_virtqueue *vq = to_vvq(_vq);
226 226
@@ -253,26 +253,34 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
253 if (unlikely(vq->broken)) 253 if (unlikely(vq->broken))
254 return IRQ_HANDLED; 254 return IRQ_HANDLED;
255 255
256 /* Other side may have missed us turning off the interrupt,
257 * but we should preserve disable semantic for virtio users. */
258 if (unlikely(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
259 pr_debug("virtqueue interrupt after disable for %p\n", vq);
260 return IRQ_HANDLED;
261 }
262
256 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 263 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
257 if (vq->vq.callback && !vq->vq.callback(&vq->vq)) 264 if (vq->vq.callback)
258 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 265 vq->vq.callback(&vq->vq);
259 266
260 return IRQ_HANDLED; 267 return IRQ_HANDLED;
261} 268}
269EXPORT_SYMBOL_GPL(vring_interrupt);
262 270
263static struct virtqueue_ops vring_vq_ops = { 271static struct virtqueue_ops vring_vq_ops = {
264 .add_buf = vring_add_buf, 272 .add_buf = vring_add_buf,
265 .get_buf = vring_get_buf, 273 .get_buf = vring_get_buf,
266 .kick = vring_kick, 274 .kick = vring_kick,
267 .restart = vring_restart, 275 .disable_cb = vring_disable_cb,
268 .shutdown = vring_shutdown, 276 .enable_cb = vring_enable_cb,
269}; 277};
270 278
271struct virtqueue *vring_new_virtqueue(unsigned int num, 279struct virtqueue *vring_new_virtqueue(unsigned int num,
272 struct virtio_device *vdev, 280 struct virtio_device *vdev,
273 void *pages, 281 void *pages,
274 void (*notify)(struct virtqueue *), 282 void (*notify)(struct virtqueue *),
275 bool (*callback)(struct virtqueue *)) 283 void (*callback)(struct virtqueue *))
276{ 284{
277 struct vring_virtqueue *vq; 285 struct vring_virtqueue *vq;
278 unsigned int i; 286 unsigned int i;
@@ -311,9 +319,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
311 319
312 return &vq->vq; 320 return &vq->vq;
313} 321}
322EXPORT_SYMBOL_GPL(vring_new_virtqueue);
314 323
315void vring_del_virtqueue(struct virtqueue *vq) 324void vring_del_virtqueue(struct virtqueue *vq)
316{ 325{
317 kfree(to_vvq(vq)); 326 kfree(to_vvq(vq));
318} 327}
328EXPORT_SYMBOL_GPL(vring_del_virtqueue);
319 329
330MODULE_LICENSE("GPL");