diff options
| author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:03:40 -0400 |
|---|---|---|
| committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:55 -0400 |
| commit | 0a8a69dd77ddbd4513b21363021ecde7e1025502 (patch) | |
| tree | ed6d8f0756835390b4c0d9a172422f2e42a65523 /drivers/virtio | |
| parent | b01d9f2863349b0e041b90c3c86a998ee0fed2b0 (diff) | |
Virtio helper routines for a descriptor ringbuffer implementation
These helper routines supply most of the virtqueue_ops for hypervisors
which want to use a ring for virtio. Unlike the previous lguest
implementation:
1) The rings are variable sized (2^n-1 elements).
2) They have an unfortunate limit of 65535 bytes per sg element.
3) The page numbers are always 64 bit (PAE anyone?)
4) They no longer place used[] on a separate page, just a separate
cacheline.
5) We do a modulo on a variable. We could be tricky if we cared.
6) Interrupts and notifies are suppressed using flags within the rings.
Users need only get the ring pages and provide a notify hook (KVM
wants the guest to allocate the rings, lguest does it sanely).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dor Laor <dor.laor@qumranet.com>
Diffstat (limited to 'drivers/virtio')
| -rw-r--r-- | drivers/virtio/Kconfig | 5 | ||||
| -rw-r--r-- | drivers/virtio/Makefile | 1 | ||||
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 313 |
3 files changed, 319 insertions, 0 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index bce84b56a6..9e33fc4da8 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | # Virtio always gets selected by whoever wants it. | 1 | # Virtio always gets selected by whoever wants it. |
| 2 | config VIRTIO | 2 | config VIRTIO |
| 3 | bool | 3 | bool |
| 4 | |||
| 5 | # Similarly the virtio ring implementation. | ||
| 6 | config VIRTIO_RING | ||
| 7 | bool | ||
| 8 | depends on VIRTIO | ||
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index af0d57dad3..f70e40971d 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile | |||
| @@ -1 +1,2 @@ | |||
| 1 | obj-$(CONFIG_VIRTIO) += virtio.o | 1 | obj-$(CONFIG_VIRTIO) += virtio.o |
| 2 | obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o | ||
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c new file mode 100644 index 0000000000..0e4baca21b --- /dev/null +++ b/drivers/virtio/virtio_ring.c | |||
| @@ -0,0 +1,313 @@ | |||
| 1 | /* Virtio ring implementation. | ||
| 2 | * | ||
| 3 | * Copyright 2007 Rusty Russell IBM Corporation | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License as published by | ||
| 7 | * the Free Software Foundation; either version 2 of the License, or | ||
| 8 | * (at your option) any later version. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | * GNU General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program; if not, write to the Free Software | ||
| 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | */ | ||
| 19 | #include <linux/virtio.h> | ||
| 20 | #include <linux/virtio_ring.h> | ||
| 21 | #include <linux/device.h> | ||
| 22 | |||
| 23 | #ifdef DEBUG | ||
| 24 | /* For development, we want to crash whenever the ring is screwed. */ | ||
| 25 | #define BAD_RING(vq, fmt...) \ | ||
| 26 | do { dev_err(&vq->vq.vdev->dev, fmt); BUG(); } while(0) | ||
| 27 | #define START_USE(vq) \ | ||
| 28 | do { if ((vq)->in_use) panic("in_use = %i\n", (vq)->in_use); (vq)->in_use = __LINE__; mb(); } while(0) | ||
| 29 | #define END_USE(vq) \ | ||
| 30 | do { BUG_ON(!(vq)->in_use); (vq)->in_use = 0; mb(); } while(0) | ||
| 31 | #else | ||
| 32 | #define BAD_RING(vq, fmt...) \ | ||
| 33 | do { dev_err(&vq->vq.vdev->dev, fmt); (vq)->broken = true; } while(0) | ||
| 34 | #define START_USE(vq) | ||
| 35 | #define END_USE(vq) | ||
| 36 | #endif | ||
| 37 | |||
| 38 | struct vring_virtqueue | ||
| 39 | { | ||
| 40 | struct virtqueue vq; | ||
| 41 | |||
| 42 | /* Actual memory layout for this queue */ | ||
| 43 | struct vring vring; | ||
| 44 | |||
| 45 | /* Other side has made a mess, don't try any more. */ | ||
| 46 | bool broken; | ||
| 47 | |||
| 48 | /* Number of free buffers */ | ||
| 49 | unsigned int num_free; | ||
| 50 | /* Head of free buffer list. */ | ||
| 51 | unsigned int free_head; | ||
| 52 | /* Number we've added since last sync. */ | ||
| 53 | unsigned int num_added; | ||
| 54 | |||
| 55 | /* Last used index we've seen. */ | ||
| 56 | unsigned int last_used_idx; | ||
| 57 | |||
| 58 | /* How to notify other side. FIXME: commonalize hcalls! */ | ||
| 59 | void (*notify)(struct virtqueue *vq); | ||
| 60 | |||
| 61 | #ifdef DEBUG | ||
| 62 | /* They're supposed to lock for us. */ | ||
| 63 | unsigned int in_use; | ||
| 64 | #endif | ||
| 65 | |||
| 66 | /* Tokens for callbacks. */ | ||
| 67 | void *data[]; | ||
| 68 | }; | ||
| 69 | |||
| 70 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | ||
| 71 | |||
| 72 | static int vring_add_buf(struct virtqueue *_vq, | ||
| 73 | struct scatterlist sg[], | ||
| 74 | unsigned int out, | ||
| 75 | unsigned int in, | ||
| 76 | void *data) | ||
| 77 | { | ||
| 78 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 79 | unsigned int i, avail, head, uninitialized_var(prev); | ||
| 80 | |||
| 81 | BUG_ON(data == NULL); | ||
| 82 | BUG_ON(out + in > vq->vring.num); | ||
| 83 | BUG_ON(out + in == 0); | ||
| 84 | |||
| 85 | START_USE(vq); | ||
| 86 | |||
| 87 | if (vq->num_free < out + in) { | ||
| 88 | pr_debug("Can't add buf len %i - avail = %i\n", | ||
| 89 | out + in, vq->num_free); | ||
| 90 | END_USE(vq); | ||
| 91 | return -ENOSPC; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* We're about to use some buffers from the free list. */ | ||
| 95 | vq->num_free -= out + in; | ||
| 96 | |||
| 97 | head = vq->free_head; | ||
| 98 | for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { | ||
| 99 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | ||
| 100 | vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) | ||
| 101 | + sg->offset; | ||
| 102 | vq->vring.desc[i].len = sg->length; | ||
| 103 | prev = i; | ||
| 104 | sg++; | ||
| 105 | } | ||
| 106 | for (; in; i = vq->vring.desc[i].next, in--) { | ||
| 107 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | ||
| 108 | vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) | ||
| 109 | + sg->offset; | ||
| 110 | vq->vring.desc[i].len = sg->length; | ||
| 111 | prev = i; | ||
| 112 | sg++; | ||
| 113 | } | ||
| 114 | /* Last one doesn't continue. */ | ||
| 115 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | ||
| 116 | |||
| 117 | /* Update free pointer */ | ||
| 118 | vq->free_head = i; | ||
| 119 | |||
| 120 | /* Set token. */ | ||
| 121 | vq->data[head] = data; | ||
| 122 | |||
| 123 | /* Put entry in available array (but don't update avail->idx until they | ||
| 124 | * do sync). FIXME: avoid modulus here? */ | ||
| 125 | avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; | ||
| 126 | vq->vring.avail->ring[avail] = head; | ||
| 127 | |||
| 128 | pr_debug("Added buffer head %i to %p\n", head, vq); | ||
| 129 | END_USE(vq); | ||
| 130 | return 0; | ||
| 131 | } | ||
| 132 | |||
| 133 | static void vring_kick(struct virtqueue *_vq) | ||
| 134 | { | ||
| 135 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 136 | START_USE(vq); | ||
| 137 | /* Descriptors and available array need to be set before we expose the | ||
| 138 | * new available array entries. */ | ||
| 139 | wmb(); | ||
| 140 | |||
| 141 | vq->vring.avail->idx += vq->num_added; | ||
| 142 | vq->num_added = 0; | ||
| 143 | |||
| 144 | /* Need to update avail index before checking if we should notify */ | ||
| 145 | mb(); | ||
| 146 | |||
| 147 | if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) | ||
| 148 | /* Prod other side to tell it about changes. */ | ||
| 149 | vq->notify(&vq->vq); | ||
| 150 | |||
| 151 | END_USE(vq); | ||
| 152 | } | ||
| 153 | |||
| 154 | static void detach_buf(struct vring_virtqueue *vq, unsigned int head) | ||
| 155 | { | ||
| 156 | unsigned int i; | ||
| 157 | |||
| 158 | /* Clear data ptr. */ | ||
| 159 | vq->data[head] = NULL; | ||
| 160 | |||
| 161 | /* Put back on free list: find end */ | ||
| 162 | i = head; | ||
| 163 | while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { | ||
| 164 | i = vq->vring.desc[i].next; | ||
| 165 | vq->num_free++; | ||
| 166 | } | ||
| 167 | |||
| 168 | vq->vring.desc[i].next = vq->free_head; | ||
| 169 | vq->free_head = head; | ||
| 170 | /* Plus final descriptor */ | ||
| 171 | vq->num_free++; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* FIXME: We need to tell other side about removal, to synchronize. */ | ||
| 175 | static void vring_shutdown(struct virtqueue *_vq) | ||
| 176 | { | ||
| 177 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 178 | unsigned int i; | ||
| 179 | |||
| 180 | for (i = 0; i < vq->vring.num; i++) | ||
| 181 | detach_buf(vq, i); | ||
| 182 | } | ||
| 183 | |||
| 184 | static inline bool more_used(const struct vring_virtqueue *vq) | ||
| 185 | { | ||
| 186 | return vq->last_used_idx != vq->vring.used->idx; | ||
| 187 | } | ||
| 188 | |||
| 189 | static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) | ||
| 190 | { | ||
| 191 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 192 | void *ret; | ||
| 193 | unsigned int i; | ||
| 194 | |||
| 195 | START_USE(vq); | ||
| 196 | |||
| 197 | if (!more_used(vq)) { | ||
| 198 | pr_debug("No more buffers in queue\n"); | ||
| 199 | END_USE(vq); | ||
| 200 | return NULL; | ||
| 201 | } | ||
| 202 | |||
| 203 | i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; | ||
| 204 | *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; | ||
| 205 | |||
| 206 | if (unlikely(i >= vq->vring.num)) { | ||
| 207 | BAD_RING(vq, "id %u out of range\n", i); | ||
| 208 | return NULL; | ||
| 209 | } | ||
| 210 | if (unlikely(!vq->data[i])) { | ||
| 211 | BAD_RING(vq, "id %u is not a head!\n", i); | ||
| 212 | return NULL; | ||
| 213 | } | ||
| 214 | |||
| 215 | /* detach_buf clears data, so grab it now. */ | ||
| 216 | ret = vq->data[i]; | ||
| 217 | detach_buf(vq, i); | ||
| 218 | vq->last_used_idx++; | ||
| 219 | END_USE(vq); | ||
| 220 | return ret; | ||
| 221 | } | ||
| 222 | |||
| 223 | static bool vring_restart(struct virtqueue *_vq) | ||
| 224 | { | ||
| 225 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 226 | |||
| 227 | START_USE(vq); | ||
| 228 | BUG_ON(!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)); | ||
| 229 | |||
| 230 | /* We optimistically turn back on interrupts, then check if there was | ||
| 231 | * more to do. */ | ||
| 232 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; | ||
| 233 | mb(); | ||
| 234 | if (unlikely(more_used(vq))) { | ||
| 235 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
| 236 | END_USE(vq); | ||
| 237 | return false; | ||
| 238 | } | ||
| 239 | |||
| 240 | END_USE(vq); | ||
| 241 | return true; | ||
| 242 | } | ||
| 243 | |||
| 244 | irqreturn_t vring_interrupt(int irq, void *_vq) | ||
| 245 | { | ||
| 246 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
| 247 | |||
| 248 | if (!more_used(vq)) { | ||
| 249 | pr_debug("virtqueue interrupt with no work for %p\n", vq); | ||
| 250 | return IRQ_NONE; | ||
| 251 | } | ||
| 252 | |||
| 253 | if (unlikely(vq->broken)) | ||
| 254 | return IRQ_HANDLED; | ||
| 255 | |||
| 256 | pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); | ||
| 257 | if (vq->vq.callback && !vq->vq.callback(&vq->vq)) | ||
| 258 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
| 259 | |||
| 260 | return IRQ_HANDLED; | ||
| 261 | } | ||
| 262 | |||
| 263 | static struct virtqueue_ops vring_vq_ops = { | ||
| 264 | .add_buf = vring_add_buf, | ||
| 265 | .get_buf = vring_get_buf, | ||
| 266 | .kick = vring_kick, | ||
| 267 | .restart = vring_restart, | ||
| 268 | .shutdown = vring_shutdown, | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct virtqueue *vring_new_virtqueue(unsigned int num, | ||
| 272 | struct virtio_device *vdev, | ||
| 273 | void *pages, | ||
| 274 | void (*notify)(struct virtqueue *), | ||
| 275 | bool (*callback)(struct virtqueue *)) | ||
| 276 | { | ||
| 277 | struct vring_virtqueue *vq; | ||
| 278 | unsigned int i; | ||
| 279 | |||
| 280 | vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); | ||
| 281 | if (!vq) | ||
| 282 | return NULL; | ||
| 283 | |||
| 284 | vring_init(&vq->vring, num, pages); | ||
| 285 | vq->vq.callback = callback; | ||
| 286 | vq->vq.vdev = vdev; | ||
| 287 | vq->vq.vq_ops = &vring_vq_ops; | ||
| 288 | vq->notify = notify; | ||
| 289 | vq->broken = false; | ||
| 290 | vq->last_used_idx = 0; | ||
| 291 | vq->num_added = 0; | ||
| 292 | #ifdef DEBUG | ||
| 293 | vq->in_use = false; | ||
| 294 | #endif | ||
| 295 | |||
| 296 | /* No callback? Tell other side not to bother us. */ | ||
| 297 | if (!callback) | ||
| 298 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
| 299 | |||
| 300 | /* Put everything in free lists. */ | ||
| 301 | vq->num_free = num; | ||
| 302 | vq->free_head = 0; | ||
| 303 | for (i = 0; i < num-1; i++) | ||
| 304 | vq->vring.desc[i].next = i+1; | ||
| 305 | |||
| 306 | return &vq->vq; | ||
| 307 | } | ||
| 308 | |||
| 309 | void vring_del_virtqueue(struct virtqueue *vq) | ||
| 310 | { | ||
| 311 | kfree(to_vvq(vq)); | ||
| 312 | } | ||
| 313 | |||
