diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:03:40 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:55 -0400 |
commit | 0a8a69dd77ddbd4513b21363021ecde7e1025502 (patch) | |
tree | ed6d8f0756835390b4c0d9a172422f2e42a65523 /drivers | |
parent | b01d9f2863349b0e041b90c3c86a998ee0fed2b0 (diff) |
Virtio helper routines for a descriptor ringbuffer implementation
These helper routines supply most of the virtqueue_ops for hypervisors
which want to use a ring for virtio. Unlike the previous lguest
implementation:
1) The rings are variable sized (2^n-1 elements).
2) They have an unfortunate limit of 65535 bytes per sg element.
3) The page numbers are always 64 bit (PAE anyone?)
4) They no longer place used[] on a separate page, just a separate
cacheline.
5) We do a modulo on a variable. We could be tricky if we cared.
6) Interrupts and notifies are suppressed using flags within the rings.
Users need only get the ring pages and provide a notify hook (KVM
wants the guest to allocate the rings, lguest does it sanely).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dor Laor <dor.laor@qumranet.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/virtio/Kconfig | 5 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 1 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 313 |
3 files changed, 319 insertions, 0 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index bce84b56a659..9e33fc4da875 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig | |||
@@ -1,3 +1,8 @@ | |||
1 | # Virtio always gets selected by whoever wants it. | 1 | # Virtio always gets selected by whoever wants it. |
2 | config VIRTIO | 2 | config VIRTIO |
3 | bool | 3 | bool |
4 | |||
5 | # Similarly the virtio ring implementation. | ||
6 | config VIRTIO_RING | ||
7 | bool | ||
8 | depends on VIRTIO | ||
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index af0d57dad3eb..f70e40971dd9 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile | |||
@@ -1 +1,2 @@ | |||
1 | obj-$(CONFIG_VIRTIO) += virtio.o | 1 | obj-$(CONFIG_VIRTIO) += virtio.o |
2 | obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o | ||
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c new file mode 100644 index 000000000000..0e4baca21b8f --- /dev/null +++ b/drivers/virtio/virtio_ring.c | |||
@@ -0,0 +1,313 @@ | |||
1 | /* Virtio ring implementation. | ||
2 | * | ||
3 | * Copyright 2007 Rusty Russell IBM Corporation | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include <linux/virtio.h> | ||
20 | #include <linux/virtio_ring.h> | ||
21 | #include <linux/device.h> | ||
22 | |||
23 | #ifdef DEBUG | ||
24 | /* For development, we want to crash whenever the ring is screwed. */ | ||
25 | #define BAD_RING(vq, fmt...) \ | ||
26 | do { dev_err(&vq->vq.vdev->dev, fmt); BUG(); } while(0) | ||
27 | #define START_USE(vq) \ | ||
28 | do { if ((vq)->in_use) panic("in_use = %i\n", (vq)->in_use); (vq)->in_use = __LINE__; mb(); } while(0) | ||
29 | #define END_USE(vq) \ | ||
30 | do { BUG_ON(!(vq)->in_use); (vq)->in_use = 0; mb(); } while(0) | ||
31 | #else | ||
32 | #define BAD_RING(vq, fmt...) \ | ||
33 | do { dev_err(&vq->vq.vdev->dev, fmt); (vq)->broken = true; } while(0) | ||
34 | #define START_USE(vq) | ||
35 | #define END_USE(vq) | ||
36 | #endif | ||
37 | |||
38 | struct vring_virtqueue | ||
39 | { | ||
40 | struct virtqueue vq; | ||
41 | |||
42 | /* Actual memory layout for this queue */ | ||
43 | struct vring vring; | ||
44 | |||
45 | /* Other side has made a mess, don't try any more. */ | ||
46 | bool broken; | ||
47 | |||
48 | /* Number of free buffers */ | ||
49 | unsigned int num_free; | ||
50 | /* Head of free buffer list. */ | ||
51 | unsigned int free_head; | ||
52 | /* Number we've added since last sync. */ | ||
53 | unsigned int num_added; | ||
54 | |||
55 | /* Last used index we've seen. */ | ||
56 | unsigned int last_used_idx; | ||
57 | |||
58 | /* How to notify other side. FIXME: commonalize hcalls! */ | ||
59 | void (*notify)(struct virtqueue *vq); | ||
60 | |||
61 | #ifdef DEBUG | ||
62 | /* They're supposed to lock for us. */ | ||
63 | unsigned int in_use; | ||
64 | #endif | ||
65 | |||
66 | /* Tokens for callbacks. */ | ||
67 | void *data[]; | ||
68 | }; | ||
69 | |||
70 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | ||
71 | |||
72 | static int vring_add_buf(struct virtqueue *_vq, | ||
73 | struct scatterlist sg[], | ||
74 | unsigned int out, | ||
75 | unsigned int in, | ||
76 | void *data) | ||
77 | { | ||
78 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
79 | unsigned int i, avail, head, uninitialized_var(prev); | ||
80 | |||
81 | BUG_ON(data == NULL); | ||
82 | BUG_ON(out + in > vq->vring.num); | ||
83 | BUG_ON(out + in == 0); | ||
84 | |||
85 | START_USE(vq); | ||
86 | |||
87 | if (vq->num_free < out + in) { | ||
88 | pr_debug("Can't add buf len %i - avail = %i\n", | ||
89 | out + in, vq->num_free); | ||
90 | END_USE(vq); | ||
91 | return -ENOSPC; | ||
92 | } | ||
93 | |||
94 | /* We're about to use some buffers from the free list. */ | ||
95 | vq->num_free -= out + in; | ||
96 | |||
97 | head = vq->free_head; | ||
98 | for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { | ||
99 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | ||
100 | vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) | ||
101 | + sg->offset; | ||
102 | vq->vring.desc[i].len = sg->length; | ||
103 | prev = i; | ||
104 | sg++; | ||
105 | } | ||
106 | for (; in; i = vq->vring.desc[i].next, in--) { | ||
107 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | ||
108 | vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) | ||
109 | + sg->offset; | ||
110 | vq->vring.desc[i].len = sg->length; | ||
111 | prev = i; | ||
112 | sg++; | ||
113 | } | ||
114 | /* Last one doesn't continue. */ | ||
115 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | ||
116 | |||
117 | /* Update free pointer */ | ||
118 | vq->free_head = i; | ||
119 | |||
120 | /* Set token. */ | ||
121 | vq->data[head] = data; | ||
122 | |||
123 | /* Put entry in available array (but don't update avail->idx until they | ||
124 | * do sync). FIXME: avoid modulus here? */ | ||
125 | avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; | ||
126 | vq->vring.avail->ring[avail] = head; | ||
127 | |||
128 | pr_debug("Added buffer head %i to %p\n", head, vq); | ||
129 | END_USE(vq); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static void vring_kick(struct virtqueue *_vq) | ||
134 | { | ||
135 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
136 | START_USE(vq); | ||
137 | /* Descriptors and available array need to be set before we expose the | ||
138 | * new available array entries. */ | ||
139 | wmb(); | ||
140 | |||
141 | vq->vring.avail->idx += vq->num_added; | ||
142 | vq->num_added = 0; | ||
143 | |||
144 | /* Need to update avail index before checking if we should notify */ | ||
145 | mb(); | ||
146 | |||
147 | if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) | ||
148 | /* Prod other side to tell it about changes. */ | ||
149 | vq->notify(&vq->vq); | ||
150 | |||
151 | END_USE(vq); | ||
152 | } | ||
153 | |||
154 | static void detach_buf(struct vring_virtqueue *vq, unsigned int head) | ||
155 | { | ||
156 | unsigned int i; | ||
157 | |||
158 | /* Clear data ptr. */ | ||
159 | vq->data[head] = NULL; | ||
160 | |||
161 | /* Put back on free list: find end */ | ||
162 | i = head; | ||
163 | while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { | ||
164 | i = vq->vring.desc[i].next; | ||
165 | vq->num_free++; | ||
166 | } | ||
167 | |||
168 | vq->vring.desc[i].next = vq->free_head; | ||
169 | vq->free_head = head; | ||
170 | /* Plus final descriptor */ | ||
171 | vq->num_free++; | ||
172 | } | ||
173 | |||
174 | /* FIXME: We need to tell other side about removal, to synchronize. */ | ||
175 | static void vring_shutdown(struct virtqueue *_vq) | ||
176 | { | ||
177 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
178 | unsigned int i; | ||
179 | |||
180 | for (i = 0; i < vq->vring.num; i++) | ||
181 | detach_buf(vq, i); | ||
182 | } | ||
183 | |||
184 | static inline bool more_used(const struct vring_virtqueue *vq) | ||
185 | { | ||
186 | return vq->last_used_idx != vq->vring.used->idx; | ||
187 | } | ||
188 | |||
189 | static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) | ||
190 | { | ||
191 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
192 | void *ret; | ||
193 | unsigned int i; | ||
194 | |||
195 | START_USE(vq); | ||
196 | |||
197 | if (!more_used(vq)) { | ||
198 | pr_debug("No more buffers in queue\n"); | ||
199 | END_USE(vq); | ||
200 | return NULL; | ||
201 | } | ||
202 | |||
203 | i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; | ||
204 | *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; | ||
205 | |||
206 | if (unlikely(i >= vq->vring.num)) { | ||
207 | BAD_RING(vq, "id %u out of range\n", i); | ||
208 | return NULL; | ||
209 | } | ||
210 | if (unlikely(!vq->data[i])) { | ||
211 | BAD_RING(vq, "id %u is not a head!\n", i); | ||
212 | return NULL; | ||
213 | } | ||
214 | |||
215 | /* detach_buf clears data, so grab it now. */ | ||
216 | ret = vq->data[i]; | ||
217 | detach_buf(vq, i); | ||
218 | vq->last_used_idx++; | ||
219 | END_USE(vq); | ||
220 | return ret; | ||
221 | } | ||
222 | |||
223 | static bool vring_restart(struct virtqueue *_vq) | ||
224 | { | ||
225 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
226 | |||
227 | START_USE(vq); | ||
228 | BUG_ON(!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)); | ||
229 | |||
230 | /* We optimistically turn back on interrupts, then check if there was | ||
231 | * more to do. */ | ||
232 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; | ||
233 | mb(); | ||
234 | if (unlikely(more_used(vq))) { | ||
235 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
236 | END_USE(vq); | ||
237 | return false; | ||
238 | } | ||
239 | |||
240 | END_USE(vq); | ||
241 | return true; | ||
242 | } | ||
243 | |||
244 | irqreturn_t vring_interrupt(int irq, void *_vq) | ||
245 | { | ||
246 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
247 | |||
248 | if (!more_used(vq)) { | ||
249 | pr_debug("virtqueue interrupt with no work for %p\n", vq); | ||
250 | return IRQ_NONE; | ||
251 | } | ||
252 | |||
253 | if (unlikely(vq->broken)) | ||
254 | return IRQ_HANDLED; | ||
255 | |||
256 | pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); | ||
257 | if (vq->vq.callback && !vq->vq.callback(&vq->vq)) | ||
258 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
259 | |||
260 | return IRQ_HANDLED; | ||
261 | } | ||
262 | |||
263 | static struct virtqueue_ops vring_vq_ops = { | ||
264 | .add_buf = vring_add_buf, | ||
265 | .get_buf = vring_get_buf, | ||
266 | .kick = vring_kick, | ||
267 | .restart = vring_restart, | ||
268 | .shutdown = vring_shutdown, | ||
269 | }; | ||
270 | |||
271 | struct virtqueue *vring_new_virtqueue(unsigned int num, | ||
272 | struct virtio_device *vdev, | ||
273 | void *pages, | ||
274 | void (*notify)(struct virtqueue *), | ||
275 | bool (*callback)(struct virtqueue *)) | ||
276 | { | ||
277 | struct vring_virtqueue *vq; | ||
278 | unsigned int i; | ||
279 | |||
280 | vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); | ||
281 | if (!vq) | ||
282 | return NULL; | ||
283 | |||
284 | vring_init(&vq->vring, num, pages); | ||
285 | vq->vq.callback = callback; | ||
286 | vq->vq.vdev = vdev; | ||
287 | vq->vq.vq_ops = &vring_vq_ops; | ||
288 | vq->notify = notify; | ||
289 | vq->broken = false; | ||
290 | vq->last_used_idx = 0; | ||
291 | vq->num_added = 0; | ||
292 | #ifdef DEBUG | ||
293 | vq->in_use = false; | ||
294 | #endif | ||
295 | |||
296 | /* No callback? Tell other side not to bother us. */ | ||
297 | if (!callback) | ||
298 | vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; | ||
299 | |||
300 | /* Put everything in free lists. */ | ||
301 | vq->num_free = num; | ||
302 | vq->free_head = 0; | ||
303 | for (i = 0; i < num-1; i++) | ||
304 | vq->vring.desc[i].next = i+1; | ||
305 | |||
306 | return &vq->vq; | ||
307 | } | ||
308 | |||
309 | void vring_del_virtqueue(struct virtqueue *vq) | ||
310 | { | ||
311 | kfree(to_vvq(vq)); | ||
312 | } | ||
313 | |||