diff options
Diffstat (limited to 'drivers/virtio/virtio_ring.c')
-rw-r--r-- | drivers/virtio/virtio_ring.c | 297 |
1 files changed, 207 insertions, 90 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ffd7e7da5d3b..5217baf5528c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -24,27 +24,6 @@ | |||
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
26 | 26 | ||
27 | /* virtio guest is communicating with a virtual "device" that actually runs on | ||
28 | * a host processor. Memory barriers are used to control SMP effects. */ | ||
29 | #ifdef CONFIG_SMP | ||
30 | /* Where possible, use SMP barriers which are more lightweight than mandatory | ||
31 | * barriers, because mandatory barriers control MMIO effects on accesses | ||
32 | * through relaxed memory I/O windows (which virtio-pci does not use). */ | ||
33 | #define virtio_mb(vq) \ | ||
34 | do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) | ||
35 | #define virtio_rmb(vq) \ | ||
36 | do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) | ||
37 | #define virtio_wmb(vq) \ | ||
38 | do { if ((vq)->weak_barriers) smp_wmb(); else wmb(); } while(0) | ||
39 | #else | ||
40 | /* We must force memory ordering even if guest is UP since host could be | ||
41 | * running on another CPU, but SMP barriers are defined to barrier() in that | ||
42 | * configuration. So fall back to mandatory barriers instead. */ | ||
43 | #define virtio_mb(vq) mb() | ||
44 | #define virtio_rmb(vq) rmb() | ||
45 | #define virtio_wmb(vq) wmb() | ||
46 | #endif | ||
47 | |||
48 | #ifdef DEBUG | 27 | #ifdef DEBUG |
49 | /* For development, we want to crash whenever the ring is screwed. */ | 28 | /* For development, we want to crash whenever the ring is screwed. */ |
50 | #define BAD_RING(_vq, fmt, args...) \ | 29 | #define BAD_RING(_vq, fmt, args...) \ |
@@ -119,16 +98,36 @@ struct vring_virtqueue | |||
119 | 98 | ||
120 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 99 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
121 | 100 | ||
101 | static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, | ||
102 | unsigned int *count) | ||
103 | { | ||
104 | return sg_next(sg); | ||
105 | } | ||
106 | |||
107 | static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, | ||
108 | unsigned int *count) | ||
109 | { | ||
110 | if (--(*count) == 0) | ||
111 | return NULL; | ||
112 | return sg + 1; | ||
113 | } | ||
114 | |||
122 | /* Set up an indirect table of descriptors and add it to the queue. */ | 115 | /* Set up an indirect table of descriptors and add it to the queue. */ |
123 | static int vring_add_indirect(struct vring_virtqueue *vq, | 116 | static inline int vring_add_indirect(struct vring_virtqueue *vq, |
124 | struct scatterlist sg[], | 117 | struct scatterlist *sgs[], |
125 | unsigned int out, | 118 | struct scatterlist *(*next) |
126 | unsigned int in, | 119 | (struct scatterlist *, unsigned int *), |
127 | gfp_t gfp) | 120 | unsigned int total_sg, |
121 | unsigned int total_out, | ||
122 | unsigned int total_in, | ||
123 | unsigned int out_sgs, | ||
124 | unsigned int in_sgs, | ||
125 | gfp_t gfp) | ||
128 | { | 126 | { |
129 | struct vring_desc *desc; | 127 | struct vring_desc *desc; |
130 | unsigned head; | 128 | unsigned head; |
131 | int i; | 129 | struct scatterlist *sg; |
130 | int i, n; | ||
132 | 131 | ||
133 | /* | 132 | /* |
134 | * We require lowmem mappings for the descriptors because | 133 | * We require lowmem mappings for the descriptors because |
@@ -137,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
137 | */ | 136 | */ |
138 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); | 137 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); |
139 | 138 | ||
140 | desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); | 139 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); |
141 | if (!desc) | 140 | if (!desc) |
142 | return -ENOMEM; | 141 | return -ENOMEM; |
143 | 142 | ||
144 | /* Transfer entries from the sg list into the indirect page */ | 143 | /* Transfer entries from the sg lists into the indirect page */ |
145 | for (i = 0; i < out; i++) { | 144 | i = 0; |
146 | desc[i].flags = VRING_DESC_F_NEXT; | 145 | for (n = 0; n < out_sgs; n++) { |
147 | desc[i].addr = sg_phys(sg); | 146 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
148 | desc[i].len = sg->length; | 147 | desc[i].flags = VRING_DESC_F_NEXT; |
149 | desc[i].next = i+1; | 148 | desc[i].addr = sg_phys(sg); |
150 | sg++; | 149 | desc[i].len = sg->length; |
150 | desc[i].next = i+1; | ||
151 | i++; | ||
152 | } | ||
151 | } | 153 | } |
152 | for (; i < (out + in); i++) { | 154 | for (; n < (out_sgs + in_sgs); n++) { |
153 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 155 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
154 | desc[i].addr = sg_phys(sg); | 156 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
155 | desc[i].len = sg->length; | 157 | desc[i].addr = sg_phys(sg); |
156 | desc[i].next = i+1; | 158 | desc[i].len = sg->length; |
157 | sg++; | 159 | desc[i].next = i+1; |
160 | i++; | ||
161 | } | ||
158 | } | 162 | } |
163 | BUG_ON(i != total_sg); | ||
159 | 164 | ||
160 | /* Last one doesn't continue. */ | 165 | /* Last one doesn't continue. */ |
161 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; | 166 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; |
@@ -176,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
176 | return head; | 181 | return head; |
177 | } | 182 | } |
178 | 183 | ||
179 | /** | 184 | static inline int virtqueue_add(struct virtqueue *_vq, |
180 | * virtqueue_add_buf - expose buffer to other end | 185 | struct scatterlist *sgs[], |
181 | * @vq: the struct virtqueue we're talking about. | 186 | struct scatterlist *(*next) |
182 | * @sg: the description of the buffer(s). | 187 | (struct scatterlist *, unsigned int *), |
183 | * @out_num: the number of sg readable by other side | 188 | unsigned int total_out, |
184 | * @in_num: the number of sg which are writable (after readable ones) | 189 | unsigned int total_in, |
185 | * @data: the token identifying the buffer. | 190 | unsigned int out_sgs, |
186 | * @gfp: how to do memory allocations (if necessary). | 191 | unsigned int in_sgs, |
187 | * | 192 | void *data, |
188 | * Caller must ensure we don't call this with other virtqueue operations | 193 | gfp_t gfp) |
189 | * at the same time (except where noted). | ||
190 | * | ||
191 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
192 | */ | ||
193 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
194 | struct scatterlist sg[], | ||
195 | unsigned int out, | ||
196 | unsigned int in, | ||
197 | void *data, | ||
198 | gfp_t gfp) | ||
199 | { | 194 | { |
200 | struct vring_virtqueue *vq = to_vvq(_vq); | 195 | struct vring_virtqueue *vq = to_vvq(_vq); |
201 | unsigned int i, avail, uninitialized_var(prev); | 196 | struct scatterlist *sg; |
197 | unsigned int i, n, avail, uninitialized_var(prev), total_sg; | ||
202 | int head; | 198 | int head; |
203 | 199 | ||
204 | START_USE(vq); | 200 | START_USE(vq); |
@@ -218,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq, | |||
218 | } | 214 | } |
219 | #endif | 215 | #endif |
220 | 216 | ||
217 | total_sg = total_in + total_out; | ||
218 | |||
221 | /* If the host supports indirect descriptor tables, and we have multiple | 219 | /* If the host supports indirect descriptor tables, and we have multiple |
222 | * buffers, then go indirect. FIXME: tune this threshold */ | 220 | * buffers, then go indirect. FIXME: tune this threshold */ |
223 | if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { | 221 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { |
224 | head = vring_add_indirect(vq, sg, out, in, gfp); | 222 | head = vring_add_indirect(vq, sgs, next, total_sg, total_out, |
223 | total_in, | ||
224 | out_sgs, in_sgs, gfp); | ||
225 | if (likely(head >= 0)) | 225 | if (likely(head >= 0)) |
226 | goto add_head; | 226 | goto add_head; |
227 | } | 227 | } |
228 | 228 | ||
229 | BUG_ON(out + in > vq->vring.num); | 229 | BUG_ON(total_sg > vq->vring.num); |
230 | BUG_ON(out + in == 0); | 230 | BUG_ON(total_sg == 0); |
231 | 231 | ||
232 | if (vq->vq.num_free < out + in) { | 232 | if (vq->vq.num_free < total_sg) { |
233 | pr_debug("Can't add buf len %i - avail = %i\n", | 233 | pr_debug("Can't add buf len %i - avail = %i\n", |
234 | out + in, vq->vq.num_free); | 234 | total_sg, vq->vq.num_free); |
235 | /* FIXME: for historical reasons, we force a notify here if | 235 | /* FIXME: for historical reasons, we force a notify here if |
236 | * there are outgoing parts to the buffer. Presumably the | 236 | * there are outgoing parts to the buffer. Presumably the |
237 | * host should service the ring ASAP. */ | 237 | * host should service the ring ASAP. */ |
238 | if (out) | 238 | if (out_sgs) |
239 | vq->notify(&vq->vq); | 239 | vq->notify(&vq->vq); |
240 | END_USE(vq); | 240 | END_USE(vq); |
241 | return -ENOSPC; | 241 | return -ENOSPC; |
242 | } | 242 | } |
243 | 243 | ||
244 | /* We're about to use some buffers from the free list. */ | 244 | /* We're about to use some buffers from the free list. */ |
245 | vq->vq.num_free -= out + in; | 245 | vq->vq.num_free -= total_sg; |
246 | 246 | ||
247 | head = vq->free_head; | 247 | head = i = vq->free_head; |
248 | for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { | 248 | for (n = 0; n < out_sgs; n++) { |
249 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 249 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
250 | vq->vring.desc[i].addr = sg_phys(sg); | 250 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; |
251 | vq->vring.desc[i].len = sg->length; | 251 | vq->vring.desc[i].addr = sg_phys(sg); |
252 | prev = i; | 252 | vq->vring.desc[i].len = sg->length; |
253 | sg++; | 253 | prev = i; |
254 | i = vq->vring.desc[i].next; | ||
255 | } | ||
254 | } | 256 | } |
255 | for (; in; i = vq->vring.desc[i].next, in--) { | 257 | for (; n < (out_sgs + in_sgs); n++) { |
256 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 258 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
257 | vq->vring.desc[i].addr = sg_phys(sg); | 259 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
258 | vq->vring.desc[i].len = sg->length; | 260 | vq->vring.desc[i].addr = sg_phys(sg); |
259 | prev = i; | 261 | vq->vring.desc[i].len = sg->length; |
260 | sg++; | 262 | prev = i; |
263 | i = vq->vring.desc[i].next; | ||
264 | } | ||
261 | } | 265 | } |
262 | /* Last one doesn't continue. */ | 266 | /* Last one doesn't continue. */ |
263 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | 267 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; |
@@ -276,7 +280,7 @@ add_head: | |||
276 | 280 | ||
277 | /* Descriptors and available array need to be set before we expose the | 281 | /* Descriptors and available array need to be set before we expose the |
278 | * new available array entries. */ | 282 | * new available array entries. */ |
279 | virtio_wmb(vq); | 283 | virtio_wmb(vq->weak_barriers); |
280 | vq->vring.avail->idx++; | 284 | vq->vring.avail->idx++; |
281 | vq->num_added++; | 285 | vq->num_added++; |
282 | 286 | ||
@@ -290,9 +294,122 @@ add_head: | |||
290 | 294 | ||
291 | return 0; | 295 | return 0; |
292 | } | 296 | } |
297 | |||
298 | /** | ||
299 | * virtqueue_add_buf - expose buffer to other end | ||
300 | * @vq: the struct virtqueue we're talking about. | ||
301 | * @sg: the description of the buffer(s). | ||
302 | * @out_num: the number of sg readable by other side | ||
303 | * @in_num: the number of sg which are writable (after readable ones) | ||
304 | * @data: the token identifying the buffer. | ||
305 | * @gfp: how to do memory allocations (if necessary). | ||
306 | * | ||
307 | * Caller must ensure we don't call this with other virtqueue operations | ||
308 | * at the same time (except where noted). | ||
309 | * | ||
310 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
311 | */ | ||
312 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
313 | struct scatterlist sg[], | ||
314 | unsigned int out, | ||
315 | unsigned int in, | ||
316 | void *data, | ||
317 | gfp_t gfp) | ||
318 | { | ||
319 | struct scatterlist *sgs[2]; | ||
320 | |||
321 | sgs[0] = sg; | ||
322 | sgs[1] = sg + out; | ||
323 | |||
324 | return virtqueue_add(_vq, sgs, sg_next_arr, | ||
325 | out, in, out ? 1 : 0, in ? 1 : 0, data, gfp); | ||
326 | } | ||
293 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); | 327 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); |
294 | 328 | ||
295 | /** | 329 | /** |
330 | * virtqueue_add_sgs - expose buffers to other end | ||
331 | * @vq: the struct virtqueue we're talking about. | ||
332 | * @sgs: array of terminated scatterlists. | ||
333 | * @out_num: the number of scatterlists readable by other side | ||
334 | * @in_num: the number of scatterlists which are writable (after readable ones) | ||
335 | * @data: the token identifying the buffer. | ||
336 | * @gfp: how to do memory allocations (if necessary). | ||
337 | * | ||
338 | * Caller must ensure we don't call this with other virtqueue operations | ||
339 | * at the same time (except where noted). | ||
340 | * | ||
341 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
342 | */ | ||
343 | int virtqueue_add_sgs(struct virtqueue *_vq, | ||
344 | struct scatterlist *sgs[], | ||
345 | unsigned int out_sgs, | ||
346 | unsigned int in_sgs, | ||
347 | void *data, | ||
348 | gfp_t gfp) | ||
349 | { | ||
350 | unsigned int i, total_out, total_in; | ||
351 | |||
352 | /* Count them first. */ | ||
353 | for (i = total_out = total_in = 0; i < out_sgs; i++) { | ||
354 | struct scatterlist *sg; | ||
355 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
356 | total_out++; | ||
357 | } | ||
358 | for (; i < out_sgs + in_sgs; i++) { | ||
359 | struct scatterlist *sg; | ||
360 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
361 | total_in++; | ||
362 | } | ||
363 | return virtqueue_add(_vq, sgs, sg_next_chained, | ||
364 | total_out, total_in, out_sgs, in_sgs, data, gfp); | ||
365 | } | ||
366 | EXPORT_SYMBOL_GPL(virtqueue_add_sgs); | ||
367 | |||
368 | /** | ||
369 | * virtqueue_add_outbuf - expose output buffers to other end | ||
370 | * @vq: the struct virtqueue we're talking about. | ||
371 | * @sgs: array of scatterlists (need not be terminated!) | ||
372 | * @num: the number of scatterlists readable by other side | ||
373 | * @data: the token identifying the buffer. | ||
374 | * @gfp: how to do memory allocations (if necessary). | ||
375 | * | ||
376 | * Caller must ensure we don't call this with other virtqueue operations | ||
377 | * at the same time (except where noted). | ||
378 | * | ||
379 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
380 | */ | ||
381 | int virtqueue_add_outbuf(struct virtqueue *vq, | ||
382 | struct scatterlist sg[], unsigned int num, | ||
383 | void *data, | ||
384 | gfp_t gfp) | ||
385 | { | ||
386 | return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); | ||
387 | } | ||
388 | EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); | ||
389 | |||
390 | /** | ||
391 | * virtqueue_add_inbuf - expose input buffers to other end | ||
392 | * @vq: the struct virtqueue we're talking about. | ||
393 | * @sgs: array of scatterlists (need not be terminated!) | ||
394 | * @num: the number of scatterlists writable by other side | ||
395 | * @data: the token identifying the buffer. | ||
396 | * @gfp: how to do memory allocations (if necessary). | ||
397 | * | ||
398 | * Caller must ensure we don't call this with other virtqueue operations | ||
399 | * at the same time (except where noted). | ||
400 | * | ||
401 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
402 | */ | ||
403 | int virtqueue_add_inbuf(struct virtqueue *vq, | ||
404 | struct scatterlist sg[], unsigned int num, | ||
405 | void *data, | ||
406 | gfp_t gfp) | ||
407 | { | ||
408 | return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); | ||
409 | } | ||
410 | EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); | ||
411 | |||
412 | /** | ||
296 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. | 413 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. |
297 | * @vq: the struct virtqueue | 414 | * @vq: the struct virtqueue |
298 | * | 415 | * |
@@ -312,7 +429,7 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq) | |||
312 | START_USE(vq); | 429 | START_USE(vq); |
313 | /* We need to expose available array entries before checking avail | 430 | /* We need to expose available array entries before checking avail |
314 | * event. */ | 431 | * event. */ |
315 | virtio_mb(vq); | 432 | virtio_mb(vq->weak_barriers); |
316 | 433 | ||
317 | old = vq->vring.avail->idx - vq->num_added; | 434 | old = vq->vring.avail->idx - vq->num_added; |
318 | new = vq->vring.avail->idx; | 435 | new = vq->vring.avail->idx; |
@@ -436,7 +553,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) | |||
436 | } | 553 | } |
437 | 554 | ||
438 | /* Only get used array entries after they have been exposed by host. */ | 555 | /* Only get used array entries after they have been exposed by host. */ |
439 | virtio_rmb(vq); | 556 | virtio_rmb(vq->weak_barriers); |
440 | 557 | ||
441 | last_used = (vq->last_used_idx & (vq->vring.num - 1)); | 558 | last_used = (vq->last_used_idx & (vq->vring.num - 1)); |
442 | i = vq->vring.used->ring[last_used].id; | 559 | i = vq->vring.used->ring[last_used].id; |
@@ -460,7 +577,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) | |||
460 | * the read in the next get_buf call. */ | 577 | * the read in the next get_buf call. */ |
461 | if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { | 578 | if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { |
462 | vring_used_event(&vq->vring) = vq->last_used_idx; | 579 | vring_used_event(&vq->vring) = vq->last_used_idx; |
463 | virtio_mb(vq); | 580 | virtio_mb(vq->weak_barriers); |
464 | } | 581 | } |
465 | 582 | ||
466 | #ifdef DEBUG | 583 | #ifdef DEBUG |
@@ -513,7 +630,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) | |||
513 | * entry. Always do both to keep code simple. */ | 630 | * entry. Always do both to keep code simple. */ |
514 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; | 631 | vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; |
515 | vring_used_event(&vq->vring) = vq->last_used_idx; | 632 | vring_used_event(&vq->vring) = vq->last_used_idx; |
516 | virtio_mb(vq); | 633 | virtio_mb(vq->weak_barriers); |
517 | if (unlikely(more_used(vq))) { | 634 | if (unlikely(more_used(vq))) { |
518 | END_USE(vq); | 635 | END_USE(vq); |
519 | return false; | 636 | return false; |
@@ -553,7 +670,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) | |||
553 | /* TODO: tune this threshold */ | 670 | /* TODO: tune this threshold */ |
554 | bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; | 671 | bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; |
555 | vring_used_event(&vq->vring) = vq->last_used_idx + bufs; | 672 | vring_used_event(&vq->vring) = vq->last_used_idx + bufs; |
556 | virtio_mb(vq); | 673 | virtio_mb(vq->weak_barriers); |
557 | if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { | 674 | if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { |
558 | END_USE(vq); | 675 | END_USE(vq); |
559 | return false; | 676 | return false; |