aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2016-02-03 00:46:36 -0500
committerMichael S. Tsirkin <mst@redhat.com>2016-03-02 10:01:57 -0500
commit780bc7903a32edb63be138487fd981694d993610 (patch)
treeb1893866fc5223937f84afe43b03eb1af1964fd6
parentd26c96c8102549f91eb0bea6196d54711ab52176 (diff)
virtio_ring: Support DMA APIs
virtio_ring currently sends the device (usually a hypervisor) physical addresses of its I/O buffers. This is okay when DMA addresses and physical addresses are the same thing, but this isn't always the case. For example, this never works on Xen guests, and it is likely to fail if a physical "virtio" device ever ends up behind an IOMMU or swiotlb. The immediate use case for me is to enable virtio on Xen guests. For that to work, we need vring to support DMA address translation as well as a corresponding change to virtio_pci or to another driver. Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
-rw-r--r--drivers/virtio/Kconfig2
-rw-r--r--drivers/virtio/virtio_ring.c200
-rw-r--r--tools/virtio/linux/dma-mapping.h17
3 files changed, 183 insertions, 36 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index cab9f3f63a38..77590320d44c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -60,7 +60,7 @@ config VIRTIO_INPUT
60 60
61 config VIRTIO_MMIO 61 config VIRTIO_MMIO
62 tristate "Platform bus driver for memory mapped virtio devices" 62 tristate "Platform bus driver for memory mapped virtio devices"
63 depends on HAS_IOMEM 63 depends on HAS_IOMEM && HAS_DMA
64 select VIRTIO 64 select VIRTIO
65 ---help--- 65 ---help---
66 This drivers provides support for memory mapped virtio 66 This drivers provides support for memory mapped virtio
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ab0be6c084f6..9abc008ff7ea 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -24,6 +24,7 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#include <linux/kmemleak.h> 26#include <linux/kmemleak.h>
27#include <linux/dma-mapping.h>
27 28
28#ifdef DEBUG 29#ifdef DEBUG
29/* For development, we want to crash whenever the ring is screwed. */ 30/* For development, we want to crash whenever the ring is screwed. */
@@ -54,6 +55,11 @@
54#define END_USE(vq) 55#define END_USE(vq)
55#endif 56#endif
56 57
58struct vring_desc_state {
59 void *data; /* Data for callback. */
60 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
61};
62
57struct vring_virtqueue { 63struct vring_virtqueue {
58 struct virtqueue vq; 64 struct virtqueue vq;
59 65
@@ -98,8 +104,8 @@ struct vring_virtqueue {
98 ktime_t last_add_time; 104 ktime_t last_add_time;
99#endif 105#endif
100 106
101 /* Tokens for callbacks. */ 107 /* Per-descriptor state. */
102 void *data[]; 108 struct vring_desc_state desc_state[];
103}; 109};
104 110
105#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 111#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
@@ -128,6 +134,79 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
128 return false; 134 return false;
129} 135}
130 136
137/*
138 * The DMA ops on various arches are rather gnarly right now, and
139 * making all of the arch DMA ops work on the vring device itself
140 * is a mess. For now, we use the parent device for DMA ops.
141 */
142struct device *vring_dma_dev(const struct vring_virtqueue *vq)
143{
144 return vq->vq.vdev->dev.parent;
145}
146
147/* Map one sg entry. */
148static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
149 struct scatterlist *sg,
150 enum dma_data_direction direction)
151{
152 if (!vring_use_dma_api(vq->vq.vdev))
153 return (dma_addr_t)sg_phys(sg);
154
155 /*
156 * We can't use dma_map_sg, because we don't use scatterlists in
157 * the way it expects (we don't guarantee that the scatterlist
158 * will exist for the lifetime of the mapping).
159 */
160 return dma_map_page(vring_dma_dev(vq),
161 sg_page(sg), sg->offset, sg->length,
162 direction);
163}
164
165static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
166 void *cpu_addr, size_t size,
167 enum dma_data_direction direction)
168{
169 if (!vring_use_dma_api(vq->vq.vdev))
170 return (dma_addr_t)virt_to_phys(cpu_addr);
171
172 return dma_map_single(vring_dma_dev(vq),
173 cpu_addr, size, direction);
174}
175
176static void vring_unmap_one(const struct vring_virtqueue *vq,
177 struct vring_desc *desc)
178{
179 u16 flags;
180
181 if (!vring_use_dma_api(vq->vq.vdev))
182 return;
183
184 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
185
186 if (flags & VRING_DESC_F_INDIRECT) {
187 dma_unmap_single(vring_dma_dev(vq),
188 virtio64_to_cpu(vq->vq.vdev, desc->addr),
189 virtio32_to_cpu(vq->vq.vdev, desc->len),
190 (flags & VRING_DESC_F_WRITE) ?
191 DMA_FROM_DEVICE : DMA_TO_DEVICE);
192 } else {
193 dma_unmap_page(vring_dma_dev(vq),
194 virtio64_to_cpu(vq->vq.vdev, desc->addr),
195 virtio32_to_cpu(vq->vq.vdev, desc->len),
196 (flags & VRING_DESC_F_WRITE) ?
197 DMA_FROM_DEVICE : DMA_TO_DEVICE);
198 }
199}
200
201static int vring_mapping_error(const struct vring_virtqueue *vq,
202 dma_addr_t addr)
203{
204 if (!vring_use_dma_api(vq->vq.vdev))
205 return 0;
206
207 return dma_mapping_error(vring_dma_dev(vq), addr);
208}
209
131static struct vring_desc *alloc_indirect(struct virtqueue *_vq, 210static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
132 unsigned int total_sg, gfp_t gfp) 211 unsigned int total_sg, gfp_t gfp)
133{ 212{
@@ -161,7 +240,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
161 struct vring_virtqueue *vq = to_vvq(_vq); 240 struct vring_virtqueue *vq = to_vvq(_vq);
162 struct scatterlist *sg; 241 struct scatterlist *sg;
163 struct vring_desc *desc; 242 struct vring_desc *desc;
164 unsigned int i, n, avail, descs_used, uninitialized_var(prev); 243 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
165 int head; 244 int head;
166 bool indirect; 245 bool indirect;
167 246
@@ -201,21 +280,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
201 280
202 if (desc) { 281 if (desc) {
203 /* Use a single buffer which doesn't continue */ 282 /* Use a single buffer which doesn't continue */
204 vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); 283 indirect = true;
205 vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
206 /* avoid kmemleak false positive (hidden by virt_to_phys) */
207 kmemleak_ignore(desc);
208 vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
209
210 /* Set up rest to use this indirect table. */ 284 /* Set up rest to use this indirect table. */
211 i = 0; 285 i = 0;
212 descs_used = 1; 286 descs_used = 1;
213 indirect = true;
214 } else { 287 } else {
288 indirect = false;
215 desc = vq->vring.desc; 289 desc = vq->vring.desc;
216 i = head; 290 i = head;
217 descs_used = total_sg; 291 descs_used = total_sg;
218 indirect = false;
219 } 292 }
220 293
221 if (vq->vq.num_free < descs_used) { 294 if (vq->vq.num_free < descs_used) {
@@ -230,13 +303,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
230 return -ENOSPC; 303 return -ENOSPC;
231 } 304 }
232 305
233 /* We're about to use some buffers from the free list. */
234 vq->vq.num_free -= descs_used;
235
236 for (n = 0; n < out_sgs; n++) { 306 for (n = 0; n < out_sgs; n++) {
237 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 307 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
308 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
309 if (vring_mapping_error(vq, addr))
310 goto unmap_release;
311
238 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 312 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
239 desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); 313 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
240 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 314 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
241 prev = i; 315 prev = i;
242 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 316 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
@@ -244,8 +318,12 @@ static inline int virtqueue_add(struct virtqueue *_vq,
244 } 318 }
245 for (; n < (out_sgs + in_sgs); n++) { 319 for (; n < (out_sgs + in_sgs); n++) {
246 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 320 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
321 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
322 if (vring_mapping_error(vq, addr))
323 goto unmap_release;
324
247 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 325 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
248 desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); 326 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
249 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 327 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
250 prev = i; 328 prev = i;
251 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 329 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
@@ -254,14 +332,33 @@ static inline int virtqueue_add(struct virtqueue *_vq,
254 /* Last one doesn't continue. */ 332 /* Last one doesn't continue. */
255 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 333 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
256 334
335 if (indirect) {
336 /* Now that the indirect table is filled in, map it. */
337 dma_addr_t addr = vring_map_single(
338 vq, desc, total_sg * sizeof(struct vring_desc),
339 DMA_TO_DEVICE);
340 if (vring_mapping_error(vq, addr))
341 goto unmap_release;
342
343 vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
344 vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
345
346 vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
347 }
348
349 /* We're using some buffers from the free list. */
350 vq->vq.num_free -= descs_used;
351
257 /* Update free pointer */ 352 /* Update free pointer */
258 if (indirect) 353 if (indirect)
259 vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); 354 vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
260 else 355 else
261 vq->free_head = i; 356 vq->free_head = i;
262 357
263 /* Set token. */ 358 /* Store token and indirect buffer state. */
264 vq->data[head] = data; 359 vq->desc_state[head].data = data;
360 if (indirect)
361 vq->desc_state[head].indir_desc = desc;
265 362
266 /* Put entry in available array (but don't update avail->idx until they 363 /* Put entry in available array (but don't update avail->idx until they
267 * do sync). */ 364 * do sync). */
@@ -284,6 +381,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
284 virtqueue_kick(_vq); 381 virtqueue_kick(_vq);
285 382
286 return 0; 383 return 0;
384
385unmap_release:
386 err_idx = i;
387 i = head;
388
389 for (n = 0; n < total_sg; n++) {
390 if (i == err_idx)
391 break;
392 vring_unmap_one(vq, &desc[i]);
393 i = vq->vring.desc[i].next;
394 }
395
396 vq->vq.num_free += total_sg;
397
398 if (indirect)
399 kfree(desc);
400
401 return -EIO;
287} 402}
288 403
289/** 404/**
@@ -454,27 +569,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
454 569
455static void detach_buf(struct vring_virtqueue *vq, unsigned int head) 570static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
456{ 571{
457 unsigned int i; 572 unsigned int i, j;
573 u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
458 574
459 /* Clear data ptr. */ 575 /* Clear data ptr. */
460 vq->data[head] = NULL; 576 vq->desc_state[head].data = NULL;
461 577
462 /* Put back on free list: find end */ 578 /* Put back on free list: unmap first-level descriptors and find end */
463 i = head; 579 i = head;
464 580
465 /* Free the indirect table */ 581 while (vq->vring.desc[i].flags & nextflag) {
466 if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)) 582 vring_unmap_one(vq, &vq->vring.desc[i]);
467 kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
468
469 while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
470 i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); 583 i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
471 vq->vq.num_free++; 584 vq->vq.num_free++;
472 } 585 }
473 586
587 vring_unmap_one(vq, &vq->vring.desc[i]);
474 vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); 588 vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
475 vq->free_head = head; 589 vq->free_head = head;
590
476 /* Plus final descriptor */ 591 /* Plus final descriptor */
477 vq->vq.num_free++; 592 vq->vq.num_free++;
593
594 /* Free the indirect table, if any, now that it's unmapped. */
595 if (vq->desc_state[head].indir_desc) {
596 struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
597 u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
598
599 BUG_ON(!(vq->vring.desc[head].flags &
600 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
601 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
602
603 for (j = 0; j < len / sizeof(struct vring_desc); j++)
604 vring_unmap_one(vq, &indir_desc[j]);
605
606 kfree(vq->desc_state[head].indir_desc);
607 vq->desc_state[head].indir_desc = NULL;
608 }
478} 609}
479 610
480static inline bool more_used(const struct vring_virtqueue *vq) 611static inline bool more_used(const struct vring_virtqueue *vq)
@@ -529,13 +660,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
529 BAD_RING(vq, "id %u out of range\n", i); 660 BAD_RING(vq, "id %u out of range\n", i);
530 return NULL; 661 return NULL;
531 } 662 }
532 if (unlikely(!vq->data[i])) { 663 if (unlikely(!vq->desc_state[i].data)) {
533 BAD_RING(vq, "id %u is not a head!\n", i); 664 BAD_RING(vq, "id %u is not a head!\n", i);
534 return NULL; 665 return NULL;
535 } 666 }
536 667
537 /* detach_buf clears data, so grab it now. */ 668 /* detach_buf clears data, so grab it now. */
538 ret = vq->data[i]; 669 ret = vq->desc_state[i].data;
539 detach_buf(vq, i); 670 detach_buf(vq, i);
540 vq->last_used_idx++; 671 vq->last_used_idx++;
541 /* If we expect an interrupt for the next entry, tell host 672 /* If we expect an interrupt for the next entry, tell host
@@ -709,10 +840,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
709 START_USE(vq); 840 START_USE(vq);
710 841
711 for (i = 0; i < vq->vring.num; i++) { 842 for (i = 0; i < vq->vring.num; i++) {
712 if (!vq->data[i]) 843 if (!vq->desc_state[i].data)
713 continue; 844 continue;
714 /* detach_buf clears data, so grab it now. */ 845 /* detach_buf clears data, so grab it now. */
715 buf = vq->data[i]; 846 buf = vq->desc_state[i].data;
716 detach_buf(vq, i); 847 detach_buf(vq, i);
717 vq->avail_idx_shadow--; 848 vq->avail_idx_shadow--;
718 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); 849 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
@@ -766,7 +897,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
766 return NULL; 897 return NULL;
767 } 898 }
768 899
769 vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); 900 vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
901 GFP_KERNEL);
770 if (!vq) 902 if (!vq)
771 return NULL; 903 return NULL;
772 904
@@ -800,11 +932,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
800 932
801 /* Put everything in free lists. */ 933 /* Put everything in free lists. */
802 vq->free_head = 0; 934 vq->free_head = 0;
803 for (i = 0; i < num-1; i++) { 935 for (i = 0; i < num-1; i++)
804 vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 936 vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
805 vq->data[i] = NULL; 937 memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
806 }
807 vq->data[i] = NULL;
808 938
809 return &vq->vq; 939 return &vq->vq;
810} 940}
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000000..4f93af89ae16
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,17 @@
1#ifndef _LINUX_DMA_MAPPING_H
2#define _LINUX_DMA_MAPPING_H
3
4#ifdef CONFIG_HAS_DMA
5# error Virtio userspace code does not support CONFIG_HAS_DMA
6#endif
7
8#define PCI_DMA_BUS_IS_PHYS 1
9
10enum dma_data_direction {
11 DMA_BIDIRECTIONAL = 0,
12 DMA_TO_DEVICE = 1,
13 DMA_FROM_DEVICE = 2,
14 DMA_NONE = 3,
15};
16
17#endif