aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark McLoughlin <markmc@redhat.com>2009-05-11 13:11:45 -0400
committerRusty Russell <rusty@rustcorp.com.au>2009-06-12 08:46:39 -0400
commit9fa29b9df32ba4db055f3977933cd0c1b8fe67cd (patch)
treec44177d2d352f74300fe9124decc6462c8804bbe
parentee006b353f1ca8c9a8470b72b462beb011d62e32 (diff)
virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC)
Add a new feature flag for indirect ring entries. These are ring entries which point to a table of buffer descriptors. The idea here is to increase the ring capacity by allowing a larger effective ring size whereby the ring size dictates the number of requests that may be outstanding, rather than the size of those requests. This should be most effective in the case of block I/O where we can potentially benefit by concurrently dispatching a large number of large requests. Even in the simple case of single segment block requests, this results in a threefold increase in ring capacity. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--drivers/virtio/virtio_ring.c75
-rw-r--r--include/linux/virtio_ring.h5
2 files changed, 78 insertions, 2 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 579fa693d5d0..a882f2606515 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -61,6 +61,9 @@ struct vring_virtqueue
61 /* Other side has made a mess, don't try any more. */ 61 /* Other side has made a mess, don't try any more. */
62 bool broken; 62 bool broken;
63 63
64 /* Host supports indirect buffers */
65 bool indirect;
66
64 /* Number of free buffers */ 67 /* Number of free buffers */
65 unsigned int num_free; 68 unsigned int num_free;
66 /* Head of free buffer list. */ 69 /* Head of free buffer list. */
@@ -85,6 +88,55 @@ struct vring_virtqueue
85 88
86#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 89#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
87 90
91/* Set up an indirect table of descriptors and add it to the queue. */
92static int vring_add_indirect(struct vring_virtqueue *vq,
93 struct scatterlist sg[],
94 unsigned int out,
95 unsigned int in)
96{
97 struct vring_desc *desc;
98 unsigned head;
99 int i;
100
101 desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
102 if (!desc)
103 return vq->vring.num;
104
105 /* Transfer entries from the sg list into the indirect page */
106 for (i = 0; i < out; i++) {
107 desc[i].flags = VRING_DESC_F_NEXT;
108 desc[i].addr = sg_phys(sg);
109 desc[i].len = sg->length;
110 desc[i].next = i+1;
111 sg++;
112 }
113 for (; i < (out + in); i++) {
114 desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
115 desc[i].addr = sg_phys(sg);
116 desc[i].len = sg->length;
117 desc[i].next = i+1;
118 sg++;
119 }
120
121 /* Last one doesn't continue. */
122 desc[i-1].flags &= ~VRING_DESC_F_NEXT;
123 desc[i-1].next = 0;
124
125 /* We're about to use a buffer */
126 vq->num_free--;
127
128 /* Use a single buffer which doesn't continue */
129 head = vq->free_head;
130 vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
131 vq->vring.desc[head].addr = virt_to_phys(desc);
132 vq->vring.desc[head].len = i * sizeof(struct vring_desc);
133
134 /* Update free pointer */
135 vq->free_head = vq->vring.desc[head].next;
136
137 return head;
138}
139
88static int vring_add_buf(struct virtqueue *_vq, 140static int vring_add_buf(struct virtqueue *_vq,
89 struct scatterlist sg[], 141 struct scatterlist sg[],
90 unsigned int out, 142 unsigned int out,
@@ -94,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq,
94 struct vring_virtqueue *vq = to_vvq(_vq); 146 struct vring_virtqueue *vq = to_vvq(_vq);
95 unsigned int i, avail, head, uninitialized_var(prev); 147 unsigned int i, avail, head, uninitialized_var(prev);
96 148
149 START_USE(vq);
150
97 BUG_ON(data == NULL); 151 BUG_ON(data == NULL);
152
153 /* If the host supports indirect descriptor tables, and we have multiple
154 * buffers, then go indirect. FIXME: tune this threshold */
155 if (vq->indirect && (out + in) > 1 && vq->num_free) {
156 head = vring_add_indirect(vq, sg, out, in);
157 if (head != vq->vring.num)
158 goto add_head;
159 }
160
98 BUG_ON(out + in > vq->vring.num); 161 BUG_ON(out + in > vq->vring.num);
99 BUG_ON(out + in == 0); 162 BUG_ON(out + in == 0);
100 163
101 START_USE(vq);
102
103 if (vq->num_free < out + in) { 164 if (vq->num_free < out + in) {
104 pr_debug("Can't add buf len %i - avail = %i\n", 165 pr_debug("Can't add buf len %i - avail = %i\n",
105 out + in, vq->num_free); 166 out + in, vq->num_free);
@@ -136,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq,
136 /* Update free pointer */ 197 /* Update free pointer */
137 vq->free_head = i; 198 vq->free_head = i;
138 199
200add_head:
139 /* Set token. */ 201 /* Set token. */
140 vq->data[head] = data; 202 vq->data[head] = data;
141 203
@@ -179,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
179 241
180 /* Put back on free list: find end */ 242 /* Put back on free list: find end */
181 i = head; 243 i = head;
244
245 /* Free the indirect table */
246 if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
247 kfree(phys_to_virt(vq->vring.desc[i].addr));
248
182 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { 249 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
183 i = vq->vring.desc[i].next; 250 i = vq->vring.desc[i].next;
184 vq->num_free++; 251 vq->num_free++;
@@ -323,6 +390,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
323 vq->in_use = false; 390 vq->in_use = false;
324#endif 391#endif
325 392
393 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
394
326 /* No callback? Tell other side not to bother us. */ 395 /* No callback? Tell other side not to bother us. */
327 if (!callback) 396 if (!callback)
328 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 397 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -351,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev)
351 420
352 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 421 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
353 switch (i) { 422 switch (i) {
423 case VIRTIO_RING_F_INDIRECT_DESC:
424 break;
354 default: 425 default:
355 /* We don't understand this bit. */ 426 /* We don't understand this bit. */
356 clear_bit(i, vdev->features); 427 clear_bit(i, vdev->features);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 166c519689de..693e0ec5afa6 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -14,6 +14,8 @@
14#define VRING_DESC_F_NEXT 1 14#define VRING_DESC_F_NEXT 1
15/* This marks a buffer as write-only (otherwise read-only). */ 15/* This marks a buffer as write-only (otherwise read-only). */
16#define VRING_DESC_F_WRITE 2 16#define VRING_DESC_F_WRITE 2
17/* This means the buffer contains a list of buffer descriptors. */
18#define VRING_DESC_F_INDIRECT 4
17 19
18/* The Host uses this in used->flags to advise the Guest: don't kick me when 20/* The Host uses this in used->flags to advise the Guest: don't kick me when
19 * you add a buffer. It's unreliable, so it's simply an optimization. Guest 21 * you add a buffer. It's unreliable, so it's simply an optimization. Guest
@@ -24,6 +26,9 @@
24 * optimization. */ 26 * optimization. */
25#define VRING_AVAIL_F_NO_INTERRUPT 1 27#define VRING_AVAIL_F_NO_INTERRUPT 1
26 28
29/* We support indirect buffer descriptors */
30#define VIRTIO_RING_F_INDIRECT_DESC 28
31
27/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ 32/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
28struct vring_desc 33struct vring_desc
29{ 34{