diff options
| author | Rusty Russell <rusty@rustcorp.com.au> | 2014-09-10 20:47:38 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2014-09-13 12:52:35 -0400 |
| commit | b25bd2515ea32cf5ddd5fd5a2a93b8c9dd875e4f (patch) | |
| tree | fbbe65f8f3e0d92f8883192a95b0cd1b6d8263e4 | |
| parent | eeebf9b1fc0862466c5661d63fbaf66ab4a50210 (diff) | |
virtio_ring: unify direct/indirect code paths.
virtqueue_add() populates the virtqueue descriptor table from the sgs
given. If it uses an indirect descriptor table, then it puts a single
descriptor in the descriptor table pointing to the kmalloc'ed indirect
table where the sg is populated.
Previously vring_add_indirect() did the allocation and the simple
linear layout. We replace that with alloc_indirect() which allocates
the indirect table then chains it like the normal descriptor table so
we can reuse the core logic.
This slows down pktgen by less than 1/2 a percent (which uses direct
descriptors), as well as vring_bench, but it's far neater.
vring_bench before:
1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
vring_bench after:
1125610268-1183528965(1.14172e+09+/-8e+06)ns
pktgen before:
787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0
pktgen after:
779988-790404(786391+/-2.5e+03)pps 361-366(364.35+/-1.3)Mb/sec (361914432-366747456(3.64885e+08+/-1.2e+06)bps) errors: 0
Now, if we make force indirect descriptors by turning off any_header_sg
in virtio_net.c:
pktgen before:
713773-721062(718374+/-2.1e+03)pps 331-334(332.95+/-0.92)Mb/sec (331190672-334572768(3.33325e+08+/-9.6e+05)bps) errors: 0
pktgen after:
710542-719195(714898+/-2.4e+03)pps 329-333(331.15+/-1.1)Mb/sec (329691488-333706480(3.31713e+08+/-1.1e+06)bps) errors: 0
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 128 |
1 files changed, 52 insertions, 76 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 10a7c0205440..3b1f89b6e743 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
| @@ -99,18 +99,10 @@ struct vring_virtqueue | |||
| 99 | 99 | ||
| 100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
| 101 | 101 | ||
| 102 | /* Set up an indirect table of descriptors and add it to the queue. */ | 102 | static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp) |
| 103 | static inline int vring_add_indirect(struct vring_virtqueue *vq, | ||
| 104 | struct scatterlist *sgs[], | ||
| 105 | unsigned int total_sg, | ||
| 106 | unsigned int out_sgs, | ||
| 107 | unsigned int in_sgs, | ||
| 108 | gfp_t gfp) | ||
| 109 | { | 103 | { |
| 110 | struct vring_desc *desc; | 104 | struct vring_desc *desc; |
| 111 | unsigned head; | 105 | unsigned int i; |
| 112 | struct scatterlist *sg; | ||
| 113 | int i, n; | ||
| 114 | 106 | ||
| 115 | /* | 107 | /* |
| 116 | * We require lowmem mappings for the descriptors because | 108 | * We require lowmem mappings for the descriptors because |
| @@ -121,49 +113,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, | |||
| 121 | 113 | ||
| 122 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); | 114 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); |
| 123 | if (!desc) | 115 | if (!desc) |
| 124 | return -ENOMEM; | 116 | return NULL; |
| 125 | |||
| 126 | /* Transfer entries from the sg lists into the indirect page */ | ||
| 127 | i = 0; | ||
| 128 | for (n = 0; n < out_sgs; n++) { | ||
| 129 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | ||
| 130 | desc[i].flags = VRING_DESC_F_NEXT; | ||
| 131 | desc[i].addr = sg_phys(sg); | ||
| 132 | desc[i].len = sg->length; | ||
| 133 | desc[i].next = i+1; | ||
| 134 | i++; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | for (; n < (out_sgs + in_sgs); n++) { | ||
| 138 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | ||
| 139 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | ||
| 140 | desc[i].addr = sg_phys(sg); | ||
| 141 | desc[i].len = sg->length; | ||
| 142 | desc[i].next = i+1; | ||
| 143 | i++; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | BUG_ON(i != total_sg); | ||
| 147 | |||
| 148 | /* Last one doesn't continue. */ | ||
| 149 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; | ||
| 150 | desc[i-1].next = 0; | ||
| 151 | |||
| 152 | /* We're about to use a buffer */ | ||
| 153 | vq->vq.num_free--; | ||
| 154 | |||
| 155 | /* Use a single buffer which doesn't continue */ | ||
| 156 | head = vq->free_head; | ||
| 157 | vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; | ||
| 158 | vq->vring.desc[head].addr = virt_to_phys(desc); | ||
| 159 | /* kmemleak gives a false positive, as it's hidden by virt_to_phys */ | ||
| 160 | kmemleak_ignore(desc); | ||
| 161 | vq->vring.desc[head].len = i * sizeof(struct vring_desc); | ||
| 162 | |||
| 163 | /* Update free pointer */ | ||
| 164 | vq->free_head = vq->vring.desc[head].next; | ||
| 165 | 117 | ||
| 166 | return head; | 118 | for (i = 0; i < total_sg; i++) |
| 119 | desc[i].next = i+1; | ||
| 120 | return desc; | ||
| 167 | } | 121 | } |
| 168 | 122 | ||
| 169 | static inline int virtqueue_add(struct virtqueue *_vq, | 123 | static inline int virtqueue_add(struct virtqueue *_vq, |
| @@ -176,8 +130,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
| 176 | { | 130 | { |
| 177 | struct vring_virtqueue *vq = to_vvq(_vq); | 131 | struct vring_virtqueue *vq = to_vvq(_vq); |
| 178 | struct scatterlist *sg; | 132 | struct scatterlist *sg; |
| 179 | unsigned int i, n, avail, uninitialized_var(prev); | 133 | struct vring_desc *desc; |
| 134 | unsigned int i, n, avail, descs_used, uninitialized_var(prev); | ||
| 180 | int head; | 135 | int head; |
| 136 | bool indirect; | ||
| 181 | 137 | ||
| 182 | START_USE(vq); | 138 | START_USE(vq); |
| 183 | 139 | ||
| @@ -201,21 +157,40 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
| 201 | } | 157 | } |
| 202 | #endif | 158 | #endif |
| 203 | 159 | ||
| 160 | BUG_ON(total_sg > vq->vring.num); | ||
| 161 | BUG_ON(total_sg == 0); | ||
| 162 | |||
| 163 | head = vq->free_head; | ||
| 164 | |||
| 204 | /* If the host supports indirect descriptor tables, and we have multiple | 165 | /* If the host supports indirect descriptor tables, and we have multiple |
| 205 | * buffers, then go indirect. FIXME: tune this threshold */ | 166 | * buffers, then go indirect. FIXME: tune this threshold */ |
| 206 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { | 167 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) |
| 207 | head = vring_add_indirect(vq, sgs, total_sg, | 168 | desc = alloc_indirect(total_sg, gfp); |
| 208 | out_sgs, in_sgs, gfp); | 169 | else |
| 209 | if (likely(head >= 0)) | 170 | desc = NULL; |
| 210 | goto add_head; | 171 | |
| 172 | if (desc) { | ||
| 173 | /* Use a single buffer which doesn't continue */ | ||
| 174 | vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; | ||
| 175 | vq->vring.desc[head].addr = virt_to_phys(desc); | ||
| 176 | /* avoid kmemleak false positive (hidden by virt_to_phys) */ | ||
| 177 | kmemleak_ignore(desc); | ||
| 178 | vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc); | ||
| 179 | |||
| 180 | /* Set up rest to use this indirect table. */ | ||
| 181 | i = 0; | ||
| 182 | descs_used = 1; | ||
| 183 | indirect = true; | ||
| 184 | } else { | ||
| 185 | desc = vq->vring.desc; | ||
| 186 | i = head; | ||
| 187 | descs_used = total_sg; | ||
| 188 | indirect = false; | ||
| 211 | } | 189 | } |
| 212 | 190 | ||
| 213 | BUG_ON(total_sg > vq->vring.num); | 191 | if (vq->vq.num_free < descs_used) { |
| 214 | BUG_ON(total_sg == 0); | ||
| 215 | |||
| 216 | if (vq->vq.num_free < total_sg) { | ||
| 217 | pr_debug("Can't add buf len %i - avail = %i\n", | 192 | pr_debug("Can't add buf len %i - avail = %i\n", |
| 218 | total_sg, vq->vq.num_free); | 193 | descs_used, vq->vq.num_free); |
| 219 | /* FIXME: for historical reasons, we force a notify here if | 194 | /* FIXME: for historical reasons, we force a notify here if |
| 220 | * there are outgoing parts to the buffer. Presumably the | 195 | * there are outgoing parts to the buffer. Presumably the |
| 221 | * host should service the ring ASAP. */ | 196 | * host should service the ring ASAP. */ |
| @@ -226,34 +201,35 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
| 226 | } | 201 | } |
| 227 | 202 | ||
| 228 | /* We're about to use some buffers from the free list. */ | 203 | /* We're about to use some buffers from the free list. */ |
| 229 | vq->vq.num_free -= total_sg; | 204 | vq->vq.num_free -= descs_used; |
| 230 | 205 | ||
| 231 | head = i = vq->free_head; | ||
| 232 | for (n = 0; n < out_sgs; n++) { | 206 | for (n = 0; n < out_sgs; n++) { |
| 233 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | 207 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
| 234 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 208 | desc[i].flags = VRING_DESC_F_NEXT; |
| 235 | vq->vring.desc[i].addr = sg_phys(sg); | 209 | desc[i].addr = sg_phys(sg); |
| 236 | vq->vring.desc[i].len = sg->length; | 210 | desc[i].len = sg->length; |
| 237 | prev = i; | 211 | prev = i; |
| 238 | i = vq->vring.desc[i].next; | 212 | i = desc[i].next; |
| 239 | } | 213 | } |
| 240 | } | 214 | } |
| 241 | for (; n < (out_sgs + in_sgs); n++) { | 215 | for (; n < (out_sgs + in_sgs); n++) { |
| 242 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | 216 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
| 243 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 217 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
| 244 | vq->vring.desc[i].addr = sg_phys(sg); | 218 | desc[i].addr = sg_phys(sg); |
| 245 | vq->vring.desc[i].len = sg->length; | 219 | desc[i].len = sg->length; |
| 246 | prev = i; | 220 | prev = i; |
| 247 | i = vq->vring.desc[i].next; | 221 | i = desc[i].next; |
| 248 | } | 222 | } |
| 249 | } | 223 | } |
| 250 | /* Last one doesn't continue. */ | 224 | /* Last one doesn't continue. */ |
| 251 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | 225 | desc[prev].flags &= ~VRING_DESC_F_NEXT; |
| 252 | 226 | ||
| 253 | /* Update free pointer */ | 227 | /* Update free pointer */ |
| 254 | vq->free_head = i; | 228 | if (indirect) |
| 229 | vq->free_head = vq->vring.desc[head].next; | ||
| 230 | else | ||
| 231 | vq->free_head = i; | ||
| 255 | 232 | ||
| 256 | add_head: | ||
| 257 | /* Set token. */ | 233 | /* Set token. */ |
| 258 | vq->data[head] = data; | 234 | vq->data[head] = data; |
| 259 | 235 | ||
