diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2014-09-10 20:47:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-13 12:50:46 -0400 |
commit | eeebf9b1fc0862466c5661d63fbaf66ab4a50210 (patch) | |
tree | a0b217e5f0c70869c73b270ee8faa75c250a2b2f /drivers/virtio | |
parent | a58354409a00f2d8f7882c6a91fde1df5d0a4bb8 (diff) |
virtio_ring: assume sgs are always well-formed.
We used to have several callers which just used arrays. They're
gone, so we can use sg_next() everywhere, simplifying the code.
On my laptop, this slowed down vring_bench by 15%:
vring_bench before:
936153354-967745359(9.44739e+08+/-6.1e+06)ns
vring_bench after:
1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
However, a more realistic test using pktgen on a AMD FX(tm)-8320 saw
a few percent improvement:
pktgen before:
767390-792966(785159+/-6.5e+03)pps 356-367(363.75+/-2.9)Mb/sec (356068960-367936224(3.64314e+08+/-3e+06)bps) errors: 0
pktgen after:
787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/virtio_ring.c | 68 |
1 files changed, 19 insertions, 49 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4d08f45a9c29..10a7c0205440 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -99,28 +99,10 @@ struct vring_virtqueue | |||
99 | 99 | ||
100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
101 | 101 | ||
102 | static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, | ||
103 | unsigned int *count) | ||
104 | { | ||
105 | return sg_next(sg); | ||
106 | } | ||
107 | |||
108 | static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, | ||
109 | unsigned int *count) | ||
110 | { | ||
111 | if (--(*count) == 0) | ||
112 | return NULL; | ||
113 | return sg + 1; | ||
114 | } | ||
115 | |||
116 | /* Set up an indirect table of descriptors and add it to the queue. */ | 102 | /* Set up an indirect table of descriptors and add it to the queue. */ |
117 | static inline int vring_add_indirect(struct vring_virtqueue *vq, | 103 | static inline int vring_add_indirect(struct vring_virtqueue *vq, |
118 | struct scatterlist *sgs[], | 104 | struct scatterlist *sgs[], |
119 | struct scatterlist *(*next) | ||
120 | (struct scatterlist *, unsigned int *), | ||
121 | unsigned int total_sg, | 105 | unsigned int total_sg, |
122 | unsigned int total_out, | ||
123 | unsigned int total_in, | ||
124 | unsigned int out_sgs, | 106 | unsigned int out_sgs, |
125 | unsigned int in_sgs, | 107 | unsigned int in_sgs, |
126 | gfp_t gfp) | 108 | gfp_t gfp) |
@@ -144,7 +126,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, | |||
144 | /* Transfer entries from the sg lists into the indirect page */ | 126 | /* Transfer entries from the sg lists into the indirect page */ |
145 | i = 0; | 127 | i = 0; |
146 | for (n = 0; n < out_sgs; n++) { | 128 | for (n = 0; n < out_sgs; n++) { |
147 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { | 129 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
148 | desc[i].flags = VRING_DESC_F_NEXT; | 130 | desc[i].flags = VRING_DESC_F_NEXT; |
149 | desc[i].addr = sg_phys(sg); | 131 | desc[i].addr = sg_phys(sg); |
150 | desc[i].len = sg->length; | 132 | desc[i].len = sg->length; |
@@ -153,7 +135,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, | |||
153 | } | 135 | } |
154 | } | 136 | } |
155 | for (; n < (out_sgs + in_sgs); n++) { | 137 | for (; n < (out_sgs + in_sgs); n++) { |
156 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { | 138 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
157 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 139 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
158 | desc[i].addr = sg_phys(sg); | 140 | desc[i].addr = sg_phys(sg); |
159 | desc[i].len = sg->length; | 141 | desc[i].len = sg->length; |
@@ -186,10 +168,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, | |||
186 | 168 | ||
187 | static inline int virtqueue_add(struct virtqueue *_vq, | 169 | static inline int virtqueue_add(struct virtqueue *_vq, |
188 | struct scatterlist *sgs[], | 170 | struct scatterlist *sgs[], |
189 | struct scatterlist *(*next) | 171 | unsigned int total_sg, |
190 | (struct scatterlist *, unsigned int *), | ||
191 | unsigned int total_out, | ||
192 | unsigned int total_in, | ||
193 | unsigned int out_sgs, | 172 | unsigned int out_sgs, |
194 | unsigned int in_sgs, | 173 | unsigned int in_sgs, |
195 | void *data, | 174 | void *data, |
@@ -197,7 +176,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
197 | { | 176 | { |
198 | struct vring_virtqueue *vq = to_vvq(_vq); | 177 | struct vring_virtqueue *vq = to_vvq(_vq); |
199 | struct scatterlist *sg; | 178 | struct scatterlist *sg; |
200 | unsigned int i, n, avail, uninitialized_var(prev), total_sg; | 179 | unsigned int i, n, avail, uninitialized_var(prev); |
201 | int head; | 180 | int head; |
202 | 181 | ||
203 | START_USE(vq); | 182 | START_USE(vq); |
@@ -222,13 +201,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
222 | } | 201 | } |
223 | #endif | 202 | #endif |
224 | 203 | ||
225 | total_sg = total_in + total_out; | ||
226 | |||
227 | /* If the host supports indirect descriptor tables, and we have multiple | 204 | /* If the host supports indirect descriptor tables, and we have multiple |
228 | * buffers, then go indirect. FIXME: tune this threshold */ | 205 | * buffers, then go indirect. FIXME: tune this threshold */ |
229 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { | 206 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { |
230 | head = vring_add_indirect(vq, sgs, next, total_sg, total_out, | 207 | head = vring_add_indirect(vq, sgs, total_sg, |
231 | total_in, | ||
232 | out_sgs, in_sgs, gfp); | 208 | out_sgs, in_sgs, gfp); |
233 | if (likely(head >= 0)) | 209 | if (likely(head >= 0)) |
234 | goto add_head; | 210 | goto add_head; |
@@ -254,7 +230,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
254 | 230 | ||
255 | head = i = vq->free_head; | 231 | head = i = vq->free_head; |
256 | for (n = 0; n < out_sgs; n++) { | 232 | for (n = 0; n < out_sgs; n++) { |
257 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { | 233 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
258 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 234 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; |
259 | vq->vring.desc[i].addr = sg_phys(sg); | 235 | vq->vring.desc[i].addr = sg_phys(sg); |
260 | vq->vring.desc[i].len = sg->length; | 236 | vq->vring.desc[i].len = sg->length; |
@@ -263,7 +239,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
263 | } | 239 | } |
264 | } | 240 | } |
265 | for (; n < (out_sgs + in_sgs); n++) { | 241 | for (; n < (out_sgs + in_sgs); n++) { |
266 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { | 242 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
267 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 243 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
268 | vq->vring.desc[i].addr = sg_phys(sg); | 244 | vq->vring.desc[i].addr = sg_phys(sg); |
269 | vq->vring.desc[i].len = sg->length; | 245 | vq->vring.desc[i].len = sg->length; |
@@ -324,29 +300,23 @@ int virtqueue_add_sgs(struct virtqueue *_vq, | |||
324 | void *data, | 300 | void *data, |
325 | gfp_t gfp) | 301 | gfp_t gfp) |
326 | { | 302 | { |
327 | unsigned int i, total_out, total_in; | 303 | unsigned int i, total_sg = 0; |
328 | 304 | ||
329 | /* Count them first. */ | 305 | /* Count them first. */ |
330 | for (i = total_out = total_in = 0; i < out_sgs; i++) { | 306 | for (i = 0; i < out_sgs + in_sgs; i++) { |
331 | struct scatterlist *sg; | ||
332 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
333 | total_out++; | ||
334 | } | ||
335 | for (; i < out_sgs + in_sgs; i++) { | ||
336 | struct scatterlist *sg; | 307 | struct scatterlist *sg; |
337 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | 308 | for (sg = sgs[i]; sg; sg = sg_next(sg)) |
338 | total_in++; | 309 | total_sg++; |
339 | } | 310 | } |
340 | return virtqueue_add(_vq, sgs, sg_next_chained, | 311 | return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); |
341 | total_out, total_in, out_sgs, in_sgs, data, gfp); | ||
342 | } | 312 | } |
343 | EXPORT_SYMBOL_GPL(virtqueue_add_sgs); | 313 | EXPORT_SYMBOL_GPL(virtqueue_add_sgs); |
344 | 314 | ||
345 | /** | 315 | /** |
346 | * virtqueue_add_outbuf - expose output buffers to other end | 316 | * virtqueue_add_outbuf - expose output buffers to other end |
347 | * @vq: the struct virtqueue we're talking about. | 317 | * @vq: the struct virtqueue we're talking about. |
348 | * @sgs: array of scatterlists (need not be terminated!) | 318 | * @sg: scatterlist (must be well-formed and terminated!) |
349 | * @num: the number of scatterlists readable by other side | 319 | * @num: the number of entries in @sg readable by other side |
350 | * @data: the token identifying the buffer. | 320 | * @data: the token identifying the buffer. |
351 | * @gfp: how to do memory allocations (if necessary). | 321 | * @gfp: how to do memory allocations (if necessary). |
352 | * | 322 | * |
@@ -356,19 +326,19 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); | |||
356 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). | 326 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). |
357 | */ | 327 | */ |
358 | int virtqueue_add_outbuf(struct virtqueue *vq, | 328 | int virtqueue_add_outbuf(struct virtqueue *vq, |
359 | struct scatterlist sg[], unsigned int num, | 329 | struct scatterlist *sg, unsigned int num, |
360 | void *data, | 330 | void *data, |
361 | gfp_t gfp) | 331 | gfp_t gfp) |
362 | { | 332 | { |
363 | return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); | 333 | return virtqueue_add(vq, &sg, num, 1, 0, data, gfp); |
364 | } | 334 | } |
365 | EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); | 335 | EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); |
366 | 336 | ||
367 | /** | 337 | /** |
368 | * virtqueue_add_inbuf - expose input buffers to other end | 338 | * virtqueue_add_inbuf - expose input buffers to other end |
369 | * @vq: the struct virtqueue we're talking about. | 339 | * @vq: the struct virtqueue we're talking about. |
370 | * @sgs: array of scatterlists (need not be terminated!) | 340 | * @sg: scatterlist (must be well-formed and terminated!) |
371 | * @num: the number of scatterlists writable by other side | 341 | * @num: the number of entries in @sg writable by other side |
372 | * @data: the token identifying the buffer. | 342 | * @data: the token identifying the buffer. |
373 | * @gfp: how to do memory allocations (if necessary). | 343 | * @gfp: how to do memory allocations (if necessary). |
374 | * | 344 | * |
@@ -378,11 +348,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); | |||
378 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). | 348 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). |
379 | */ | 349 | */ |
380 | int virtqueue_add_inbuf(struct virtqueue *vq, | 350 | int virtqueue_add_inbuf(struct virtqueue *vq, |
381 | struct scatterlist sg[], unsigned int num, | 351 | struct scatterlist *sg, unsigned int num, |
382 | void *data, | 352 | void *data, |
383 | gfp_t gfp) | 353 | gfp_t gfp) |
384 | { | 354 | { |
385 | return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); | 355 | return virtqueue_add(vq, &sg, num, 0, 1, data, gfp); |
386 | } | 356 | } |
387 | EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); | 357 | EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); |
388 | 358 | ||