diff options
| author | Rusty Russell <rusty@rustcorp.com.au> | 2013-03-20 01:07:09 -0400 |
|---|---|---|
| committer | Rusty Russell <rusty@rustcorp.com.au> | 2013-03-20 01:13:29 -0400 |
| commit | 13816c768d46586e925b22736992258d6105ad2c (patch) | |
| tree | fffb6c71e49b040f532885e6311c5d4886c5d219 /drivers/virtio | |
| parent | c8164d8931fdee9ac5314708c4071adf1d997425 (diff) | |
virtio_ring: virtqueue_add_sgs, to add multiple sgs.
virtio_scsi can really use this, to avoid the current hack of copying
the whole sg array. Some other things get slightly neater, too.
This causes a slowdown in virtqueue_add_buf(), which is implemented as
a wrapper. This is addressed in the next patches.
for i in `seq 50`; do /usr/bin/time -f 'Wall time:%e' ./vringh_test --indirect --eventidx --parallel --fast-vringh; done 2>&1 | stats --trim-outliers:
Before:
Using CPUS 0 and 3
Guest: notified 0, pinged 39009-39063(39062)
Host: notified 39009-39063(39062), pinged 0
Wall time:1.700000-1.950000(1.723542)
After:
Using CPUS 0 and 3
Guest: notified 0, pinged 39062-39063(39063)
Host: notified 39062-39063(39063), pinged 0
Wall time:1.760000-2.220000(1.789167)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Reviewed-by: Asias He <asias@redhat.com>
Diffstat (limited to 'drivers/virtio')
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 220 |
1 files changed, 157 insertions, 63 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 245177c286ae..a78ad459cc85 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
| @@ -98,16 +98,36 @@ struct vring_virtqueue | |||
| 98 | 98 | ||
| 99 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 99 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
| 100 | 100 | ||
| 101 | static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, | ||
| 102 | unsigned int *count) | ||
| 103 | { | ||
| 104 | return sg_next(sg); | ||
| 105 | } | ||
| 106 | |||
| 107 | static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, | ||
| 108 | unsigned int *count) | ||
| 109 | { | ||
| 110 | if (--(*count) == 0) | ||
| 111 | return NULL; | ||
| 112 | return sg + 1; | ||
| 113 | } | ||
| 114 | |||
| 101 | /* Set up an indirect table of descriptors and add it to the queue. */ | 115 | /* Set up an indirect table of descriptors and add it to the queue. */ |
| 102 | static int vring_add_indirect(struct vring_virtqueue *vq, | 116 | static inline int vring_add_indirect(struct vring_virtqueue *vq, |
| 103 | struct scatterlist sg[], | 117 | struct scatterlist *sgs[], |
| 104 | unsigned int out, | 118 | struct scatterlist *(*next) |
| 105 | unsigned int in, | 119 | (struct scatterlist *, unsigned int *), |
| 106 | gfp_t gfp) | 120 | unsigned int total_sg, |
| 121 | unsigned int total_out, | ||
| 122 | unsigned int total_in, | ||
| 123 | unsigned int out_sgs, | ||
| 124 | unsigned int in_sgs, | ||
| 125 | gfp_t gfp) | ||
| 107 | { | 126 | { |
| 108 | struct vring_desc *desc; | 127 | struct vring_desc *desc; |
| 109 | unsigned head; | 128 | unsigned head; |
| 110 | int i; | 129 | struct scatterlist *sg; |
| 130 | int i, n; | ||
| 111 | 131 | ||
| 112 | /* | 132 | /* |
| 113 | * We require lowmem mappings for the descriptors because | 133 | * We require lowmem mappings for the descriptors because |
| @@ -116,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
| 116 | */ | 136 | */ |
| 117 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); | 137 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); |
| 118 | 138 | ||
| 119 | desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); | 139 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); |
| 120 | if (!desc) | 140 | if (!desc) |
| 121 | return -ENOMEM; | 141 | return -ENOMEM; |
| 122 | 142 | ||
| 123 | /* Transfer entries from the sg list into the indirect page */ | 143 | /* Transfer entries from the sg lists into the indirect page */ |
| 124 | for (i = 0; i < out; i++) { | 144 | i = 0; |
| 125 | desc[i].flags = VRING_DESC_F_NEXT; | 145 | for (n = 0; n < out_sgs; n++) { |
| 126 | desc[i].addr = sg_phys(sg); | 146 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
| 127 | desc[i].len = sg->length; | 147 | desc[i].flags = VRING_DESC_F_NEXT; |
| 128 | desc[i].next = i+1; | 148 | desc[i].addr = sg_phys(sg); |
| 129 | sg++; | 149 | desc[i].len = sg->length; |
| 150 | desc[i].next = i+1; | ||
| 151 | i++; | ||
| 152 | } | ||
| 130 | } | 153 | } |
| 131 | for (; i < (out + in); i++) { | 154 | for (; n < (out_sgs + in_sgs); n++) { |
| 132 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 155 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
| 133 | desc[i].addr = sg_phys(sg); | 156 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
| 134 | desc[i].len = sg->length; | 157 | desc[i].addr = sg_phys(sg); |
| 135 | desc[i].next = i+1; | 158 | desc[i].len = sg->length; |
| 136 | sg++; | 159 | desc[i].next = i+1; |
| 160 | i++; | ||
| 161 | } | ||
| 137 | } | 162 | } |
| 163 | BUG_ON(i != total_sg); | ||
| 138 | 164 | ||
| 139 | /* Last one doesn't continue. */ | 165 | /* Last one doesn't continue. */ |
| 140 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; | 166 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; |
| @@ -155,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
| 155 | return head; | 181 | return head; |
| 156 | } | 182 | } |
| 157 | 183 | ||
| 158 | /** | 184 | static inline int virtqueue_add(struct virtqueue *_vq, |
| 159 | * virtqueue_add_buf - expose buffer to other end | 185 | struct scatterlist *sgs[], |
| 160 | * @vq: the struct virtqueue we're talking about. | 186 | struct scatterlist *(*next) |
| 161 | * @sg: the description of the buffer(s). | 187 | (struct scatterlist *, unsigned int *), |
| 162 | * @out_num: the number of sg readable by other side | 188 | unsigned int total_out, |
| 163 | * @in_num: the number of sg which are writable (after readable ones) | 189 | unsigned int total_in, |
| 164 | * @data: the token identifying the buffer. | 190 | unsigned int out_sgs, |
| 165 | * @gfp: how to do memory allocations (if necessary). | 191 | unsigned int in_sgs, |
| 166 | * | 192 | void *data, |
| 167 | * Caller must ensure we don't call this with other virtqueue operations | 193 | gfp_t gfp) |
| 168 | * at the same time (except where noted). | ||
| 169 | * | ||
| 170 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
| 171 | */ | ||
| 172 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
| 173 | struct scatterlist sg[], | ||
| 174 | unsigned int out, | ||
| 175 | unsigned int in, | ||
| 176 | void *data, | ||
| 177 | gfp_t gfp) | ||
| 178 | { | 194 | { |
| 179 | struct vring_virtqueue *vq = to_vvq(_vq); | 195 | struct vring_virtqueue *vq = to_vvq(_vq); |
| 180 | unsigned int i, avail, uninitialized_var(prev); | 196 | struct scatterlist *sg; |
| 197 | unsigned int i, n, avail, uninitialized_var(prev), total_sg; | ||
| 181 | int head; | 198 | int head; |
| 182 | 199 | ||
| 183 | START_USE(vq); | 200 | START_USE(vq); |
| @@ -197,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq, | |||
| 197 | } | 214 | } |
| 198 | #endif | 215 | #endif |
| 199 | 216 | ||
| 217 | total_sg = total_in + total_out; | ||
| 218 | |||
| 200 | /* If the host supports indirect descriptor tables, and we have multiple | 219 | /* If the host supports indirect descriptor tables, and we have multiple |
| 201 | * buffers, then go indirect. FIXME: tune this threshold */ | 220 | * buffers, then go indirect. FIXME: tune this threshold */ |
| 202 | if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { | 221 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { |
| 203 | head = vring_add_indirect(vq, sg, out, in, gfp); | 222 | head = vring_add_indirect(vq, sgs, next, total_sg, total_out, |
| 223 | total_in, | ||
| 224 | out_sgs, in_sgs, gfp); | ||
| 204 | if (likely(head >= 0)) | 225 | if (likely(head >= 0)) |
| 205 | goto add_head; | 226 | goto add_head; |
| 206 | } | 227 | } |
| 207 | 228 | ||
| 208 | BUG_ON(out + in > vq->vring.num); | 229 | BUG_ON(total_sg > vq->vring.num); |
| 209 | BUG_ON(out + in == 0); | 230 | BUG_ON(total_sg == 0); |
| 210 | 231 | ||
| 211 | if (vq->vq.num_free < out + in) { | 232 | if (vq->vq.num_free < total_sg) { |
| 212 | pr_debug("Can't add buf len %i - avail = %i\n", | 233 | pr_debug("Can't add buf len %i - avail = %i\n", |
| 213 | out + in, vq->vq.num_free); | 234 | total_sg, vq->vq.num_free); |
| 214 | /* FIXME: for historical reasons, we force a notify here if | 235 | /* FIXME: for historical reasons, we force a notify here if |
| 215 | * there are outgoing parts to the buffer. Presumably the | 236 | * there are outgoing parts to the buffer. Presumably the |
| 216 | * host should service the ring ASAP. */ | 237 | * host should service the ring ASAP. */ |
| 217 | if (out) | 238 | if (out_sgs) |
| 218 | vq->notify(&vq->vq); | 239 | vq->notify(&vq->vq); |
| 219 | END_USE(vq); | 240 | END_USE(vq); |
| 220 | return -ENOSPC; | 241 | return -ENOSPC; |
| 221 | } | 242 | } |
| 222 | 243 | ||
| 223 | /* We're about to use some buffers from the free list. */ | 244 | /* We're about to use some buffers from the free list. */ |
| 224 | vq->vq.num_free -= out + in; | 245 | vq->vq.num_free -= total_sg; |
| 225 | 246 | ||
| 226 | head = vq->free_head; | 247 | head = i = vq->free_head; |
| 227 | for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { | 248 | for (n = 0; n < out_sgs; n++) { |
| 228 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 249 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
| 229 | vq->vring.desc[i].addr = sg_phys(sg); | 250 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; |
| 230 | vq->vring.desc[i].len = sg->length; | 251 | vq->vring.desc[i].addr = sg_phys(sg); |
| 231 | prev = i; | 252 | vq->vring.desc[i].len = sg->length; |
| 232 | sg++; | 253 | prev = i; |
| 254 | i = vq->vring.desc[i].next; | ||
| 255 | } | ||
| 233 | } | 256 | } |
| 234 | for (; in; i = vq->vring.desc[i].next, in--) { | 257 | for (; n < (out_sgs + in_sgs); n++) { |
| 235 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 258 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
| 236 | vq->vring.desc[i].addr = sg_phys(sg); | 259 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
| 237 | vq->vring.desc[i].len = sg->length; | 260 | vq->vring.desc[i].addr = sg_phys(sg); |
| 238 | prev = i; | 261 | vq->vring.desc[i].len = sg->length; |
| 239 | sg++; | 262 | prev = i; |
| 263 | i = vq->vring.desc[i].next; | ||
| 264 | } | ||
| 240 | } | 265 | } |
| 241 | /* Last one doesn't continue. */ | 266 | /* Last one doesn't continue. */ |
| 242 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | 267 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; |
| @@ -269,9 +294,78 @@ add_head: | |||
| 269 | 294 | ||
| 270 | return 0; | 295 | return 0; |
| 271 | } | 296 | } |
| 297 | |||
| 298 | /** | ||
| 299 | * virtqueue_add_buf - expose buffer to other end | ||
| 300 | * @vq: the struct virtqueue we're talking about. | ||
| 301 | * @sg: the description of the buffer(s). | ||
| 302 | * @out_num: the number of sg readable by other side | ||
| 303 | * @in_num: the number of sg which are writable (after readable ones) | ||
| 304 | * @data: the token identifying the buffer. | ||
| 305 | * @gfp: how to do memory allocations (if necessary). | ||
| 306 | * | ||
| 307 | * Caller must ensure we don't call this with other virtqueue operations | ||
| 308 | * at the same time (except where noted). | ||
| 309 | * | ||
| 310 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
| 311 | */ | ||
| 312 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
| 313 | struct scatterlist sg[], | ||
| 314 | unsigned int out, | ||
| 315 | unsigned int in, | ||
| 316 | void *data, | ||
| 317 | gfp_t gfp) | ||
| 318 | { | ||
| 319 | struct scatterlist *sgs[2]; | ||
| 320 | |||
| 321 | sgs[0] = sg; | ||
| 322 | sgs[1] = sg + out; | ||
| 323 | |||
| 324 | return virtqueue_add(_vq, sgs, sg_next_arr, | ||
| 325 | out, in, out ? 1 : 0, in ? 1 : 0, data, gfp); | ||
| 326 | } | ||
| 272 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); | 327 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); |
| 273 | 328 | ||
| 274 | /** | 329 | /** |
| 330 | * virtqueue_add_sgs - expose buffers to other end | ||
| 331 | * @vq: the struct virtqueue we're talking about. | ||
| 332 | * @sgs: array of terminated scatterlists. | ||
| 333 | * @out_num: the number of scatterlists readable by other side | ||
| 334 | * @in_num: the number of scatterlists which are writable (after readable ones) | ||
| 335 | * @data: the token identifying the buffer. | ||
| 336 | * @gfp: how to do memory allocations (if necessary). | ||
| 337 | * | ||
| 338 | * Caller must ensure we don't call this with other virtqueue operations | ||
| 339 | * at the same time (except where noted). | ||
| 340 | * | ||
| 341 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
| 342 | */ | ||
| 343 | int virtqueue_add_sgs(struct virtqueue *_vq, | ||
| 344 | struct scatterlist *sgs[], | ||
| 345 | unsigned int out_sgs, | ||
| 346 | unsigned int in_sgs, | ||
| 347 | void *data, | ||
| 348 | gfp_t gfp) | ||
| 349 | { | ||
| 350 | unsigned int i, total_out, total_in; | ||
| 351 | |||
| 352 | /* Count them first. */ | ||
| 353 | for (i = total_out = total_in = 0; i < out_sgs; i++) { | ||
| 354 | struct scatterlist *sg; | ||
| 355 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
| 356 | total_out++; | ||
| 357 | } | ||
| 358 | for (; i < out_sgs + in_sgs; i++) { | ||
| 359 | struct scatterlist *sg; | ||
| 360 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
| 361 | total_in++; | ||
| 362 | } | ||
| 363 | return virtqueue_add(_vq, sgs, sg_next_chained, | ||
| 364 | total_out, total_in, out_sgs, in_sgs, data, gfp); | ||
| 365 | } | ||
| 366 | EXPORT_SYMBOL_GPL(virtqueue_add_sgs); | ||
| 367 | |||
| 368 | /** | ||
| 275 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. | 369 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. |
| 276 | * @vq: the struct virtqueue | 370 | * @vq: the struct virtqueue |
| 277 | * | 371 | * |
