diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2013-03-20 01:07:09 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2013-03-20 01:13:29 -0400 |
commit | 13816c768d46586e925b22736992258d6105ad2c (patch) | |
tree | fffb6c71e49b040f532885e6311c5d4886c5d219 /drivers/virtio | |
parent | c8164d8931fdee9ac5314708c4071adf1d997425 (diff) |
virtio_ring: virtqueue_add_sgs, to add multiple sgs.
virtio_scsi can really use this, to avoid the current hack of copying
the whole sg array. Some other things get slightly neater, too.
This causes a slowdown in virtqueue_add_buf(), which is implemented as
a wrapper. This is addressed in the next patches.
for i in `seq 50`; do /usr/bin/time -f 'Wall time:%e' ./vringh_test --indirect --eventidx --parallel --fast-vringh; done 2>&1 | stats --trim-outliers:
Before:
Using CPUS 0 and 3
Guest: notified 0, pinged 39009-39063(39062)
Host: notified 39009-39063(39062), pinged 0
Wall time:1.700000-1.950000(1.723542)
After:
Using CPUS 0 and 3
Guest: notified 0, pinged 39062-39063(39063)
Host: notified 39062-39063(39063), pinged 0
Wall time:1.760000-2.220000(1.789167)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Reviewed-by: Asias He <asias@redhat.com>
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/virtio_ring.c | 220 |
1 files changed, 157 insertions, 63 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 245177c286ae..a78ad459cc85 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -98,16 +98,36 @@ struct vring_virtqueue | |||
98 | 98 | ||
99 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 99 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
100 | 100 | ||
101 | static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, | ||
102 | unsigned int *count) | ||
103 | { | ||
104 | return sg_next(sg); | ||
105 | } | ||
106 | |||
107 | static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, | ||
108 | unsigned int *count) | ||
109 | { | ||
110 | if (--(*count) == 0) | ||
111 | return NULL; | ||
112 | return sg + 1; | ||
113 | } | ||
114 | |||
101 | /* Set up an indirect table of descriptors and add it to the queue. */ | 115 | /* Set up an indirect table of descriptors and add it to the queue. */ |
102 | static int vring_add_indirect(struct vring_virtqueue *vq, | 116 | static inline int vring_add_indirect(struct vring_virtqueue *vq, |
103 | struct scatterlist sg[], | 117 | struct scatterlist *sgs[], |
104 | unsigned int out, | 118 | struct scatterlist *(*next) |
105 | unsigned int in, | 119 | (struct scatterlist *, unsigned int *), |
106 | gfp_t gfp) | 120 | unsigned int total_sg, |
121 | unsigned int total_out, | ||
122 | unsigned int total_in, | ||
123 | unsigned int out_sgs, | ||
124 | unsigned int in_sgs, | ||
125 | gfp_t gfp) | ||
107 | { | 126 | { |
108 | struct vring_desc *desc; | 127 | struct vring_desc *desc; |
109 | unsigned head; | 128 | unsigned head; |
110 | int i; | 129 | struct scatterlist *sg; |
130 | int i, n; | ||
111 | 131 | ||
112 | /* | 132 | /* |
113 | * We require lowmem mappings for the descriptors because | 133 | * We require lowmem mappings for the descriptors because |
@@ -116,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
116 | */ | 136 | */ |
117 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); | 137 | gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); |
118 | 138 | ||
119 | desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); | 139 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); |
120 | if (!desc) | 140 | if (!desc) |
121 | return -ENOMEM; | 141 | return -ENOMEM; |
122 | 142 | ||
123 | /* Transfer entries from the sg list into the indirect page */ | 143 | /* Transfer entries from the sg lists into the indirect page */ |
124 | for (i = 0; i < out; i++) { | 144 | i = 0; |
125 | desc[i].flags = VRING_DESC_F_NEXT; | 145 | for (n = 0; n < out_sgs; n++) { |
126 | desc[i].addr = sg_phys(sg); | 146 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
127 | desc[i].len = sg->length; | 147 | desc[i].flags = VRING_DESC_F_NEXT; |
128 | desc[i].next = i+1; | 148 | desc[i].addr = sg_phys(sg); |
129 | sg++; | 149 | desc[i].len = sg->length; |
150 | desc[i].next = i+1; | ||
151 | i++; | ||
152 | } | ||
130 | } | 153 | } |
131 | for (; i < (out + in); i++) { | 154 | for (; n < (out_sgs + in_sgs); n++) { |
132 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 155 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
133 | desc[i].addr = sg_phys(sg); | 156 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
134 | desc[i].len = sg->length; | 157 | desc[i].addr = sg_phys(sg); |
135 | desc[i].next = i+1; | 158 | desc[i].len = sg->length; |
136 | sg++; | 159 | desc[i].next = i+1; |
160 | i++; | ||
161 | } | ||
137 | } | 162 | } |
163 | BUG_ON(i != total_sg); | ||
138 | 164 | ||
139 | /* Last one doesn't continue. */ | 165 | /* Last one doesn't continue. */ |
140 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; | 166 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; |
@@ -155,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
155 | return head; | 181 | return head; |
156 | } | 182 | } |
157 | 183 | ||
158 | /** | 184 | static inline int virtqueue_add(struct virtqueue *_vq, |
159 | * virtqueue_add_buf - expose buffer to other end | 185 | struct scatterlist *sgs[], |
160 | * @vq: the struct virtqueue we're talking about. | 186 | struct scatterlist *(*next) |
161 | * @sg: the description of the buffer(s). | 187 | (struct scatterlist *, unsigned int *), |
162 | * @out_num: the number of sg readable by other side | 188 | unsigned int total_out, |
163 | * @in_num: the number of sg which are writable (after readable ones) | 189 | unsigned int total_in, |
164 | * @data: the token identifying the buffer. | 190 | unsigned int out_sgs, |
165 | * @gfp: how to do memory allocations (if necessary). | 191 | unsigned int in_sgs, |
166 | * | 192 | void *data, |
167 | * Caller must ensure we don't call this with other virtqueue operations | 193 | gfp_t gfp) |
168 | * at the same time (except where noted). | ||
169 | * | ||
170 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
171 | */ | ||
172 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
173 | struct scatterlist sg[], | ||
174 | unsigned int out, | ||
175 | unsigned int in, | ||
176 | void *data, | ||
177 | gfp_t gfp) | ||
178 | { | 194 | { |
179 | struct vring_virtqueue *vq = to_vvq(_vq); | 195 | struct vring_virtqueue *vq = to_vvq(_vq); |
180 | unsigned int i, avail, uninitialized_var(prev); | 196 | struct scatterlist *sg; |
197 | unsigned int i, n, avail, uninitialized_var(prev), total_sg; | ||
181 | int head; | 198 | int head; |
182 | 199 | ||
183 | START_USE(vq); | 200 | START_USE(vq); |
@@ -197,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq, | |||
197 | } | 214 | } |
198 | #endif | 215 | #endif |
199 | 216 | ||
217 | total_sg = total_in + total_out; | ||
218 | |||
200 | /* If the host supports indirect descriptor tables, and we have multiple | 219 | /* If the host supports indirect descriptor tables, and we have multiple |
201 | * buffers, then go indirect. FIXME: tune this threshold */ | 220 | * buffers, then go indirect. FIXME: tune this threshold */ |
202 | if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { | 221 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { |
203 | head = vring_add_indirect(vq, sg, out, in, gfp); | 222 | head = vring_add_indirect(vq, sgs, next, total_sg, total_out, |
223 | total_in, | ||
224 | out_sgs, in_sgs, gfp); | ||
204 | if (likely(head >= 0)) | 225 | if (likely(head >= 0)) |
205 | goto add_head; | 226 | goto add_head; |
206 | } | 227 | } |
207 | 228 | ||
208 | BUG_ON(out + in > vq->vring.num); | 229 | BUG_ON(total_sg > vq->vring.num); |
209 | BUG_ON(out + in == 0); | 230 | BUG_ON(total_sg == 0); |
210 | 231 | ||
211 | if (vq->vq.num_free < out + in) { | 232 | if (vq->vq.num_free < total_sg) { |
212 | pr_debug("Can't add buf len %i - avail = %i\n", | 233 | pr_debug("Can't add buf len %i - avail = %i\n", |
213 | out + in, vq->vq.num_free); | 234 | total_sg, vq->vq.num_free); |
214 | /* FIXME: for historical reasons, we force a notify here if | 235 | /* FIXME: for historical reasons, we force a notify here if |
215 | * there are outgoing parts to the buffer. Presumably the | 236 | * there are outgoing parts to the buffer. Presumably the |
216 | * host should service the ring ASAP. */ | 237 | * host should service the ring ASAP. */ |
217 | if (out) | 238 | if (out_sgs) |
218 | vq->notify(&vq->vq); | 239 | vq->notify(&vq->vq); |
219 | END_USE(vq); | 240 | END_USE(vq); |
220 | return -ENOSPC; | 241 | return -ENOSPC; |
221 | } | 242 | } |
222 | 243 | ||
223 | /* We're about to use some buffers from the free list. */ | 244 | /* We're about to use some buffers from the free list. */ |
224 | vq->vq.num_free -= out + in; | 245 | vq->vq.num_free -= total_sg; |
225 | 246 | ||
226 | head = vq->free_head; | 247 | head = i = vq->free_head; |
227 | for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { | 248 | for (n = 0; n < out_sgs; n++) { |
228 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 249 | for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { |
229 | vq->vring.desc[i].addr = sg_phys(sg); | 250 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; |
230 | vq->vring.desc[i].len = sg->length; | 251 | vq->vring.desc[i].addr = sg_phys(sg); |
231 | prev = i; | 252 | vq->vring.desc[i].len = sg->length; |
232 | sg++; | 253 | prev = i; |
254 | i = vq->vring.desc[i].next; | ||
255 | } | ||
233 | } | 256 | } |
234 | for (; in; i = vq->vring.desc[i].next, in--) { | 257 | for (; n < (out_sgs + in_sgs); n++) { |
235 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 258 | for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { |
236 | vq->vring.desc[i].addr = sg_phys(sg); | 259 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
237 | vq->vring.desc[i].len = sg->length; | 260 | vq->vring.desc[i].addr = sg_phys(sg); |
238 | prev = i; | 261 | vq->vring.desc[i].len = sg->length; |
239 | sg++; | 262 | prev = i; |
263 | i = vq->vring.desc[i].next; | ||
264 | } | ||
240 | } | 265 | } |
241 | /* Last one doesn't continue. */ | 266 | /* Last one doesn't continue. */ |
242 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | 267 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; |
@@ -269,9 +294,78 @@ add_head: | |||
269 | 294 | ||
270 | return 0; | 295 | return 0; |
271 | } | 296 | } |
297 | |||
298 | /** | ||
299 | * virtqueue_add_buf - expose buffer to other end | ||
300 | * @vq: the struct virtqueue we're talking about. | ||
301 | * @sg: the description of the buffer(s). | ||
302 | * @out_num: the number of sg readable by other side | ||
303 | * @in_num: the number of sg which are writable (after readable ones) | ||
304 | * @data: the token identifying the buffer. | ||
305 | * @gfp: how to do memory allocations (if necessary). | ||
306 | * | ||
307 | * Caller must ensure we don't call this with other virtqueue operations | ||
308 | * at the same time (except where noted). | ||
309 | * | ||
310 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
311 | */ | ||
312 | int virtqueue_add_buf(struct virtqueue *_vq, | ||
313 | struct scatterlist sg[], | ||
314 | unsigned int out, | ||
315 | unsigned int in, | ||
316 | void *data, | ||
317 | gfp_t gfp) | ||
318 | { | ||
319 | struct scatterlist *sgs[2]; | ||
320 | |||
321 | sgs[0] = sg; | ||
322 | sgs[1] = sg + out; | ||
323 | |||
324 | return virtqueue_add(_vq, sgs, sg_next_arr, | ||
325 | out, in, out ? 1 : 0, in ? 1 : 0, data, gfp); | ||
326 | } | ||
272 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); | 327 | EXPORT_SYMBOL_GPL(virtqueue_add_buf); |
273 | 328 | ||
274 | /** | 329 | /** |
330 | * virtqueue_add_sgs - expose buffers to other end | ||
331 | * @vq: the struct virtqueue we're talking about. | ||
332 | * @sgs: array of terminated scatterlists. | ||
333 | * @out_num: the number of scatterlists readable by other side | ||
334 | * @in_num: the number of scatterlists which are writable (after readable ones) | ||
335 | * @data: the token identifying the buffer. | ||
336 | * @gfp: how to do memory allocations (if necessary). | ||
337 | * | ||
338 | * Caller must ensure we don't call this with other virtqueue operations | ||
339 | * at the same time (except where noted). | ||
340 | * | ||
341 | * Returns zero or a negative error (ie. ENOSPC, ENOMEM). | ||
342 | */ | ||
343 | int virtqueue_add_sgs(struct virtqueue *_vq, | ||
344 | struct scatterlist *sgs[], | ||
345 | unsigned int out_sgs, | ||
346 | unsigned int in_sgs, | ||
347 | void *data, | ||
348 | gfp_t gfp) | ||
349 | { | ||
350 | unsigned int i, total_out, total_in; | ||
351 | |||
352 | /* Count them first. */ | ||
353 | for (i = total_out = total_in = 0; i < out_sgs; i++) { | ||
354 | struct scatterlist *sg; | ||
355 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
356 | total_out++; | ||
357 | } | ||
358 | for (; i < out_sgs + in_sgs; i++) { | ||
359 | struct scatterlist *sg; | ||
360 | for (sg = sgs[i]; sg; sg = sg_next(sg)) | ||
361 | total_in++; | ||
362 | } | ||
363 | return virtqueue_add(_vq, sgs, sg_next_chained, | ||
364 | total_out, total_in, out_sgs, in_sgs, data, gfp); | ||
365 | } | ||
366 | EXPORT_SYMBOL_GPL(virtqueue_add_sgs); | ||
367 | |||
368 | /** | ||
275 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. | 369 | * virtqueue_kick_prepare - first half of split virtqueue_kick call. |
276 | * @vq: the struct virtqueue | 370 | * @vq: the struct virtqueue |
277 | * | 371 | * |