diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2014-09-10 20:47:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-13 12:52:35 -0400 |
commit | b25bd2515ea32cf5ddd5fd5a2a93b8c9dd875e4f (patch) | |
tree | fbbe65f8f3e0d92f8883192a95b0cd1b6d8263e4 /drivers/virtio | |
parent | eeebf9b1fc0862466c5661d63fbaf66ab4a50210 (diff) |
virtio_ring: unify direct/indirect code paths.
virtqueue_add() populates the virtqueue descriptor table from the sgs
given. If it uses an indirect descriptor table, then it puts a single
descriptor in the descriptor table pointing to the kmalloc'ed indirect
table where the sg is populated.
Previously vring_add_indirect() did the allocation and the simple
linear layout. We replace that with alloc_indirect() which allocates
the indirect table then chains it like the normal descriptor table so
we can reuse the core logic.
This slows down pktgen by less than 1/2 a percent (which uses direct
descriptors), as well as vring_bench, but it's far neater.
vring_bench before:
1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
vring_bench after:
1125610268-1183528965(1.14172e+09+/-8e+06)ns
pktgen before:
787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0
pktgen after:
779988-790404(786391+/-2.5e+03)pps 361-366(364.35+/-1.3)Mb/sec (361914432-366747456(3.64885e+08+/-1.2e+06)bps) errors: 0
Now, if we make force indirect descriptors by turning off any_header_sg
in virtio_net.c:
pktgen before:
713773-721062(718374+/-2.1e+03)pps 331-334(332.95+/-0.92)Mb/sec (331190672-334572768(3.33325e+08+/-9.6e+05)bps) errors: 0
pktgen after:
710542-719195(714898+/-2.4e+03)pps 329-333(331.15+/-1.1)Mb/sec (329691488-333706480(3.31713e+08+/-1.1e+06)bps) errors: 0
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/virtio_ring.c | 128 |
1 files changed, 52 insertions, 76 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 10a7c0205440..3b1f89b6e743 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -99,18 +99,10 @@ struct vring_virtqueue | |||
99 | 99 | ||
100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) | 100 | #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) |
101 | 101 | ||
102 | /* Set up an indirect table of descriptors and add it to the queue. */ | 102 | static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp) |
103 | static inline int vring_add_indirect(struct vring_virtqueue *vq, | ||
104 | struct scatterlist *sgs[], | ||
105 | unsigned int total_sg, | ||
106 | unsigned int out_sgs, | ||
107 | unsigned int in_sgs, | ||
108 | gfp_t gfp) | ||
109 | { | 103 | { |
110 | struct vring_desc *desc; | 104 | struct vring_desc *desc; |
111 | unsigned head; | 105 | unsigned int i; |
112 | struct scatterlist *sg; | ||
113 | int i, n; | ||
114 | 106 | ||
115 | /* | 107 | /* |
116 | * We require lowmem mappings for the descriptors because | 108 | * We require lowmem mappings for the descriptors because |
@@ -121,49 +113,11 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq, | |||
121 | 113 | ||
122 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); | 114 | desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); |
123 | if (!desc) | 115 | if (!desc) |
124 | return -ENOMEM; | 116 | return NULL; |
125 | |||
126 | /* Transfer entries from the sg lists into the indirect page */ | ||
127 | i = 0; | ||
128 | for (n = 0; n < out_sgs; n++) { | ||
129 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | ||
130 | desc[i].flags = VRING_DESC_F_NEXT; | ||
131 | desc[i].addr = sg_phys(sg); | ||
132 | desc[i].len = sg->length; | ||
133 | desc[i].next = i+1; | ||
134 | i++; | ||
135 | } | ||
136 | } | ||
137 | for (; n < (out_sgs + in_sgs); n++) { | ||
138 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | ||
139 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | ||
140 | desc[i].addr = sg_phys(sg); | ||
141 | desc[i].len = sg->length; | ||
142 | desc[i].next = i+1; | ||
143 | i++; | ||
144 | } | ||
145 | } | ||
146 | BUG_ON(i != total_sg); | ||
147 | |||
148 | /* Last one doesn't continue. */ | ||
149 | desc[i-1].flags &= ~VRING_DESC_F_NEXT; | ||
150 | desc[i-1].next = 0; | ||
151 | |||
152 | /* We're about to use a buffer */ | ||
153 | vq->vq.num_free--; | ||
154 | |||
155 | /* Use a single buffer which doesn't continue */ | ||
156 | head = vq->free_head; | ||
157 | vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; | ||
158 | vq->vring.desc[head].addr = virt_to_phys(desc); | ||
159 | /* kmemleak gives a false positive, as it's hidden by virt_to_phys */ | ||
160 | kmemleak_ignore(desc); | ||
161 | vq->vring.desc[head].len = i * sizeof(struct vring_desc); | ||
162 | |||
163 | /* Update free pointer */ | ||
164 | vq->free_head = vq->vring.desc[head].next; | ||
165 | 117 | ||
166 | return head; | 118 | for (i = 0; i < total_sg; i++) |
119 | desc[i].next = i+1; | ||
120 | return desc; | ||
167 | } | 121 | } |
168 | 122 | ||
169 | static inline int virtqueue_add(struct virtqueue *_vq, | 123 | static inline int virtqueue_add(struct virtqueue *_vq, |
@@ -176,8 +130,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
176 | { | 130 | { |
177 | struct vring_virtqueue *vq = to_vvq(_vq); | 131 | struct vring_virtqueue *vq = to_vvq(_vq); |
178 | struct scatterlist *sg; | 132 | struct scatterlist *sg; |
179 | unsigned int i, n, avail, uninitialized_var(prev); | 133 | struct vring_desc *desc; |
134 | unsigned int i, n, avail, descs_used, uninitialized_var(prev); | ||
180 | int head; | 135 | int head; |
136 | bool indirect; | ||
181 | 137 | ||
182 | START_USE(vq); | 138 | START_USE(vq); |
183 | 139 | ||
@@ -201,21 +157,40 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
201 | } | 157 | } |
202 | #endif | 158 | #endif |
203 | 159 | ||
160 | BUG_ON(total_sg > vq->vring.num); | ||
161 | BUG_ON(total_sg == 0); | ||
162 | |||
163 | head = vq->free_head; | ||
164 | |||
204 | /* If the host supports indirect descriptor tables, and we have multiple | 165 | /* If the host supports indirect descriptor tables, and we have multiple |
205 | * buffers, then go indirect. FIXME: tune this threshold */ | 166 | * buffers, then go indirect. FIXME: tune this threshold */ |
206 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) { | 167 | if (vq->indirect && total_sg > 1 && vq->vq.num_free) |
207 | head = vring_add_indirect(vq, sgs, total_sg, | 168 | desc = alloc_indirect(total_sg, gfp); |
208 | out_sgs, in_sgs, gfp); | 169 | else |
209 | if (likely(head >= 0)) | 170 | desc = NULL; |
210 | goto add_head; | 171 | |
172 | if (desc) { | ||
173 | /* Use a single buffer which doesn't continue */ | ||
174 | vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; | ||
175 | vq->vring.desc[head].addr = virt_to_phys(desc); | ||
176 | /* avoid kmemleak false positive (hidden by virt_to_phys) */ | ||
177 | kmemleak_ignore(desc); | ||
178 | vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc); | ||
179 | |||
180 | /* Set up rest to use this indirect table. */ | ||
181 | i = 0; | ||
182 | descs_used = 1; | ||
183 | indirect = true; | ||
184 | } else { | ||
185 | desc = vq->vring.desc; | ||
186 | i = head; | ||
187 | descs_used = total_sg; | ||
188 | indirect = false; | ||
211 | } | 189 | } |
212 | 190 | ||
213 | BUG_ON(total_sg > vq->vring.num); | 191 | if (vq->vq.num_free < descs_used) { |
214 | BUG_ON(total_sg == 0); | ||
215 | |||
216 | if (vq->vq.num_free < total_sg) { | ||
217 | pr_debug("Can't add buf len %i - avail = %i\n", | 192 | pr_debug("Can't add buf len %i - avail = %i\n", |
218 | total_sg, vq->vq.num_free); | 193 | descs_used, vq->vq.num_free); |
219 | /* FIXME: for historical reasons, we force a notify here if | 194 | /* FIXME: for historical reasons, we force a notify here if |
220 | * there are outgoing parts to the buffer. Presumably the | 195 | * there are outgoing parts to the buffer. Presumably the |
221 | * host should service the ring ASAP. */ | 196 | * host should service the ring ASAP. */ |
@@ -226,34 +201,35 @@ static inline int virtqueue_add(struct virtqueue *_vq, | |||
226 | } | 201 | } |
227 | 202 | ||
228 | /* We're about to use some buffers from the free list. */ | 203 | /* We're about to use some buffers from the free list. */ |
229 | vq->vq.num_free -= total_sg; | 204 | vq->vq.num_free -= descs_used; |
230 | 205 | ||
231 | head = i = vq->free_head; | ||
232 | for (n = 0; n < out_sgs; n++) { | 206 | for (n = 0; n < out_sgs; n++) { |
233 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | 207 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
234 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT; | 208 | desc[i].flags = VRING_DESC_F_NEXT; |
235 | vq->vring.desc[i].addr = sg_phys(sg); | 209 | desc[i].addr = sg_phys(sg); |
236 | vq->vring.desc[i].len = sg->length; | 210 | desc[i].len = sg->length; |
237 | prev = i; | 211 | prev = i; |
238 | i = vq->vring.desc[i].next; | 212 | i = desc[i].next; |
239 | } | 213 | } |
240 | } | 214 | } |
241 | for (; n < (out_sgs + in_sgs); n++) { | 215 | for (; n < (out_sgs + in_sgs); n++) { |
242 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { | 216 | for (sg = sgs[n]; sg; sg = sg_next(sg)) { |
243 | vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; | 217 | desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; |
244 | vq->vring.desc[i].addr = sg_phys(sg); | 218 | desc[i].addr = sg_phys(sg); |
245 | vq->vring.desc[i].len = sg->length; | 219 | desc[i].len = sg->length; |
246 | prev = i; | 220 | prev = i; |
247 | i = vq->vring.desc[i].next; | 221 | i = desc[i].next; |
248 | } | 222 | } |
249 | } | 223 | } |
250 | /* Last one doesn't continue. */ | 224 | /* Last one doesn't continue. */ |
251 | vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; | 225 | desc[prev].flags &= ~VRING_DESC_F_NEXT; |
252 | 226 | ||
253 | /* Update free pointer */ | 227 | /* Update free pointer */ |
254 | vq->free_head = i; | 228 | if (indirect) |
229 | vq->free_head = vq->vring.desc[head].next; | ||
230 | else | ||
231 | vq->free_head = i; | ||
255 | 232 | ||
256 | add_head: | ||
257 | /* Set token. */ | 233 | /* Set token. */ |
258 | vq->data[head] = data; | 234 | vq->data[head] = data; |
259 | 235 | ||