diff options
author | Wei Liu <wei.liu2@citrix.com> | 2013-09-22 14:03:44 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-09-30 19:14:11 -0400 |
commit | 4f0581d25827d5e864bcf07b05d73d0d12a20a5c (patch) | |
tree | 3fc6e6c5430c1e7cb1309e3250df727ca210e70c /drivers/net/xen-netback/netback.c | |
parent | 6459082a3cfb2e79a93ffcbfae3b5924ec9cac77 (diff) |
xen-netback: improve ring effeciency for guest RX
There was a bug that netback routines netbk/xenvif_skb_count_slots and
netbk/xenvif_gop_frag_copy disagreed with each other, which caused
netback to push wrong number of responses to netfront, which caused
netfront to eventually crash. The bug was fixed in 6e43fc04a
("xen-netback: count number required slots for an skb more carefully").
Commit 6e43fc04a focused on backport-ability. The drawback with the
existing packing scheme is that the ring is not used effeciently, as
stated in 6e43fc04a.
skb->data like:
| 1111|222222222222|3333 |
is arranged as:
|1111 |222222222222|3333 |
If we can do this:
|111122222222|22223333 |
That would save one ring slot, which improves ring effeciency.
This patch effectively reverts 6e43fc04a. That patch made count_slots
agree with gop_frag_copy, while this patch goes the other way around --
make gop_frag_copy agree with count_slots. The end result is that they
still agree with each other, and the ring is now arranged like:
|111122222222|22223333 |
The patch that improves packing was first posted by Xi Xong and Matt
Wilson. I only rebase it on top of net-next and rewrite commit message,
so I retain all their SoBs. For more infomation about the original bug
please refer to email listed below and commit message of 6e43fc04a.
Original patch:
http://lists.xen.org/archives/html/xen-devel/2013-07/msg00760.html
Signed-off-by: Xi Xiong <xixiong@amazon.com>
Reviewed-by: Matt Wilson <msw@amazon.com>
[ msw: minor code cleanups, rewrote commit message, adjusted code
to count RX slots instead of meta structures ]
Signed-off-by: Matt Wilson <msw@amazon.com>
Cc: Annie Li <annie.li@oracle.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
[ liuw: rebased on top of net-next tree, rewrote commit message, coding
style cleanup. ]
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback/netback.c')
-rw-r--r-- | drivers/net/xen-netback/netback.c | 144 |
1 files changed, 61 insertions, 83 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c611de..d0b0feb035fb 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c | |||
@@ -47,6 +47,14 @@ | |||
47 | #include <asm/xen/hypercall.h> | 47 | #include <asm/xen/hypercall.h> |
48 | #include <asm/xen/page.h> | 48 | #include <asm/xen/page.h> |
49 | 49 | ||
50 | /* SKB control block overlay is used to store useful information when | ||
51 | * doing guest RX. | ||
52 | */ | ||
53 | struct skb_cb_overlay { | ||
54 | int meta_slots_used; | ||
55 | int peek_slots_count; | ||
56 | }; | ||
57 | |||
50 | /* Provide an option to disable split event channels at load time as | 58 | /* Provide an option to disable split event channels at load time as |
51 | * event channels are limited resource. Split event channels are | 59 | * event channels are limited resource. Split event channels are |
52 | * enabled by default. | 60 | * enabled by default. |
@@ -212,49 +220,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head) | |||
212 | return false; | 220 | return false; |
213 | } | 221 | } |
214 | 222 | ||
215 | struct xenvif_count_slot_state { | ||
216 | unsigned long copy_off; | ||
217 | bool head; | ||
218 | }; | ||
219 | |||
220 | unsigned int xenvif_count_frag_slots(struct xenvif *vif, | ||
221 | unsigned long offset, unsigned long size, | ||
222 | struct xenvif_count_slot_state *state) | ||
223 | { | ||
224 | unsigned count = 0; | ||
225 | |||
226 | offset &= ~PAGE_MASK; | ||
227 | |||
228 | while (size > 0) { | ||
229 | unsigned long bytes; | ||
230 | |||
231 | bytes = PAGE_SIZE - offset; | ||
232 | |||
233 | if (bytes > size) | ||
234 | bytes = size; | ||
235 | |||
236 | if (start_new_rx_buffer(state->copy_off, bytes, state->head)) { | ||
237 | count++; | ||
238 | state->copy_off = 0; | ||
239 | } | ||
240 | |||
241 | if (state->copy_off + bytes > MAX_BUFFER_OFFSET) | ||
242 | bytes = MAX_BUFFER_OFFSET - state->copy_off; | ||
243 | |||
244 | state->copy_off += bytes; | ||
245 | |||
246 | offset += bytes; | ||
247 | size -= bytes; | ||
248 | |||
249 | if (offset == PAGE_SIZE) | ||
250 | offset = 0; | ||
251 | |||
252 | state->head = false; | ||
253 | } | ||
254 | |||
255 | return count; | ||
256 | } | ||
257 | |||
258 | /* | 223 | /* |
259 | * Figure out how many ring slots we're going to need to send @skb to | 224 | * Figure out how many ring slots we're going to need to send @skb to |
260 | * the guest. This function is essentially a dry run of | 225 | * the guest. This function is essentially a dry run of |
@@ -262,40 +227,53 @@ unsigned int xenvif_count_frag_slots(struct xenvif *vif, | |||
262 | */ | 227 | */ |
263 | unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) | 228 | unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) |
264 | { | 229 | { |
265 | struct xenvif_count_slot_state state; | ||
266 | unsigned int count; | 230 | unsigned int count; |
267 | unsigned char *data; | 231 | int i, copy_off; |
268 | unsigned i; | 232 | struct skb_cb_overlay *sco; |
269 | 233 | ||
270 | state.head = true; | 234 | count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE); |
271 | state.copy_off = 0; | ||
272 | 235 | ||
273 | /* Slot for the first (partial) page of data. */ | 236 | copy_off = skb_headlen(skb) % PAGE_SIZE; |
274 | count = 1; | ||
275 | 237 | ||
276 | /* Need a slot for the GSO prefix for GSO extra data? */ | ||
277 | if (skb_shinfo(skb)->gso_size) | 238 | if (skb_shinfo(skb)->gso_size) |
278 | count++; | 239 | count++; |
279 | 240 | ||
280 | data = skb->data; | 241 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
281 | while (data < skb_tail_pointer(skb)) { | 242 | unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); |
282 | unsigned long offset = offset_in_page(data); | 243 | unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; |
283 | unsigned long size = PAGE_SIZE - offset; | 244 | unsigned long bytes; |
284 | 245 | ||
285 | if (data + size > skb_tail_pointer(skb)) | 246 | offset &= ~PAGE_MASK; |
286 | size = skb_tail_pointer(skb) - data; | ||
287 | 247 | ||
288 | count += xenvif_count_frag_slots(vif, offset, size, &state); | 248 | while (size > 0) { |
249 | BUG_ON(offset >= PAGE_SIZE); | ||
250 | BUG_ON(copy_off > MAX_BUFFER_OFFSET); | ||
289 | 251 | ||
290 | data += size; | 252 | bytes = PAGE_SIZE - offset; |
291 | } | ||
292 | 253 | ||
293 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 254 | if (bytes > size) |
294 | unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | 255 | bytes = size; |
295 | unsigned long offset = skb_shinfo(skb)->frags[i].page_offset; | 256 | |
257 | if (start_new_rx_buffer(copy_off, bytes, 0)) { | ||
258 | count++; | ||
259 | copy_off = 0; | ||
260 | } | ||
296 | 261 | ||
297 | count += xenvif_count_frag_slots(vif, offset, size, &state); | 262 | if (copy_off + bytes > MAX_BUFFER_OFFSET) |
263 | bytes = MAX_BUFFER_OFFSET - copy_off; | ||
264 | |||
265 | copy_off += bytes; | ||
266 | |||
267 | offset += bytes; | ||
268 | size -= bytes; | ||
269 | |||
270 | if (offset == PAGE_SIZE) | ||
271 | offset = 0; | ||
272 | } | ||
298 | } | 273 | } |
274 | |||
275 | sco = (struct skb_cb_overlay *)skb->cb; | ||
276 | sco->peek_slots_count = count; | ||
299 | return count; | 277 | return count; |
300 | } | 278 | } |
301 | 279 | ||
@@ -327,14 +305,11 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, | |||
327 | return meta; | 305 | return meta; |
328 | } | 306 | } |
329 | 307 | ||
330 | /* | 308 | /* Set up the grant operations for this fragment. */ |
331 | * Set up the grant operations for this fragment. If it's a flipping | ||
332 | * interface, we also set up the unmap request from here. | ||
333 | */ | ||
334 | static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | 309 | static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, |
335 | struct netrx_pending_operations *npo, | 310 | struct netrx_pending_operations *npo, |
336 | struct page *page, unsigned long size, | 311 | struct page *page, unsigned long size, |
337 | unsigned long offset, int *head) | 312 | unsigned long offset, int head, int *first) |
338 | { | 313 | { |
339 | struct gnttab_copy *copy_gop; | 314 | struct gnttab_copy *copy_gop; |
340 | struct xenvif_rx_meta *meta; | 315 | struct xenvif_rx_meta *meta; |
@@ -358,12 +333,12 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | |||
358 | if (bytes > size) | 333 | if (bytes > size) |
359 | bytes = size; | 334 | bytes = size; |
360 | 335 | ||
361 | if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { | 336 | if (start_new_rx_buffer(npo->copy_off, bytes, head)) { |
362 | /* | 337 | /* |
363 | * Netfront requires there to be some data in the head | 338 | * Netfront requires there to be some data in the head |
364 | * buffer. | 339 | * buffer. |
365 | */ | 340 | */ |
366 | BUG_ON(*head); | 341 | BUG_ON(*first); |
367 | 342 | ||
368 | meta = get_next_rx_buffer(vif, npo); | 343 | meta = get_next_rx_buffer(vif, npo); |
369 | } | 344 | } |
@@ -397,10 +372,10 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, | |||
397 | } | 372 | } |
398 | 373 | ||
399 | /* Leave a gap for the GSO descriptor. */ | 374 | /* Leave a gap for the GSO descriptor. */ |
400 | if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) | 375 | if (*first && skb_shinfo(skb)->gso_size && !vif->gso_prefix) |
401 | vif->rx.req_cons++; | 376 | vif->rx.req_cons++; |
402 | 377 | ||
403 | *head = 0; /* There must be something in this buffer now. */ | 378 | *first = 0; /* There must be something in this buffer now. */ |
404 | 379 | ||
405 | } | 380 | } |
406 | } | 381 | } |
@@ -426,7 +401,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
426 | struct xen_netif_rx_request *req; | 401 | struct xen_netif_rx_request *req; |
427 | struct xenvif_rx_meta *meta; | 402 | struct xenvif_rx_meta *meta; |
428 | unsigned char *data; | 403 | unsigned char *data; |
429 | int head = 1; | 404 | int first = 1; |
430 | int old_meta_prod; | 405 | int old_meta_prod; |
431 | 406 | ||
432 | old_meta_prod = npo->meta_prod; | 407 | old_meta_prod = npo->meta_prod; |
@@ -462,7 +437,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
462 | len = skb_tail_pointer(skb) - data; | 437 | len = skb_tail_pointer(skb) - data; |
463 | 438 | ||
464 | xenvif_gop_frag_copy(vif, skb, npo, | 439 | xenvif_gop_frag_copy(vif, skb, npo, |
465 | virt_to_page(data), len, offset, &head); | 440 | virt_to_page(data), len, offset, 1, &first); |
466 | data += len; | 441 | data += len; |
467 | } | 442 | } |
468 | 443 | ||
@@ -471,7 +446,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, | |||
471 | skb_frag_page(&skb_shinfo(skb)->frags[i]), | 446 | skb_frag_page(&skb_shinfo(skb)->frags[i]), |
472 | skb_frag_size(&skb_shinfo(skb)->frags[i]), | 447 | skb_frag_size(&skb_shinfo(skb)->frags[i]), |
473 | skb_shinfo(skb)->frags[i].page_offset, | 448 | skb_shinfo(skb)->frags[i].page_offset, |
474 | &head); | 449 | 0, &first); |
475 | } | 450 | } |
476 | 451 | ||
477 | return npo->meta_prod - old_meta_prod; | 452 | return npo->meta_prod - old_meta_prod; |
@@ -529,10 +504,6 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status, | |||
529 | } | 504 | } |
530 | } | 505 | } |
531 | 506 | ||
532 | struct skb_cb_overlay { | ||
533 | int meta_slots_used; | ||
534 | }; | ||
535 | |||
536 | static void xenvif_kick_thread(struct xenvif *vif) | 507 | static void xenvif_kick_thread(struct xenvif *vif) |
537 | { | 508 | { |
538 | wake_up(&vif->wq); | 509 | wake_up(&vif->wq); |
@@ -563,19 +534,26 @@ void xenvif_rx_action(struct xenvif *vif) | |||
563 | count = 0; | 534 | count = 0; |
564 | 535 | ||
565 | while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { | 536 | while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { |
537 | RING_IDX old_rx_req_cons; | ||
538 | |||
566 | vif = netdev_priv(skb->dev); | 539 | vif = netdev_priv(skb->dev); |
567 | nr_frags = skb_shinfo(skb)->nr_frags; | 540 | nr_frags = skb_shinfo(skb)->nr_frags; |
568 | 541 | ||
542 | old_rx_req_cons = vif->rx.req_cons; | ||
569 | sco = (struct skb_cb_overlay *)skb->cb; | 543 | sco = (struct skb_cb_overlay *)skb->cb; |
570 | sco->meta_slots_used = xenvif_gop_skb(skb, &npo); | 544 | sco->meta_slots_used = xenvif_gop_skb(skb, &npo); |
571 | 545 | ||
572 | count += nr_frags + 1; | 546 | count += vif->rx.req_cons - old_rx_req_cons; |
573 | 547 | ||
574 | __skb_queue_tail(&rxq, skb); | 548 | __skb_queue_tail(&rxq, skb); |
575 | 549 | ||
550 | skb = skb_peek(&vif->rx_queue); | ||
551 | if (skb == NULL) | ||
552 | break; | ||
553 | sco = (struct skb_cb_overlay *)skb->cb; | ||
554 | |||
576 | /* Filled the batch queue? */ | 555 | /* Filled the batch queue? */ |
577 | /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ | 556 | if (count + sco->peek_slots_count >= XEN_NETIF_RX_RING_SIZE) |
578 | if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) | ||
579 | break; | 557 | break; |
580 | } | 558 | } |
581 | 559 | ||