xen-netback: improve ring effeciency for guest RX

There was a bug that netback routines netbk/xenvif_skb_count_slots and netbk/xenvif_gop_frag_copy disagreed with each other, which caused netback to push wrong number of responses to netfront, which caused netfront to eventually crash. The bug was fixed in 6e43fc04a ("xen-netback: count number required slots for an skb more carefully"). Commit 6e43fc04a focused on backport-ability. The drawback with the existing packing scheme is that the ring is not used effeciently, as stated in 6e43fc04a. skb->data like: | 1111|222222222222|3333 | is arranged as: |1111 |222222222222|3333 | If we can do this: |111122222222|22223333 | That would save one ring slot, which improves ring effeciency. This patch effectively reverts 6e43fc04a. That patch made count_slots agree with gop_frag_copy, while this patch goes the other way around -- make gop_frag_copy agree with count_slots. The end result is that they still agree with each other, and the ring is now arranged like: |111122222222|22223333 | The patch that improves packing was first posted by Xi Xong and Matt Wilson. I only rebase it on top of net-next and rewrite commit message, so I retain all their SoBs. For more infomation about the original bug please refer to email listed below and commit message of 6e43fc04a. Original patch: http://lists.xen.org/archives/html/xen-devel/2013-07/msg00760.html Signed-off-by: Xi Xiong <xixiong@amazon.com> Reviewed-by: Matt Wilson <msw@amazon.com> [ msw: minor code cleanups, rewrote commit message, adjusted code to count RX slots instead of meta structures ] Signed-off-by: Matt Wilson <msw@amazon.com> Cc: Annie Li <annie.li@oracle.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> [ liuw: rebased on top of net-next tree, rewrote commit message, coding style cleanup. ] Signed-off-by: Wei Liu <wei.liu2@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Acked-by: Ian Campbell <Ian.Campbell@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Wei Liu <wei.liu2@citrix.com> 2013-09-22 14:03:44 -0400
committer: David S. Miller <davem@davemloft.net> 2013-09-30 19:14:11 -0400
commit: 4f0581d25827d5e864bcf07b05d73d0d12a20a5c (patch)
tree: 3fc6e6c5430c1e7cb1309e3250df727ca210e70c /drivers/net/xen-netback/netback.c
parent: 6459082a3cfb2e79a93ffcbfae3b5924ec9cac77 (diff)
1 files changed, 61 insertions, 83 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c611de..d0b0feb035fb 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,6 +47,14 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
+/* SKB control block overlay is used to store useful information when
+ * doing guest RX.
+ */
+struct skb_cb_overlay {
+        int meta_slots_used;
+        int peek_slots_count;
+};
 /* Provide an option to disable split event channels at load time as
 * event channels are limited resource. Split event channels are
 * enabled by default.
@@ -212,49 +220,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
        return false;
 }
-struct xenvif_count_slot_state {
-        unsigned long copy_off;
-        bool head;
-};
-unsigned int xenvif_count_frag_slots(struct xenvif *vif,
-                                     unsigned long offset, unsigned long size,
-                                     struct xenvif_count_slot_state *state)
-{
-        unsigned count = 0;
-        offset &= ~PAGE_MASK;
-        while (size > 0) {
-                unsigned long bytes;
-                bytes = PAGE_SIZE - offset;
-                if (bytes > size)
-                        bytes = size;
-                if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
-                        count++;
-                        state->copy_off = 0;
-                }
-                if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
-                        bytes = MAX_BUFFER_OFFSET - state->copy_off;
-                state->copy_off += bytes;
-                offset += bytes;
-                size -= bytes;
-                if (offset == PAGE_SIZE)
-                        offset = 0;
-                state->head = false;
-        }
-        return count;
-}
 /*
 * Figure out how many ring slots we're going to need to send @skb to
 * the guest. This function is essentially a dry run of
@@ -262,40 +227,53 @@ unsigned int xenvif_count_frag_slots(struct xenvif *vif,
 */
 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
 {
-        struct xenvif_count_slot_state state;
        unsigned int count;
-        unsigned char *data;
+        int i, copy_off;
-        unsigned i;
+        struct skb_cb_overlay *sco;
-        state.head = true;
+        count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
-        state.copy_off = 0;
-        /* Slot for the first (partial) page of data. */
+        copy_off = skb_headlen(skb) % PAGE_SIZE;
-        count = 1;
-        /* Need a slot for the GSO prefix for GSO extra data? */
        if (skb_shinfo(skb)->gso_size)
                count++;
-        data = skb->data;
+        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-        while (data < skb_tail_pointer(skb)) {
+                unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-                unsigned long offset = offset_in_page(data);
+                unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
-                unsigned long size = PAGE_SIZE - offset;
+                unsigned long bytes;
-                if (data + size > skb_tail_pointer(skb))
+                offset &= ~PAGE_MASK;
-                        size = skb_tail_pointer(skb) - data;
-                count += xenvif_count_frag_slots(vif, offset, size, &state);
+                while (size > 0) {
+                        BUG_ON(offset >= PAGE_SIZE);
+                        BUG_ON(copy_off > MAX_BUFFER_OFFSET);
-                data += size;
+                        bytes = PAGE_SIZE - offset;
-        }
-        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                        if (bytes > size)
-                unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+                                bytes = size;
-                unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
+                        if (start_new_rx_buffer(copy_off, bytes, 0)) {
+                                count++;
+                                copy_off = 0;
+                        }
-                count += xenvif_count_frag_slots(vif, offset, size, &state);
+                        if (copy_off + bytes > MAX_BUFFER_OFFSET)
+                                bytes = MAX_BUFFER_OFFSET - copy_off;
+                        copy_off += bytes;
+                        offset += bytes;
+                        size -= bytes;
+                        if (offset == PAGE_SIZE)
+                                offset = 0;
+                }
        }
+        sco = (struct skb_cb_overlay *)skb->cb;
+        sco->peek_slots_count = count;
        return count;
 }
@@ -327,14 +305,11 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
        return meta;
 }
-/*
+/* Set up the grant operations for this fragment. */
- * Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
                                 struct netrx_pending_operations *npo,
                                 struct page *page, unsigned long size,
-                                 unsigned long offset, int *head)
+                                 unsigned long offset, int head, int *first)
 {
        struct gnttab_copy *copy_gop;
        struct xenvif_rx_meta *meta;
@@ -358,12 +333,12 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
                if (bytes > size)
                        bytes = size;
-                if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
+                if (start_new_rx_buffer(npo->copy_off, bytes, head)) {
                        /*
                         * Netfront requires there to be some data in the head
                         * buffer.
                         */
-                        BUG_ON(*head);
+                        BUG_ON(*first);
                        meta = get_next_rx_buffer(vif, npo);
                }
@@ -397,10 +372,10 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
                }
                /* Leave a gap for the GSO descriptor. */
-                if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
+                if (*first && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
                        vif->rx.req_cons++;
-                *head = 0; /* There must be something in this buffer now. */
+                *first = 0; /* There must be something in this buffer now. */
        }
 }
@@ -426,7 +401,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
        struct xen_netif_rx_request *req;
        struct xenvif_rx_meta *meta;
        unsigned char *data;
-        int head = 1;
+        int first = 1;
        int old_meta_prod;
        old_meta_prod = npo->meta_prod;
@@ -462,7 +437,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
                        len = skb_tail_pointer(skb) - data;
                xenvif_gop_frag_copy(vif, skb, npo,
-                                     virt_to_page(data), len, offset, &head);
+                                     virt_to_page(data), len, offset, 1, &first);
                data += len;
        }
@@ -471,7 +446,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
                                     skb_frag_page(&skb_shinfo(skb)->frags[i]),
                                     skb_frag_size(&skb_shinfo(skb)->frags[i]),
                                     skb_shinfo(skb)->frags[i].page_offset,
-                                     &head);
+                                     0, &first);
        }
        return npo->meta_prod - old_meta_prod;
@@ -529,10 +504,6 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
        }
 }
-struct skb_cb_overlay {
-        int meta_slots_used;
-};
 static void xenvif_kick_thread(struct xenvif *vif)
 {
        wake_up(&vif->wq);
@@ -563,19 +534,26 @@ void xenvif_rx_action(struct xenvif *vif)
        count = 0;
        while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
+                RING_IDX old_rx_req_cons;
                vif = netdev_priv(skb->dev);
                nr_frags = skb_shinfo(skb)->nr_frags;
+                old_rx_req_cons = vif->rx.req_cons;
                sco = (struct skb_cb_overlay *)skb->cb;
                sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
-                count += nr_frags + 1;
+                count += vif->rx.req_cons - old_rx_req_cons;
                __skb_queue_tail(&rxq, skb);
+                skb = skb_peek(&vif->rx_queue);
+                if (skb == NULL)
+                        break;
+                sco = (struct skb_cb_overlay *)skb->cb;
                /* Filled the batch queue? */
-                /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
+                if (count + sco->peek_slots_count >= XEN_NETIF_RX_RING_SIZE)
-                if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
                        break;
        }
author	Wei Liu <wei.liu2@citrix.com>	2013-09-22 14:03:44 -0400
committer	David S. Miller <davem@davemloft.net>	2013-09-30 19:14:11 -0400
commit	4f0581d25827d5e864bcf07b05d73d0d12a20a5c (patch)
tree	3fc6e6c5430c1e7cb1309e3250df727ca210e70c /drivers/net/xen-netback/netback.c
parent	6459082a3cfb2e79a93ffcbfae3b5924ec9cac77 (diff)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c611de..d0b0feb035fb 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c
@@ -47,6 +47,14 @@
47	#include <asm/xen/hypercall.h>	47	#include <asm/xen/hypercall.h>
48	#include <asm/xen/page.h>	48	#include <asm/xen/page.h>
49		49
		50	/* SKB control block overlay is used to store useful information when
		51	* doing guest RX.
		52	*/
		53	struct skb_cb_overlay {
		54	int meta_slots_used;
		55	int peek_slots_count;
		56	};
		57
50	/* Provide an option to disable split event channels at load time as	58	/* Provide an option to disable split event channels at load time as
51	* event channels are limited resource. Split event channels are	59	* event channels are limited resource. Split event channels are
52	* enabled by default.	60	* enabled by default.
@@ -212,49 +220,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
212	return false;	220	return false;
213	}	221	}
214		222
215	struct xenvif_count_slot_state {
216	unsigned long copy_off;
217	bool head;
218	};
219
220	unsigned int xenvif_count_frag_slots(struct xenvif *vif,
221	unsigned long offset, unsigned long size,
222	struct xenvif_count_slot_state *state)
223	{
224	unsigned count = 0;
225
226	offset &= ~PAGE_MASK;
227
228	while (size > 0) {
229	unsigned long bytes;
230
231	bytes = PAGE_SIZE - offset;
232
233	if (bytes > size)
234	bytes = size;
235
236	if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
237	count++;
238	state->copy_off = 0;
239	}
240
241	if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
242	bytes = MAX_BUFFER_OFFSET - state->copy_off;
243
244	state->copy_off += bytes;
245
246	offset += bytes;
247	size -= bytes;
248
249	if (offset == PAGE_SIZE)
250	offset = 0;
251
252	state->head = false;
253	}
254
255	return count;
256	}
257
258	/*	223	/*
259	* Figure out how many ring slots we're going to need to send @skb to	224	* Figure out how many ring slots we're going to need to send @skb to
260	* the guest. This function is essentially a dry run of	225	* the guest. This function is essentially a dry run of
@@ -262,40 +227,53 @@ unsigned int xenvif_count_frag_slots(struct xenvif *vif,
262	*/	227	*/
263	unsigned int xenvif_count_skb_slots(struct xenvif vif, struct sk_buff skb)	228	unsigned int xenvif_count_skb_slots(struct xenvif vif, struct sk_buff skb)
264	{	229	{
265	struct xenvif_count_slot_state state;
266	unsigned int count;	230	unsigned int count;
267	unsigned char *data;	231	int i, copy_off;
268	unsigned i;	232	struct skb_cb_overlay *sco;
269		233
270	state.head = true;	234	count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
271	state.copy_off = 0;
272		235
273	/* Slot for the first (partial) page of data. */	236	copy_off = skb_headlen(skb) % PAGE_SIZE;
274	count = 1;
275		237
276	/* Need a slot for the GSO prefix for GSO extra data? */
277	if (skb_shinfo(skb)->gso_size)	238	if (skb_shinfo(skb)->gso_size)
278	count++;	239	count++;
279		240
280	data = skb->data;	241	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
281	while (data < skb_tail_pointer(skb)) {	242	unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
282	unsigned long offset = offset_in_page(data);	243	unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
283	unsigned long size = PAGE_SIZE - offset;	244	unsigned long bytes;
284		245
285	if (data + size > skb_tail_pointer(skb))	246	offset &= ~PAGE_MASK;
286	size = skb_tail_pointer(skb) - data;
287		247
288	count += xenvif_count_frag_slots(vif, offset, size, &state);	248	while (size > 0) {
		249	BUG_ON(offset >= PAGE_SIZE);
		250	BUG_ON(copy_off > MAX_BUFFER_OFFSET);
289		251
290	data += size;	252	bytes = PAGE_SIZE - offset;
291	}
292		253
293	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {	254	if (bytes > size)
294	unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);	255	bytes = size;
295	unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;	256
		257	if (start_new_rx_buffer(copy_off, bytes, 0)) {
		258	count++;
		259	copy_off = 0;
		260	}
296		261
297	count += xenvif_count_frag_slots(vif, offset, size, &state);	262	if (copy_off + bytes > MAX_BUFFER_OFFSET)
		263	bytes = MAX_BUFFER_OFFSET - copy_off;
		264
		265	copy_off += bytes;
		266
		267	offset += bytes;
		268	size -= bytes;
		269
		270	if (offset == PAGE_SIZE)
		271	offset = 0;
		272	}
298	}	273	}
		274
		275	sco = (struct skb_cb_overlay *)skb->cb;
		276	sco->peek_slots_count = count;
299	return count;	277	return count;
300	}	278	}
301		279
@@ -327,14 +305,11 @@ static struct xenvif_rx_meta get_next_rx_buffer(struct xenvif vif,
327	return meta;	305	return meta;
328	}	306	}
329		307
330	/*	308	/* Set up the grant operations for this fragment. */
331	* Set up the grant operations for this fragment. If it's a flipping
332	* interface, we also set up the unmap request from here.
333	*/
334	static void xenvif_gop_frag_copy(struct xenvif vif, struct sk_buff skb,	309	static void xenvif_gop_frag_copy(struct xenvif vif, struct sk_buff skb,
335	struct netrx_pending_operations *npo,	310	struct netrx_pending_operations *npo,
336	struct page *page, unsigned long size,	311	struct page *page, unsigned long size,
337	unsigned long offset, int *head)	312	unsigned long offset, int head, int *first)
338	{	313	{
339	struct gnttab_copy *copy_gop;	314	struct gnttab_copy *copy_gop;
340	struct xenvif_rx_meta *meta;	315	struct xenvif_rx_meta *meta;
@@ -358,12 +333,12 @@ static void xenvif_gop_frag_copy(struct xenvif vif, struct sk_buff skb,
358	if (bytes > size)	333	if (bytes > size)
359	bytes = size;	334	bytes = size;
360		335
361	if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {	336	if (start_new_rx_buffer(npo->copy_off, bytes, head)) {
362	/*	337	/*
363	* Netfront requires there to be some data in the head	338	* Netfront requires there to be some data in the head
364	* buffer.	339	* buffer.
365	*/	340	*/
366	BUG_ON(*head);	341	BUG_ON(*first);
367		342
368	meta = get_next_rx_buffer(vif, npo);	343	meta = get_next_rx_buffer(vif, npo);
369	}	344	}
@@ -397,10 +372,10 @@ static void xenvif_gop_frag_copy(struct xenvif vif, struct sk_buff skb,
397	}	372	}
398		373
399	/* Leave a gap for the GSO descriptor. */	374	/* Leave a gap for the GSO descriptor. */
400	if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)	375	if (*first && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
401	vif->rx.req_cons++;	376	vif->rx.req_cons++;
402		377
403	head = 0; / There must be something in this buffer now. */	378	first = 0; / There must be something in this buffer now. */
404		379
405	}	380	}
406	}	381	}
@@ -426,7 +401,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
426	struct xen_netif_rx_request *req;	401	struct xen_netif_rx_request *req;
427	struct xenvif_rx_meta *meta;	402	struct xenvif_rx_meta *meta;
428	unsigned char *data;	403	unsigned char *data;
429	int head = 1;	404	int first = 1;
430	int old_meta_prod;	405	int old_meta_prod;
431		406
432	old_meta_prod = npo->meta_prod;	407	old_meta_prod = npo->meta_prod;
@@ -462,7 +437,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
462	len = skb_tail_pointer(skb) - data;	437	len = skb_tail_pointer(skb) - data;
463		438
464	xenvif_gop_frag_copy(vif, skb, npo,	439	xenvif_gop_frag_copy(vif, skb, npo,
465	virt_to_page(data), len, offset, &head);	440	virt_to_page(data), len, offset, 1, &first);
466	data += len;	441	data += len;
467	}	442	}
468		443
@@ -471,7 +446,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
471	skb_frag_page(&skb_shinfo(skb)->frags[i]),	446	skb_frag_page(&skb_shinfo(skb)->frags[i]),
472	skb_frag_size(&skb_shinfo(skb)->frags[i]),	447	skb_frag_size(&skb_shinfo(skb)->frags[i]),
473	skb_shinfo(skb)->frags[i].page_offset,	448	skb_shinfo(skb)->frags[i].page_offset,
474	&head);	449	0, &first);
475	}	450	}
476		451
477	return npo->meta_prod - old_meta_prod;	452	return npo->meta_prod - old_meta_prod;
@@ -529,10 +504,6 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
529	}	504	}
530	}	505	}
531		506
532	struct skb_cb_overlay {
533	int meta_slots_used;
534	};
535
536	static void xenvif_kick_thread(struct xenvif *vif)	507	static void xenvif_kick_thread(struct xenvif *vif)
537	{	508	{
538	wake_up(&vif->wq);	509	wake_up(&vif->wq);
@@ -563,19 +534,26 @@ void xenvif_rx_action(struct xenvif *vif)
563	count = 0;	534	count = 0;
564		535
565	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {	536	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
		537	RING_IDX old_rx_req_cons;
		538
566	vif = netdev_priv(skb->dev);	539	vif = netdev_priv(skb->dev);
567	nr_frags = skb_shinfo(skb)->nr_frags;	540	nr_frags = skb_shinfo(skb)->nr_frags;
568		541
		542	old_rx_req_cons = vif->rx.req_cons;
569	sco = (struct skb_cb_overlay *)skb->cb;	543	sco = (struct skb_cb_overlay *)skb->cb;
570	sco->meta_slots_used = xenvif_gop_skb(skb, &npo);	544	sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
571		545
572	count += nr_frags + 1;	546	count += vif->rx.req_cons - old_rx_req_cons;
573		547
574	__skb_queue_tail(&rxq, skb);	548	__skb_queue_tail(&rxq, skb);
575		549
		550	skb = skb_peek(&vif->rx_queue);
		551	if (skb == NULL)
		552	break;
		553	sco = (struct skb_cb_overlay *)skb->cb;
		554
576	/* Filled the batch queue? */	555	/* Filled the batch queue? */
577	/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */	556	if (count + sco->peek_slots_count >= XEN_NETIF_RX_RING_SIZE)
578	if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
579	break;	557	break;
580	}	558	}
581		559