xen-netback: always fully coalesce guest Rx packets

Always fully coalesce guest Rx packets into the minimum number of ring slots. Reducing the number of slots per packet has significant performance benefits when receiving off-host traffic. Results from XenServer's performance benchmarks: Baseline Full coalesce Interhost VM receive 7.2 Gb/s 11 Gb/s Interhost aggregate 24 Gb/s 24 Gb/s Intrahost single stream 14 Gb/s 14 Gb/s Intrahost aggregate 34 Gb/s 34 Gb/s However, this can increase the number of grant ops per packet which decreases performance of backend (dom0) to VM traffic (by ~10%) /unless/ grant copy has been optimized for adjacent ops with the same source or destination (see "grant-table: defer releasing pages acquired in a grant copy"[1] expected in Xen 4.6). [1] http://lists.xen.org/archives/html/xen-devel/2015-01/msg01118.html Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: David Vrabel <david.vrabel@citrix.com> 2015-01-20 09:49:52 -0500
committer: David S. Miller <davem@davemloft.net> 2015-01-23 21:01:58 -0500
commit: 1650d5455bd2dc6b5ee134bd6fc1a3236c266b5b (patch)
tree: ade96080aac11eaf88a50f000a60c46ef633fb39
parent: f4ac8292b09350868418983fc1b85a6c6e48a177 (diff)
2 files changed, 3 insertions, 105 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5f1fda44882b..589fa256256b 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -251,7 +251,6 @@ struct xenvif {
 struct xenvif_rx_cb {
        unsigned long expires;
        int meta_slots_used;
-        bool full_coalesce;
 };
 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 908e65e9b821..49322b6c32df 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -233,51 +233,6 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
        }
 }
-/*
- * Returns true if we should start a new receive buffer instead of
- * adding 'size' bytes to a buffer which currently contains 'offset'
- * bytes.
- */
-static bool start_new_rx_buffer(int offset, unsigned long size, int head,
-                                bool full_coalesce)
-{
-        /* simple case: we have completely filled the current buffer. */
-        if (offset == MAX_BUFFER_OFFSET)
-                return true;
-        /*
-         * complex case: start a fresh buffer if the current frag
-         * would overflow the current buffer but only if:
-         *     (i)   this frag would fit completely in the next buffer
-         * and (ii)  there is already some data in the current buffer
-         * and (iii) this is not the head buffer.
-         * and (iv)  there is no need to fully utilize the buffers
-         *
-         * Where:
-         * - (i) stops us splitting a frag into two copies
-         *   unless the frag is too large for a single buffer.
-         * - (ii) stops us from leaving a buffer pointlessly empty.
-         * - (iii) stops us leaving the first buffer
-         *   empty. Strictly speaking this is already covered
-         *   by (ii) but is explicitly checked because
-         *   netfront relies on the first buffer being
-         *   non-empty and can crash otherwise.
-         * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
-         *   slot
-         *
-         * This means we will effectively linearise small
-         * frags but do not needlessly split large buffers
-         * into multiple copies tend to give large frags their
-         * own buffers as before.
-         */
-        BUG_ON(size > MAX_BUFFER_OFFSET);
-        if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
-            !full_coalesce)
-                return true;
-        return false;
-}
 struct netrx_pending_operations {
        unsigned copy_prod, copy_cons;
        unsigned meta_prod, meta_cons;
@@ -336,24 +291,13 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
                BUG_ON(offset >= PAGE_SIZE);
                BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-                bytes = PAGE_SIZE - offset;
+                if (npo->copy_off == MAX_BUFFER_OFFSET)
+                        meta = get_next_rx_buffer(queue, npo);
+                bytes = PAGE_SIZE - offset;
                if (bytes > size)
                        bytes = size;
-                if (start_new_rx_buffer(npo->copy_off,
-                                        bytes,
-                                        *head,
-                                        XENVIF_RX_CB(skb)->full_coalesce)) {
-                        /*
-                         * Netfront requires there to be some data in the head
-                         * buffer.
-                         */
-                        BUG_ON(*head);
-                        meta = get_next_rx_buffer(queue, npo);
-                }
                if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
                        bytes = MAX_BUFFER_OFFSET - npo->copy_off;
@@ -652,60 +596,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
        while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
               && (skb = xenvif_rx_dequeue(queue)) != NULL) {
-                RING_IDX max_slots_needed;
                RING_IDX old_req_cons;
                RING_IDX ring_slots_used;
-                int i;
                queue->last_rx_time = jiffies;
-                /* We need a cheap worse case estimate for the number of
-                 * slots we'll use.
-                 */
-                max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
-                                                skb_headlen(skb),
-                                                PAGE_SIZE);
-                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                        unsigned int size;
-                        unsigned int offset;
-                        size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-                        offset = skb_shinfo(skb)->frags[i].page_offset;
-                        /* For a worse-case estimate we need to factor in
-                         * the fragment page offset as this will affect the
-                         * number of times xenvif_gop_frag_copy() will
-                         * call start_new_rx_buffer().
-                         */
-                        max_slots_needed += DIV_ROUND_UP(offset + size,
-                                                         PAGE_SIZE);
-                }
-                /* To avoid the estimate becoming too pessimal for some
-                 * frontends that limit posted rx requests, cap the estimate
-                 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
-                 * the skb into the provided slots.
-                 */
-                if (max_slots_needed > MAX_SKB_FRAGS) {
-                        max_slots_needed = MAX_SKB_FRAGS;
-                        XENVIF_RX_CB(skb)->full_coalesce = true;
-                } else {
-                        XENVIF_RX_CB(skb)->full_coalesce = false;
-                }
-                /* We may need one more slot for GSO metadata */
-                if (skb_is_gso(skb) &&
-                   (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
-                    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
-                        max_slots_needed++;
                old_req_cons = queue->rx.req_cons;
                XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
                ring_slots_used = queue->rx.req_cons - old_req_cons;
-                BUG_ON(ring_slots_used > max_slots_needed);
                __skb_queue_tail(&rxq, skb);
        }
author	David Vrabel <david.vrabel@citrix.com>	2015-01-20 09:49:52 -0500
committer	David S. Miller <davem@davemloft.net>	2015-01-23 21:01:58 -0500
commit	1650d5455bd2dc6b5ee134bd6fc1a3236c266b5b (patch)
tree	ade96080aac11eaf88a50f000a60c46ef633fb39
parent	f4ac8292b09350868418983fc1b85a6c6e48a177 (diff)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5f1fda44882b..589fa256256b 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h
@@ -251,7 +251,6 @@ struct xenvif {
251	struct xenvif_rx_cb {	251	struct xenvif_rx_cb {
252	unsigned long expires;	252	unsigned long expires;
253	int meta_slots_used;	253	int meta_slots_used;
254	bool full_coalesce;
255	};	254	};
256		255
257	#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)	256	#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)


diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 908e65e9b821..49322b6c32df 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c
@@ -233,51 +233,6 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
233	}	233	}
234	}	234	}
235		235
236	/*
237	* Returns true if we should start a new receive buffer instead of
238	* adding 'size' bytes to a buffer which currently contains 'offset'
239	* bytes.
240	*/
241	static bool start_new_rx_buffer(int offset, unsigned long size, int head,
242	bool full_coalesce)
243	{
244	/* simple case: we have completely filled the current buffer. */
245	if (offset == MAX_BUFFER_OFFSET)
246	return true;
247
248	/*
249	* complex case: start a fresh buffer if the current frag
250	* would overflow the current buffer but only if:
251	* (i) this frag would fit completely in the next buffer
252	* and (ii) there is already some data in the current buffer
253	* and (iii) this is not the head buffer.
254	* and (iv) there is no need to fully utilize the buffers
255	*
256	* Where:
257	* - (i) stops us splitting a frag into two copies
258	* unless the frag is too large for a single buffer.
259	* - (ii) stops us from leaving a buffer pointlessly empty.
260	* - (iii) stops us leaving the first buffer
261	* empty. Strictly speaking this is already covered
262	* by (ii) but is explicitly checked because
263	* netfront relies on the first buffer being
264	* non-empty and can crash otherwise.
265	* - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
266	* slot
267	*
268	* This means we will effectively linearise small
269	* frags but do not needlessly split large buffers
270	* into multiple copies tend to give large frags their
271	* own buffers as before.
272	*/
273	BUG_ON(size > MAX_BUFFER_OFFSET);
274	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
275	!full_coalesce)
276	return true;
277
278	return false;
279	}
280
281	struct netrx_pending_operations {	236	struct netrx_pending_operations {
282	unsigned copy_prod, copy_cons;	237	unsigned copy_prod, copy_cons;
283	unsigned meta_prod, meta_cons;	238	unsigned meta_prod, meta_cons;
@@ -336,24 +291,13 @@ static void xenvif_gop_frag_copy(struct xenvif_queue queue, struct sk_buff skb
336	BUG_ON(offset >= PAGE_SIZE);	291	BUG_ON(offset >= PAGE_SIZE);
337	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);	292	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
338		293
339	bytes = PAGE_SIZE - offset;	294	if (npo->copy_off == MAX_BUFFER_OFFSET)
		295	meta = get_next_rx_buffer(queue, npo);
340		296
		297	bytes = PAGE_SIZE - offset;
341	if (bytes > size)	298	if (bytes > size)
342	bytes = size;	299	bytes = size;
343		300
344	if (start_new_rx_buffer(npo->copy_off,
345	bytes,
346	*head,
347	XENVIF_RX_CB(skb)->full_coalesce)) {
348	/*
349	* Netfront requires there to be some data in the head
350	* buffer.
351	*/
352	BUG_ON(*head);
353
354	meta = get_next_rx_buffer(queue, npo);
355	}
356
357	if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)	301	if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
358	bytes = MAX_BUFFER_OFFSET - npo->copy_off;	302	bytes = MAX_BUFFER_OFFSET - npo->copy_off;
359		303
@@ -652,60 +596,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
652		596
653	while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)	597	while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
654	&& (skb = xenvif_rx_dequeue(queue)) != NULL) {	598	&& (skb = xenvif_rx_dequeue(queue)) != NULL) {
655	RING_IDX max_slots_needed;
656	RING_IDX old_req_cons;	599	RING_IDX old_req_cons;
657	RING_IDX ring_slots_used;	600	RING_IDX ring_slots_used;
658	int i;
659		601
660	queue->last_rx_time = jiffies;	602	queue->last_rx_time = jiffies;
661		603
662	/* We need a cheap worse case estimate for the number of
663	* slots we'll use.
664	*/
665
666	max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
667	skb_headlen(skb),
668	PAGE_SIZE);
669	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
670	unsigned int size;
671	unsigned int offset;
672
673	size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
674	offset = skb_shinfo(skb)->frags[i].page_offset;
675
676	/* For a worse-case estimate we need to factor in
677	* the fragment page offset as this will affect the
678	* number of times xenvif_gop_frag_copy() will
679	* call start_new_rx_buffer().
680	*/
681	max_slots_needed += DIV_ROUND_UP(offset + size,
682	PAGE_SIZE);
683	}
684
685	/* To avoid the estimate becoming too pessimal for some
686	* frontends that limit posted rx requests, cap the estimate
687	* at MAX_SKB_FRAGS. In this case netback will fully coalesce
688	* the skb into the provided slots.
689	*/
690	if (max_slots_needed > MAX_SKB_FRAGS) {
691	max_slots_needed = MAX_SKB_FRAGS;
692	XENVIF_RX_CB(skb)->full_coalesce = true;
693	} else {
694	XENVIF_RX_CB(skb)->full_coalesce = false;
695	}
696
697	/* We may need one more slot for GSO metadata */
698	if (skb_is_gso(skb) &&
699	(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 \|\|
700	skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
701	max_slots_needed++;
702
703	old_req_cons = queue->rx.req_cons;	604	old_req_cons = queue->rx.req_cons;
704	XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);	605	XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
705	ring_slots_used = queue->rx.req_cons - old_req_cons;	606	ring_slots_used = queue->rx.req_cons - old_req_cons;
706		607
707	BUG_ON(ring_slots_used > max_slots_needed);
708
709	__skb_queue_tail(&rxq, skb);	608	__skb_queue_tail(&rxq, skb);
710	}	609	}
711		610