aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/common.h
diff options
context:
space:
mode:
authorPaul Durrant <Paul.Durrant@citrix.com>2013-12-06 11:36:07 -0500
committerDavid S. Miller <davem@davemloft.net>2013-12-09 20:33:12 -0500
commitca2f09f2b2c6c25047cfc545d057c4edfcfe561c (patch)
tree08e9cc996597b15106ca9cc21128ca51ba51b3aa /drivers/net/xen-netback/common.h
parent512137eeff00f73a8a62e481a6575f1556cf962c (diff)
xen-netback: improve guest-receive-side flow control
The way that flow control works without this patch is that, in start_xmit() the code uses xenvif_count_skb_slots() to predict how many slots xenvif_gop_skb() will consume and then adds this to a 'req_cons_peek' counter which it then uses to determine if the shared ring has that amount of space available by checking whether 'req_prod' has passed that value. If the ring doesn't have space the tx queue is stopped. xenvif_gop_skb() will then consume slots and update 'req_cons' and issue responses, updating 'rsp_prod' as it goes. The frontend will consume those responses and post new requests, by updating req_prod. So, req_prod chases req_cons which chases rsp_prod, and can never exceed that value. Thus if xenvif_count_skb_slots() ever returns a number of slots greater than xenvif_gop_skb() uses, req_cons_peek will get to a value that req_prod cannot possibly achieve (since it's limited by the 'real' req_cons) and, if this happens enough times, req_cons_peek gets more than a ring size ahead of req_cons and the tx queue then remains stopped forever waiting for an unachievable amount of space to become available in the ring. Having two routines trying to calculate the same value is always going to be fragile, so this patch does away with that. All we essentially need to do is make sure that we have 'enough stuff' on our internal queue without letting it build up uncontrollably. So start_xmit() makes a cheap optimistic check of how much space is needed for an skb and only turns the queue off if that is unachievable. net_rx_action() is the place where we could do with an accurate predicition but, since that has proven tricky to calculate, a cheap worse-case (but not too bad) estimate is all we really need since the only thing we *must* prevent is xenvif_gop_skb() consuming more slots than are available. Without this patch I can trivially stall netback permanently by just doing a large guest to guest file copy between two Windows Server 2008R2 VMs on a single host. Patch tested with frontends in: - Windows Server 2008R2 - CentOS 6.0 - Debian Squeeze - Debian Wheezy - SLES11 Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Annie Li <annie.li@oracle.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Wei Liu <wei.liu2@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback/common.h')
-rw-r--r--drivers/net/xen-netback/common.h27
1 files changed, 11 insertions, 16 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 08ae01b41c83..ba30a6d9fefa 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -136,12 +136,10 @@ struct xenvif {
136 char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */ 136 char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
137 struct xen_netif_rx_back_ring rx; 137 struct xen_netif_rx_back_ring rx;
138 struct sk_buff_head rx_queue; 138 struct sk_buff_head rx_queue;
139 139 /* Set when the RX interrupt is triggered by the frontend.
140 /* Allow xenvif_start_xmit() to peek ahead in the rx request 140 * The worker thread may need to wake the queue.
141 * ring. This is a prediction of what rx_req_cons will be
142 * once all queued skbs are put on the ring.
143 */ 141 */
144 RING_IDX rx_req_cons_peek; 142 bool rx_event;
145 143
146 /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each 144 /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
147 * head/fragment page uses 2 copy operations because it 145 * head/fragment page uses 2 copy operations because it
@@ -198,8 +196,6 @@ void xenvif_xenbus_fini(void);
198 196
199int xenvif_schedulable(struct xenvif *vif); 197int xenvif_schedulable(struct xenvif *vif);
200 198
201int xenvif_rx_ring_full(struct xenvif *vif);
202
203int xenvif_must_stop_queue(struct xenvif *vif); 199int xenvif_must_stop_queue(struct xenvif *vif);
204 200
205/* (Un)Map communication rings. */ 201/* (Un)Map communication rings. */
@@ -211,21 +207,20 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
211/* Check for SKBs from frontend and schedule backend processing */ 207/* Check for SKBs from frontend and schedule backend processing */
212void xenvif_check_rx_xenvif(struct xenvif *vif); 208void xenvif_check_rx_xenvif(struct xenvif *vif);
213 209
214/* Queue an SKB for transmission to the frontend */
215void xenvif_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
216/* Notify xenvif that ring now has space to send an skb to the frontend */
217void xenvif_notify_tx_completion(struct xenvif *vif);
218
219/* Prevent the device from generating any further traffic. */ 210/* Prevent the device from generating any further traffic. */
220void xenvif_carrier_off(struct xenvif *vif); 211void xenvif_carrier_off(struct xenvif *vif);
221 212
222/* Returns number of ring slots required to send an skb to the frontend */
223unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
224
225int xenvif_tx_action(struct xenvif *vif, int budget); 213int xenvif_tx_action(struct xenvif *vif, int budget);
226void xenvif_rx_action(struct xenvif *vif);
227 214
228int xenvif_kthread(void *data); 215int xenvif_kthread(void *data);
216void xenvif_kick_thread(struct xenvif *vif);
217
218/* Determine whether the needed number of slots (req) are available,
219 * and set req_event if not.
220 */
221bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
222
223void xenvif_stop_queue(struct xenvif *vif);
229 224
230extern bool separate_tx_rx_irq; 225extern bool separate_tx_rx_irq;
231 226