aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/interface.c
diff options
context:
space:
mode:
authorPaul Durrant <Paul.Durrant@citrix.com>2013-12-06 11:36:07 -0500
committerDavid S. Miller <davem@davemloft.net>2013-12-09 20:33:12 -0500
commitca2f09f2b2c6c25047cfc545d057c4edfcfe561c (patch)
tree08e9cc996597b15106ca9cc21128ca51ba51b3aa /drivers/net/xen-netback/interface.c
parent512137eeff00f73a8a62e481a6575f1556cf962c (diff)
xen-netback: improve guest-receive-side flow control
The way that flow control works without this patch is that, in start_xmit() the code uses xenvif_count_skb_slots() to predict how many slots xenvif_gop_skb() will consume and then adds this to a 'req_cons_peek' counter which it then uses to determine if the shared ring has that amount of space available by checking whether 'req_prod' has passed that value. If the ring doesn't have space the tx queue is stopped. xenvif_gop_skb() will then consume slots and update 'req_cons' and issue responses, updating 'rsp_prod' as it goes. The frontend will consume those responses and post new requests, by updating req_prod. So, req_prod chases req_cons which chases rsp_prod, and can never exceed that value. Thus if xenvif_count_skb_slots() ever returns a number of slots greater than xenvif_gop_skb() uses, req_cons_peek will get to a value that req_prod cannot possibly achieve (since it's limited by the 'real' req_cons) and, if this happens enough times, req_cons_peek gets more than a ring size ahead of req_cons and the tx queue then remains stopped forever waiting for an unachievable amount of space to become available in the ring. Having two routines trying to calculate the same value is always going to be fragile, so this patch does away with that. All we essentially need to do is make sure that we have 'enough stuff' on our internal queue without letting it build up uncontrollably. So start_xmit() makes a cheap optimistic check of how much space is needed for an skb and only turns the queue off if that is unachievable. net_rx_action() is the place where we could do with an accurate predicition but, since that has proven tricky to calculate, a cheap worse-case (but not too bad) estimate is all we really need since the only thing we *must* prevent is xenvif_gop_skb() consuming more slots than are available. Without this patch I can trivially stall netback permanently by just doing a large guest to guest file copy between two Windows Server 2008R2 VMs on a single host. Patch tested with frontends in: - Windows Server 2008R2 - CentOS 6.0 - Debian Squeeze - Debian Wheezy - SLES11 Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Annie Li <annie.li@oracle.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Wei Liu <wei.liu2@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
-rw-r--r--drivers/net/xen-netback/interface.c47
1 files changed, 24 insertions, 23 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 870f1fa58370..1dcb9606e6e0 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -46,11 +46,6 @@ int xenvif_schedulable(struct xenvif *vif)
46 return netif_running(vif->dev) && netif_carrier_ok(vif->dev); 46 return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
47} 47}
48 48
49static int xenvif_rx_schedulable(struct xenvif *vif)
50{
51 return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif);
52}
53
54static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) 49static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
55{ 50{
56 struct xenvif *vif = dev_id; 51 struct xenvif *vif = dev_id;
@@ -104,8 +99,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
104{ 99{
105 struct xenvif *vif = dev_id; 100 struct xenvif *vif = dev_id;
106 101
107 if (xenvif_rx_schedulable(vif)) 102 vif->rx_event = true;
108 netif_wake_queue(vif->dev); 103 xenvif_kick_thread(vif);
109 104
110 return IRQ_HANDLED; 105 return IRQ_HANDLED;
111} 106}
@@ -121,24 +116,35 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
121static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 116static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
122{ 117{
123 struct xenvif *vif = netdev_priv(dev); 118 struct xenvif *vif = netdev_priv(dev);
119 int min_slots_needed;
124 120
125 BUG_ON(skb->dev != dev); 121 BUG_ON(skb->dev != dev);
126 122
127 /* Drop the packet if vif is not ready */ 123 /* Drop the packet if vif is not ready */
128 if (vif->task == NULL) 124 if (vif->task == NULL || !xenvif_schedulable(vif))
129 goto drop; 125 goto drop;
130 126
131 /* Drop the packet if the target domain has no receive buffers. */ 127 /* At best we'll need one slot for the header and one for each
132 if (!xenvif_rx_schedulable(vif)) 128 * frag.
133 goto drop; 129 */
130 min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
134 131
135 /* Reserve ring slots for the worst-case number of fragments. */ 132 /* If the skb is GSO then we'll also need an extra slot for the
136 vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb); 133 * metadata.
134 */
135 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
136 skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
137 min_slots_needed++;
137 138
138 if (vif->can_queue && xenvif_must_stop_queue(vif)) 139 /* If the skb can't possibly fit in the remaining slots
139 netif_stop_queue(dev); 140 * then turn off the queue to give the ring a chance to
141 * drain.
142 */
143 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
144 xenvif_stop_queue(vif);
140 145
141 xenvif_queue_tx_skb(vif, skb); 146 skb_queue_tail(&vif->rx_queue, skb);
147 xenvif_kick_thread(vif);
142 148
143 return NETDEV_TX_OK; 149 return NETDEV_TX_OK;
144 150
@@ -148,12 +154,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
148 return NETDEV_TX_OK; 154 return NETDEV_TX_OK;
149} 155}
150 156
151void xenvif_notify_tx_completion(struct xenvif *vif)
152{
153 if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
154 netif_wake_queue(vif->dev);
155}
156
157static struct net_device_stats *xenvif_get_stats(struct net_device *dev) 157static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
158{ 158{
159 struct xenvif *vif = netdev_priv(dev); 159 struct xenvif *vif = netdev_priv(dev);
@@ -378,6 +378,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
378 if (err < 0) 378 if (err < 0)
379 goto err; 379 goto err;
380 380
381 init_waitqueue_head(&vif->wq);
382
381 if (tx_evtchn == rx_evtchn) { 383 if (tx_evtchn == rx_evtchn) {
382 /* feature-split-event-channels == 0 */ 384 /* feature-split-event-channels == 0 */
383 err = bind_interdomain_evtchn_to_irqhandler( 385 err = bind_interdomain_evtchn_to_irqhandler(
@@ -410,7 +412,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
410 disable_irq(vif->rx_irq); 412 disable_irq(vif->rx_irq);
411 } 413 }
412 414
413 init_waitqueue_head(&vif->wq);
414 task = kthread_create(xenvif_kthread, 415 task = kthread_create(xenvif_kthread,
415 (void *)vif, "%s", vif->dev->name); 416 (void *)vif, "%s", vif->dev->name);
416 if (IS_ERR(task)) { 417 if (IS_ERR(task)) {