aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2014-10-22 09:08:55 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-25 14:15:20 -0400
commitecf08d2dbb96d5a4b4bcc53a39e8d29cc8fef02e (patch)
tree081341a32e4d7954ea805b145c18176876905ca7
parentf48da8b14d04ca87ffcffe68829afd45f926ec6a (diff)
xen-netback: reintroduce guest Rx stall detection
If a frontend not receiving packets it is useful to detect this and turn off the carrier so packets are dropped early instead of being queued and drained when they expire. A to-guest queue is stalled if it doesn't have enough free slots for a an extended period of time (default 60 s). If at least one queue is stalled, the carrier is turned off (in the expectation that the other queues will soon stall as well). The carrier is only turned on once all queues are ready. When the frontend connects, all the queues start in the stalled state and only become ready once the frontend queues enough Rx requests. Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Wei Liu <wei.liu2@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/xen-netback/common.h5
-rw-r--r--drivers/net/xen-netback/interface.c5
-rw-r--r--drivers/net/xen-netback/netback.c76
-rw-r--r--drivers/net/xen-netback/xenbus.c1
4 files changed, 86 insertions, 1 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index c2642402b7a1..083ecc93fe5e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -179,6 +179,8 @@ struct xenvif_queue { /* Per-queue data for xenvif */
179 179
180 unsigned int rx_queue_max; 180 unsigned int rx_queue_max;
181 unsigned int rx_queue_len; 181 unsigned int rx_queue_len;
182 unsigned long last_rx_time;
183 bool stalled;
182 184
183 struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; 185 struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
184 186
@@ -232,6 +234,9 @@ struct xenvif {
232 /* Queues */ 234 /* Queues */
233 struct xenvif_queue *queues; 235 struct xenvif_queue *queues;
234 unsigned int num_queues; /* active queues, resource allocated */ 236 unsigned int num_queues; /* active queues, resource allocated */
237 unsigned int stalled_queues;
238
239 spinlock_t lock;
235 240
236#ifdef CONFIG_DEBUG_FS 241#ifdef CONFIG_DEBUG_FS
237 struct dentry *xenvif_dbg_root; 242 struct dentry *xenvif_dbg_root;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a134d52f55b4..895fe84011e7 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -419,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
419 vif->queues = NULL; 419 vif->queues = NULL;
420 vif->num_queues = 0; 420 vif->num_queues = 0;
421 421
422 spin_lock_init(&vif->lock);
423
422 dev->netdev_ops = &xenvif_netdev_ops; 424 dev->netdev_ops = &xenvif_netdev_ops;
423 dev->hw_features = NETIF_F_SG | 425 dev->hw_features = NETIF_F_SG |
424 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 426 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -505,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif)
505 dev_set_mtu(vif->dev, ETH_DATA_LEN); 507 dev_set_mtu(vif->dev, ETH_DATA_LEN);
506 netdev_update_features(vif->dev); 508 netdev_update_features(vif->dev);
507 set_bit(VIF_STATUS_CONNECTED, &vif->status); 509 set_bit(VIF_STATUS_CONNECTED, &vif->status);
508 netif_carrier_on(vif->dev);
509 if (netif_running(vif->dev)) 510 if (netif_running(vif->dev))
510 xenvif_up(vif); 511 xenvif_up(vif);
511 rtnl_unlock(); 512 rtnl_unlock();
@@ -565,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
565 disable_irq(queue->rx_irq); 566 disable_irq(queue->rx_irq);
566 } 567 }
567 568
569 queue->stalled = true;
570
568 task = kthread_create(xenvif_kthread_guest_rx, 571 task = kthread_create(xenvif_kthread_guest_rx,
569 (void *)queue, "%s-guest-rx", queue->name); 572 (void *)queue, "%s-guest-rx", queue->name);
570 if (IS_ERR(task)) { 573 if (IS_ERR(task)) {
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 57aa3b507d32..6563f0713fc0 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -62,6 +62,13 @@ unsigned int rx_drain_timeout_msecs = 10000;
62module_param(rx_drain_timeout_msecs, uint, 0444); 62module_param(rx_drain_timeout_msecs, uint, 0444);
63unsigned int rx_drain_timeout_jiffies; 63unsigned int rx_drain_timeout_jiffies;
64 64
65/* The length of time before the frontend is considered unresponsive
66 * because it isn't providing Rx slots.
67 */
68static unsigned int rx_stall_timeout_msecs = 60000;
69module_param(rx_stall_timeout_msecs, uint, 0444);
70static unsigned int rx_stall_timeout_jiffies;
71
65unsigned int xenvif_max_queues; 72unsigned int xenvif_max_queues;
66module_param_named(max_queues, xenvif_max_queues, uint, 0644); 73module_param_named(max_queues, xenvif_max_queues, uint, 0644);
67MODULE_PARM_DESC(max_queues, 74MODULE_PARM_DESC(max_queues,
@@ -649,6 +656,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
649 RING_IDX ring_slots_used; 656 RING_IDX ring_slots_used;
650 int i; 657 int i;
651 658
659 queue->last_rx_time = jiffies;
660
652 /* We need a cheap worse case estimate for the number of 661 /* We need a cheap worse case estimate for the number of
653 * slots we'll use. 662 * slots we'll use.
654 */ 663 */
@@ -1972,10 +1981,67 @@ err:
1972 return err; 1981 return err;
1973} 1982}
1974 1983
1984static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
1985{
1986 struct xenvif *vif = queue->vif;
1987
1988 queue->stalled = true;
1989
1990 /* At least one queue has stalled? Disable the carrier. */
1991 spin_lock(&vif->lock);
1992 if (vif->stalled_queues++ == 0) {
1993 netdev_info(vif->dev, "Guest Rx stalled");
1994 netif_carrier_off(vif->dev);
1995 }
1996 spin_unlock(&vif->lock);
1997}
1998
1999static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
2000{
2001 struct xenvif *vif = queue->vif;
2002
2003 queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
2004 queue->stalled = false;
2005
2006 /* All queues are ready? Enable the carrier. */
2007 spin_lock(&vif->lock);
2008 if (--vif->stalled_queues == 0) {
2009 netdev_info(vif->dev, "Guest Rx ready");
2010 netif_carrier_on(vif->dev);
2011 }
2012 spin_unlock(&vif->lock);
2013}
2014
2015static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
2016{
2017 RING_IDX prod, cons;
2018
2019 prod = queue->rx.sring->req_prod;
2020 cons = queue->rx.req_cons;
2021
2022 return !queue->stalled
2023 && prod - cons < XEN_NETBK_RX_SLOTS_MAX
2024 && time_after(jiffies,
2025 queue->last_rx_time + rx_stall_timeout_jiffies);
2026}
2027
2028static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
2029{
2030 RING_IDX prod, cons;
2031
2032 prod = queue->rx.sring->req_prod;
2033 cons = queue->rx.req_cons;
2034
2035 return queue->stalled
2036 && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
2037}
2038
1975static bool xenvif_have_rx_work(struct xenvif_queue *queue) 2039static bool xenvif_have_rx_work(struct xenvif_queue *queue)
1976{ 2040{
1977 return (!skb_queue_empty(&queue->rx_queue) 2041 return (!skb_queue_empty(&queue->rx_queue)
1978 && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) 2042 && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
2043 || xenvif_rx_queue_stalled(queue)
2044 || xenvif_rx_queue_ready(queue)
1979 || kthread_should_stop() 2045 || kthread_should_stop()
1980 || queue->vif->disabled; 2046 || queue->vif->disabled;
1981} 2047}
@@ -2050,6 +2116,15 @@ int xenvif_kthread_guest_rx(void *data)
2050 if (!skb_queue_empty(&queue->rx_queue)) 2116 if (!skb_queue_empty(&queue->rx_queue))
2051 xenvif_rx_action(queue); 2117 xenvif_rx_action(queue);
2052 2118
2119 /* If the guest hasn't provided any Rx slots for a
2120 * while it's probably not responsive, drop the
2121 * carrier so packets are dropped earlier.
2122 */
2123 if (xenvif_rx_queue_stalled(queue))
2124 xenvif_queue_carrier_off(queue);
2125 else if (xenvif_rx_queue_ready(queue))
2126 xenvif_queue_carrier_on(queue);
2127
2053 /* Queued packets may have foreign pages from other 2128 /* Queued packets may have foreign pages from other
2054 * domains. These cannot be queued indefinitely as 2129 * domains. These cannot be queued indefinitely as
2055 * this would starve guests of grant refs and transmit 2130 * this would starve guests of grant refs and transmit
@@ -2120,6 +2195,7 @@ static int __init netback_init(void)
2120 goto failed_init; 2195 goto failed_init;
2121 2196
2122 rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); 2197 rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
2198 rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
2123 2199
2124#ifdef CONFIG_DEBUG_FS 2200#ifdef CONFIG_DEBUG_FS
2125 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); 2201 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 96a754d8e517..4e56a27f9689 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -711,6 +711,7 @@ static void connect(struct backend_info *be)
711 be->vif->queues = vzalloc(requested_num_queues * 711 be->vif->queues = vzalloc(requested_num_queues *
712 sizeof(struct xenvif_queue)); 712 sizeof(struct xenvif_queue));
713 be->vif->num_queues = requested_num_queues; 713 be->vif->num_queues = requested_num_queues;
714 be->vif->stalled_queues = requested_num_queues;
714 715
715 for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) { 716 for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
716 queue = &be->vif->queues[queue_index]; 717 queue = &be->vif->queues[queue_index];