aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback
diff options
context:
space:
mode:
authorZoltan Kiss <zoltan.kiss@citrix.com>2014-08-04 11:20:58 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-05 19:04:46 -0400
commitf34a4cf9c9b4fd35ba7f9a596cedb011879a1a4d (patch)
tree00be3a0233299ae75e9ab75ca4940cdea15fb6d9 /drivers/net/xen-netback
parent3d1af1df9762e56e563e8fd088a1b4ce2bcfaf8b (diff)
xen-netback: Turn off the carrier if the guest is not able to receive
Currently when the guest is not able to receive more packets, qdisc layer starts a timer, and when it goes off, qdisc is started again to deliver a packet again. This is a very slow way to drain the queues, consumes unnecessary resources and slows down other guests shutdown. This patch change the behaviour by turning the carrier off when that timer fires, so all the packets are freed up which were stucked waiting for that vif. Instead of the rx_queue_purge bool it uses the VIF_STATUS_RX_PURGE_EVENT bit to signal the thread that either the timeout happened or an RX interrupt arrived, so the thread can check what it should do. It also disables NAPI, so the guest can't transmit, but leaves the interrupts on, so it can resurrect. Only the queues which brought down the interface can enable it again, the bit QUEUE_STATUS_RX_STALLED makes sure of that. Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com> Signed-off-by: David Vrabel <david.vrabel@citrix.com> Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r--drivers/net/xen-netback/common.h15
-rw-r--r--drivers/net/xen-netback/interface.c49
-rw-r--r--drivers/net/xen-netback/netback.c97
3 files changed, 123 insertions, 38 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 4a92fc19f410..ef3026f46a37 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
176 struct xen_netif_rx_back_ring rx; 176 struct xen_netif_rx_back_ring rx;
177 struct sk_buff_head rx_queue; 177 struct sk_buff_head rx_queue;
178 RING_IDX rx_last_skb_slots; 178 RING_IDX rx_last_skb_slots;
179 bool rx_queue_purge; 179 unsigned long status;
180 180
181 struct timer_list wake_queue; 181 struct timer_list rx_stalled;
182 182
183 struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; 183 struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
184 184
@@ -200,7 +200,16 @@ struct xenvif_queue { /* Per-queue data for xenvif */
200 200
201enum state_bit_shift { 201enum state_bit_shift {
202 /* This bit marks that the vif is connected */ 202 /* This bit marks that the vif is connected */
203 VIF_STATUS_CONNECTED 203 VIF_STATUS_CONNECTED,
204 /* This bit signals the RX thread that queuing was stopped (in
205 * start_xmit), and either the timer fired or an RX interrupt came
206 */
207 QUEUE_STATUS_RX_PURGE_EVENT,
208 /* This bit tells the interrupt handler that this queue was the reason
209 * for the carrier off, so it should kick the thread. Only queues which
210 * brought it down can turn on the carrier.
211 */
212 QUEUE_STATUS_RX_STALLED
204}; 213};
205 214
206struct xenvif { 215struct xenvif {
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index fbdadb3d8220..48a55cda979b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -78,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget)
78 /* This vif is rogue, we pretend we've there is nothing to do 78 /* This vif is rogue, we pretend we've there is nothing to do
79 * for this vif to deschedule it from NAPI. But this interface 79 * for this vif to deschedule it from NAPI. But this interface
80 * will be turned off in thread context later. 80 * will be turned off in thread context later.
81 * Also, if a guest doesn't post enough slots to receive data on one of
82 * its queues, the carrier goes down and NAPI is descheduled here so
83 * the guest can't send more packets until it's ready to receive.
81 */ 84 */
82 if (unlikely(queue->vif->disabled)) { 85 if (unlikely(queue->vif->disabled ||
86 !netif_carrier_ok(queue->vif->dev))) {
83 napi_complete(napi); 87 napi_complete(napi);
84 return 0; 88 return 0;
85 } 89 }
@@ -97,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget)
97static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) 101static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
98{ 102{
99 struct xenvif_queue *queue = dev_id; 103 struct xenvif_queue *queue = dev_id;
104 struct netdev_queue *net_queue =
105 netdev_get_tx_queue(queue->vif->dev, queue->id);
100 106
107 /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
108 * the carrier went down and this queue was previously blocked
109 */
110 if (unlikely(netif_tx_queue_stopped(net_queue) ||
111 (!netif_carrier_ok(queue->vif->dev) &&
112 test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
113 set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
101 xenvif_kick_thread(queue); 114 xenvif_kick_thread(queue);
102 115
103 return IRQ_HANDLED; 116 return IRQ_HANDLED;
@@ -125,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
125 netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); 138 netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
126} 139}
127 140
128/* Callback to wake the queue and drain it on timeout */ 141/* Callback to wake the queue's thread and turn the carrier off on timeout */
129static void xenvif_wake_queue_callback(unsigned long data) 142static void xenvif_rx_stalled(unsigned long data)
130{ 143{
131 struct xenvif_queue *queue = (struct xenvif_queue *)data; 144 struct xenvif_queue *queue = (struct xenvif_queue *)data;
132 145
133 if (xenvif_queue_stopped(queue)) { 146 if (xenvif_queue_stopped(queue)) {
134 netdev_err(queue->vif->dev, "draining TX queue\n"); 147 set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
135 queue->rx_queue_purge = true;
136 xenvif_kick_thread(queue); 148 xenvif_kick_thread(queue);
137 xenvif_wake_queue(queue);
138 } 149 }
139} 150}
140 151
@@ -183,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
183 * drain. 194 * drain.
184 */ 195 */
185 if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { 196 if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
186 queue->wake_queue.function = xenvif_wake_queue_callback; 197 queue->rx_stalled.function = xenvif_rx_stalled;
187 queue->wake_queue.data = (unsigned long)queue; 198 queue->rx_stalled.data = (unsigned long)queue;
188 xenvif_stop_queue(queue); 199 xenvif_stop_queue(queue);
189 mod_timer(&queue->wake_queue, 200 mod_timer(&queue->rx_stalled,
190 jiffies + rx_drain_timeout_jiffies); 201 jiffies + rx_drain_timeout_jiffies);
191 } 202 }
192 203
193 skb_queue_tail(&queue->rx_queue, skb); 204 skb_queue_tail(&queue->rx_queue, skb);
@@ -515,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
515 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; 526 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
516 } 527 }
517 528
518 init_timer(&queue->wake_queue); 529 init_timer(&queue->rx_stalled);
519 530
520 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, 531 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
521 XENVIF_NAPI_WEIGHT); 532 XENVIF_NAPI_WEIGHT);
@@ -666,7 +677,7 @@ void xenvif_disconnect(struct xenvif *vif)
666 queue = &vif->queues[queue_index]; 677 queue = &vif->queues[queue_index];
667 678
668 if (queue->task) { 679 if (queue->task) {
669 del_timer_sync(&queue->wake_queue); 680 del_timer_sync(&queue->rx_stalled);
670 kthread_stop(queue->task); 681 kthread_stop(queue->task);
671 queue->task = NULL; 682 queue->task = NULL;
672 } 683 }
@@ -708,16 +719,12 @@ void xenvif_free(struct xenvif *vif)
708 /* Here we want to avoid timeout messages if an skb can be legitimately 719 /* Here we want to avoid timeout messages if an skb can be legitimately
709 * stuck somewhere else. Realistically this could be an another vif's 720 * stuck somewhere else. Realistically this could be an another vif's
710 * internal or QDisc queue. That another vif also has this 721 * internal or QDisc queue. That another vif also has this
711 * rx_drain_timeout_msecs timeout, but the timer only ditches the 722 * rx_drain_timeout_msecs timeout, so give it time to drain out.
712 * internal queue. After that, the QDisc queue can put in worst case 723 * Although if that other guest wakes up just before its timeout happens
713 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's 724 * and takes only one skb from QDisc, it can hold onto other skbs for a
714 * internal queue, so we need several rounds of such timeouts until we 725 * longer period.
715 * can be sure that no another vif should have skb's from us. We are
716 * not sending more skb's, so newly stuck packets are not interesting
717 * for us here.
718 */ 726 */
719 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * 727 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
720 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
721 728
722 unregister_netdev(vif->dev); 729 unregister_netdev(vif->dev);
723 730
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6c4cc0f44da5..aa2093325be1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1869static inline int rx_work_todo(struct xenvif_queue *queue) 1869static inline int rx_work_todo(struct xenvif_queue *queue)
1870{ 1870{
1871 return (!skb_queue_empty(&queue->rx_queue) && 1871 return (!skb_queue_empty(&queue->rx_queue) &&
1872 xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) || 1872 xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
1873 queue->rx_queue_purge;
1874} 1873}
1875 1874
1876static inline int tx_work_todo(struct xenvif_queue *queue) 1875static inline int tx_work_todo(struct xenvif_queue *queue)
@@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue)
1935 xenvif_wake_queue(queue); 1934 xenvif_wake_queue(queue);
1936} 1935}
1937 1936
1937/* Only called from the queue's thread, it handles the situation when the guest
1938 * doesn't post enough requests on the receiving ring.
1939 * First xenvif_start_xmit disables QDisc and start a timer, and then either the
1940 * timer fires, or the guest send an interrupt after posting new request. If it
1941 * is the timer, the carrier is turned off here.
1942 * */
1943static void xenvif_rx_purge_event(struct xenvif_queue *queue)
1944{
1945 /* Either the last unsuccesful skb or at least 1 slot should fit */
1946 int needed = queue->rx_last_skb_slots ?
1947 queue->rx_last_skb_slots : 1;
1948
1949 /* It is assumed that if the guest post new slots after this, the RX
1950 * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
1951 * the thread again
1952 */
1953 set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
1954 if (!xenvif_rx_ring_slots_available(queue, needed)) {
1955 rtnl_lock();
1956 if (netif_carrier_ok(queue->vif->dev)) {
1957 /* Timer fired and there are still no slots. Turn off
1958 * everything except the interrupts
1959 */
1960 netif_carrier_off(queue->vif->dev);
1961 skb_queue_purge(&queue->rx_queue);
1962 queue->rx_last_skb_slots = 0;
1963 if (net_ratelimit())
1964 netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
1965 } else {
1966 /* Probably an another queue already turned the carrier
1967 * off, make sure nothing is stucked in the internal
1968 * queue of this queue
1969 */
1970 skb_queue_purge(&queue->rx_queue);
1971 queue->rx_last_skb_slots = 0;
1972 }
1973 rtnl_unlock();
1974 } else if (!netif_carrier_ok(queue->vif->dev)) {
1975 unsigned int num_queues = queue->vif->num_queues;
1976 unsigned int i;
1977 /* The carrier was down, but an interrupt kicked
1978 * the thread again after new requests were
1979 * posted
1980 */
1981 clear_bit(QUEUE_STATUS_RX_STALLED,
1982 &queue->status);
1983 rtnl_lock();
1984 netif_carrier_on(queue->vif->dev);
1985 netif_tx_wake_all_queues(queue->vif->dev);
1986 rtnl_unlock();
1987
1988 for (i = 0; i < num_queues; i++) {
1989 struct xenvif_queue *temp = &queue->vif->queues[i];
1990
1991 xenvif_napi_schedule_or_enable_events(temp);
1992 }
1993 if (net_ratelimit())
1994 netdev_err(queue->vif->dev, "Carrier on again\n");
1995 } else {
1996 /* Queuing were stopped, but the guest posted
1997 * new requests and sent an interrupt
1998 */
1999 clear_bit(QUEUE_STATUS_RX_STALLED,
2000 &queue->status);
2001 del_timer_sync(&queue->rx_stalled);
2002 xenvif_start_queue(queue);
2003 }
2004}
2005
1938int xenvif_kthread_guest_rx(void *data) 2006int xenvif_kthread_guest_rx(void *data)
1939{ 2007{
1940 struct xenvif_queue *queue = data; 2008 struct xenvif_queue *queue = data;
@@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data)
1944 wait_event_interruptible(queue->wq, 2012 wait_event_interruptible(queue->wq,
1945 rx_work_todo(queue) || 2013 rx_work_todo(queue) ||
1946 queue->vif->disabled || 2014 queue->vif->disabled ||
2015 test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
1947 kthread_should_stop()); 2016 kthread_should_stop());
1948 2017
2018 if (kthread_should_stop())
2019 break;
2020
1949 /* This frontend is found to be rogue, disable it in 2021 /* This frontend is found to be rogue, disable it in
1950 * kthread context. Currently this is only set when 2022 * kthread context. Currently this is only set when
1951 * netback finds out frontend sends malformed packet, 2023 * netback finds out frontend sends malformed packet,
@@ -1955,24 +2027,21 @@ int xenvif_kthread_guest_rx(void *data)
1955 */ 2027 */
1956 if (unlikely(queue->vif->disabled && queue->id == 0)) 2028 if (unlikely(queue->vif->disabled && queue->id == 0))
1957 xenvif_carrier_off(queue->vif); 2029 xenvif_carrier_off(queue->vif);
1958 2030 else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
1959 if (kthread_should_stop()) 2031 &queue->status))) {
1960 break; 2032 xenvif_rx_purge_event(queue);
1961 2033 } else if (!netif_carrier_ok(queue->vif->dev)) {
1962 if (queue->rx_queue_purge) { 2034 /* Another queue stalled and turned the carrier off, so
2035 * purge the internal queue of queues which were not
2036 * blocked
2037 */
1963 skb_queue_purge(&queue->rx_queue); 2038 skb_queue_purge(&queue->rx_queue);
1964 queue->rx_queue_purge = false; 2039 queue->rx_last_skb_slots = 0;
1965 } 2040 }
1966 2041
1967 if (!skb_queue_empty(&queue->rx_queue)) 2042 if (!skb_queue_empty(&queue->rx_queue))
1968 xenvif_rx_action(queue); 2043 xenvif_rx_action(queue);
1969 2044
1970 if (skb_queue_empty(&queue->rx_queue) &&
1971 xenvif_queue_stopped(queue)) {
1972 del_timer_sync(&queue->wake_queue);
1973 xenvif_start_queue(queue);
1974 }
1975
1976 cond_resched(); 2045 cond_resched();
1977 } 2046 }
1978 2047