aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/interface.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
-rw-r--r--drivers/net/xen-netback/interface.c171
1 files changed, 138 insertions, 33 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 301cc037fda8..20e9defa1060 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
38 38
39#include <xen/events.h> 39#include <xen/events.h>
40#include <asm/xen/hypercall.h> 40#include <asm/xen/hypercall.h>
41#include <xen/balloon.h>
41 42
42#define XENVIF_QUEUE_LENGTH 32 43#define XENVIF_QUEUE_LENGTH 32
43#define XENVIF_NAPI_WEIGHT 64 44#define XENVIF_NAPI_WEIGHT 64
@@ -62,35 +63,20 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
62 struct xenvif *vif = container_of(napi, struct xenvif, napi); 63 struct xenvif *vif = container_of(napi, struct xenvif, napi);
63 int work_done; 64 int work_done;
64 65
66 /* This vif is rogue, we pretend we've there is nothing to do
67 * for this vif to deschedule it from NAPI. But this interface
68 * will be turned off in thread context later.
69 */
70 if (unlikely(vif->disabled)) {
71 napi_complete(napi);
72 return 0;
73 }
74
65 work_done = xenvif_tx_action(vif, budget); 75 work_done = xenvif_tx_action(vif, budget);
66 76
67 if (work_done < budget) { 77 if (work_done < budget) {
68 int more_to_do = 0; 78 napi_complete(napi);
69 unsigned long flags; 79 xenvif_napi_schedule_or_enable_events(vif);
70
71 /* It is necessary to disable IRQ before calling
72 * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might
73 * lose event from the frontend.
74 *
75 * Consider:
76 * RING_HAS_UNCONSUMED_REQUESTS
77 * <frontend generates event to trigger napi_schedule>
78 * __napi_complete
79 *
80 * This handler is still in scheduled state so the
81 * event has no effect at all. After __napi_complete
82 * this handler is descheduled and cannot get
83 * scheduled again. We lose event in this case and the ring
84 * will be completely stalled.
85 */
86
87 local_irq_save(flags);
88
89 RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
90 if (!more_to_do)
91 __napi_complete(napi);
92
93 local_irq_restore(flags);
94 } 80 }
95 81
96 return work_done; 82 return work_done;
@@ -113,6 +99,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
113 return IRQ_HANDLED; 99 return IRQ_HANDLED;
114} 100}
115 101
102static void xenvif_wake_queue(unsigned long data)
103{
104 struct xenvif *vif = (struct xenvif *)data;
105
106 if (netif_queue_stopped(vif->dev)) {
107 netdev_err(vif->dev, "draining TX queue\n");
108 vif->rx_queue_purge = true;
109 xenvif_kick_thread(vif);
110 netif_wake_queue(vif->dev);
111 }
112}
113
116static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 114static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
117{ 115{
118 struct xenvif *vif = netdev_priv(dev); 116 struct xenvif *vif = netdev_priv(dev);
@@ -121,7 +119,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
121 BUG_ON(skb->dev != dev); 119 BUG_ON(skb->dev != dev);
122 120
123 /* Drop the packet if vif is not ready */ 121 /* Drop the packet if vif is not ready */
124 if (vif->task == NULL || !xenvif_schedulable(vif)) 122 if (vif->task == NULL ||
123 vif->dealloc_task == NULL ||
124 !xenvif_schedulable(vif))
125 goto drop; 125 goto drop;
126 126
127 /* At best we'll need one slot for the header and one for each 127 /* At best we'll need one slot for the header and one for each
@@ -139,8 +139,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
139 * then turn off the queue to give the ring a chance to 139 * then turn off the queue to give the ring a chance to
140 * drain. 140 * drain.
141 */ 141 */
142 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) 142 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
143 vif->wake_queue.function = xenvif_wake_queue;
144 vif->wake_queue.data = (unsigned long)vif;
143 xenvif_stop_queue(vif); 145 xenvif_stop_queue(vif);
146 mod_timer(&vif->wake_queue,
147 jiffies + rx_drain_timeout_jiffies);
148 }
144 149
145 skb_queue_tail(&vif->rx_queue, skb); 150 skb_queue_tail(&vif->rx_queue, skb);
146 xenvif_kick_thread(vif); 151 xenvif_kick_thread(vif);
@@ -165,7 +170,7 @@ static void xenvif_up(struct xenvif *vif)
165 enable_irq(vif->tx_irq); 170 enable_irq(vif->tx_irq);
166 if (vif->tx_irq != vif->rx_irq) 171 if (vif->tx_irq != vif->rx_irq)
167 enable_irq(vif->rx_irq); 172 enable_irq(vif->rx_irq);
168 xenvif_check_rx_xenvif(vif); 173 xenvif_napi_schedule_or_enable_events(vif);
169} 174}
170 175
171static void xenvif_down(struct xenvif *vif) 176static void xenvif_down(struct xenvif *vif)
@@ -233,6 +238,28 @@ static const struct xenvif_stat {
233 "rx_gso_checksum_fixup", 238 "rx_gso_checksum_fixup",
234 offsetof(struct xenvif, rx_gso_checksum_fixup) 239 offsetof(struct xenvif, rx_gso_checksum_fixup)
235 }, 240 },
241 /* If (sent != success + fail), there are probably packets never
242 * freed up properly!
243 */
244 {
245 "tx_zerocopy_sent",
246 offsetof(struct xenvif, tx_zerocopy_sent),
247 },
248 {
249 "tx_zerocopy_success",
250 offsetof(struct xenvif, tx_zerocopy_success),
251 },
252 {
253 "tx_zerocopy_fail",
254 offsetof(struct xenvif, tx_zerocopy_fail)
255 },
256 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
257 * a guest with the same MAX_SKB_FRAG
258 */
259 {
260 "tx_frag_overflow",
261 offsetof(struct xenvif, tx_frag_overflow)
262 },
236}; 263};
237 264
238static int xenvif_get_sset_count(struct net_device *dev, int string_set) 265static int xenvif_get_sset_count(struct net_device *dev, int string_set)
@@ -321,11 +348,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
321 vif->ip_csum = 1; 348 vif->ip_csum = 1;
322 vif->dev = dev; 349 vif->dev = dev;
323 350
351 vif->disabled = false;
352
324 vif->credit_bytes = vif->remaining_credit = ~0UL; 353 vif->credit_bytes = vif->remaining_credit = ~0UL;
325 vif->credit_usec = 0UL; 354 vif->credit_usec = 0UL;
326 init_timer(&vif->credit_timeout); 355 init_timer(&vif->credit_timeout);
327 vif->credit_window_start = get_jiffies_64(); 356 vif->credit_window_start = get_jiffies_64();
328 357
358 init_timer(&vif->wake_queue);
359
329 dev->netdev_ops = &xenvif_netdev_ops; 360 dev->netdev_ops = &xenvif_netdev_ops;
330 dev->hw_features = NETIF_F_SG | 361 dev->hw_features = NETIF_F_SG |
331 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 362 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -342,8 +373,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
342 vif->pending_prod = MAX_PENDING_REQS; 373 vif->pending_prod = MAX_PENDING_REQS;
343 for (i = 0; i < MAX_PENDING_REQS; i++) 374 for (i = 0; i < MAX_PENDING_REQS; i++)
344 vif->pending_ring[i] = i; 375 vif->pending_ring[i] = i;
345 for (i = 0; i < MAX_PENDING_REQS; i++) 376 spin_lock_init(&vif->callback_lock);
346 vif->mmap_pages[i] = NULL; 377 spin_lock_init(&vif->response_lock);
378 /* If ballooning is disabled, this will consume real memory, so you
379 * better enable it. The long term solution would be to use just a
380 * bunch of valid page descriptors, without dependency on ballooning
381 */
382 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
383 vif->mmap_pages,
384 false);
385 if (err) {
386 netdev_err(dev, "Could not reserve mmap_pages\n");
387 return ERR_PTR(-ENOMEM);
388 }
389 for (i = 0; i < MAX_PENDING_REQS; i++) {
390 vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
391 { .callback = xenvif_zerocopy_callback,
392 .ctx = NULL,
393 .desc = i };
394 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
395 }
347 396
348 /* 397 /*
349 * Initialise a dummy MAC address. We choose the numerically 398 * Initialise a dummy MAC address. We choose the numerically
@@ -381,12 +430,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
381 430
382 BUG_ON(vif->tx_irq); 431 BUG_ON(vif->tx_irq);
383 BUG_ON(vif->task); 432 BUG_ON(vif->task);
433 BUG_ON(vif->dealloc_task);
384 434
385 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 435 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
386 if (err < 0) 436 if (err < 0)
387 goto err; 437 goto err;
388 438
389 init_waitqueue_head(&vif->wq); 439 init_waitqueue_head(&vif->wq);
440 init_waitqueue_head(&vif->dealloc_wq);
390 441
391 if (tx_evtchn == rx_evtchn) { 442 if (tx_evtchn == rx_evtchn) {
392 /* feature-split-event-channels == 0 */ 443 /* feature-split-event-channels == 0 */
@@ -420,8 +471,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
420 disable_irq(vif->rx_irq); 471 disable_irq(vif->rx_irq);
421 } 472 }
422 473
423 task = kthread_create(xenvif_kthread, 474 task = kthread_create(xenvif_kthread_guest_rx,
424 (void *)vif, "%s", vif->dev->name); 475 (void *)vif, "%s-guest-rx", vif->dev->name);
425 if (IS_ERR(task)) { 476 if (IS_ERR(task)) {
426 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 477 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
427 err = PTR_ERR(task); 478 err = PTR_ERR(task);
@@ -430,6 +481,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
430 481
431 vif->task = task; 482 vif->task = task;
432 483
484 task = kthread_create(xenvif_dealloc_kthread,
485 (void *)vif, "%s-dealloc", vif->dev->name);
486 if (IS_ERR(task)) {
487 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
488 err = PTR_ERR(task);
489 goto err_rx_unbind;
490 }
491
492 vif->dealloc_task = task;
493
433 rtnl_lock(); 494 rtnl_lock();
434 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) 495 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
435 dev_set_mtu(vif->dev, ETH_DATA_LEN); 496 dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -440,6 +501,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
440 rtnl_unlock(); 501 rtnl_unlock();
441 502
442 wake_up_process(vif->task); 503 wake_up_process(vif->task);
504 wake_up_process(vif->dealloc_task);
443 505
444 return 0; 506 return 0;
445 507
@@ -473,10 +535,16 @@ void xenvif_disconnect(struct xenvif *vif)
473 xenvif_carrier_off(vif); 535 xenvif_carrier_off(vif);
474 536
475 if (vif->task) { 537 if (vif->task) {
538 del_timer_sync(&vif->wake_queue);
476 kthread_stop(vif->task); 539 kthread_stop(vif->task);
477 vif->task = NULL; 540 vif->task = NULL;
478 } 541 }
479 542
543 if (vif->dealloc_task) {
544 kthread_stop(vif->dealloc_task);
545 vif->dealloc_task = NULL;
546 }
547
480 if (vif->tx_irq) { 548 if (vif->tx_irq) {
481 if (vif->tx_irq == vif->rx_irq) 549 if (vif->tx_irq == vif->rx_irq)
482 unbind_from_irqhandler(vif->tx_irq, vif); 550 unbind_from_irqhandler(vif->tx_irq, vif);
@@ -492,6 +560,43 @@ void xenvif_disconnect(struct xenvif *vif)
492 560
493void xenvif_free(struct xenvif *vif) 561void xenvif_free(struct xenvif *vif)
494{ 562{
563 int i, unmap_timeout = 0;
564 /* Here we want to avoid timeout messages if an skb can be legitimately
565 * stuck somewhere else. Realistically this could be an another vif's
566 * internal or QDisc queue. That another vif also has this
567 * rx_drain_timeout_msecs timeout, but the timer only ditches the
568 * internal queue. After that, the QDisc queue can put in worst case
569 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
570 * internal queue, so we need several rounds of such timeouts until we
571 * can be sure that no another vif should have skb's from us. We are
572 * not sending more skb's, so newly stuck packets are not interesting
573 * for us here.
574 */
575 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
576 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
577
578 for (i = 0; i < MAX_PENDING_REQS; ++i) {
579 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
580 unmap_timeout++;
581 schedule_timeout(msecs_to_jiffies(1000));
582 if (unmap_timeout > worst_case_skb_lifetime &&
583 net_ratelimit())
584 netdev_err(vif->dev,
585 "Page still granted! Index: %x\n",
586 i);
587 /* If there are still unmapped pages, reset the loop to
588 * start checking again. We shouldn't exit here until
589 * dealloc thread and NAPI instance release all the
590 * pages. If a kernel bug causes the skbs to stall
591 * somewhere, the interface cannot be brought down
592 * properly.
593 */
594 i = -1;
595 }
596 }
597
598 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
599
495 netif_napi_del(&vif->napi); 600 netif_napi_del(&vif->napi);
496 601
497 unregister_netdev(vif->dev); 602 unregister_netdev(vif->dev);