aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/interface.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
-rw-r--r--drivers/net/xen-netback/interface.c141
1 files changed, 135 insertions, 6 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 301cc037fda8..ef05c5c49d41 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
38 38
39#include <xen/events.h> 39#include <xen/events.h>
40#include <asm/xen/hypercall.h> 40#include <asm/xen/hypercall.h>
41#include <xen/balloon.h>
41 42
42#define XENVIF_QUEUE_LENGTH 32 43#define XENVIF_QUEUE_LENGTH 32
43#define XENVIF_NAPI_WEIGHT 64 44#define XENVIF_NAPI_WEIGHT 64
@@ -62,6 +63,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
62 struct xenvif *vif = container_of(napi, struct xenvif, napi); 63 struct xenvif *vif = container_of(napi, struct xenvif, napi);
63 int work_done; 64 int work_done;
64 65
66 /* This vif is rogue, we pretend we've there is nothing to do
67 * for this vif to deschedule it from NAPI. But this interface
68 * will be turned off in thread context later.
69 */
70 if (unlikely(vif->disabled)) {
71 napi_complete(napi);
72 return 0;
73 }
74
65 work_done = xenvif_tx_action(vif, budget); 75 work_done = xenvif_tx_action(vif, budget);
66 76
67 if (work_done < budget) { 77 if (work_done < budget) {
@@ -113,6 +123,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
113 return IRQ_HANDLED; 123 return IRQ_HANDLED;
114} 124}
115 125
126static void xenvif_wake_queue(unsigned long data)
127{
128 struct xenvif *vif = (struct xenvif *)data;
129
130 if (netif_queue_stopped(vif->dev)) {
131 netdev_err(vif->dev, "draining TX queue\n");
132 vif->rx_queue_purge = true;
133 xenvif_kick_thread(vif);
134 netif_wake_queue(vif->dev);
135 }
136}
137
116static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) 138static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
117{ 139{
118 struct xenvif *vif = netdev_priv(dev); 140 struct xenvif *vif = netdev_priv(dev);
@@ -121,7 +143,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
121 BUG_ON(skb->dev != dev); 143 BUG_ON(skb->dev != dev);
122 144
123 /* Drop the packet if vif is not ready */ 145 /* Drop the packet if vif is not ready */
124 if (vif->task == NULL || !xenvif_schedulable(vif)) 146 if (vif->task == NULL ||
147 vif->dealloc_task == NULL ||
148 !xenvif_schedulable(vif))
125 goto drop; 149 goto drop;
126 150
127 /* At best we'll need one slot for the header and one for each 151 /* At best we'll need one slot for the header and one for each
@@ -139,8 +163,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
139 * then turn off the queue to give the ring a chance to 163 * then turn off the queue to give the ring a chance to
140 * drain. 164 * drain.
141 */ 165 */
142 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) 166 if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
167 vif->wake_queue.function = xenvif_wake_queue;
168 vif->wake_queue.data = (unsigned long)vif;
143 xenvif_stop_queue(vif); 169 xenvif_stop_queue(vif);
170 mod_timer(&vif->wake_queue,
171 jiffies + rx_drain_timeout_jiffies);
172 }
144 173
145 skb_queue_tail(&vif->rx_queue, skb); 174 skb_queue_tail(&vif->rx_queue, skb);
146 xenvif_kick_thread(vif); 175 xenvif_kick_thread(vif);
@@ -233,6 +262,28 @@ static const struct xenvif_stat {
233 "rx_gso_checksum_fixup", 262 "rx_gso_checksum_fixup",
234 offsetof(struct xenvif, rx_gso_checksum_fixup) 263 offsetof(struct xenvif, rx_gso_checksum_fixup)
235 }, 264 },
265 /* If (sent != success + fail), there are probably packets never
266 * freed up properly!
267 */
268 {
269 "tx_zerocopy_sent",
270 offsetof(struct xenvif, tx_zerocopy_sent),
271 },
272 {
273 "tx_zerocopy_success",
274 offsetof(struct xenvif, tx_zerocopy_success),
275 },
276 {
277 "tx_zerocopy_fail",
278 offsetof(struct xenvif, tx_zerocopy_fail)
279 },
280 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
281 * a guest with the same MAX_SKB_FRAG
282 */
283 {
284 "tx_frag_overflow",
285 offsetof(struct xenvif, tx_frag_overflow)
286 },
236}; 287};
237 288
238static int xenvif_get_sset_count(struct net_device *dev, int string_set) 289static int xenvif_get_sset_count(struct net_device *dev, int string_set)
@@ -321,11 +372,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
321 vif->ip_csum = 1; 372 vif->ip_csum = 1;
322 vif->dev = dev; 373 vif->dev = dev;
323 374
375 vif->disabled = false;
376
324 vif->credit_bytes = vif->remaining_credit = ~0UL; 377 vif->credit_bytes = vif->remaining_credit = ~0UL;
325 vif->credit_usec = 0UL; 378 vif->credit_usec = 0UL;
326 init_timer(&vif->credit_timeout); 379 init_timer(&vif->credit_timeout);
327 vif->credit_window_start = get_jiffies_64(); 380 vif->credit_window_start = get_jiffies_64();
328 381
382 init_timer(&vif->wake_queue);
383
329 dev->netdev_ops = &xenvif_netdev_ops; 384 dev->netdev_ops = &xenvif_netdev_ops;
330 dev->hw_features = NETIF_F_SG | 385 dev->hw_features = NETIF_F_SG |
331 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 386 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -342,8 +397,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
342 vif->pending_prod = MAX_PENDING_REQS; 397 vif->pending_prod = MAX_PENDING_REQS;
343 for (i = 0; i < MAX_PENDING_REQS; i++) 398 for (i = 0; i < MAX_PENDING_REQS; i++)
344 vif->pending_ring[i] = i; 399 vif->pending_ring[i] = i;
345 for (i = 0; i < MAX_PENDING_REQS; i++) 400 spin_lock_init(&vif->callback_lock);
346 vif->mmap_pages[i] = NULL; 401 spin_lock_init(&vif->response_lock);
402 /* If ballooning is disabled, this will consume real memory, so you
403 * better enable it. The long term solution would be to use just a
404 * bunch of valid page descriptors, without dependency on ballooning
405 */
406 err = alloc_xenballooned_pages(MAX_PENDING_REQS,
407 vif->mmap_pages,
408 false);
409 if (err) {
410 netdev_err(dev, "Could not reserve mmap_pages\n");
411 return ERR_PTR(-ENOMEM);
412 }
413 for (i = 0; i < MAX_PENDING_REQS; i++) {
414 vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
415 { .callback = xenvif_zerocopy_callback,
416 .ctx = NULL,
417 .desc = i };
418 vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
419 }
347 420
348 /* 421 /*
349 * Initialise a dummy MAC address. We choose the numerically 422 * Initialise a dummy MAC address. We choose the numerically
@@ -381,12 +454,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
381 454
382 BUG_ON(vif->tx_irq); 455 BUG_ON(vif->tx_irq);
383 BUG_ON(vif->task); 456 BUG_ON(vif->task);
457 BUG_ON(vif->dealloc_task);
384 458
385 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); 459 err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
386 if (err < 0) 460 if (err < 0)
387 goto err; 461 goto err;
388 462
389 init_waitqueue_head(&vif->wq); 463 init_waitqueue_head(&vif->wq);
464 init_waitqueue_head(&vif->dealloc_wq);
390 465
391 if (tx_evtchn == rx_evtchn) { 466 if (tx_evtchn == rx_evtchn) {
392 /* feature-split-event-channels == 0 */ 467 /* feature-split-event-channels == 0 */
@@ -420,8 +495,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
420 disable_irq(vif->rx_irq); 495 disable_irq(vif->rx_irq);
421 } 496 }
422 497
423 task = kthread_create(xenvif_kthread, 498 task = kthread_create(xenvif_kthread_guest_rx,
424 (void *)vif, "%s", vif->dev->name); 499 (void *)vif, "%s-guest-rx", vif->dev->name);
425 if (IS_ERR(task)) { 500 if (IS_ERR(task)) {
426 pr_warn("Could not allocate kthread for %s\n", vif->dev->name); 501 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
427 err = PTR_ERR(task); 502 err = PTR_ERR(task);
@@ -430,6 +505,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
430 505
431 vif->task = task; 506 vif->task = task;
432 507
508 task = kthread_create(xenvif_dealloc_kthread,
509 (void *)vif, "%s-dealloc", vif->dev->name);
510 if (IS_ERR(task)) {
511 pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
512 err = PTR_ERR(task);
513 goto err_rx_unbind;
514 }
515
516 vif->dealloc_task = task;
517
433 rtnl_lock(); 518 rtnl_lock();
434 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) 519 if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
435 dev_set_mtu(vif->dev, ETH_DATA_LEN); 520 dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -440,6 +525,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
440 rtnl_unlock(); 525 rtnl_unlock();
441 526
442 wake_up_process(vif->task); 527 wake_up_process(vif->task);
528 wake_up_process(vif->dealloc_task);
443 529
444 return 0; 530 return 0;
445 531
@@ -473,10 +559,16 @@ void xenvif_disconnect(struct xenvif *vif)
473 xenvif_carrier_off(vif); 559 xenvif_carrier_off(vif);
474 560
475 if (vif->task) { 561 if (vif->task) {
562 del_timer_sync(&vif->wake_queue);
476 kthread_stop(vif->task); 563 kthread_stop(vif->task);
477 vif->task = NULL; 564 vif->task = NULL;
478 } 565 }
479 566
567 if (vif->dealloc_task) {
568 kthread_stop(vif->dealloc_task);
569 vif->dealloc_task = NULL;
570 }
571
480 if (vif->tx_irq) { 572 if (vif->tx_irq) {
481 if (vif->tx_irq == vif->rx_irq) 573 if (vif->tx_irq == vif->rx_irq)
482 unbind_from_irqhandler(vif->tx_irq, vif); 574 unbind_from_irqhandler(vif->tx_irq, vif);
@@ -492,6 +584,43 @@ void xenvif_disconnect(struct xenvif *vif)
492 584
493void xenvif_free(struct xenvif *vif) 585void xenvif_free(struct xenvif *vif)
494{ 586{
587 int i, unmap_timeout = 0;
588 /* Here we want to avoid timeout messages if an skb can be legitimately
589 * stuck somewhere else. Realistically this could be an another vif's
590 * internal or QDisc queue. That another vif also has this
591 * rx_drain_timeout_msecs timeout, but the timer only ditches the
592 * internal queue. After that, the QDisc queue can put in worst case
593 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
594 * internal queue, so we need several rounds of such timeouts until we
595 * can be sure that no another vif should have skb's from us. We are
596 * not sending more skb's, so newly stuck packets are not interesting
597 * for us here.
598 */
599 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
600 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
601
602 for (i = 0; i < MAX_PENDING_REQS; ++i) {
603 if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
604 unmap_timeout++;
605 schedule_timeout(msecs_to_jiffies(1000));
606 if (unmap_timeout > worst_case_skb_lifetime &&
607 net_ratelimit())
608 netdev_err(vif->dev,
609 "Page still granted! Index: %x\n",
610 i);
611 /* If there are still unmapped pages, reset the loop to
612 * start checking again. We shouldn't exit here until
613 * dealloc thread and NAPI instance release all the
614 * pages. If a kernel bug causes the skbs to stall
615 * somewhere, the interface cannot be brought down
616 * properly.
617 */
618 i = -1;
619 }
620 }
621
622 free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
623
495 netif_napi_del(&vif->napi); 624 netif_napi_del(&vif->napi);
496 625
497 unregister_netdev(vif->dev); 626 unregister_netdev(vif->dev);