diff options
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
| -rw-r--r-- | drivers/net/xen-netback/interface.c | 171 |
1 files changed, 138 insertions, 33 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 301cc037fda8..20e9defa1060 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #include <xen/events.h> | 39 | #include <xen/events.h> |
| 40 | #include <asm/xen/hypercall.h> | 40 | #include <asm/xen/hypercall.h> |
| 41 | #include <xen/balloon.h> | ||
| 41 | 42 | ||
| 42 | #define XENVIF_QUEUE_LENGTH 32 | 43 | #define XENVIF_QUEUE_LENGTH 32 |
| 43 | #define XENVIF_NAPI_WEIGHT 64 | 44 | #define XENVIF_NAPI_WEIGHT 64 |
| @@ -62,35 +63,20 @@ static int xenvif_poll(struct napi_struct *napi, int budget) | |||
| 62 | struct xenvif *vif = container_of(napi, struct xenvif, napi); | 63 | struct xenvif *vif = container_of(napi, struct xenvif, napi); |
| 63 | int work_done; | 64 | int work_done; |
| 64 | 65 | ||
| 66 | /* This vif is rogue, we pretend we've there is nothing to do | ||
| 67 | * for this vif to deschedule it from NAPI. But this interface | ||
| 68 | * will be turned off in thread context later. | ||
| 69 | */ | ||
| 70 | if (unlikely(vif->disabled)) { | ||
| 71 | napi_complete(napi); | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 65 | work_done = xenvif_tx_action(vif, budget); | 75 | work_done = xenvif_tx_action(vif, budget); |
| 66 | 76 | ||
| 67 | if (work_done < budget) { | 77 | if (work_done < budget) { |
| 68 | int more_to_do = 0; | 78 | napi_complete(napi); |
| 69 | unsigned long flags; | 79 | xenvif_napi_schedule_or_enable_events(vif); |
| 70 | |||
| 71 | /* It is necessary to disable IRQ before calling | ||
| 72 | * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might | ||
| 73 | * lose event from the frontend. | ||
| 74 | * | ||
| 75 | * Consider: | ||
| 76 | * RING_HAS_UNCONSUMED_REQUESTS | ||
| 77 | * <frontend generates event to trigger napi_schedule> | ||
| 78 | * __napi_complete | ||
| 79 | * | ||
| 80 | * This handler is still in scheduled state so the | ||
| 81 | * event has no effect at all. After __napi_complete | ||
| 82 | * this handler is descheduled and cannot get | ||
| 83 | * scheduled again. We lose event in this case and the ring | ||
| 84 | * will be completely stalled. | ||
| 85 | */ | ||
| 86 | |||
| 87 | local_irq_save(flags); | ||
| 88 | |||
| 89 | RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); | ||
| 90 | if (!more_to_do) | ||
| 91 | __napi_complete(napi); | ||
| 92 | |||
| 93 | local_irq_restore(flags); | ||
| 94 | } | 80 | } |
| 95 | 81 | ||
| 96 | return work_done; | 82 | return work_done; |
| @@ -113,6 +99,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) | |||
| 113 | return IRQ_HANDLED; | 99 | return IRQ_HANDLED; |
| 114 | } | 100 | } |
| 115 | 101 | ||
| 102 | static void xenvif_wake_queue(unsigned long data) | ||
| 103 | { | ||
| 104 | struct xenvif *vif = (struct xenvif *)data; | ||
| 105 | |||
| 106 | if (netif_queue_stopped(vif->dev)) { | ||
| 107 | netdev_err(vif->dev, "draining TX queue\n"); | ||
| 108 | vif->rx_queue_purge = true; | ||
| 109 | xenvif_kick_thread(vif); | ||
| 110 | netif_wake_queue(vif->dev); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 116 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | 114 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) |
| 117 | { | 115 | { |
| 118 | struct xenvif *vif = netdev_priv(dev); | 116 | struct xenvif *vif = netdev_priv(dev); |
| @@ -121,7 +119,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 121 | BUG_ON(skb->dev != dev); | 119 | BUG_ON(skb->dev != dev); |
| 122 | 120 | ||
| 123 | /* Drop the packet if vif is not ready */ | 121 | /* Drop the packet if vif is not ready */ |
| 124 | if (vif->task == NULL || !xenvif_schedulable(vif)) | 122 | if (vif->task == NULL || |
| 123 | vif->dealloc_task == NULL || | ||
| 124 | !xenvif_schedulable(vif)) | ||
| 125 | goto drop; | 125 | goto drop; |
| 126 | 126 | ||
| 127 | /* At best we'll need one slot for the header and one for each | 127 | /* At best we'll need one slot for the header and one for each |
| @@ -139,8 +139,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 139 | * then turn off the queue to give the ring a chance to | 139 | * then turn off the queue to give the ring a chance to |
| 140 | * drain. | 140 | * drain. |
| 141 | */ | 141 | */ |
| 142 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) | 142 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { |
| 143 | vif->wake_queue.function = xenvif_wake_queue; | ||
| 144 | vif->wake_queue.data = (unsigned long)vif; | ||
| 143 | xenvif_stop_queue(vif); | 145 | xenvif_stop_queue(vif); |
| 146 | mod_timer(&vif->wake_queue, | ||
| 147 | jiffies + rx_drain_timeout_jiffies); | ||
| 148 | } | ||
| 144 | 149 | ||
| 145 | skb_queue_tail(&vif->rx_queue, skb); | 150 | skb_queue_tail(&vif->rx_queue, skb); |
| 146 | xenvif_kick_thread(vif); | 151 | xenvif_kick_thread(vif); |
| @@ -165,7 +170,7 @@ static void xenvif_up(struct xenvif *vif) | |||
| 165 | enable_irq(vif->tx_irq); | 170 | enable_irq(vif->tx_irq); |
| 166 | if (vif->tx_irq != vif->rx_irq) | 171 | if (vif->tx_irq != vif->rx_irq) |
| 167 | enable_irq(vif->rx_irq); | 172 | enable_irq(vif->rx_irq); |
| 168 | xenvif_check_rx_xenvif(vif); | 173 | xenvif_napi_schedule_or_enable_events(vif); |
| 169 | } | 174 | } |
| 170 | 175 | ||
| 171 | static void xenvif_down(struct xenvif *vif) | 176 | static void xenvif_down(struct xenvif *vif) |
| @@ -233,6 +238,28 @@ static const struct xenvif_stat { | |||
| 233 | "rx_gso_checksum_fixup", | 238 | "rx_gso_checksum_fixup", |
| 234 | offsetof(struct xenvif, rx_gso_checksum_fixup) | 239 | offsetof(struct xenvif, rx_gso_checksum_fixup) |
| 235 | }, | 240 | }, |
| 241 | /* If (sent != success + fail), there are probably packets never | ||
| 242 | * freed up properly! | ||
| 243 | */ | ||
| 244 | { | ||
| 245 | "tx_zerocopy_sent", | ||
| 246 | offsetof(struct xenvif, tx_zerocopy_sent), | ||
| 247 | }, | ||
| 248 | { | ||
| 249 | "tx_zerocopy_success", | ||
| 250 | offsetof(struct xenvif, tx_zerocopy_success), | ||
| 251 | }, | ||
| 252 | { | ||
| 253 | "tx_zerocopy_fail", | ||
| 254 | offsetof(struct xenvif, tx_zerocopy_fail) | ||
| 255 | }, | ||
| 256 | /* Number of packets exceeding MAX_SKB_FRAG slots. You should use | ||
| 257 | * a guest with the same MAX_SKB_FRAG | ||
| 258 | */ | ||
| 259 | { | ||
| 260 | "tx_frag_overflow", | ||
| 261 | offsetof(struct xenvif, tx_frag_overflow) | ||
| 262 | }, | ||
| 236 | }; | 263 | }; |
| 237 | 264 | ||
| 238 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) | 265 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) |
| @@ -321,11 +348,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 321 | vif->ip_csum = 1; | 348 | vif->ip_csum = 1; |
| 322 | vif->dev = dev; | 349 | vif->dev = dev; |
| 323 | 350 | ||
| 351 | vif->disabled = false; | ||
| 352 | |||
| 324 | vif->credit_bytes = vif->remaining_credit = ~0UL; | 353 | vif->credit_bytes = vif->remaining_credit = ~0UL; |
| 325 | vif->credit_usec = 0UL; | 354 | vif->credit_usec = 0UL; |
| 326 | init_timer(&vif->credit_timeout); | 355 | init_timer(&vif->credit_timeout); |
| 327 | vif->credit_window_start = get_jiffies_64(); | 356 | vif->credit_window_start = get_jiffies_64(); |
| 328 | 357 | ||
| 358 | init_timer(&vif->wake_queue); | ||
| 359 | |||
| 329 | dev->netdev_ops = &xenvif_netdev_ops; | 360 | dev->netdev_ops = &xenvif_netdev_ops; |
| 330 | dev->hw_features = NETIF_F_SG | | 361 | dev->hw_features = NETIF_F_SG | |
| 331 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | | 362 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | |
| @@ -342,8 +373,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 342 | vif->pending_prod = MAX_PENDING_REQS; | 373 | vif->pending_prod = MAX_PENDING_REQS; |
| 343 | for (i = 0; i < MAX_PENDING_REQS; i++) | 374 | for (i = 0; i < MAX_PENDING_REQS; i++) |
| 344 | vif->pending_ring[i] = i; | 375 | vif->pending_ring[i] = i; |
| 345 | for (i = 0; i < MAX_PENDING_REQS; i++) | 376 | spin_lock_init(&vif->callback_lock); |
| 346 | vif->mmap_pages[i] = NULL; | 377 | spin_lock_init(&vif->response_lock); |
| 378 | /* If ballooning is disabled, this will consume real memory, so you | ||
| 379 | * better enable it. The long term solution would be to use just a | ||
| 380 | * bunch of valid page descriptors, without dependency on ballooning | ||
| 381 | */ | ||
| 382 | err = alloc_xenballooned_pages(MAX_PENDING_REQS, | ||
| 383 | vif->mmap_pages, | ||
| 384 | false); | ||
| 385 | if (err) { | ||
| 386 | netdev_err(dev, "Could not reserve mmap_pages\n"); | ||
| 387 | return ERR_PTR(-ENOMEM); | ||
| 388 | } | ||
| 389 | for (i = 0; i < MAX_PENDING_REQS; i++) { | ||
| 390 | vif->pending_tx_info[i].callback_struct = (struct ubuf_info) | ||
| 391 | { .callback = xenvif_zerocopy_callback, | ||
| 392 | .ctx = NULL, | ||
| 393 | .desc = i }; | ||
| 394 | vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; | ||
| 395 | } | ||
| 347 | 396 | ||
| 348 | /* | 397 | /* |
| 349 | * Initialise a dummy MAC address. We choose the numerically | 398 | * Initialise a dummy MAC address. We choose the numerically |
| @@ -381,12 +430,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 381 | 430 | ||
| 382 | BUG_ON(vif->tx_irq); | 431 | BUG_ON(vif->tx_irq); |
| 383 | BUG_ON(vif->task); | 432 | BUG_ON(vif->task); |
| 433 | BUG_ON(vif->dealloc_task); | ||
| 384 | 434 | ||
| 385 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); | 435 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); |
| 386 | if (err < 0) | 436 | if (err < 0) |
| 387 | goto err; | 437 | goto err; |
| 388 | 438 | ||
| 389 | init_waitqueue_head(&vif->wq); | 439 | init_waitqueue_head(&vif->wq); |
| 440 | init_waitqueue_head(&vif->dealloc_wq); | ||
| 390 | 441 | ||
| 391 | if (tx_evtchn == rx_evtchn) { | 442 | if (tx_evtchn == rx_evtchn) { |
| 392 | /* feature-split-event-channels == 0 */ | 443 | /* feature-split-event-channels == 0 */ |
| @@ -420,8 +471,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 420 | disable_irq(vif->rx_irq); | 471 | disable_irq(vif->rx_irq); |
| 421 | } | 472 | } |
| 422 | 473 | ||
| 423 | task = kthread_create(xenvif_kthread, | 474 | task = kthread_create(xenvif_kthread_guest_rx, |
| 424 | (void *)vif, "%s", vif->dev->name); | 475 | (void *)vif, "%s-guest-rx", vif->dev->name); |
| 425 | if (IS_ERR(task)) { | 476 | if (IS_ERR(task)) { |
| 426 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | 477 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); |
| 427 | err = PTR_ERR(task); | 478 | err = PTR_ERR(task); |
| @@ -430,6 +481,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 430 | 481 | ||
| 431 | vif->task = task; | 482 | vif->task = task; |
| 432 | 483 | ||
| 484 | task = kthread_create(xenvif_dealloc_kthread, | ||
| 485 | (void *)vif, "%s-dealloc", vif->dev->name); | ||
| 486 | if (IS_ERR(task)) { | ||
| 487 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | ||
| 488 | err = PTR_ERR(task); | ||
| 489 | goto err_rx_unbind; | ||
| 490 | } | ||
| 491 | |||
| 492 | vif->dealloc_task = task; | ||
| 493 | |||
| 433 | rtnl_lock(); | 494 | rtnl_lock(); |
| 434 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) | 495 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) |
| 435 | dev_set_mtu(vif->dev, ETH_DATA_LEN); | 496 | dev_set_mtu(vif->dev, ETH_DATA_LEN); |
| @@ -440,6 +501,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 440 | rtnl_unlock(); | 501 | rtnl_unlock(); |
| 441 | 502 | ||
| 442 | wake_up_process(vif->task); | 503 | wake_up_process(vif->task); |
| 504 | wake_up_process(vif->dealloc_task); | ||
| 443 | 505 | ||
| 444 | return 0; | 506 | return 0; |
| 445 | 507 | ||
| @@ -473,10 +535,16 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 473 | xenvif_carrier_off(vif); | 535 | xenvif_carrier_off(vif); |
| 474 | 536 | ||
| 475 | if (vif->task) { | 537 | if (vif->task) { |
| 538 | del_timer_sync(&vif->wake_queue); | ||
| 476 | kthread_stop(vif->task); | 539 | kthread_stop(vif->task); |
| 477 | vif->task = NULL; | 540 | vif->task = NULL; |
| 478 | } | 541 | } |
| 479 | 542 | ||
| 543 | if (vif->dealloc_task) { | ||
| 544 | kthread_stop(vif->dealloc_task); | ||
| 545 | vif->dealloc_task = NULL; | ||
| 546 | } | ||
| 547 | |||
| 480 | if (vif->tx_irq) { | 548 | if (vif->tx_irq) { |
| 481 | if (vif->tx_irq == vif->rx_irq) | 549 | if (vif->tx_irq == vif->rx_irq) |
| 482 | unbind_from_irqhandler(vif->tx_irq, vif); | 550 | unbind_from_irqhandler(vif->tx_irq, vif); |
| @@ -492,6 +560,43 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 492 | 560 | ||
| 493 | void xenvif_free(struct xenvif *vif) | 561 | void xenvif_free(struct xenvif *vif) |
| 494 | { | 562 | { |
| 563 | int i, unmap_timeout = 0; | ||
| 564 | /* Here we want to avoid timeout messages if an skb can be legitimately | ||
| 565 | * stuck somewhere else. Realistically this could be an another vif's | ||
| 566 | * internal or QDisc queue. That another vif also has this | ||
| 567 | * rx_drain_timeout_msecs timeout, but the timer only ditches the | ||
| 568 | * internal queue. After that, the QDisc queue can put in worst case | ||
| 569 | * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's | ||
| 570 | * internal queue, so we need several rounds of such timeouts until we | ||
| 571 | * can be sure that no another vif should have skb's from us. We are | ||
| 572 | * not sending more skb's, so newly stuck packets are not interesting | ||
| 573 | * for us here. | ||
| 574 | */ | ||
| 575 | unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * | ||
| 576 | DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); | ||
| 577 | |||
| 578 | for (i = 0; i < MAX_PENDING_REQS; ++i) { | ||
| 579 | if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { | ||
| 580 | unmap_timeout++; | ||
| 581 | schedule_timeout(msecs_to_jiffies(1000)); | ||
| 582 | if (unmap_timeout > worst_case_skb_lifetime && | ||
| 583 | net_ratelimit()) | ||
| 584 | netdev_err(vif->dev, | ||
| 585 | "Page still granted! Index: %x\n", | ||
| 586 | i); | ||
| 587 | /* If there are still unmapped pages, reset the loop to | ||
| 588 | * start checking again. We shouldn't exit here until | ||
| 589 | * dealloc thread and NAPI instance release all the | ||
| 590 | * pages. If a kernel bug causes the skbs to stall | ||
| 591 | * somewhere, the interface cannot be brought down | ||
| 592 | * properly. | ||
| 593 | */ | ||
| 594 | i = -1; | ||
| 595 | } | ||
| 596 | } | ||
| 597 | |||
| 598 | free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); | ||
| 599 | |||
| 495 | netif_napi_del(&vif->napi); | 600 | netif_napi_del(&vif->napi); |
| 496 | 601 | ||
| 497 | unregister_netdev(vif->dev); | 602 | unregister_netdev(vif->dev); |
