diff options
Diffstat (limited to 'drivers/net/xen-netback/interface.c')
| -rw-r--r-- | drivers/net/xen-netback/interface.c | 141 |
1 files changed, 135 insertions, 6 deletions
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 301cc037fda8..ef05c5c49d41 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #include <xen/events.h> | 39 | #include <xen/events.h> |
| 40 | #include <asm/xen/hypercall.h> | 40 | #include <asm/xen/hypercall.h> |
| 41 | #include <xen/balloon.h> | ||
| 41 | 42 | ||
| 42 | #define XENVIF_QUEUE_LENGTH 32 | 43 | #define XENVIF_QUEUE_LENGTH 32 |
| 43 | #define XENVIF_NAPI_WEIGHT 64 | 44 | #define XENVIF_NAPI_WEIGHT 64 |
| @@ -62,6 +63,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget) | |||
| 62 | struct xenvif *vif = container_of(napi, struct xenvif, napi); | 63 | struct xenvif *vif = container_of(napi, struct xenvif, napi); |
| 63 | int work_done; | 64 | int work_done; |
| 64 | 65 | ||
| 66 | /* This vif is rogue, we pretend we've there is nothing to do | ||
| 67 | * for this vif to deschedule it from NAPI. But this interface | ||
| 68 | * will be turned off in thread context later. | ||
| 69 | */ | ||
| 70 | if (unlikely(vif->disabled)) { | ||
| 71 | napi_complete(napi); | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 65 | work_done = xenvif_tx_action(vif, budget); | 75 | work_done = xenvif_tx_action(vif, budget); |
| 66 | 76 | ||
| 67 | if (work_done < budget) { | 77 | if (work_done < budget) { |
| @@ -113,6 +123,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) | |||
| 113 | return IRQ_HANDLED; | 123 | return IRQ_HANDLED; |
| 114 | } | 124 | } |
| 115 | 125 | ||
| 126 | static void xenvif_wake_queue(unsigned long data) | ||
| 127 | { | ||
| 128 | struct xenvif *vif = (struct xenvif *)data; | ||
| 129 | |||
| 130 | if (netif_queue_stopped(vif->dev)) { | ||
| 131 | netdev_err(vif->dev, "draining TX queue\n"); | ||
| 132 | vif->rx_queue_purge = true; | ||
| 133 | xenvif_kick_thread(vif); | ||
| 134 | netif_wake_queue(vif->dev); | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 116 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | 138 | static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) |
| 117 | { | 139 | { |
| 118 | struct xenvif *vif = netdev_priv(dev); | 140 | struct xenvif *vif = netdev_priv(dev); |
| @@ -121,7 +143,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 121 | BUG_ON(skb->dev != dev); | 143 | BUG_ON(skb->dev != dev); |
| 122 | 144 | ||
| 123 | /* Drop the packet if vif is not ready */ | 145 | /* Drop the packet if vif is not ready */ |
| 124 | if (vif->task == NULL || !xenvif_schedulable(vif)) | 146 | if (vif->task == NULL || |
| 147 | vif->dealloc_task == NULL || | ||
| 148 | !xenvif_schedulable(vif)) | ||
| 125 | goto drop; | 149 | goto drop; |
| 126 | 150 | ||
| 127 | /* At best we'll need one slot for the header and one for each | 151 | /* At best we'll need one slot for the header and one for each |
| @@ -139,8 +163,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 139 | * then turn off the queue to give the ring a chance to | 163 | * then turn off the queue to give the ring a chance to |
| 140 | * drain. | 164 | * drain. |
| 141 | */ | 165 | */ |
| 142 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) | 166 | if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { |
| 167 | vif->wake_queue.function = xenvif_wake_queue; | ||
| 168 | vif->wake_queue.data = (unsigned long)vif; | ||
| 143 | xenvif_stop_queue(vif); | 169 | xenvif_stop_queue(vif); |
| 170 | mod_timer(&vif->wake_queue, | ||
| 171 | jiffies + rx_drain_timeout_jiffies); | ||
| 172 | } | ||
| 144 | 173 | ||
| 145 | skb_queue_tail(&vif->rx_queue, skb); | 174 | skb_queue_tail(&vif->rx_queue, skb); |
| 146 | xenvif_kick_thread(vif); | 175 | xenvif_kick_thread(vif); |
| @@ -233,6 +262,28 @@ static const struct xenvif_stat { | |||
| 233 | "rx_gso_checksum_fixup", | 262 | "rx_gso_checksum_fixup", |
| 234 | offsetof(struct xenvif, rx_gso_checksum_fixup) | 263 | offsetof(struct xenvif, rx_gso_checksum_fixup) |
| 235 | }, | 264 | }, |
| 265 | /* If (sent != success + fail), there are probably packets never | ||
| 266 | * freed up properly! | ||
| 267 | */ | ||
| 268 | { | ||
| 269 | "tx_zerocopy_sent", | ||
| 270 | offsetof(struct xenvif, tx_zerocopy_sent), | ||
| 271 | }, | ||
| 272 | { | ||
| 273 | "tx_zerocopy_success", | ||
| 274 | offsetof(struct xenvif, tx_zerocopy_success), | ||
| 275 | }, | ||
| 276 | { | ||
| 277 | "tx_zerocopy_fail", | ||
| 278 | offsetof(struct xenvif, tx_zerocopy_fail) | ||
| 279 | }, | ||
| 280 | /* Number of packets exceeding MAX_SKB_FRAG slots. You should use | ||
| 281 | * a guest with the same MAX_SKB_FRAG | ||
| 282 | */ | ||
| 283 | { | ||
| 284 | "tx_frag_overflow", | ||
| 285 | offsetof(struct xenvif, tx_frag_overflow) | ||
| 286 | }, | ||
| 236 | }; | 287 | }; |
| 237 | 288 | ||
| 238 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) | 289 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) |
| @@ -321,11 +372,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 321 | vif->ip_csum = 1; | 372 | vif->ip_csum = 1; |
| 322 | vif->dev = dev; | 373 | vif->dev = dev; |
| 323 | 374 | ||
| 375 | vif->disabled = false; | ||
| 376 | |||
| 324 | vif->credit_bytes = vif->remaining_credit = ~0UL; | 377 | vif->credit_bytes = vif->remaining_credit = ~0UL; |
| 325 | vif->credit_usec = 0UL; | 378 | vif->credit_usec = 0UL; |
| 326 | init_timer(&vif->credit_timeout); | 379 | init_timer(&vif->credit_timeout); |
| 327 | vif->credit_window_start = get_jiffies_64(); | 380 | vif->credit_window_start = get_jiffies_64(); |
| 328 | 381 | ||
| 382 | init_timer(&vif->wake_queue); | ||
| 383 | |||
| 329 | dev->netdev_ops = &xenvif_netdev_ops; | 384 | dev->netdev_ops = &xenvif_netdev_ops; |
| 330 | dev->hw_features = NETIF_F_SG | | 385 | dev->hw_features = NETIF_F_SG | |
| 331 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | | 386 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | |
| @@ -342,8 +397,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, | |||
| 342 | vif->pending_prod = MAX_PENDING_REQS; | 397 | vif->pending_prod = MAX_PENDING_REQS; |
| 343 | for (i = 0; i < MAX_PENDING_REQS; i++) | 398 | for (i = 0; i < MAX_PENDING_REQS; i++) |
| 344 | vif->pending_ring[i] = i; | 399 | vif->pending_ring[i] = i; |
| 345 | for (i = 0; i < MAX_PENDING_REQS; i++) | 400 | spin_lock_init(&vif->callback_lock); |
| 346 | vif->mmap_pages[i] = NULL; | 401 | spin_lock_init(&vif->response_lock); |
| 402 | /* If ballooning is disabled, this will consume real memory, so you | ||
| 403 | * better enable it. The long term solution would be to use just a | ||
| 404 | * bunch of valid page descriptors, without dependency on ballooning | ||
| 405 | */ | ||
| 406 | err = alloc_xenballooned_pages(MAX_PENDING_REQS, | ||
| 407 | vif->mmap_pages, | ||
| 408 | false); | ||
| 409 | if (err) { | ||
| 410 | netdev_err(dev, "Could not reserve mmap_pages\n"); | ||
| 411 | return ERR_PTR(-ENOMEM); | ||
| 412 | } | ||
| 413 | for (i = 0; i < MAX_PENDING_REQS; i++) { | ||
| 414 | vif->pending_tx_info[i].callback_struct = (struct ubuf_info) | ||
| 415 | { .callback = xenvif_zerocopy_callback, | ||
| 416 | .ctx = NULL, | ||
| 417 | .desc = i }; | ||
| 418 | vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; | ||
| 419 | } | ||
| 347 | 420 | ||
| 348 | /* | 421 | /* |
| 349 | * Initialise a dummy MAC address. We choose the numerically | 422 | * Initialise a dummy MAC address. We choose the numerically |
| @@ -381,12 +454,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 381 | 454 | ||
| 382 | BUG_ON(vif->tx_irq); | 455 | BUG_ON(vif->tx_irq); |
| 383 | BUG_ON(vif->task); | 456 | BUG_ON(vif->task); |
| 457 | BUG_ON(vif->dealloc_task); | ||
| 384 | 458 | ||
| 385 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); | 459 | err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref); |
| 386 | if (err < 0) | 460 | if (err < 0) |
| 387 | goto err; | 461 | goto err; |
| 388 | 462 | ||
| 389 | init_waitqueue_head(&vif->wq); | 463 | init_waitqueue_head(&vif->wq); |
| 464 | init_waitqueue_head(&vif->dealloc_wq); | ||
| 390 | 465 | ||
| 391 | if (tx_evtchn == rx_evtchn) { | 466 | if (tx_evtchn == rx_evtchn) { |
| 392 | /* feature-split-event-channels == 0 */ | 467 | /* feature-split-event-channels == 0 */ |
| @@ -420,8 +495,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 420 | disable_irq(vif->rx_irq); | 495 | disable_irq(vif->rx_irq); |
| 421 | } | 496 | } |
| 422 | 497 | ||
| 423 | task = kthread_create(xenvif_kthread, | 498 | task = kthread_create(xenvif_kthread_guest_rx, |
| 424 | (void *)vif, "%s", vif->dev->name); | 499 | (void *)vif, "%s-guest-rx", vif->dev->name); |
| 425 | if (IS_ERR(task)) { | 500 | if (IS_ERR(task)) { |
| 426 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | 501 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); |
| 427 | err = PTR_ERR(task); | 502 | err = PTR_ERR(task); |
| @@ -430,6 +505,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 430 | 505 | ||
| 431 | vif->task = task; | 506 | vif->task = task; |
| 432 | 507 | ||
| 508 | task = kthread_create(xenvif_dealloc_kthread, | ||
| 509 | (void *)vif, "%s-dealloc", vif->dev->name); | ||
| 510 | if (IS_ERR(task)) { | ||
| 511 | pr_warn("Could not allocate kthread for %s\n", vif->dev->name); | ||
| 512 | err = PTR_ERR(task); | ||
| 513 | goto err_rx_unbind; | ||
| 514 | } | ||
| 515 | |||
| 516 | vif->dealloc_task = task; | ||
| 517 | |||
| 433 | rtnl_lock(); | 518 | rtnl_lock(); |
| 434 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) | 519 | if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) |
| 435 | dev_set_mtu(vif->dev, ETH_DATA_LEN); | 520 | dev_set_mtu(vif->dev, ETH_DATA_LEN); |
| @@ -440,6 +525,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, | |||
| 440 | rtnl_unlock(); | 525 | rtnl_unlock(); |
| 441 | 526 | ||
| 442 | wake_up_process(vif->task); | 527 | wake_up_process(vif->task); |
| 528 | wake_up_process(vif->dealloc_task); | ||
| 443 | 529 | ||
| 444 | return 0; | 530 | return 0; |
| 445 | 531 | ||
| @@ -473,10 +559,16 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 473 | xenvif_carrier_off(vif); | 559 | xenvif_carrier_off(vif); |
| 474 | 560 | ||
| 475 | if (vif->task) { | 561 | if (vif->task) { |
| 562 | del_timer_sync(&vif->wake_queue); | ||
| 476 | kthread_stop(vif->task); | 563 | kthread_stop(vif->task); |
| 477 | vif->task = NULL; | 564 | vif->task = NULL; |
| 478 | } | 565 | } |
| 479 | 566 | ||
| 567 | if (vif->dealloc_task) { | ||
| 568 | kthread_stop(vif->dealloc_task); | ||
| 569 | vif->dealloc_task = NULL; | ||
| 570 | } | ||
| 571 | |||
| 480 | if (vif->tx_irq) { | 572 | if (vif->tx_irq) { |
| 481 | if (vif->tx_irq == vif->rx_irq) | 573 | if (vif->tx_irq == vif->rx_irq) |
| 482 | unbind_from_irqhandler(vif->tx_irq, vif); | 574 | unbind_from_irqhandler(vif->tx_irq, vif); |
| @@ -492,6 +584,43 @@ void xenvif_disconnect(struct xenvif *vif) | |||
| 492 | 584 | ||
| 493 | void xenvif_free(struct xenvif *vif) | 585 | void xenvif_free(struct xenvif *vif) |
| 494 | { | 586 | { |
| 587 | int i, unmap_timeout = 0; | ||
| 588 | /* Here we want to avoid timeout messages if an skb can be legitimately | ||
| 589 | * stuck somewhere else. Realistically this could be an another vif's | ||
| 590 | * internal or QDisc queue. That another vif also has this | ||
| 591 | * rx_drain_timeout_msecs timeout, but the timer only ditches the | ||
| 592 | * internal queue. After that, the QDisc queue can put in worst case | ||
| 593 | * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's | ||
| 594 | * internal queue, so we need several rounds of such timeouts until we | ||
| 595 | * can be sure that no another vif should have skb's from us. We are | ||
| 596 | * not sending more skb's, so newly stuck packets are not interesting | ||
| 597 | * for us here. | ||
| 598 | */ | ||
| 599 | unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * | ||
| 600 | DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); | ||
| 601 | |||
| 602 | for (i = 0; i < MAX_PENDING_REQS; ++i) { | ||
| 603 | if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { | ||
| 604 | unmap_timeout++; | ||
| 605 | schedule_timeout(msecs_to_jiffies(1000)); | ||
| 606 | if (unmap_timeout > worst_case_skb_lifetime && | ||
| 607 | net_ratelimit()) | ||
| 608 | netdev_err(vif->dev, | ||
| 609 | "Page still granted! Index: %x\n", | ||
| 610 | i); | ||
| 611 | /* If there are still unmapped pages, reset the loop to | ||
| 612 | * start checking again. We shouldn't exit here until | ||
| 613 | * dealloc thread and NAPI instance release all the | ||
| 614 | * pages. If a kernel bug causes the skbs to stall | ||
| 615 | * somewhere, the interface cannot be brought down | ||
| 616 | * properly. | ||
| 617 | */ | ||
| 618 | i = -1; | ||
| 619 | } | ||
| 620 | } | ||
| 621 | |||
| 622 | free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages); | ||
| 623 | |||
| 495 | netif_napi_del(&vif->napi); | 624 | netif_napi_del(&vif->napi); |
| 496 | 625 | ||
| 497 | unregister_netdev(vif->dev); | 626 | unregister_netdev(vif->dev); |
