diff options
-rw-r--r-- | drivers/net/virtio_net.c | 197 | ||||
-rw-r--r-- | include/linux/netdevice.h | 35 | ||||
-rw-r--r-- | lib/average.c | 6 | ||||
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 50 | ||||
-rw-r--r-- | net/core/sock.c | 4 |
6 files changed, 214 insertions, 90 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9bd70aa87bf7..d75f8edf4fb3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/if_vlan.h> | 26 | #include <linux/if_vlan.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/cpu.h> | 28 | #include <linux/cpu.h> |
29 | #include <linux/average.h> | ||
29 | 30 | ||
30 | static int napi_weight = NAPI_POLL_WEIGHT; | 31 | static int napi_weight = NAPI_POLL_WEIGHT; |
31 | module_param(napi_weight, int, 0444); | 32 | module_param(napi_weight, int, 0444); |
@@ -36,11 +37,18 @@ module_param(gso, bool, 0444); | |||
36 | 37 | ||
37 | /* FIXME: MTU in config. */ | 38 | /* FIXME: MTU in config. */ |
38 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) | 39 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) |
39 | #define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \ | ||
40 | sizeof(struct virtio_net_hdr_mrg_rxbuf), \ | ||
41 | L1_CACHE_BYTES)) | ||
42 | #define GOOD_COPY_LEN 128 | 40 | #define GOOD_COPY_LEN 128 |
43 | 41 | ||
42 | /* Weight used for the RX packet size EWMA. The average packet size is used to | ||
43 | * determine the packet buffer size when refilling RX rings. As the entire RX | ||
44 | * ring may be refilled at once, the weight is chosen so that the EWMA will be | ||
45 | * insensitive to short-term, transient changes in packet size. | ||
46 | */ | ||
47 | #define RECEIVE_AVG_WEIGHT 64 | ||
48 | |||
49 | /* Minimum alignment for mergeable packet buffers. */ | ||
50 | #define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) | ||
51 | |||
44 | #define VIRTNET_DRIVER_VERSION "1.0.0" | 52 | #define VIRTNET_DRIVER_VERSION "1.0.0" |
45 | 53 | ||
46 | struct virtnet_stats { | 54 | struct virtnet_stats { |
@@ -75,6 +83,12 @@ struct receive_queue { | |||
75 | /* Chain pages by the private ptr. */ | 83 | /* Chain pages by the private ptr. */ |
76 | struct page *pages; | 84 | struct page *pages; |
77 | 85 | ||
86 | /* Average packet length for mergeable receive buffers. */ | ||
87 | struct ewma mrg_avg_pkt_len; | ||
88 | |||
89 | /* Page frag for packet buffer allocation. */ | ||
90 | struct page_frag alloc_frag; | ||
91 | |||
78 | /* RX: fragments + linear part + virtio header */ | 92 | /* RX: fragments + linear part + virtio header */ |
79 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; | 93 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; |
80 | 94 | ||
@@ -123,11 +137,6 @@ struct virtnet_info { | |||
123 | /* Lock for config space updates */ | 137 | /* Lock for config space updates */ |
124 | struct mutex config_lock; | 138 | struct mutex config_lock; |
125 | 139 | ||
126 | /* Page_frag for GFP_KERNEL packet buffer allocation when we run | ||
127 | * low on memory. | ||
128 | */ | ||
129 | struct page_frag alloc_frag; | ||
130 | |||
131 | /* Does the affinity hint is set for virtqueues? */ | 140 | /* Does the affinity hint is set for virtqueues? */ |
132 | bool affinity_hint_set; | 141 | bool affinity_hint_set; |
133 | 142 | ||
@@ -218,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq) | |||
218 | netif_wake_subqueue(vi->dev, vq2txq(vq)); | 227 | netif_wake_subqueue(vi->dev, vq2txq(vq)); |
219 | } | 228 | } |
220 | 229 | ||
230 | static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx) | ||
231 | { | ||
232 | unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1); | ||
233 | return (truesize + 1) * MERGEABLE_BUFFER_ALIGN; | ||
234 | } | ||
235 | |||
236 | static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx) | ||
237 | { | ||
238 | return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN); | ||
239 | |||
240 | } | ||
241 | |||
242 | static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize) | ||
243 | { | ||
244 | unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN; | ||
245 | return (unsigned long)buf | (size - 1); | ||
246 | } | ||
247 | |||
221 | /* Called from bottom half context */ | 248 | /* Called from bottom half context */ |
222 | static struct sk_buff *page_to_skb(struct receive_queue *rq, | 249 | static struct sk_buff *page_to_skb(struct receive_queue *rq, |
223 | struct page *page, unsigned int offset, | 250 | struct page *page, unsigned int offset, |
@@ -326,36 +353,33 @@ err: | |||
326 | 353 | ||
327 | static struct sk_buff *receive_mergeable(struct net_device *dev, | 354 | static struct sk_buff *receive_mergeable(struct net_device *dev, |
328 | struct receive_queue *rq, | 355 | struct receive_queue *rq, |
329 | void *buf, | 356 | unsigned long ctx, |
330 | unsigned int len) | 357 | unsigned int len) |
331 | { | 358 | { |
359 | void *buf = mergeable_ctx_to_buf_address(ctx); | ||
332 | struct skb_vnet_hdr *hdr = buf; | 360 | struct skb_vnet_hdr *hdr = buf; |
333 | int num_buf = hdr->mhdr.num_buffers; | 361 | int num_buf = hdr->mhdr.num_buffers; |
334 | struct page *page = virt_to_head_page(buf); | 362 | struct page *page = virt_to_head_page(buf); |
335 | int offset = buf - page_address(page); | 363 | int offset = buf - page_address(page); |
336 | struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, | 364 | unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); |
337 | MERGE_BUFFER_LEN); | 365 | |
366 | struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize); | ||
338 | struct sk_buff *curr_skb = head_skb; | 367 | struct sk_buff *curr_skb = head_skb; |
339 | 368 | ||
340 | if (unlikely(!curr_skb)) | 369 | if (unlikely(!curr_skb)) |
341 | goto err_skb; | 370 | goto err_skb; |
342 | |||
343 | while (--num_buf) { | 371 | while (--num_buf) { |
344 | int num_skb_frags; | 372 | int num_skb_frags; |
345 | 373 | ||
346 | buf = virtqueue_get_buf(rq->vq, &len); | 374 | ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); |
347 | if (unlikely(!buf)) { | 375 | if (unlikely(!ctx)) { |
348 | pr_debug("%s: rx error: %d buffers out of %d missing\n", | 376 | pr_debug("%s: rx error: %d buffers out of %d missing\n", |
349 | dev->name, num_buf, hdr->mhdr.num_buffers); | 377 | dev->name, num_buf, hdr->mhdr.num_buffers); |
350 | dev->stats.rx_length_errors++; | 378 | dev->stats.rx_length_errors++; |
351 | goto err_buf; | 379 | goto err_buf; |
352 | } | 380 | } |
353 | if (unlikely(len > MERGE_BUFFER_LEN)) { | ||
354 | pr_debug("%s: rx error: merge buffer too long\n", | ||
355 | dev->name); | ||
356 | len = MERGE_BUFFER_LEN; | ||
357 | } | ||
358 | 381 | ||
382 | buf = mergeable_ctx_to_buf_address(ctx); | ||
359 | page = virt_to_head_page(buf); | 383 | page = virt_to_head_page(buf); |
360 | 384 | ||
361 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; | 385 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; |
@@ -372,35 +396,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
372 | head_skb->truesize += nskb->truesize; | 396 | head_skb->truesize += nskb->truesize; |
373 | num_skb_frags = 0; | 397 | num_skb_frags = 0; |
374 | } | 398 | } |
399 | truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); | ||
375 | if (curr_skb != head_skb) { | 400 | if (curr_skb != head_skb) { |
376 | head_skb->data_len += len; | 401 | head_skb->data_len += len; |
377 | head_skb->len += len; | 402 | head_skb->len += len; |
378 | head_skb->truesize += MERGE_BUFFER_LEN; | 403 | head_skb->truesize += truesize; |
379 | } | 404 | } |
380 | offset = buf - page_address(page); | 405 | offset = buf - page_address(page); |
381 | if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { | 406 | if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { |
382 | put_page(page); | 407 | put_page(page); |
383 | skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, | 408 | skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, |
384 | len, MERGE_BUFFER_LEN); | 409 | len, truesize); |
385 | } else { | 410 | } else { |
386 | skb_add_rx_frag(curr_skb, num_skb_frags, page, | 411 | skb_add_rx_frag(curr_skb, num_skb_frags, page, |
387 | offset, len, MERGE_BUFFER_LEN); | 412 | offset, len, truesize); |
388 | } | 413 | } |
389 | } | 414 | } |
390 | 415 | ||
416 | ewma_add(&rq->mrg_avg_pkt_len, head_skb->len); | ||
391 | return head_skb; | 417 | return head_skb; |
392 | 418 | ||
393 | err_skb: | 419 | err_skb: |
394 | put_page(page); | 420 | put_page(page); |
395 | while (--num_buf) { | 421 | while (--num_buf) { |
396 | buf = virtqueue_get_buf(rq->vq, &len); | 422 | ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len); |
397 | if (unlikely(!buf)) { | 423 | if (unlikely(!ctx)) { |
398 | pr_debug("%s: rx error: %d buffers missing\n", | 424 | pr_debug("%s: rx error: %d buffers missing\n", |
399 | dev->name, num_buf); | 425 | dev->name, num_buf); |
400 | dev->stats.rx_length_errors++; | 426 | dev->stats.rx_length_errors++; |
401 | break; | 427 | break; |
402 | } | 428 | } |
403 | page = virt_to_head_page(buf); | 429 | page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx)); |
404 | put_page(page); | 430 | put_page(page); |
405 | } | 431 | } |
406 | err_buf: | 432 | err_buf: |
@@ -420,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) | |||
420 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { | 446 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { |
421 | pr_debug("%s: short packet %i\n", dev->name, len); | 447 | pr_debug("%s: short packet %i\n", dev->name, len); |
422 | dev->stats.rx_length_errors++; | 448 | dev->stats.rx_length_errors++; |
423 | if (vi->mergeable_rx_bufs) | 449 | if (vi->mergeable_rx_bufs) { |
424 | put_page(virt_to_head_page(buf)); | 450 | unsigned long ctx = (unsigned long)buf; |
425 | else if (vi->big_packets) | 451 | void *base = mergeable_ctx_to_buf_address(ctx); |
452 | put_page(virt_to_head_page(base)); | ||
453 | } else if (vi->big_packets) { | ||
426 | give_pages(rq, buf); | 454 | give_pages(rq, buf); |
427 | else | 455 | } else { |
428 | dev_kfree_skb(buf); | 456 | dev_kfree_skb(buf); |
457 | } | ||
429 | return; | 458 | return; |
430 | } | 459 | } |
431 | 460 | ||
432 | if (vi->mergeable_rx_bufs) | 461 | if (vi->mergeable_rx_bufs) |
433 | skb = receive_mergeable(dev, rq, buf, len); | 462 | skb = receive_mergeable(dev, rq, (unsigned long)buf, len); |
434 | else if (vi->big_packets) | 463 | else if (vi->big_packets) |
435 | skb = receive_big(dev, rq, buf, len); | 464 | skb = receive_big(dev, rq, buf, len); |
436 | else | 465 | else |
@@ -571,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) | |||
571 | return err; | 600 | return err; |
572 | } | 601 | } |
573 | 602 | ||
603 | static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len) | ||
604 | { | ||
605 | const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); | ||
606 | unsigned int len; | ||
607 | |||
608 | len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len), | ||
609 | GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); | ||
610 | return ALIGN(len, MERGEABLE_BUFFER_ALIGN); | ||
611 | } | ||
612 | |||
574 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) | 613 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) |
575 | { | 614 | { |
576 | struct virtnet_info *vi = rq->vq->vdev->priv; | 615 | struct page_frag *alloc_frag = &rq->alloc_frag; |
577 | char *buf = NULL; | 616 | char *buf; |
617 | unsigned long ctx; | ||
578 | int err; | 618 | int err; |
619 | unsigned int len, hole; | ||
579 | 620 | ||
580 | if (gfp & __GFP_WAIT) { | 621 | len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); |
581 | if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, | 622 | if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) |
582 | gfp)) { | ||
583 | buf = (char *)page_address(vi->alloc_frag.page) + | ||
584 | vi->alloc_frag.offset; | ||
585 | get_page(vi->alloc_frag.page); | ||
586 | vi->alloc_frag.offset += MERGE_BUFFER_LEN; | ||
587 | } | ||
588 | } else { | ||
589 | buf = netdev_alloc_frag(MERGE_BUFFER_LEN); | ||
590 | } | ||
591 | if (!buf) | ||
592 | return -ENOMEM; | 623 | return -ENOMEM; |
593 | 624 | ||
594 | sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); | 625 | buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; |
595 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); | 626 | ctx = mergeable_buf_to_ctx(buf, len); |
627 | get_page(alloc_frag->page); | ||
628 | alloc_frag->offset += len; | ||
629 | hole = alloc_frag->size - alloc_frag->offset; | ||
630 | if (hole < len) { | ||
631 | /* To avoid internal fragmentation, if there is very likely not | ||
632 | * enough space for another buffer, add the remaining space to | ||
633 | * the current buffer. This extra space is not included in | ||
634 | * the truesize stored in ctx. | ||
635 | */ | ||
636 | len += hole; | ||
637 | alloc_frag->offset += hole; | ||
638 | } | ||
639 | |||
640 | sg_init_one(rq->sg, buf, len); | ||
641 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp); | ||
596 | if (err < 0) | 642 | if (err < 0) |
597 | put_page(virt_to_head_page(buf)); | 643 | put_page(virt_to_head_page(buf)); |
598 | 644 | ||
@@ -612,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) | |||
612 | int err; | 658 | int err; |
613 | bool oom; | 659 | bool oom; |
614 | 660 | ||
661 | gfp |= __GFP_COLD; | ||
615 | do { | 662 | do { |
616 | if (vi->mergeable_rx_bufs) | 663 | if (vi->mergeable_rx_bufs) |
617 | err = add_recvbuf_mergeable(rq, gfp); | 664 | err = add_recvbuf_mergeable(rq, gfp); |
@@ -1368,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi) | |||
1368 | } | 1415 | } |
1369 | } | 1416 | } |
1370 | 1417 | ||
1418 | static void free_receive_page_frags(struct virtnet_info *vi) | ||
1419 | { | ||
1420 | int i; | ||
1421 | for (i = 0; i < vi->max_queue_pairs; i++) | ||
1422 | if (vi->rq[i].alloc_frag.page) | ||
1423 | put_page(vi->rq[i].alloc_frag.page); | ||
1424 | } | ||
1425 | |||
1371 | static void free_unused_bufs(struct virtnet_info *vi) | 1426 | static void free_unused_bufs(struct virtnet_info *vi) |
1372 | { | 1427 | { |
1373 | void *buf; | 1428 | void *buf; |
@@ -1383,12 +1438,15 @@ static void free_unused_bufs(struct virtnet_info *vi) | |||
1383 | struct virtqueue *vq = vi->rq[i].vq; | 1438 | struct virtqueue *vq = vi->rq[i].vq; |
1384 | 1439 | ||
1385 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { | 1440 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { |
1386 | if (vi->mergeable_rx_bufs) | 1441 | if (vi->mergeable_rx_bufs) { |
1387 | put_page(virt_to_head_page(buf)); | 1442 | unsigned long ctx = (unsigned long)buf; |
1388 | else if (vi->big_packets) | 1443 | void *base = mergeable_ctx_to_buf_address(ctx); |
1444 | put_page(virt_to_head_page(base)); | ||
1445 | } else if (vi->big_packets) { | ||
1389 | give_pages(&vi->rq[i], buf); | 1446 | give_pages(&vi->rq[i], buf); |
1390 | else | 1447 | } else { |
1391 | dev_kfree_skb(buf); | 1448 | dev_kfree_skb(buf); |
1449 | } | ||
1392 | } | 1450 | } |
1393 | } | 1451 | } |
1394 | } | 1452 | } |
@@ -1496,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) | |||
1496 | napi_weight); | 1554 | napi_weight); |
1497 | 1555 | ||
1498 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); | 1556 | sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); |
1557 | ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); | ||
1499 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); | 1558 | sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); |
1500 | } | 1559 | } |
1501 | 1560 | ||
@@ -1532,6 +1591,33 @@ err: | |||
1532 | return ret; | 1591 | return ret; |
1533 | } | 1592 | } |
1534 | 1593 | ||
1594 | #ifdef CONFIG_SYSFS | ||
1595 | static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, | ||
1596 | struct rx_queue_attribute *attribute, char *buf) | ||
1597 | { | ||
1598 | struct virtnet_info *vi = netdev_priv(queue->dev); | ||
1599 | unsigned int queue_index = get_netdev_rx_queue_index(queue); | ||
1600 | struct ewma *avg; | ||
1601 | |||
1602 | BUG_ON(queue_index >= vi->max_queue_pairs); | ||
1603 | avg = &vi->rq[queue_index].mrg_avg_pkt_len; | ||
1604 | return sprintf(buf, "%u\n", get_mergeable_buf_len(avg)); | ||
1605 | } | ||
1606 | |||
1607 | static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = | ||
1608 | __ATTR_RO(mergeable_rx_buffer_size); | ||
1609 | |||
1610 | static struct attribute *virtio_net_mrg_rx_attrs[] = { | ||
1611 | &mergeable_rx_buffer_size_attribute.attr, | ||
1612 | NULL | ||
1613 | }; | ||
1614 | |||
1615 | static const struct attribute_group virtio_net_mrg_rx_group = { | ||
1616 | .name = "virtio_net", | ||
1617 | .attrs = virtio_net_mrg_rx_attrs | ||
1618 | }; | ||
1619 | #endif | ||
1620 | |||
1535 | static int virtnet_probe(struct virtio_device *vdev) | 1621 | static int virtnet_probe(struct virtio_device *vdev) |
1536 | { | 1622 | { |
1537 | int i, err; | 1623 | int i, err; |
@@ -1646,6 +1732,10 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
1646 | if (err) | 1732 | if (err) |
1647 | goto free_stats; | 1733 | goto free_stats; |
1648 | 1734 | ||
1735 | #ifdef CONFIG_SYSFS | ||
1736 | if (vi->mergeable_rx_bufs) | ||
1737 | dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; | ||
1738 | #endif | ||
1649 | netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); | 1739 | netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); |
1650 | netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); | 1740 | netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); |
1651 | 1741 | ||
@@ -1695,9 +1785,8 @@ free_recv_bufs: | |||
1695 | unregister_netdev(dev); | 1785 | unregister_netdev(dev); |
1696 | free_vqs: | 1786 | free_vqs: |
1697 | cancel_delayed_work_sync(&vi->refill); | 1787 | cancel_delayed_work_sync(&vi->refill); |
1788 | free_receive_page_frags(vi); | ||
1698 | virtnet_del_vqs(vi); | 1789 | virtnet_del_vqs(vi); |
1699 | if (vi->alloc_frag.page) | ||
1700 | put_page(vi->alloc_frag.page); | ||
1701 | free_stats: | 1790 | free_stats: |
1702 | free_percpu(vi->stats); | 1791 | free_percpu(vi->stats); |
1703 | free: | 1792 | free: |
@@ -1714,6 +1803,8 @@ static void remove_vq_common(struct virtnet_info *vi) | |||
1714 | 1803 | ||
1715 | free_receive_bufs(vi); | 1804 | free_receive_bufs(vi); |
1716 | 1805 | ||
1806 | free_receive_page_frags(vi); | ||
1807 | |||
1717 | virtnet_del_vqs(vi); | 1808 | virtnet_del_vqs(vi); |
1718 | } | 1809 | } |
1719 | 1810 | ||
@@ -1731,8 +1822,6 @@ static void virtnet_remove(struct virtio_device *vdev) | |||
1731 | unregister_netdev(vi->dev); | 1822 | unregister_netdev(vi->dev); |
1732 | 1823 | ||
1733 | remove_vq_common(vi); | 1824 | remove_vq_common(vi); |
1734 | if (vi->alloc_frag.page) | ||
1735 | put_page(vi->alloc_frag.page); | ||
1736 | 1825 | ||
1737 | flush_work(&vi->config_work); | 1826 | flush_work(&vi->config_work); |
1738 | 1827 | ||
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d7668b881d08..e985231fe04b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -668,15 +668,28 @@ extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; | |||
668 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, | 668 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, |
669 | u16 filter_id); | 669 | u16 filter_id); |
670 | #endif | 670 | #endif |
671 | #endif /* CONFIG_RPS */ | ||
671 | 672 | ||
672 | /* This structure contains an instance of an RX queue. */ | 673 | /* This structure contains an instance of an RX queue. */ |
673 | struct netdev_rx_queue { | 674 | struct netdev_rx_queue { |
675 | #ifdef CONFIG_RPS | ||
674 | struct rps_map __rcu *rps_map; | 676 | struct rps_map __rcu *rps_map; |
675 | struct rps_dev_flow_table __rcu *rps_flow_table; | 677 | struct rps_dev_flow_table __rcu *rps_flow_table; |
678 | #endif | ||
676 | struct kobject kobj; | 679 | struct kobject kobj; |
677 | struct net_device *dev; | 680 | struct net_device *dev; |
678 | } ____cacheline_aligned_in_smp; | 681 | } ____cacheline_aligned_in_smp; |
679 | #endif /* CONFIG_RPS */ | 682 | |
683 | /* | ||
684 | * RX queue sysfs structures and functions. | ||
685 | */ | ||
686 | struct rx_queue_attribute { | ||
687 | struct attribute attr; | ||
688 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
689 | struct rx_queue_attribute *attr, char *buf); | ||
690 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
691 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
692 | }; | ||
680 | 693 | ||
681 | #ifdef CONFIG_XPS | 694 | #ifdef CONFIG_XPS |
682 | /* | 695 | /* |
@@ -1313,7 +1326,7 @@ struct net_device { | |||
1313 | unicast) */ | 1326 | unicast) */ |
1314 | 1327 | ||
1315 | 1328 | ||
1316 | #ifdef CONFIG_RPS | 1329 | #ifdef CONFIG_SYSFS |
1317 | struct netdev_rx_queue *_rx; | 1330 | struct netdev_rx_queue *_rx; |
1318 | 1331 | ||
1319 | /* Number of RX queues allocated at register_netdev() time */ | 1332 | /* Number of RX queues allocated at register_netdev() time */ |
@@ -1424,6 +1437,8 @@ struct net_device { | |||
1424 | struct device dev; | 1437 | struct device dev; |
1425 | /* space for optional device, statistics, and wireless sysfs groups */ | 1438 | /* space for optional device, statistics, and wireless sysfs groups */ |
1426 | const struct attribute_group *sysfs_groups[4]; | 1439 | const struct attribute_group *sysfs_groups[4]; |
1440 | /* space for optional per-rx queue attributes */ | ||
1441 | const struct attribute_group *sysfs_rx_queue_group; | ||
1427 | 1442 | ||
1428 | /* rtnetlink link ops */ | 1443 | /* rtnetlink link ops */ |
1429 | const struct rtnl_link_ops *rtnl_link_ops; | 1444 | const struct rtnl_link_ops *rtnl_link_ops; |
@@ -2375,7 +2390,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) | |||
2375 | 2390 | ||
2376 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); | 2391 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); |
2377 | 2392 | ||
2378 | #ifdef CONFIG_RPS | 2393 | #ifdef CONFIG_SYSFS |
2379 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); | 2394 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); |
2380 | #else | 2395 | #else |
2381 | static inline int netif_set_real_num_rx_queues(struct net_device *dev, | 2396 | static inline int netif_set_real_num_rx_queues(struct net_device *dev, |
@@ -2394,7 +2409,7 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, | |||
2394 | from_dev->real_num_tx_queues); | 2409 | from_dev->real_num_tx_queues); |
2395 | if (err) | 2410 | if (err) |
2396 | return err; | 2411 | return err; |
2397 | #ifdef CONFIG_RPS | 2412 | #ifdef CONFIG_SYSFS |
2398 | return netif_set_real_num_rx_queues(to_dev, | 2413 | return netif_set_real_num_rx_queues(to_dev, |
2399 | from_dev->real_num_rx_queues); | 2414 | from_dev->real_num_rx_queues); |
2400 | #else | 2415 | #else |
@@ -2402,6 +2417,18 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, | |||
2402 | #endif | 2417 | #endif |
2403 | } | 2418 | } |
2404 | 2419 | ||
2420 | #ifdef CONFIG_SYSFS | ||
2421 | static inline unsigned int get_netdev_rx_queue_index( | ||
2422 | struct netdev_rx_queue *queue) | ||
2423 | { | ||
2424 | struct net_device *dev = queue->dev; | ||
2425 | int index = queue - dev->_rx; | ||
2426 | |||
2427 | BUG_ON(index >= dev->num_rx_queues); | ||
2428 | return index; | ||
2429 | } | ||
2430 | #endif | ||
2431 | |||
2405 | #define DEFAULT_MAX_NUM_RSS_QUEUES (8) | 2432 | #define DEFAULT_MAX_NUM_RSS_QUEUES (8) |
2406 | int netif_get_num_default_rss_queues(void); | 2433 | int netif_get_num_default_rss_queues(void); |
2407 | 2434 | ||
diff --git a/lib/average.c b/lib/average.c index 99a67e662b3c..114d1beae0c7 100644 --- a/lib/average.c +++ b/lib/average.c | |||
@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); | |||
53 | */ | 53 | */ |
54 | struct ewma *ewma_add(struct ewma *avg, unsigned long val) | 54 | struct ewma *ewma_add(struct ewma *avg, unsigned long val) |
55 | { | 55 | { |
56 | avg->internal = avg->internal ? | 56 | unsigned long internal = ACCESS_ONCE(avg->internal); |
57 | (((avg->internal << avg->weight) - avg->internal) + | 57 | |
58 | ACCESS_ONCE(avg->internal) = internal ? | ||
59 | (((internal << avg->weight) - internal) + | ||
58 | (val << avg->factor)) >> avg->weight : | 60 | (val << avg->factor)) >> avg->weight : |
59 | (val << avg->factor); | 61 | (val << avg->factor); |
60 | return avg; | 62 | return avg; |
diff --git a/net/core/dev.c b/net/core/dev.c index f87bedd51eed..288df6232006 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2083,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
2083 | } | 2083 | } |
2084 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 2084 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
2085 | 2085 | ||
2086 | #ifdef CONFIG_RPS | 2086 | #ifdef CONFIG_SYSFS |
2087 | /** | 2087 | /** |
2088 | * netif_set_real_num_rx_queues - set actual number of RX queues used | 2088 | * netif_set_real_num_rx_queues - set actual number of RX queues used |
2089 | * @dev: Network device | 2089 | * @dev: Network device |
@@ -5764,7 +5764,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
5764 | } | 5764 | } |
5765 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5765 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
5766 | 5766 | ||
5767 | #ifdef CONFIG_RPS | 5767 | #ifdef CONFIG_SYSFS |
5768 | static int netif_alloc_rx_queues(struct net_device *dev) | 5768 | static int netif_alloc_rx_queues(struct net_device *dev) |
5769 | { | 5769 | { |
5770 | unsigned int i, count = dev->num_rx_queues; | 5770 | unsigned int i, count = dev->num_rx_queues; |
@@ -6309,7 +6309,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6309 | return NULL; | 6309 | return NULL; |
6310 | } | 6310 | } |
6311 | 6311 | ||
6312 | #ifdef CONFIG_RPS | 6312 | #ifdef CONFIG_SYSFS |
6313 | if (rxqs < 1) { | 6313 | if (rxqs < 1) { |
6314 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); | 6314 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); |
6315 | return NULL; | 6315 | return NULL; |
@@ -6365,7 +6365,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6365 | if (netif_alloc_netdev_queues(dev)) | 6365 | if (netif_alloc_netdev_queues(dev)) |
6366 | goto free_all; | 6366 | goto free_all; |
6367 | 6367 | ||
6368 | #ifdef CONFIG_RPS | 6368 | #ifdef CONFIG_SYSFS |
6369 | dev->num_rx_queues = rxqs; | 6369 | dev->num_rx_queues = rxqs; |
6370 | dev->real_num_rx_queues = rxqs; | 6370 | dev->real_num_rx_queues = rxqs; |
6371 | if (netif_alloc_rx_queues(dev)) | 6371 | if (netif_alloc_rx_queues(dev)) |
@@ -6385,7 +6385,7 @@ free_all: | |||
6385 | free_pcpu: | 6385 | free_pcpu: |
6386 | free_percpu(dev->pcpu_refcnt); | 6386 | free_percpu(dev->pcpu_refcnt); |
6387 | netif_free_tx_queues(dev); | 6387 | netif_free_tx_queues(dev); |
6388 | #ifdef CONFIG_RPS | 6388 | #ifdef CONFIG_SYSFS |
6389 | kfree(dev->_rx); | 6389 | kfree(dev->_rx); |
6390 | #endif | 6390 | #endif |
6391 | 6391 | ||
@@ -6410,7 +6410,7 @@ void free_netdev(struct net_device *dev) | |||
6410 | release_net(dev_net(dev)); | 6410 | release_net(dev_net(dev)); |
6411 | 6411 | ||
6412 | netif_free_tx_queues(dev); | 6412 | netif_free_tx_queues(dev); |
6413 | #ifdef CONFIG_RPS | 6413 | #ifdef CONFIG_SYSFS |
6414 | kfree(dev->_rx); | 6414 | kfree(dev->_rx); |
6415 | #endif | 6415 | #endif |
6416 | 6416 | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 49843bf7e43e..7eeadeecc5a2 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = { | |||
498 | #define net_class_groups NULL | 498 | #define net_class_groups NULL |
499 | #endif /* CONFIG_SYSFS */ | 499 | #endif /* CONFIG_SYSFS */ |
500 | 500 | ||
501 | #ifdef CONFIG_RPS | 501 | #ifdef CONFIG_SYSFS |
502 | /* | ||
503 | * RX queue sysfs structures and functions. | ||
504 | */ | ||
505 | struct rx_queue_attribute { | ||
506 | struct attribute attr; | ||
507 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
508 | struct rx_queue_attribute *attr, char *buf); | ||
509 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
510 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
511 | }; | ||
512 | #define to_rx_queue_attr(_attr) container_of(_attr, \ | 502 | #define to_rx_queue_attr(_attr) container_of(_attr, \ |
513 | struct rx_queue_attribute, attr) | 503 | struct rx_queue_attribute, attr) |
514 | 504 | ||
@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = { | |||
543 | .store = rx_queue_attr_store, | 533 | .store = rx_queue_attr_store, |
544 | }; | 534 | }; |
545 | 535 | ||
536 | #ifdef CONFIG_RPS | ||
546 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, | 537 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, |
547 | struct rx_queue_attribute *attribute, char *buf) | 538 | struct rx_queue_attribute *attribute, char *buf) |
548 | { | 539 | { |
@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute = | |||
718 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = | 709 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = |
719 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, | 710 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, |
720 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); | 711 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); |
712 | #endif /* CONFIG_RPS */ | ||
721 | 713 | ||
722 | static struct attribute *rx_queue_default_attrs[] = { | 714 | static struct attribute *rx_queue_default_attrs[] = { |
715 | #ifdef CONFIG_RPS | ||
723 | &rps_cpus_attribute.attr, | 716 | &rps_cpus_attribute.attr, |
724 | &rps_dev_flow_table_cnt_attribute.attr, | 717 | &rps_dev_flow_table_cnt_attribute.attr, |
718 | #endif | ||
725 | NULL | 719 | NULL |
726 | }; | 720 | }; |
727 | 721 | ||
728 | static void rx_queue_release(struct kobject *kobj) | 722 | static void rx_queue_release(struct kobject *kobj) |
729 | { | 723 | { |
730 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | 724 | struct netdev_rx_queue *queue = to_rx_queue(kobj); |
725 | #ifdef CONFIG_RPS | ||
731 | struct rps_map *map; | 726 | struct rps_map *map; |
732 | struct rps_dev_flow_table *flow_table; | 727 | struct rps_dev_flow_table *flow_table; |
733 | 728 | ||
@@ -743,6 +738,7 @@ static void rx_queue_release(struct kobject *kobj) | |||
743 | RCU_INIT_POINTER(queue->rps_flow_table, NULL); | 738 | RCU_INIT_POINTER(queue->rps_flow_table, NULL); |
744 | call_rcu(&flow_table->rcu, rps_dev_flow_table_release); | 739 | call_rcu(&flow_table->rcu, rps_dev_flow_table_release); |
745 | } | 740 | } |
741 | #endif | ||
746 | 742 | ||
747 | memset(kobj, 0, sizeof(*kobj)); | 743 | memset(kobj, 0, sizeof(*kobj)); |
748 | dev_put(queue->dev); | 744 | dev_put(queue->dev); |
@@ -763,25 +759,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index) | |||
763 | kobj->kset = net->queues_kset; | 759 | kobj->kset = net->queues_kset; |
764 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, | 760 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, |
765 | "rx-%u", index); | 761 | "rx-%u", index); |
766 | if (error) { | 762 | if (error) |
767 | kobject_put(kobj); | 763 | goto exit; |
768 | return error; | 764 | |
765 | if (net->sysfs_rx_queue_group) { | ||
766 | error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); | ||
767 | if (error) | ||
768 | goto exit; | ||
769 | } | 769 | } |
770 | 770 | ||
771 | kobject_uevent(kobj, KOBJ_ADD); | 771 | kobject_uevent(kobj, KOBJ_ADD); |
772 | dev_hold(queue->dev); | 772 | dev_hold(queue->dev); |
773 | 773 | ||
774 | return error; | 774 | return error; |
775 | exit: | ||
776 | kobject_put(kobj); | ||
777 | return error; | ||
775 | } | 778 | } |
776 | #endif /* CONFIG_RPS */ | 779 | #endif /* CONFIG_SYFS */ |
777 | 780 | ||
778 | int | 781 | int |
779 | net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | 782 | net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) |
780 | { | 783 | { |
781 | #ifdef CONFIG_RPS | 784 | #ifdef CONFIG_SYSFS |
782 | int i; | 785 | int i; |
783 | int error = 0; | 786 | int error = 0; |
784 | 787 | ||
788 | #ifndef CONFIG_RPS | ||
789 | if (!net->sysfs_rx_queue_group) | ||
790 | return 0; | ||
791 | #endif | ||
785 | for (i = old_num; i < new_num; i++) { | 792 | for (i = old_num; i < new_num; i++) { |
786 | error = rx_queue_add_kobject(net, i); | 793 | error = rx_queue_add_kobject(net, i); |
787 | if (error) { | 794 | if (error) { |
@@ -790,8 +797,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
790 | } | 797 | } |
791 | } | 798 | } |
792 | 799 | ||
793 | while (--i >= new_num) | 800 | while (--i >= new_num) { |
801 | if (net->sysfs_rx_queue_group) | ||
802 | sysfs_remove_group(&net->_rx[i].kobj, | ||
803 | net->sysfs_rx_queue_group); | ||
794 | kobject_put(&net->_rx[i].kobj); | 804 | kobject_put(&net->_rx[i].kobj); |
805 | } | ||
795 | 806 | ||
796 | return error; | 807 | return error; |
797 | #else | 808 | #else |
@@ -1155,9 +1166,6 @@ static int register_queue_kobjects(struct net_device *net) | |||
1155 | NULL, &net->dev.kobj); | 1166 | NULL, &net->dev.kobj); |
1156 | if (!net->queues_kset) | 1167 | if (!net->queues_kset) |
1157 | return -ENOMEM; | 1168 | return -ENOMEM; |
1158 | #endif | ||
1159 | |||
1160 | #ifdef CONFIG_RPS | ||
1161 | real_rx = net->real_num_rx_queues; | 1169 | real_rx = net->real_num_rx_queues; |
1162 | #endif | 1170 | #endif |
1163 | real_tx = net->real_num_tx_queues; | 1171 | real_tx = net->real_num_tx_queues; |
@@ -1184,7 +1192,7 @@ static void remove_queue_kobjects(struct net_device *net) | |||
1184 | { | 1192 | { |
1185 | int real_rx = 0, real_tx = 0; | 1193 | int real_rx = 0, real_tx = 0; |
1186 | 1194 | ||
1187 | #ifdef CONFIG_RPS | 1195 | #ifdef CONFIG_SYSFS |
1188 | real_rx = net->real_num_rx_queues; | 1196 | real_rx = net->real_num_rx_queues; |
1189 | #endif | 1197 | #endif |
1190 | real_tx = net->real_num_tx_queues; | 1198 | real_tx = net->real_num_tx_queues; |
diff --git a/net/core/sock.c b/net/core/sock.c index 85ad6f0d3898..b3f7ee3008a0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1836,9 +1836,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) | |||
1836 | put_page(pfrag->page); | 1836 | put_page(pfrag->page); |
1837 | } | 1837 | } |
1838 | 1838 | ||
1839 | /* We restrict high order allocations to users that can afford to wait */ | 1839 | order = SKB_FRAG_PAGE_ORDER; |
1840 | order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; | ||
1841 | |||
1842 | do { | 1840 | do { |
1843 | gfp_t gfp = prio; | 1841 | gfp_t gfp = prio; |
1844 | 1842 | ||