aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/virtio_net.c197
-rw-r--r--include/linux/netdevice.h35
-rw-r--r--lib/average.c6
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/net-sysfs.c50
-rw-r--r--net/core/sock.c4
6 files changed, 214 insertions, 90 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bd70aa87bf7..d75f8edf4fb3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -26,6 +26,7 @@
26#include <linux/if_vlan.h> 26#include <linux/if_vlan.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/cpu.h> 28#include <linux/cpu.h>
29#include <linux/average.h>
29 30
30static int napi_weight = NAPI_POLL_WEIGHT; 31static int napi_weight = NAPI_POLL_WEIGHT;
31module_param(napi_weight, int, 0444); 32module_param(napi_weight, int, 0444);
@@ -36,11 +37,18 @@ module_param(gso, bool, 0444);
36 37
37/* FIXME: MTU in config. */ 38/* FIXME: MTU in config. */
38#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) 39#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \
40 sizeof(struct virtio_net_hdr_mrg_rxbuf), \
41 L1_CACHE_BYTES))
42#define GOOD_COPY_LEN 128 40#define GOOD_COPY_LEN 128
43 41
42/* Weight used for the RX packet size EWMA. The average packet size is used to
43 * determine the packet buffer size when refilling RX rings. As the entire RX
44 * ring may be refilled at once, the weight is chosen so that the EWMA will be
45 * insensitive to short-term, transient changes in packet size.
46 */
47#define RECEIVE_AVG_WEIGHT 64
48
49/* Minimum alignment for mergeable packet buffers. */
50#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
51
44#define VIRTNET_DRIVER_VERSION "1.0.0" 52#define VIRTNET_DRIVER_VERSION "1.0.0"
45 53
46struct virtnet_stats { 54struct virtnet_stats {
@@ -75,6 +83,12 @@ struct receive_queue {
75 /* Chain pages by the private ptr. */ 83 /* Chain pages by the private ptr. */
76 struct page *pages; 84 struct page *pages;
77 85
86 /* Average packet length for mergeable receive buffers. */
87 struct ewma mrg_avg_pkt_len;
88
89 /* Page frag for packet buffer allocation. */
90 struct page_frag alloc_frag;
91
78 /* RX: fragments + linear part + virtio header */ 92 /* RX: fragments + linear part + virtio header */
79 struct scatterlist sg[MAX_SKB_FRAGS + 2]; 93 struct scatterlist sg[MAX_SKB_FRAGS + 2];
80 94
@@ -123,11 +137,6 @@ struct virtnet_info {
123 /* Lock for config space updates */ 137 /* Lock for config space updates */
124 struct mutex config_lock; 138 struct mutex config_lock;
125 139
126 /* Page_frag for GFP_KERNEL packet buffer allocation when we run
127 * low on memory.
128 */
129 struct page_frag alloc_frag;
130
131 /* Does the affinity hint is set for virtqueues? */ 140 /* Does the affinity hint is set for virtqueues? */
132 bool affinity_hint_set; 141 bool affinity_hint_set;
133 142
@@ -218,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq)
218 netif_wake_subqueue(vi->dev, vq2txq(vq)); 227 netif_wake_subqueue(vi->dev, vq2txq(vq));
219} 228}
220 229
230static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
231{
232 unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
233 return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
234}
235
236static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
237{
238 return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
239
240}
241
242static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
243{
244 unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
245 return (unsigned long)buf | (size - 1);
246}
247
221/* Called from bottom half context */ 248/* Called from bottom half context */
222static struct sk_buff *page_to_skb(struct receive_queue *rq, 249static struct sk_buff *page_to_skb(struct receive_queue *rq,
223 struct page *page, unsigned int offset, 250 struct page *page, unsigned int offset,
@@ -326,36 +353,33 @@ err:
326 353
327static struct sk_buff *receive_mergeable(struct net_device *dev, 354static struct sk_buff *receive_mergeable(struct net_device *dev,
328 struct receive_queue *rq, 355 struct receive_queue *rq,
329 void *buf, 356 unsigned long ctx,
330 unsigned int len) 357 unsigned int len)
331{ 358{
359 void *buf = mergeable_ctx_to_buf_address(ctx);
332 struct skb_vnet_hdr *hdr = buf; 360 struct skb_vnet_hdr *hdr = buf;
333 int num_buf = hdr->mhdr.num_buffers; 361 int num_buf = hdr->mhdr.num_buffers;
334 struct page *page = virt_to_head_page(buf); 362 struct page *page = virt_to_head_page(buf);
335 int offset = buf - page_address(page); 363 int offset = buf - page_address(page);
336 struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, 364 unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
337 MERGE_BUFFER_LEN); 365
366 struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
338 struct sk_buff *curr_skb = head_skb; 367 struct sk_buff *curr_skb = head_skb;
339 368
340 if (unlikely(!curr_skb)) 369 if (unlikely(!curr_skb))
341 goto err_skb; 370 goto err_skb;
342
343 while (--num_buf) { 371 while (--num_buf) {
344 int num_skb_frags; 372 int num_skb_frags;
345 373
346 buf = virtqueue_get_buf(rq->vq, &len); 374 ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
347 if (unlikely(!buf)) { 375 if (unlikely(!ctx)) {
348 pr_debug("%s: rx error: %d buffers out of %d missing\n", 376 pr_debug("%s: rx error: %d buffers out of %d missing\n",
349 dev->name, num_buf, hdr->mhdr.num_buffers); 377 dev->name, num_buf, hdr->mhdr.num_buffers);
350 dev->stats.rx_length_errors++; 378 dev->stats.rx_length_errors++;
351 goto err_buf; 379 goto err_buf;
352 } 380 }
353 if (unlikely(len > MERGE_BUFFER_LEN)) {
354 pr_debug("%s: rx error: merge buffer too long\n",
355 dev->name);
356 len = MERGE_BUFFER_LEN;
357 }
358 381
382 buf = mergeable_ctx_to_buf_address(ctx);
359 page = virt_to_head_page(buf); 383 page = virt_to_head_page(buf);
360 384
361 num_skb_frags = skb_shinfo(curr_skb)->nr_frags; 385 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
@@ -372,35 +396,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
372 head_skb->truesize += nskb->truesize; 396 head_skb->truesize += nskb->truesize;
373 num_skb_frags = 0; 397 num_skb_frags = 0;
374 } 398 }
399 truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
375 if (curr_skb != head_skb) { 400 if (curr_skb != head_skb) {
376 head_skb->data_len += len; 401 head_skb->data_len += len;
377 head_skb->len += len; 402 head_skb->len += len;
378 head_skb->truesize += MERGE_BUFFER_LEN; 403 head_skb->truesize += truesize;
379 } 404 }
380 offset = buf - page_address(page); 405 offset = buf - page_address(page);
381 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { 406 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
382 put_page(page); 407 put_page(page);
383 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, 408 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
384 len, MERGE_BUFFER_LEN); 409 len, truesize);
385 } else { 410 } else {
386 skb_add_rx_frag(curr_skb, num_skb_frags, page, 411 skb_add_rx_frag(curr_skb, num_skb_frags, page,
387 offset, len, MERGE_BUFFER_LEN); 412 offset, len, truesize);
388 } 413 }
389 } 414 }
390 415
416 ewma_add(&rq->mrg_avg_pkt_len, head_skb->len);
391 return head_skb; 417 return head_skb;
392 418
393err_skb: 419err_skb:
394 put_page(page); 420 put_page(page);
395 while (--num_buf) { 421 while (--num_buf) {
396 buf = virtqueue_get_buf(rq->vq, &len); 422 ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
397 if (unlikely(!buf)) { 423 if (unlikely(!ctx)) {
398 pr_debug("%s: rx error: %d buffers missing\n", 424 pr_debug("%s: rx error: %d buffers missing\n",
399 dev->name, num_buf); 425 dev->name, num_buf);
400 dev->stats.rx_length_errors++; 426 dev->stats.rx_length_errors++;
401 break; 427 break;
402 } 428 }
403 page = virt_to_head_page(buf); 429 page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
404 put_page(page); 430 put_page(page);
405 } 431 }
406err_buf: 432err_buf:
@@ -420,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
420 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { 446 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
421 pr_debug("%s: short packet %i\n", dev->name, len); 447 pr_debug("%s: short packet %i\n", dev->name, len);
422 dev->stats.rx_length_errors++; 448 dev->stats.rx_length_errors++;
423 if (vi->mergeable_rx_bufs) 449 if (vi->mergeable_rx_bufs) {
424 put_page(virt_to_head_page(buf)); 450 unsigned long ctx = (unsigned long)buf;
425 else if (vi->big_packets) 451 void *base = mergeable_ctx_to_buf_address(ctx);
452 put_page(virt_to_head_page(base));
453 } else if (vi->big_packets) {
426 give_pages(rq, buf); 454 give_pages(rq, buf);
427 else 455 } else {
428 dev_kfree_skb(buf); 456 dev_kfree_skb(buf);
457 }
429 return; 458 return;
430 } 459 }
431 460
432 if (vi->mergeable_rx_bufs) 461 if (vi->mergeable_rx_bufs)
433 skb = receive_mergeable(dev, rq, buf, len); 462 skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
434 else if (vi->big_packets) 463 else if (vi->big_packets)
435 skb = receive_big(dev, rq, buf, len); 464 skb = receive_big(dev, rq, buf, len);
436 else 465 else
@@ -571,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
571 return err; 600 return err;
572} 601}
573 602
603static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len)
604{
605 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
606 unsigned int len;
607
608 len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len),
609 GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
610 return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
611}
612
574static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) 613static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
575{ 614{
576 struct virtnet_info *vi = rq->vq->vdev->priv; 615 struct page_frag *alloc_frag = &rq->alloc_frag;
577 char *buf = NULL; 616 char *buf;
617 unsigned long ctx;
578 int err; 618 int err;
619 unsigned int len, hole;
579 620
580 if (gfp & __GFP_WAIT) { 621 len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
581 if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, 622 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
582 gfp)) {
583 buf = (char *)page_address(vi->alloc_frag.page) +
584 vi->alloc_frag.offset;
585 get_page(vi->alloc_frag.page);
586 vi->alloc_frag.offset += MERGE_BUFFER_LEN;
587 }
588 } else {
589 buf = netdev_alloc_frag(MERGE_BUFFER_LEN);
590 }
591 if (!buf)
592 return -ENOMEM; 623 return -ENOMEM;
593 624
594 sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); 625 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
595 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); 626 ctx = mergeable_buf_to_ctx(buf, len);
627 get_page(alloc_frag->page);
628 alloc_frag->offset += len;
629 hole = alloc_frag->size - alloc_frag->offset;
630 if (hole < len) {
631 /* To avoid internal fragmentation, if there is very likely not
632 * enough space for another buffer, add the remaining space to
633 * the current buffer. This extra space is not included in
634 * the truesize stored in ctx.
635 */
636 len += hole;
637 alloc_frag->offset += hole;
638 }
639
640 sg_init_one(rq->sg, buf, len);
641 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
596 if (err < 0) 642 if (err < 0)
597 put_page(virt_to_head_page(buf)); 643 put_page(virt_to_head_page(buf));
598 644
@@ -612,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
612 int err; 658 int err;
613 bool oom; 659 bool oom;
614 660
661 gfp |= __GFP_COLD;
615 do { 662 do {
616 if (vi->mergeable_rx_bufs) 663 if (vi->mergeable_rx_bufs)
617 err = add_recvbuf_mergeable(rq, gfp); 664 err = add_recvbuf_mergeable(rq, gfp);
@@ -1368,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi)
1368 } 1415 }
1369} 1416}
1370 1417
1418static void free_receive_page_frags(struct virtnet_info *vi)
1419{
1420 int i;
1421 for (i = 0; i < vi->max_queue_pairs; i++)
1422 if (vi->rq[i].alloc_frag.page)
1423 put_page(vi->rq[i].alloc_frag.page);
1424}
1425
1371static void free_unused_bufs(struct virtnet_info *vi) 1426static void free_unused_bufs(struct virtnet_info *vi)
1372{ 1427{
1373 void *buf; 1428 void *buf;
@@ -1383,12 +1438,15 @@ static void free_unused_bufs(struct virtnet_info *vi)
1383 struct virtqueue *vq = vi->rq[i].vq; 1438 struct virtqueue *vq = vi->rq[i].vq;
1384 1439
1385 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { 1440 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1386 if (vi->mergeable_rx_bufs) 1441 if (vi->mergeable_rx_bufs) {
1387 put_page(virt_to_head_page(buf)); 1442 unsigned long ctx = (unsigned long)buf;
1388 else if (vi->big_packets) 1443 void *base = mergeable_ctx_to_buf_address(ctx);
1444 put_page(virt_to_head_page(base));
1445 } else if (vi->big_packets) {
1389 give_pages(&vi->rq[i], buf); 1446 give_pages(&vi->rq[i], buf);
1390 else 1447 } else {
1391 dev_kfree_skb(buf); 1448 dev_kfree_skb(buf);
1449 }
1392 } 1450 }
1393 } 1451 }
1394} 1452}
@@ -1496,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
1496 napi_weight); 1554 napi_weight);
1497 1555
1498 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); 1556 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1557 ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
1499 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); 1558 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1500 } 1559 }
1501 1560
@@ -1532,6 +1591,33 @@ err:
1532 return ret; 1591 return ret;
1533} 1592}
1534 1593
1594#ifdef CONFIG_SYSFS
1595static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
1596 struct rx_queue_attribute *attribute, char *buf)
1597{
1598 struct virtnet_info *vi = netdev_priv(queue->dev);
1599 unsigned int queue_index = get_netdev_rx_queue_index(queue);
1600 struct ewma *avg;
1601
1602 BUG_ON(queue_index >= vi->max_queue_pairs);
1603 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
1604 return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
1605}
1606
1607static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
1608 __ATTR_RO(mergeable_rx_buffer_size);
1609
1610static struct attribute *virtio_net_mrg_rx_attrs[] = {
1611 &mergeable_rx_buffer_size_attribute.attr,
1612 NULL
1613};
1614
1615static const struct attribute_group virtio_net_mrg_rx_group = {
1616 .name = "virtio_net",
1617 .attrs = virtio_net_mrg_rx_attrs
1618};
1619#endif
1620
1535static int virtnet_probe(struct virtio_device *vdev) 1621static int virtnet_probe(struct virtio_device *vdev)
1536{ 1622{
1537 int i, err; 1623 int i, err;
@@ -1646,6 +1732,10 @@ static int virtnet_probe(struct virtio_device *vdev)
1646 if (err) 1732 if (err)
1647 goto free_stats; 1733 goto free_stats;
1648 1734
1735#ifdef CONFIG_SYSFS
1736 if (vi->mergeable_rx_bufs)
1737 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
1738#endif
1649 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); 1739 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
1650 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); 1740 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
1651 1741
@@ -1695,9 +1785,8 @@ free_recv_bufs:
1695 unregister_netdev(dev); 1785 unregister_netdev(dev);
1696free_vqs: 1786free_vqs:
1697 cancel_delayed_work_sync(&vi->refill); 1787 cancel_delayed_work_sync(&vi->refill);
1788 free_receive_page_frags(vi);
1698 virtnet_del_vqs(vi); 1789 virtnet_del_vqs(vi);
1699 if (vi->alloc_frag.page)
1700 put_page(vi->alloc_frag.page);
1701free_stats: 1790free_stats:
1702 free_percpu(vi->stats); 1791 free_percpu(vi->stats);
1703free: 1792free:
@@ -1714,6 +1803,8 @@ static void remove_vq_common(struct virtnet_info *vi)
1714 1803
1715 free_receive_bufs(vi); 1804 free_receive_bufs(vi);
1716 1805
1806 free_receive_page_frags(vi);
1807
1717 virtnet_del_vqs(vi); 1808 virtnet_del_vqs(vi);
1718} 1809}
1719 1810
@@ -1731,8 +1822,6 @@ static void virtnet_remove(struct virtio_device *vdev)
1731 unregister_netdev(vi->dev); 1822 unregister_netdev(vi->dev);
1732 1823
1733 remove_vq_common(vi); 1824 remove_vq_common(vi);
1734 if (vi->alloc_frag.page)
1735 put_page(vi->alloc_frag.page);
1736 1825
1737 flush_work(&vi->config_work); 1826 flush_work(&vi->config_work);
1738 1827
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d7668b881d08..e985231fe04b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -668,15 +668,28 @@ extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
668bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, 668bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
669 u16 filter_id); 669 u16 filter_id);
670#endif 670#endif
671#endif /* CONFIG_RPS */
671 672
672/* This structure contains an instance of an RX queue. */ 673/* This structure contains an instance of an RX queue. */
673struct netdev_rx_queue { 674struct netdev_rx_queue {
675#ifdef CONFIG_RPS
674 struct rps_map __rcu *rps_map; 676 struct rps_map __rcu *rps_map;
675 struct rps_dev_flow_table __rcu *rps_flow_table; 677 struct rps_dev_flow_table __rcu *rps_flow_table;
678#endif
676 struct kobject kobj; 679 struct kobject kobj;
677 struct net_device *dev; 680 struct net_device *dev;
678} ____cacheline_aligned_in_smp; 681} ____cacheline_aligned_in_smp;
679#endif /* CONFIG_RPS */ 682
683/*
684 * RX queue sysfs structures and functions.
685 */
686struct rx_queue_attribute {
687 struct attribute attr;
688 ssize_t (*show)(struct netdev_rx_queue *queue,
689 struct rx_queue_attribute *attr, char *buf);
690 ssize_t (*store)(struct netdev_rx_queue *queue,
691 struct rx_queue_attribute *attr, const char *buf, size_t len);
692};
680 693
681#ifdef CONFIG_XPS 694#ifdef CONFIG_XPS
682/* 695/*
@@ -1313,7 +1326,7 @@ struct net_device {
1313 unicast) */ 1326 unicast) */
1314 1327
1315 1328
1316#ifdef CONFIG_RPS 1329#ifdef CONFIG_SYSFS
1317 struct netdev_rx_queue *_rx; 1330 struct netdev_rx_queue *_rx;
1318 1331
1319 /* Number of RX queues allocated at register_netdev() time */ 1332 /* Number of RX queues allocated at register_netdev() time */
@@ -1424,6 +1437,8 @@ struct net_device {
1424 struct device dev; 1437 struct device dev;
1425 /* space for optional device, statistics, and wireless sysfs groups */ 1438 /* space for optional device, statistics, and wireless sysfs groups */
1426 const struct attribute_group *sysfs_groups[4]; 1439 const struct attribute_group *sysfs_groups[4];
1440 /* space for optional per-rx queue attributes */
1441 const struct attribute_group *sysfs_rx_queue_group;
1427 1442
1428 /* rtnetlink link ops */ 1443 /* rtnetlink link ops */
1429 const struct rtnl_link_ops *rtnl_link_ops; 1444 const struct rtnl_link_ops *rtnl_link_ops;
@@ -2375,7 +2390,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
2375 2390
2376int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); 2391int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
2377 2392
2378#ifdef CONFIG_RPS 2393#ifdef CONFIG_SYSFS
2379int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); 2394int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
2380#else 2395#else
2381static inline int netif_set_real_num_rx_queues(struct net_device *dev, 2396static inline int netif_set_real_num_rx_queues(struct net_device *dev,
@@ -2394,7 +2409,7 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
2394 from_dev->real_num_tx_queues); 2409 from_dev->real_num_tx_queues);
2395 if (err) 2410 if (err)
2396 return err; 2411 return err;
2397#ifdef CONFIG_RPS 2412#ifdef CONFIG_SYSFS
2398 return netif_set_real_num_rx_queues(to_dev, 2413 return netif_set_real_num_rx_queues(to_dev,
2399 from_dev->real_num_rx_queues); 2414 from_dev->real_num_rx_queues);
2400#else 2415#else
@@ -2402,6 +2417,18 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
2402#endif 2417#endif
2403} 2418}
2404 2419
2420#ifdef CONFIG_SYSFS
2421static inline unsigned int get_netdev_rx_queue_index(
2422 struct netdev_rx_queue *queue)
2423{
2424 struct net_device *dev = queue->dev;
2425 int index = queue - dev->_rx;
2426
2427 BUG_ON(index >= dev->num_rx_queues);
2428 return index;
2429}
2430#endif
2431
2405#define DEFAULT_MAX_NUM_RSS_QUEUES (8) 2432#define DEFAULT_MAX_NUM_RSS_QUEUES (8)
2406int netif_get_num_default_rss_queues(void); 2433int netif_get_num_default_rss_queues(void);
2407 2434
diff --git a/lib/average.c b/lib/average.c
index 99a67e662b3c..114d1beae0c7 100644
--- a/lib/average.c
+++ b/lib/average.c
@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init);
53 */ 53 */
54struct ewma *ewma_add(struct ewma *avg, unsigned long val) 54struct ewma *ewma_add(struct ewma *avg, unsigned long val)
55{ 55{
56 avg->internal = avg->internal ? 56 unsigned long internal = ACCESS_ONCE(avg->internal);
57 (((avg->internal << avg->weight) - avg->internal) + 57
58 ACCESS_ONCE(avg->internal) = internal ?
59 (((internal << avg->weight) - internal) +
58 (val << avg->factor)) >> avg->weight : 60 (val << avg->factor)) >> avg->weight :
59 (val << avg->factor); 61 (val << avg->factor);
60 return avg; 62 return avg;
diff --git a/net/core/dev.c b/net/core/dev.c
index f87bedd51eed..288df6232006 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2083,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2083} 2083}
2084EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2084EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2085 2085
2086#ifdef CONFIG_RPS 2086#ifdef CONFIG_SYSFS
2087/** 2087/**
2088 * netif_set_real_num_rx_queues - set actual number of RX queues used 2088 * netif_set_real_num_rx_queues - set actual number of RX queues used
2089 * @dev: Network device 2089 * @dev: Network device
@@ -5764,7 +5764,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5764} 5764}
5765EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5765EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5766 5766
5767#ifdef CONFIG_RPS 5767#ifdef CONFIG_SYSFS
5768static int netif_alloc_rx_queues(struct net_device *dev) 5768static int netif_alloc_rx_queues(struct net_device *dev)
5769{ 5769{
5770 unsigned int i, count = dev->num_rx_queues; 5770 unsigned int i, count = dev->num_rx_queues;
@@ -6309,7 +6309,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6309 return NULL; 6309 return NULL;
6310 } 6310 }
6311 6311
6312#ifdef CONFIG_RPS 6312#ifdef CONFIG_SYSFS
6313 if (rxqs < 1) { 6313 if (rxqs < 1) {
6314 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6314 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6315 return NULL; 6315 return NULL;
@@ -6365,7 +6365,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6365 if (netif_alloc_netdev_queues(dev)) 6365 if (netif_alloc_netdev_queues(dev))
6366 goto free_all; 6366 goto free_all;
6367 6367
6368#ifdef CONFIG_RPS 6368#ifdef CONFIG_SYSFS
6369 dev->num_rx_queues = rxqs; 6369 dev->num_rx_queues = rxqs;
6370 dev->real_num_rx_queues = rxqs; 6370 dev->real_num_rx_queues = rxqs;
6371 if (netif_alloc_rx_queues(dev)) 6371 if (netif_alloc_rx_queues(dev))
@@ -6385,7 +6385,7 @@ free_all:
6385free_pcpu: 6385free_pcpu:
6386 free_percpu(dev->pcpu_refcnt); 6386 free_percpu(dev->pcpu_refcnt);
6387 netif_free_tx_queues(dev); 6387 netif_free_tx_queues(dev);
6388#ifdef CONFIG_RPS 6388#ifdef CONFIG_SYSFS
6389 kfree(dev->_rx); 6389 kfree(dev->_rx);
6390#endif 6390#endif
6391 6391
@@ -6410,7 +6410,7 @@ void free_netdev(struct net_device *dev)
6410 release_net(dev_net(dev)); 6410 release_net(dev_net(dev));
6411 6411
6412 netif_free_tx_queues(dev); 6412 netif_free_tx_queues(dev);
6413#ifdef CONFIG_RPS 6413#ifdef CONFIG_SYSFS
6414 kfree(dev->_rx); 6414 kfree(dev->_rx);
6415#endif 6415#endif
6416 6416
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 49843bf7e43e..7eeadeecc5a2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = {
498#define net_class_groups NULL 498#define net_class_groups NULL
499#endif /* CONFIG_SYSFS */ 499#endif /* CONFIG_SYSFS */
500 500
501#ifdef CONFIG_RPS 501#ifdef CONFIG_SYSFS
502/*
503 * RX queue sysfs structures and functions.
504 */
505struct rx_queue_attribute {
506 struct attribute attr;
507 ssize_t (*show)(struct netdev_rx_queue *queue,
508 struct rx_queue_attribute *attr, char *buf);
509 ssize_t (*store)(struct netdev_rx_queue *queue,
510 struct rx_queue_attribute *attr, const char *buf, size_t len);
511};
512#define to_rx_queue_attr(_attr) container_of(_attr, \ 502#define to_rx_queue_attr(_attr) container_of(_attr, \
513 struct rx_queue_attribute, attr) 503 struct rx_queue_attribute, attr)
514 504
@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
543 .store = rx_queue_attr_store, 533 .store = rx_queue_attr_store,
544}; 534};
545 535
536#ifdef CONFIG_RPS
546static ssize_t show_rps_map(struct netdev_rx_queue *queue, 537static ssize_t show_rps_map(struct netdev_rx_queue *queue,
547 struct rx_queue_attribute *attribute, char *buf) 538 struct rx_queue_attribute *attribute, char *buf)
548{ 539{
@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute =
718static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = 709static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
719 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, 710 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
720 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 711 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
712#endif /* CONFIG_RPS */
721 713
722static struct attribute *rx_queue_default_attrs[] = { 714static struct attribute *rx_queue_default_attrs[] = {
715#ifdef CONFIG_RPS
723 &rps_cpus_attribute.attr, 716 &rps_cpus_attribute.attr,
724 &rps_dev_flow_table_cnt_attribute.attr, 717 &rps_dev_flow_table_cnt_attribute.attr,
718#endif
725 NULL 719 NULL
726}; 720};
727 721
728static void rx_queue_release(struct kobject *kobj) 722static void rx_queue_release(struct kobject *kobj)
729{ 723{
730 struct netdev_rx_queue *queue = to_rx_queue(kobj); 724 struct netdev_rx_queue *queue = to_rx_queue(kobj);
725#ifdef CONFIG_RPS
731 struct rps_map *map; 726 struct rps_map *map;
732 struct rps_dev_flow_table *flow_table; 727 struct rps_dev_flow_table *flow_table;
733 728
@@ -743,6 +738,7 @@ static void rx_queue_release(struct kobject *kobj)
743 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 738 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
744 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 739 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
745 } 740 }
741#endif
746 742
747 memset(kobj, 0, sizeof(*kobj)); 743 memset(kobj, 0, sizeof(*kobj));
748 dev_put(queue->dev); 744 dev_put(queue->dev);
@@ -763,25 +759,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
763 kobj->kset = net->queues_kset; 759 kobj->kset = net->queues_kset;
764 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 760 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
765 "rx-%u", index); 761 "rx-%u", index);
766 if (error) { 762 if (error)
767 kobject_put(kobj); 763 goto exit;
768 return error; 764
765 if (net->sysfs_rx_queue_group) {
766 error = sysfs_create_group(kobj, net->sysfs_rx_queue_group);
767 if (error)
768 goto exit;
769 } 769 }
770 770
771 kobject_uevent(kobj, KOBJ_ADD); 771 kobject_uevent(kobj, KOBJ_ADD);
772 dev_hold(queue->dev); 772 dev_hold(queue->dev);
773 773
774 return error; 774 return error;
775exit:
776 kobject_put(kobj);
777 return error;
775} 778}
776#endif /* CONFIG_RPS */ 779#endif /* CONFIG_SYFS */
777 780
778int 781int
779net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 782net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
780{ 783{
781#ifdef CONFIG_RPS 784#ifdef CONFIG_SYSFS
782 int i; 785 int i;
783 int error = 0; 786 int error = 0;
784 787
788#ifndef CONFIG_RPS
789 if (!net->sysfs_rx_queue_group)
790 return 0;
791#endif
785 for (i = old_num; i < new_num; i++) { 792 for (i = old_num; i < new_num; i++) {
786 error = rx_queue_add_kobject(net, i); 793 error = rx_queue_add_kobject(net, i);
787 if (error) { 794 if (error) {
@@ -790,8 +797,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
790 } 797 }
791 } 798 }
792 799
793 while (--i >= new_num) 800 while (--i >= new_num) {
801 if (net->sysfs_rx_queue_group)
802 sysfs_remove_group(&net->_rx[i].kobj,
803 net->sysfs_rx_queue_group);
794 kobject_put(&net->_rx[i].kobj); 804 kobject_put(&net->_rx[i].kobj);
805 }
795 806
796 return error; 807 return error;
797#else 808#else
@@ -1155,9 +1166,6 @@ static int register_queue_kobjects(struct net_device *net)
1155 NULL, &net->dev.kobj); 1166 NULL, &net->dev.kobj);
1156 if (!net->queues_kset) 1167 if (!net->queues_kset)
1157 return -ENOMEM; 1168 return -ENOMEM;
1158#endif
1159
1160#ifdef CONFIG_RPS
1161 real_rx = net->real_num_rx_queues; 1169 real_rx = net->real_num_rx_queues;
1162#endif 1170#endif
1163 real_tx = net->real_num_tx_queues; 1171 real_tx = net->real_num_tx_queues;
@@ -1184,7 +1192,7 @@ static void remove_queue_kobjects(struct net_device *net)
1184{ 1192{
1185 int real_rx = 0, real_tx = 0; 1193 int real_rx = 0, real_tx = 0;
1186 1194
1187#ifdef CONFIG_RPS 1195#ifdef CONFIG_SYSFS
1188 real_rx = net->real_num_rx_queues; 1196 real_rx = net->real_num_rx_queues;
1189#endif 1197#endif
1190 real_tx = net->real_num_tx_queues; 1198 real_tx = net->real_num_tx_queues;
diff --git a/net/core/sock.c b/net/core/sock.c
index 85ad6f0d3898..b3f7ee3008a0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1836,9 +1836,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
1836 put_page(pfrag->page); 1836 put_page(pfrag->page);
1837 } 1837 }
1838 1838
1839 /* We restrict high order allocations to users that can afford to wait */ 1839 order = SKB_FRAG_PAGE_ORDER;
1840 order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1841
1842 do { 1840 do {
1843 gfp_t gfp = prio; 1841 gfp_t gfp = prio;
1844 1842