aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c573
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/drop_monitor.c16
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/flow_dissector.c91
-rw-r--r--net/core/iovec.c5
-rw-r--r--net/core/neighbour.c5
-rw-r--r--net/core/net-sysfs.c16
-rw-r--r--net/core/netpoll.c44
-rw-r--r--net/core/netprio_cgroup.c3
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c220
-rw-r--r--net/core/sock.c47
-rw-r--r--net/core/utils.c49
19 files changed, 762 insertions, 384 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index af814e764206..a16ed7bbe376 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -577,7 +577,7 @@ EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
577/** 577/**
578 * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec 578 * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
579 * @skb: buffer to copy 579 * @skb: buffer to copy
580 * @from: io vector to copy to 580 * @from: io vector to copy from
581 * @offset: offset in the io vector to start copying from 581 * @offset: offset in the io vector to start copying from
582 * @count: amount of vectors to copy to buffer from 582 * @count: amount of vectors to copy to buffer from
583 * 583 *
diff --git a/net/core/dev.c b/net/core/dev.c
index 3430b1ed12e5..0ce469e5ec80 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
131#include <linux/static_key.h> 131#include <linux/static_key.h>
132#include <linux/hashtable.h> 132#include <linux/hashtable.h>
133#include <linux/vmalloc.h> 133#include <linux/vmalloc.h>
134#include <linux/if_macvlan.h>
134 135
135#include "net-sysfs.h" 136#include "net-sysfs.h"
136 137
@@ -1203,7 +1204,7 @@ void netdev_state_change(struct net_device *dev)
1203{ 1204{
1204 if (dev->flags & IFF_UP) { 1205 if (dev->flags & IFF_UP) {
1205 call_netdevice_notifiers(NETDEV_CHANGE, dev); 1206 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1206 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 1207 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1207 } 1208 }
1208} 1209}
1209EXPORT_SYMBOL(netdev_state_change); 1210EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1294,7 @@ int dev_open(struct net_device *dev)
1293 if (ret < 0) 1294 if (ret < 0)
1294 return ret; 1295 return ret;
1295 1296
1296 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1297 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1297 call_netdevice_notifiers(NETDEV_UP, dev); 1298 call_netdevice_notifiers(NETDEV_UP, dev);
1298 1299
1299 return ret; 1300 return ret;
@@ -1307,7 +1308,7 @@ static int __dev_close_many(struct list_head *head)
1307 ASSERT_RTNL(); 1308 ASSERT_RTNL();
1308 might_sleep(); 1309 might_sleep();
1309 1310
1310 list_for_each_entry(dev, head, unreg_list) { 1311 list_for_each_entry(dev, head, close_list) {
1311 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1312 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1312 1313
1313 clear_bit(__LINK_STATE_START, &dev->state); 1314 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1324,7 @@ static int __dev_close_many(struct list_head *head)
1323 1324
1324 dev_deactivate_many(head); 1325 dev_deactivate_many(head);
1325 1326
1326 list_for_each_entry(dev, head, unreg_list) { 1327 list_for_each_entry(dev, head, close_list) {
1327 const struct net_device_ops *ops = dev->netdev_ops; 1328 const struct net_device_ops *ops = dev->netdev_ops;
1328 1329
1329 /* 1330 /*
@@ -1351,7 +1352,7 @@ static int __dev_close(struct net_device *dev)
1351 /* Temporarily disable netpoll until the interface is down */ 1352 /* Temporarily disable netpoll until the interface is down */
1352 netpoll_rx_disable(dev); 1353 netpoll_rx_disable(dev);
1353 1354
1354 list_add(&dev->unreg_list, &single); 1355 list_add(&dev->close_list, &single);
1355 retval = __dev_close_many(&single); 1356 retval = __dev_close_many(&single);
1356 list_del(&single); 1357 list_del(&single);
1357 1358
@@ -1362,21 +1363,20 @@ static int __dev_close(struct net_device *dev)
1362static int dev_close_many(struct list_head *head) 1363static int dev_close_many(struct list_head *head)
1363{ 1364{
1364 struct net_device *dev, *tmp; 1365 struct net_device *dev, *tmp;
1365 LIST_HEAD(tmp_list);
1366 1366
1367 list_for_each_entry_safe(dev, tmp, head, unreg_list) 1367 /* Remove the devices that don't need to be closed */
1368 list_for_each_entry_safe(dev, tmp, head, close_list)
1368 if (!(dev->flags & IFF_UP)) 1369 if (!(dev->flags & IFF_UP))
1369 list_move(&dev->unreg_list, &tmp_list); 1370 list_del_init(&dev->close_list);
1370 1371
1371 __dev_close_many(head); 1372 __dev_close_many(head);
1372 1373
1373 list_for_each_entry(dev, head, unreg_list) { 1374 list_for_each_entry_safe(dev, tmp, head, close_list) {
1374 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1375 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1375 call_netdevice_notifiers(NETDEV_DOWN, dev); 1376 call_netdevice_notifiers(NETDEV_DOWN, dev);
1377 list_del_init(&dev->close_list);
1376 } 1378 }
1377 1379
1378 /* rollback_registered_many needs the complete original list */
1379 list_splice(&tmp_list, head);
1380 return 0; 1380 return 0;
1381} 1381}
1382 1382
@@ -1397,7 +1397,7 @@ int dev_close(struct net_device *dev)
1397 /* Block netpoll rx while the interface is going down */ 1397 /* Block netpoll rx while the interface is going down */
1398 netpoll_rx_disable(dev); 1398 netpoll_rx_disable(dev);
1399 1399
1400 list_add(&dev->unreg_list, &single); 1400 list_add(&dev->close_list, &single);
1401 dev_close_many(&single); 1401 dev_close_many(&single);
1402 list_del(&single); 1402 list_del(&single);
1403 1403
@@ -1425,6 +1425,10 @@ void dev_disable_lro(struct net_device *dev)
1425 if (is_vlan_dev(dev)) 1425 if (is_vlan_dev(dev))
1426 dev = vlan_dev_real_dev(dev); 1426 dev = vlan_dev_real_dev(dev);
1427 1427
1428 /* the same for macvlan devices */
1429 if (netif_is_macvlan(dev))
1430 dev = macvlan_dev_real_dev(dev);
1431
1428 dev->wanted_features &= ~NETIF_F_LRO; 1432 dev->wanted_features &= ~NETIF_F_LRO;
1429 netdev_update_features(dev); 1433 netdev_update_features(dev);
1430 1434
@@ -1691,13 +1695,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1691 kfree_skb(skb); 1695 kfree_skb(skb);
1692 return NET_RX_DROP; 1696 return NET_RX_DROP;
1693 } 1697 }
1694 skb->protocol = eth_type_trans(skb, dev);
1695 1698
1696 /* eth_type_trans() can set pkt_type.
1697 * call skb_scrub_packet() after it to clear pkt_type _after_ calling
1698 * eth_type_trans().
1699 */
1700 skb_scrub_packet(skb, true); 1699 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev);
1701 1701
1702 return netif_rx(skb); 1702 return netif_rx(skb);
1703} 1703}
@@ -2378,6 +2378,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2378 } 2378 }
2379 2379
2380 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); 2380 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2381 SKB_GSO_CB(skb)->encap_level = 0;
2382
2381 skb_reset_mac_header(skb); 2383 skb_reset_mac_header(skb);
2382 skb_reset_mac_len(skb); 2384 skb_reset_mac_len(skb);
2383 2385
@@ -2603,7 +2605,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2603 dev_queue_xmit_nit(skb, dev); 2605 dev_queue_xmit_nit(skb, dev);
2604 2606
2605 skb_len = skb->len; 2607 skb_len = skb->len;
2606 rc = ops->ndo_start_xmit(skb, dev); 2608 rc = ops->ndo_start_xmit(skb, dev);
2609
2607 trace_net_dev_xmit(skb, rc, dev, skb_len); 2610 trace_net_dev_xmit(skb, rc, dev, skb_len);
2608 if (rc == NETDEV_TX_OK) 2611 if (rc == NETDEV_TX_OK)
2609 txq_trans_update(txq); 2612 txq_trans_update(txq);
@@ -2646,6 +2649,7 @@ out_kfree_skb:
2646out: 2649out:
2647 return rc; 2650 return rc;
2648} 2651}
2652EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
2649 2653
2650static void qdisc_pkt_len_init(struct sk_buff *skb) 2654static void qdisc_pkt_len_init(struct sk_buff *skb)
2651{ 2655{
@@ -2801,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
2801 * the BH enable code must have IRQs enabled so that it will not deadlock. 2805 * the BH enable code must have IRQs enabled so that it will not deadlock.
2802 * --BLG 2806 * --BLG
2803 */ 2807 */
2804int dev_queue_xmit(struct sk_buff *skb) 2808int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2805{ 2809{
2806 struct net_device *dev = skb->dev; 2810 struct net_device *dev = skb->dev;
2807 struct netdev_queue *txq; 2811 struct netdev_queue *txq;
@@ -2817,7 +2821,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2817 2821
2818 skb_update_prio(skb); 2822 skb_update_prio(skb);
2819 2823
2820 txq = netdev_pick_tx(dev, skb); 2824 txq = netdev_pick_tx(dev, skb, accel_priv);
2821 q = rcu_dereference_bh(txq->qdisc); 2825 q = rcu_dereference_bh(txq->qdisc);
2822 2826
2823#ifdef CONFIG_NET_CLS_ACT 2827#ifdef CONFIG_NET_CLS_ACT
@@ -2882,8 +2886,19 @@ out:
2882 rcu_read_unlock_bh(); 2886 rcu_read_unlock_bh();
2883 return rc; 2887 return rc;
2884} 2888}
2889
2890int dev_queue_xmit(struct sk_buff *skb)
2891{
2892 return __dev_queue_xmit(skb, NULL);
2893}
2885EXPORT_SYMBOL(dev_queue_xmit); 2894EXPORT_SYMBOL(dev_queue_xmit);
2886 2895
2896int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
2897{
2898 return __dev_queue_xmit(skb, accel_priv);
2899}
2900EXPORT_SYMBOL(dev_queue_xmit_accel);
2901
2887 2902
2888/*======================================================================= 2903/*=======================================================================
2889 Receiver routines 2904 Receiver routines
@@ -4374,42 +4389,40 @@ struct netdev_adjacent {
4374 /* upper master flag, there can only be one master device per list */ 4389 /* upper master flag, there can only be one master device per list */
4375 bool master; 4390 bool master;
4376 4391
4377 /* indicates that this dev is our first-level lower/upper device */
4378 bool neighbour;
4379
4380 /* counter for the number of times this device was added to us */ 4392 /* counter for the number of times this device was added to us */
4381 u16 ref_nr; 4393 u16 ref_nr;
4382 4394
4395 /* private field for the users */
4396 void *private;
4397
4383 struct list_head list; 4398 struct list_head list;
4384 struct rcu_head rcu; 4399 struct rcu_head rcu;
4385}; 4400};
4386 4401
4387static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4402static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4388 struct net_device *adj_dev, 4403 struct net_device *adj_dev,
4389 bool upper) 4404 struct list_head *adj_list)
4390{ 4405{
4391 struct netdev_adjacent *adj; 4406 struct netdev_adjacent *adj;
4392 struct list_head *dev_list;
4393 4407
4394 dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; 4408 list_for_each_entry_rcu(adj, adj_list, list) {
4395
4396 list_for_each_entry(adj, dev_list, list) {
4397 if (adj->dev == adj_dev) 4409 if (adj->dev == adj_dev)
4398 return adj; 4410 return adj;
4399 } 4411 }
4400 return NULL; 4412 return NULL;
4401} 4413}
4402 4414
4403static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, 4415static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4404 struct net_device *udev) 4416 struct net_device *adj_dev,
4417 struct list_head *adj_list)
4405{ 4418{
4406 return __netdev_find_adj(dev, udev, true); 4419 struct netdev_adjacent *adj;
4407}
4408 4420
4409static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, 4421 list_for_each_entry(adj, adj_list, list) {
4410 struct net_device *ldev) 4422 if (adj->dev == adj_dev)
4411{ 4423 return adj;
4412 return __netdev_find_adj(dev, ldev, false); 4424 }
4425 return NULL;
4413} 4426}
4414 4427
4415/** 4428/**
@@ -4426,7 +4439,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
4426{ 4439{
4427 ASSERT_RTNL(); 4440 ASSERT_RTNL();
4428 4441
4429 return __netdev_find_upper(dev, upper_dev); 4442 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
4430} 4443}
4431EXPORT_SYMBOL(netdev_has_upper_dev); 4444EXPORT_SYMBOL(netdev_has_upper_dev);
4432 4445
@@ -4441,7 +4454,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev)
4441{ 4454{
4442 ASSERT_RTNL(); 4455 ASSERT_RTNL();
4443 4456
4444 return !list_empty(&dev->upper_dev_list); 4457 return !list_empty(&dev->all_adj_list.upper);
4445} 4458}
4446EXPORT_SYMBOL(netdev_has_any_upper_dev); 4459EXPORT_SYMBOL(netdev_has_any_upper_dev);
4447 4460
@@ -4458,10 +4471,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4458 4471
4459 ASSERT_RTNL(); 4472 ASSERT_RTNL();
4460 4473
4461 if (list_empty(&dev->upper_dev_list)) 4474 if (list_empty(&dev->adj_list.upper))
4462 return NULL; 4475 return NULL;
4463 4476
4464 upper = list_first_entry(&dev->upper_dev_list, 4477 upper = list_first_entry(&dev->adj_list.upper,
4465 struct netdev_adjacent, list); 4478 struct netdev_adjacent, list);
4466 if (likely(upper->master)) 4479 if (likely(upper->master))
4467 return upper->dev; 4480 return upper->dev;
@@ -4469,30 +4482,98 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4469} 4482}
4470EXPORT_SYMBOL(netdev_master_upper_dev_get); 4483EXPORT_SYMBOL(netdev_master_upper_dev_get);
4471 4484
4472/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list 4485void *netdev_adjacent_get_private(struct list_head *adj_list)
4486{
4487 struct netdev_adjacent *adj;
4488
4489 adj = list_entry(adj_list, struct netdev_adjacent, list);
4490
4491 return adj->private;
4492}
4493EXPORT_SYMBOL(netdev_adjacent_get_private);
4494
4495/**
4496 * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
4473 * @dev: device 4497 * @dev: device
4474 * @iter: list_head ** of the current position 4498 * @iter: list_head ** of the current position
4475 * 4499 *
4476 * Gets the next device from the dev's upper list, starting from iter 4500 * Gets the next device from the dev's upper list, starting from iter
4477 * position. The caller must hold RCU read lock. 4501 * position. The caller must hold RCU read lock.
4478 */ 4502 */
4479struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, 4503struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
4480 struct list_head **iter) 4504 struct list_head **iter)
4481{ 4505{
4482 struct netdev_adjacent *upper; 4506 struct netdev_adjacent *upper;
4483 4507
4484 WARN_ON_ONCE(!rcu_read_lock_held()); 4508 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
4485 4509
4486 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); 4510 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4487 4511
4488 if (&upper->list == &dev->upper_dev_list) 4512 if (&upper->list == &dev->all_adj_list.upper)
4489 return NULL; 4513 return NULL;
4490 4514
4491 *iter = &upper->list; 4515 *iter = &upper->list;
4492 4516
4493 return upper->dev; 4517 return upper->dev;
4494} 4518}
4495EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); 4519EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
4520
4521/**
4522 * netdev_lower_get_next_private - Get the next ->private from the
4523 * lower neighbour list
4524 * @dev: device
4525 * @iter: list_head ** of the current position
4526 *
4527 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4528 * list, starting from iter position. The caller must hold either hold the
4529 * RTNL lock or its own locking that guarantees that the neighbour lower
4530 * list will remain unchainged.
4531 */
4532void *netdev_lower_get_next_private(struct net_device *dev,
4533 struct list_head **iter)
4534{
4535 struct netdev_adjacent *lower;
4536
4537 lower = list_entry(*iter, struct netdev_adjacent, list);
4538
4539 if (&lower->list == &dev->adj_list.lower)
4540 return NULL;
4541
4542 if (iter)
4543 *iter = lower->list.next;
4544
4545 return lower->private;
4546}
4547EXPORT_SYMBOL(netdev_lower_get_next_private);
4548
4549/**
4550 * netdev_lower_get_next_private_rcu - Get the next ->private from the
4551 * lower neighbour list, RCU
4552 * variant
4553 * @dev: device
4554 * @iter: list_head ** of the current position
4555 *
4556 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4557 * list, starting from iter position. The caller must hold RCU read lock.
4558 */
4559void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4560 struct list_head **iter)
4561{
4562 struct netdev_adjacent *lower;
4563
4564 WARN_ON_ONCE(!rcu_read_lock_held());
4565
4566 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4567
4568 if (&lower->list == &dev->adj_list.lower)
4569 return NULL;
4570
4571 if (iter)
4572 *iter = &lower->list;
4573
4574 return lower->private;
4575}
4576EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4496 4577
4497/** 4578/**
4498 * netdev_master_upper_dev_get_rcu - Get master upper device 4579 * netdev_master_upper_dev_get_rcu - Get master upper device
@@ -4505,7 +4586,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4505{ 4586{
4506 struct netdev_adjacent *upper; 4587 struct netdev_adjacent *upper;
4507 4588
4508 upper = list_first_or_null_rcu(&dev->upper_dev_list, 4589 upper = list_first_or_null_rcu(&dev->adj_list.upper,
4509 struct netdev_adjacent, list); 4590 struct netdev_adjacent, list);
4510 if (upper && likely(upper->master)) 4591 if (upper && likely(upper->master))
4511 return upper->dev; 4592 return upper->dev;
@@ -4515,15 +4596,16 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4515 4596
4516static int __netdev_adjacent_dev_insert(struct net_device *dev, 4597static int __netdev_adjacent_dev_insert(struct net_device *dev,
4517 struct net_device *adj_dev, 4598 struct net_device *adj_dev,
4518 bool neighbour, bool master, 4599 struct list_head *dev_list,
4519 bool upper) 4600 void *private, bool master)
4520{ 4601{
4521 struct netdev_adjacent *adj; 4602 struct netdev_adjacent *adj;
4603 char linkname[IFNAMSIZ+7];
4604 int ret;
4522 4605
4523 adj = __netdev_find_adj(dev, adj_dev, upper); 4606 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4524 4607
4525 if (adj) { 4608 if (adj) {
4526 BUG_ON(neighbour);
4527 adj->ref_nr++; 4609 adj->ref_nr++;
4528 return 0; 4610 return 0;
4529 } 4611 }
@@ -4534,124 +4616,179 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4534 4616
4535 adj->dev = adj_dev; 4617 adj->dev = adj_dev;
4536 adj->master = master; 4618 adj->master = master;
4537 adj->neighbour = neighbour;
4538 adj->ref_nr = 1; 4619 adj->ref_nr = 1;
4539 4620 adj->private = private;
4540 dev_hold(adj_dev); 4621 dev_hold(adj_dev);
4541 pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
4542 adj_dev->name, upper ? "upper" : "lower", dev->name,
4543 adj_dev->name);
4544 4622
4545 if (!upper) { 4623 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4546 list_add_tail_rcu(&adj->list, &dev->lower_dev_list); 4624 adj_dev->name, dev->name, adj_dev->name);
4547 return 0; 4625
4626 if (dev_list == &dev->adj_list.lower) {
4627 sprintf(linkname, "lower_%s", adj_dev->name);
4628 ret = sysfs_create_link(&(dev->dev.kobj),
4629 &(adj_dev->dev.kobj), linkname);
4630 if (ret)
4631 goto free_adj;
4632 } else if (dev_list == &dev->adj_list.upper) {
4633 sprintf(linkname, "upper_%s", adj_dev->name);
4634 ret = sysfs_create_link(&(dev->dev.kobj),
4635 &(adj_dev->dev.kobj), linkname);
4636 if (ret)
4637 goto free_adj;
4548 } 4638 }
4549 4639
4550 /* Ensure that master upper link is always the first item in list. */ 4640 /* Ensure that master link is always the first item in list. */
4551 if (master) 4641 if (master) {
4552 list_add_rcu(&adj->list, &dev->upper_dev_list); 4642 ret = sysfs_create_link(&(dev->dev.kobj),
4553 else 4643 &(adj_dev->dev.kobj), "master");
4554 list_add_tail_rcu(&adj->list, &dev->upper_dev_list); 4644 if (ret)
4645 goto remove_symlinks;
4646
4647 list_add_rcu(&adj->list, dev_list);
4648 } else {
4649 list_add_tail_rcu(&adj->list, dev_list);
4650 }
4555 4651
4556 return 0; 4652 return 0;
4557}
4558 4653
4559static inline int __netdev_upper_dev_insert(struct net_device *dev, 4654remove_symlinks:
4560 struct net_device *udev, 4655 if (dev_list == &dev->adj_list.lower) {
4561 bool master, bool neighbour) 4656 sprintf(linkname, "lower_%s", adj_dev->name);
4562{ 4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4563 return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, 4658 } else if (dev_list == &dev->adj_list.upper) {
4564 true); 4659 sprintf(linkname, "upper_%s", adj_dev->name);
4565} 4660 sysfs_remove_link(&(dev->dev.kobj), linkname);
4661 }
4566 4662
4567static inline int __netdev_lower_dev_insert(struct net_device *dev, 4663free_adj:
4568 struct net_device *ldev, 4664 kfree(adj);
4569 bool neighbour) 4665 dev_put(adj_dev);
4570{ 4666
4571 return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, 4667 return ret;
4572 false);
4573} 4668}
4574 4669
4575void __netdev_adjacent_dev_remove(struct net_device *dev, 4670void __netdev_adjacent_dev_remove(struct net_device *dev,
4576 struct net_device *adj_dev, bool upper) 4671 struct net_device *adj_dev,
4672 struct list_head *dev_list)
4577{ 4673{
4578 struct netdev_adjacent *adj; 4674 struct netdev_adjacent *adj;
4675 char linkname[IFNAMSIZ+7];
4579 4676
4580 if (upper) 4677 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4581 adj = __netdev_find_upper(dev, adj_dev);
4582 else
4583 adj = __netdev_find_lower(dev, adj_dev);
4584 4678
4585 if (!adj) 4679 if (!adj) {
4680 pr_err("tried to remove device %s from %s\n",
4681 dev->name, adj_dev->name);
4586 BUG(); 4682 BUG();
4683 }
4587 4684
4588 if (adj->ref_nr > 1) { 4685 if (adj->ref_nr > 1) {
4686 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
4687 adj->ref_nr-1);
4589 adj->ref_nr--; 4688 adj->ref_nr--;
4590 return; 4689 return;
4591 } 4690 }
4592 4691
4692 if (adj->master)
4693 sysfs_remove_link(&(dev->dev.kobj), "master");
4694
4695 if (dev_list == &dev->adj_list.lower) {
4696 sprintf(linkname, "lower_%s", adj_dev->name);
4697 sysfs_remove_link(&(dev->dev.kobj), linkname);
4698 } else if (dev_list == &dev->adj_list.upper) {
4699 sprintf(linkname, "upper_%s", adj_dev->name);
4700 sysfs_remove_link(&(dev->dev.kobj), linkname);
4701 }
4702
4593 list_del_rcu(&adj->list); 4703 list_del_rcu(&adj->list);
4594 pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", 4704 pr_debug("dev_put for %s, because link removed from %s to %s\n",
4595 adj_dev->name, upper ? "upper" : "lower", dev->name, 4705 adj_dev->name, dev->name, adj_dev->name);
4596 adj_dev->name);
4597 dev_put(adj_dev); 4706 dev_put(adj_dev);
4598 kfree_rcu(adj, rcu); 4707 kfree_rcu(adj, rcu);
4599} 4708}
4600 4709
4601static inline void __netdev_upper_dev_remove(struct net_device *dev, 4710int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4602 struct net_device *udev) 4711 struct net_device *upper_dev,
4603{ 4712 struct list_head *up_list,
4604 return __netdev_adjacent_dev_remove(dev, udev, true); 4713 struct list_head *down_list,
4605} 4714 void *private, bool master)
4606
4607static inline void __netdev_lower_dev_remove(struct net_device *dev,
4608 struct net_device *ldev)
4609{
4610 return __netdev_adjacent_dev_remove(dev, ldev, false);
4611}
4612
4613int __netdev_adjacent_dev_insert_link(struct net_device *dev,
4614 struct net_device *upper_dev,
4615 bool master, bool neighbour)
4616{ 4715{
4617 int ret; 4716 int ret;
4618 4717
4619 ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); 4718 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
4719 master);
4620 if (ret) 4720 if (ret)
4621 return ret; 4721 return ret;
4622 4722
4623 ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); 4723 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
4724 false);
4624 if (ret) { 4725 if (ret) {
4625 __netdev_upper_dev_remove(dev, upper_dev); 4726 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4626 return ret; 4727 return ret;
4627 } 4728 }
4628 4729
4629 return 0; 4730 return 0;
4630} 4731}
4631 4732
4632static inline int __netdev_adjacent_dev_link(struct net_device *dev, 4733int __netdev_adjacent_dev_link(struct net_device *dev,
4633 struct net_device *udev) 4734 struct net_device *upper_dev)
4634{ 4735{
4635 return __netdev_adjacent_dev_insert_link(dev, udev, false, false); 4736 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4737 &dev->all_adj_list.upper,
4738 &upper_dev->all_adj_list.lower,
4739 NULL, false);
4636} 4740}
4637 4741
4638static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4742void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4639 struct net_device *udev, 4743 struct net_device *upper_dev,
4640 bool master) 4744 struct list_head *up_list,
4745 struct list_head *down_list)
4641{ 4746{
4642 return __netdev_adjacent_dev_insert_link(dev, udev, master, true); 4747 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4748 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4643} 4749}
4644 4750
4645void __netdev_adjacent_dev_unlink(struct net_device *dev, 4751void __netdev_adjacent_dev_unlink(struct net_device *dev,
4646 struct net_device *upper_dev) 4752 struct net_device *upper_dev)
4647{ 4753{
4648 __netdev_upper_dev_remove(dev, upper_dev); 4754 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4649 __netdev_lower_dev_remove(upper_dev, dev); 4755 &dev->all_adj_list.upper,
4756 &upper_dev->all_adj_list.lower);
4757}
4758
4759int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4760 struct net_device *upper_dev,
4761 void *private, bool master)
4762{
4763 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4764
4765 if (ret)
4766 return ret;
4767
4768 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
4769 &dev->adj_list.upper,
4770 &upper_dev->adj_list.lower,
4771 private, master);
4772 if (ret) {
4773 __netdev_adjacent_dev_unlink(dev, upper_dev);
4774 return ret;
4775 }
4776
4777 return 0;
4650} 4778}
4651 4779
4780void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4781 struct net_device *upper_dev)
4782{
4783 __netdev_adjacent_dev_unlink(dev, upper_dev);
4784 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4785 &dev->adj_list.upper,
4786 &upper_dev->adj_list.lower);
4787}
4652 4788
4653static int __netdev_upper_dev_link(struct net_device *dev, 4789static int __netdev_upper_dev_link(struct net_device *dev,
4654 struct net_device *upper_dev, bool master) 4790 struct net_device *upper_dev, bool master,
4791 void *private)
4655{ 4792{
4656 struct netdev_adjacent *i, *j, *to_i, *to_j; 4793 struct netdev_adjacent *i, *j, *to_i, *to_j;
4657 int ret = 0; 4794 int ret = 0;
@@ -4662,26 +4799,29 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4662 return -EBUSY; 4799 return -EBUSY;
4663 4800
4664 /* To prevent loops, check if dev is not upper device to upper_dev. */ 4801 /* To prevent loops, check if dev is not upper device to upper_dev. */
4665 if (__netdev_find_upper(upper_dev, dev)) 4802 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
4666 return -EBUSY; 4803 return -EBUSY;
4667 4804
4668 if (__netdev_find_upper(dev, upper_dev)) 4805 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
4669 return -EEXIST; 4806 return -EEXIST;
4670 4807
4671 if (master && netdev_master_upper_dev_get(dev)) 4808 if (master && netdev_master_upper_dev_get(dev))
4672 return -EBUSY; 4809 return -EBUSY;
4673 4810
4674 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); 4811 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
4812 master);
4675 if (ret) 4813 if (ret)
4676 return ret; 4814 return ret;
4677 4815
4678 /* Now that we linked these devs, make all the upper_dev's 4816 /* Now that we linked these devs, make all the upper_dev's
4679 * upper_dev_list visible to every dev's lower_dev_list and vice 4817 * all_adj_list.upper visible to every dev's all_adj_list.lower an
4680 * versa, and don't forget the devices itself. All of these 4818 * versa, and don't forget the devices itself. All of these
4681 * links are non-neighbours. 4819 * links are non-neighbours.
4682 */ 4820 */
4683 list_for_each_entry(i, &dev->lower_dev_list, list) { 4821 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4684 list_for_each_entry(j, &upper_dev->upper_dev_list, list) { 4822 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
4823 pr_debug("Interlinking %s with %s, non-neighbour\n",
4824 i->dev->name, j->dev->name);
4685 ret = __netdev_adjacent_dev_link(i->dev, j->dev); 4825 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4686 if (ret) 4826 if (ret)
4687 goto rollback_mesh; 4827 goto rollback_mesh;
@@ -4689,14 +4829,18 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4689 } 4829 }
4690 4830
4691 /* add dev to every upper_dev's upper device */ 4831 /* add dev to every upper_dev's upper device */
4692 list_for_each_entry(i, &upper_dev->upper_dev_list, list) { 4832 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
4833 pr_debug("linking %s's upper device %s with %s\n",
4834 upper_dev->name, i->dev->name, dev->name);
4693 ret = __netdev_adjacent_dev_link(dev, i->dev); 4835 ret = __netdev_adjacent_dev_link(dev, i->dev);
4694 if (ret) 4836 if (ret)
4695 goto rollback_upper_mesh; 4837 goto rollback_upper_mesh;
4696 } 4838 }
4697 4839
4698 /* add upper_dev to every dev's lower device */ 4840 /* add upper_dev to every dev's lower device */
4699 list_for_each_entry(i, &dev->lower_dev_list, list) { 4841 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4842 pr_debug("linking %s's lower device %s with %s\n", dev->name,
4843 i->dev->name, upper_dev->name);
4700 ret = __netdev_adjacent_dev_link(i->dev, upper_dev); 4844 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4701 if (ret) 4845 if (ret)
4702 goto rollback_lower_mesh; 4846 goto rollback_lower_mesh;
@@ -4707,7 +4851,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4707 4851
4708rollback_lower_mesh: 4852rollback_lower_mesh:
4709 to_i = i; 4853 to_i = i;
4710 list_for_each_entry(i, &dev->lower_dev_list, list) { 4854 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4711 if (i == to_i) 4855 if (i == to_i)
4712 break; 4856 break;
4713 __netdev_adjacent_dev_unlink(i->dev, upper_dev); 4857 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
@@ -4717,7 +4861,7 @@ rollback_lower_mesh:
4717 4861
4718rollback_upper_mesh: 4862rollback_upper_mesh:
4719 to_i = i; 4863 to_i = i;
4720 list_for_each_entry(i, &upper_dev->upper_dev_list, list) { 4864 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
4721 if (i == to_i) 4865 if (i == to_i)
4722 break; 4866 break;
4723 __netdev_adjacent_dev_unlink(dev, i->dev); 4867 __netdev_adjacent_dev_unlink(dev, i->dev);
@@ -4728,8 +4872,8 @@ rollback_upper_mesh:
4728rollback_mesh: 4872rollback_mesh:
4729 to_i = i; 4873 to_i = i;
4730 to_j = j; 4874 to_j = j;
4731 list_for_each_entry(i, &dev->lower_dev_list, list) { 4875 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4732 list_for_each_entry(j, &upper_dev->upper_dev_list, list) { 4876 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
4733 if (i == to_i && j == to_j) 4877 if (i == to_i && j == to_j)
4734 break; 4878 break;
4735 __netdev_adjacent_dev_unlink(i->dev, j->dev); 4879 __netdev_adjacent_dev_unlink(i->dev, j->dev);
@@ -4738,7 +4882,7 @@ rollback_mesh:
4738 break; 4882 break;
4739 } 4883 }
4740 4884
4741 __netdev_adjacent_dev_unlink(dev, upper_dev); 4885 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
4742 4886
4743 return ret; 4887 return ret;
4744} 4888}
@@ -4756,7 +4900,7 @@ rollback_mesh:
4756int netdev_upper_dev_link(struct net_device *dev, 4900int netdev_upper_dev_link(struct net_device *dev,
4757 struct net_device *upper_dev) 4901 struct net_device *upper_dev)
4758{ 4902{
4759 return __netdev_upper_dev_link(dev, upper_dev, false); 4903 return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
4760} 4904}
4761EXPORT_SYMBOL(netdev_upper_dev_link); 4905EXPORT_SYMBOL(netdev_upper_dev_link);
4762 4906
@@ -4774,10 +4918,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
4774int netdev_master_upper_dev_link(struct net_device *dev, 4918int netdev_master_upper_dev_link(struct net_device *dev,
4775 struct net_device *upper_dev) 4919 struct net_device *upper_dev)
4776{ 4920{
4777 return __netdev_upper_dev_link(dev, upper_dev, true); 4921 return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
4778} 4922}
4779EXPORT_SYMBOL(netdev_master_upper_dev_link); 4923EXPORT_SYMBOL(netdev_master_upper_dev_link);
4780 4924
4925int netdev_master_upper_dev_link_private(struct net_device *dev,
4926 struct net_device *upper_dev,
4927 void *private)
4928{
4929 return __netdev_upper_dev_link(dev, upper_dev, true, private);
4930}
4931EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
4932
4781/** 4933/**
4782 * netdev_upper_dev_unlink - Removes a link to upper device 4934 * netdev_upper_dev_unlink - Removes a link to upper device
4783 * @dev: device 4935 * @dev: device
@@ -4792,38 +4944,68 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4792 struct netdev_adjacent *i, *j; 4944 struct netdev_adjacent *i, *j;
4793 ASSERT_RTNL(); 4945 ASSERT_RTNL();
4794 4946
4795 __netdev_adjacent_dev_unlink(dev, upper_dev); 4947 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
4796 4948
4797 /* Here is the tricky part. We must remove all dev's lower 4949 /* Here is the tricky part. We must remove all dev's lower
4798 * devices from all upper_dev's upper devices and vice 4950 * devices from all upper_dev's upper devices and vice
4799 * versa, to maintain the graph relationship. 4951 * versa, to maintain the graph relationship.
4800 */ 4952 */
4801 list_for_each_entry(i, &dev->lower_dev_list, list) 4953 list_for_each_entry(i, &dev->all_adj_list.lower, list)
4802 list_for_each_entry(j, &upper_dev->upper_dev_list, list) 4954 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
4803 __netdev_adjacent_dev_unlink(i->dev, j->dev); 4955 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4804 4956
4805 /* remove also the devices itself from lower/upper device 4957 /* remove also the devices itself from lower/upper device
4806 * list 4958 * list
4807 */ 4959 */
4808 list_for_each_entry(i, &dev->lower_dev_list, list) 4960 list_for_each_entry(i, &dev->all_adj_list.lower, list)
4809 __netdev_adjacent_dev_unlink(i->dev, upper_dev); 4961 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4810 4962
4811 list_for_each_entry(i, &upper_dev->upper_dev_list, list) 4963 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
4812 __netdev_adjacent_dev_unlink(dev, i->dev); 4964 __netdev_adjacent_dev_unlink(dev, i->dev);
4813 4965
4814 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4966 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4815} 4967}
4816EXPORT_SYMBOL(netdev_upper_dev_unlink); 4968EXPORT_SYMBOL(netdev_upper_dev_unlink);
4817 4969
4970void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
4971 struct net_device *lower_dev)
4972{
4973 struct netdev_adjacent *lower;
4974
4975 if (!lower_dev)
4976 return NULL;
4977 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
4978 if (!lower)
4979 return NULL;
4980
4981 return lower->private;
4982}
4983EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4984
4985void *netdev_lower_dev_get_private(struct net_device *dev,
4986 struct net_device *lower_dev)
4987{
4988 struct netdev_adjacent *lower;
4989
4990 if (!lower_dev)
4991 return NULL;
4992 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
4993 if (!lower)
4994 return NULL;
4995
4996 return lower->private;
4997}
4998EXPORT_SYMBOL(netdev_lower_dev_get_private);
4999
4818static void dev_change_rx_flags(struct net_device *dev, int flags) 5000static void dev_change_rx_flags(struct net_device *dev, int flags)
4819{ 5001{
4820 const struct net_device_ops *ops = dev->netdev_ops; 5002 const struct net_device_ops *ops = dev->netdev_ops;
4821 5003
4822 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) 5004 if (ops->ndo_change_rx_flags)
4823 ops->ndo_change_rx_flags(dev, flags); 5005 ops->ndo_change_rx_flags(dev, flags);
4824} 5006}
4825 5007
4826static int __dev_set_promiscuity(struct net_device *dev, int inc) 5008static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
4827{ 5009{
4828 unsigned int old_flags = dev->flags; 5010 unsigned int old_flags = dev->flags;
4829 kuid_t uid; 5011 kuid_t uid;
@@ -4866,6 +5048,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
4866 5048
4867 dev_change_rx_flags(dev, IFF_PROMISC); 5049 dev_change_rx_flags(dev, IFF_PROMISC);
4868 } 5050 }
5051 if (notify)
5052 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
4869 return 0; 5053 return 0;
4870} 5054}
4871 5055
@@ -4885,7 +5069,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
4885 unsigned int old_flags = dev->flags; 5069 unsigned int old_flags = dev->flags;
4886 int err; 5070 int err;
4887 5071
4888 err = __dev_set_promiscuity(dev, inc); 5072 err = __dev_set_promiscuity(dev, inc, true);
4889 if (err < 0) 5073 if (err < 0)
4890 return err; 5074 return err;
4891 if (dev->flags != old_flags) 5075 if (dev->flags != old_flags)
@@ -4894,22 +5078,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
4894} 5078}
4895EXPORT_SYMBOL(dev_set_promiscuity); 5079EXPORT_SYMBOL(dev_set_promiscuity);
4896 5080
4897/** 5081static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
4898 * dev_set_allmulti - update allmulti count on a device
4899 * @dev: device
4900 * @inc: modifier
4901 *
4902 * Add or remove reception of all multicast frames to a device. While the
4903 * count in the device remains above zero the interface remains listening
4904 * to all interfaces. Once it hits zero the device reverts back to normal
4905 * filtering operation. A negative @inc value is used to drop the counter
4906 * when releasing a resource needing all multicasts.
4907 * Return 0 if successful or a negative errno code on error.
4908 */
4909
4910int dev_set_allmulti(struct net_device *dev, int inc)
4911{ 5082{
4912 unsigned int old_flags = dev->flags; 5083 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
4913 5084
4914 ASSERT_RTNL(); 5085 ASSERT_RTNL();
4915 5086
@@ -4932,9 +5103,30 @@ int dev_set_allmulti(struct net_device *dev, int inc)
4932 if (dev->flags ^ old_flags) { 5103 if (dev->flags ^ old_flags) {
4933 dev_change_rx_flags(dev, IFF_ALLMULTI); 5104 dev_change_rx_flags(dev, IFF_ALLMULTI);
4934 dev_set_rx_mode(dev); 5105 dev_set_rx_mode(dev);
5106 if (notify)
5107 __dev_notify_flags(dev, old_flags,
5108 dev->gflags ^ old_gflags);
4935 } 5109 }
4936 return 0; 5110 return 0;
4937} 5111}
5112
5113/**
5114 * dev_set_allmulti - update allmulti count on a device
5115 * @dev: device
5116 * @inc: modifier
5117 *
5118 * Add or remove reception of all multicast frames to a device. While the
5119 * count in the device remains above zero the interface remains listening
5120 * to all interfaces. Once it hits zero the device reverts back to normal
5121 * filtering operation. A negative @inc value is used to drop the counter
5122 * when releasing a resource needing all multicasts.
5123 * Return 0 if successful or a negative errno code on error.
5124 */
5125
5126int dev_set_allmulti(struct net_device *dev, int inc)
5127{
5128 return __dev_set_allmulti(dev, inc, true);
5129}
4938EXPORT_SYMBOL(dev_set_allmulti); 5130EXPORT_SYMBOL(dev_set_allmulti);
4939 5131
4940/* 5132/*
@@ -4959,10 +5151,10 @@ void __dev_set_rx_mode(struct net_device *dev)
4959 * therefore calling __dev_set_promiscuity here is safe. 5151 * therefore calling __dev_set_promiscuity here is safe.
4960 */ 5152 */
4961 if (!netdev_uc_empty(dev) && !dev->uc_promisc) { 5153 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4962 __dev_set_promiscuity(dev, 1); 5154 __dev_set_promiscuity(dev, 1, false);
4963 dev->uc_promisc = true; 5155 dev->uc_promisc = true;
4964 } else if (netdev_uc_empty(dev) && dev->uc_promisc) { 5156 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4965 __dev_set_promiscuity(dev, -1); 5157 __dev_set_promiscuity(dev, -1, false);
4966 dev->uc_promisc = false; 5158 dev->uc_promisc = false;
4967 } 5159 }
4968 } 5160 }
@@ -5051,9 +5243,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
5051 5243
5052 if ((flags ^ dev->gflags) & IFF_PROMISC) { 5244 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5053 int inc = (flags & IFF_PROMISC) ? 1 : -1; 5245 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5246 unsigned int old_flags = dev->flags;
5054 5247
5055 dev->gflags ^= IFF_PROMISC; 5248 dev->gflags ^= IFF_PROMISC;
5056 dev_set_promiscuity(dev, inc); 5249
5250 if (__dev_set_promiscuity(dev, inc, false) >= 0)
5251 if (dev->flags != old_flags)
5252 dev_set_rx_mode(dev);
5057 } 5253 }
5058 5254
5059 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 5255 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
@@ -5064,16 +5260,20 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
5064 int inc = (flags & IFF_ALLMULTI) ? 1 : -1; 5260 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5065 5261
5066 dev->gflags ^= IFF_ALLMULTI; 5262 dev->gflags ^= IFF_ALLMULTI;
5067 dev_set_allmulti(dev, inc); 5263 __dev_set_allmulti(dev, inc, false);
5068 } 5264 }
5069 5265
5070 return ret; 5266 return ret;
5071} 5267}
5072 5268
5073void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) 5269void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
5270 unsigned int gchanges)
5074{ 5271{
5075 unsigned int changes = dev->flags ^ old_flags; 5272 unsigned int changes = dev->flags ^ old_flags;
5076 5273
5274 if (gchanges)
5275 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
5276
5077 if (changes & IFF_UP) { 5277 if (changes & IFF_UP) {
5078 if (dev->flags & IFF_UP) 5278 if (dev->flags & IFF_UP)
5079 call_netdevice_notifiers(NETDEV_UP, dev); 5279 call_netdevice_notifiers(NETDEV_UP, dev);
@@ -5102,17 +5302,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
5102int dev_change_flags(struct net_device *dev, unsigned int flags) 5302int dev_change_flags(struct net_device *dev, unsigned int flags)
5103{ 5303{
5104 int ret; 5304 int ret;
5105 unsigned int changes, old_flags = dev->flags; 5305 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
5106 5306
5107 ret = __dev_change_flags(dev, flags); 5307 ret = __dev_change_flags(dev, flags);
5108 if (ret < 0) 5308 if (ret < 0)
5109 return ret; 5309 return ret;
5110 5310
5111 changes = old_flags ^ dev->flags; 5311 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
5112 if (changes) 5312 __dev_notify_flags(dev, old_flags, changes);
5113 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
5114
5115 __dev_notify_flags(dev, old_flags);
5116 return ret; 5313 return ret;
5117} 5314}
5118EXPORT_SYMBOL(dev_change_flags); 5315EXPORT_SYMBOL(dev_change_flags);
@@ -5259,6 +5456,7 @@ static void net_set_todo(struct net_device *dev)
5259static void rollback_registered_many(struct list_head *head) 5456static void rollback_registered_many(struct list_head *head)
5260{ 5457{
5261 struct net_device *dev, *tmp; 5458 struct net_device *dev, *tmp;
5459 LIST_HEAD(close_head);
5262 5460
5263 BUG_ON(dev_boot_phase); 5461 BUG_ON(dev_boot_phase);
5264 ASSERT_RTNL(); 5462 ASSERT_RTNL();
@@ -5281,7 +5479,9 @@ static void rollback_registered_many(struct list_head *head)
5281 } 5479 }
5282 5480
5283 /* If device is running, close it first. */ 5481 /* If device is running, close it first. */
5284 dev_close_many(head); 5482 list_for_each_entry(dev, head, unreg_list)
5483 list_add_tail(&dev->close_list, &close_head);
5484 dev_close_many(&close_head);
5285 5485
5286 list_for_each_entry(dev, head, unreg_list) { 5486 list_for_each_entry(dev, head, unreg_list) {
5287 /* And unlink it from device chain. */ 5487 /* And unlink it from device chain. */
@@ -5304,7 +5504,7 @@ static void rollback_registered_many(struct list_head *head)
5304 5504
5305 if (!dev->rtnl_link_ops || 5505 if (!dev->rtnl_link_ops ||
5306 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 5506 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5307 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 5507 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
5308 5508
5309 /* 5509 /*
5310 * Flush the unicast and multicast chains 5510 * Flush the unicast and multicast chains
@@ -5703,7 +5903,7 @@ int register_netdevice(struct net_device *dev)
5703 */ 5903 */
5704 if (!dev->rtnl_link_ops || 5904 if (!dev->rtnl_link_ops ||
5705 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 5905 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5706 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); 5906 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
5707 5907
5708out: 5908out:
5709 return ret; 5909 return ret;
@@ -6010,6 +6210,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
6010} 6210}
6011EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); 6211EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6012 6212
6213void netdev_freemem(struct net_device *dev)
6214{
6215 char *addr = (char *)dev - dev->padded;
6216
6217 if (is_vmalloc_addr(addr))
6218 vfree(addr);
6219 else
6220 kfree(addr);
6221}
6222
6013/** 6223/**
6014 * alloc_netdev_mqs - allocate network device 6224 * alloc_netdev_mqs - allocate network device
6015 * @sizeof_priv: size of private data to allocate space for 6225 * @sizeof_priv: size of private data to allocate space for
@@ -6053,7 +6263,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6053 /* ensure 32-byte alignment of whole construct */ 6263 /* ensure 32-byte alignment of whole construct */
6054 alloc_size += NETDEV_ALIGN - 1; 6264 alloc_size += NETDEV_ALIGN - 1;
6055 6265
6056 p = kzalloc(alloc_size, GFP_KERNEL); 6266 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6267 if (!p)
6268 p = vzalloc(alloc_size);
6057 if (!p) 6269 if (!p)
6058 return NULL; 6270 return NULL;
6059 6271
@@ -6062,7 +6274,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6062 6274
6063 dev->pcpu_refcnt = alloc_percpu(int); 6275 dev->pcpu_refcnt = alloc_percpu(int);
6064 if (!dev->pcpu_refcnt) 6276 if (!dev->pcpu_refcnt)
6065 goto free_p; 6277 goto free_dev;
6066 6278
6067 if (dev_addr_init(dev)) 6279 if (dev_addr_init(dev))
6068 goto free_pcpu; 6280 goto free_pcpu;
@@ -6077,9 +6289,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6077 6289
6078 INIT_LIST_HEAD(&dev->napi_list); 6290 INIT_LIST_HEAD(&dev->napi_list);
6079 INIT_LIST_HEAD(&dev->unreg_list); 6291 INIT_LIST_HEAD(&dev->unreg_list);
6292 INIT_LIST_HEAD(&dev->close_list);
6080 INIT_LIST_HEAD(&dev->link_watch_list); 6293 INIT_LIST_HEAD(&dev->link_watch_list);
6081 INIT_LIST_HEAD(&dev->upper_dev_list); 6294 INIT_LIST_HEAD(&dev->adj_list.upper);
6082 INIT_LIST_HEAD(&dev->lower_dev_list); 6295 INIT_LIST_HEAD(&dev->adj_list.lower);
6296 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6297 INIT_LIST_HEAD(&dev->all_adj_list.lower);
6083 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6298 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6084 setup(dev); 6299 setup(dev);
6085 6300
@@ -6112,8 +6327,8 @@ free_pcpu:
6112 kfree(dev->_rx); 6327 kfree(dev->_rx);
6113#endif 6328#endif
6114 6329
6115free_p: 6330free_dev:
6116 kfree(p); 6331 netdev_freemem(dev);
6117 return NULL; 6332 return NULL;
6118} 6333}
6119EXPORT_SYMBOL(alloc_netdev_mqs); 6334EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -6150,7 +6365,7 @@ void free_netdev(struct net_device *dev)
6150 6365
6151 /* Compatibility with error handling in drivers */ 6366 /* Compatibility with error handling in drivers */
6152 if (dev->reg_state == NETREG_UNINITIALIZED) { 6367 if (dev->reg_state == NETREG_UNINITIALIZED) {
6153 kfree((char *)dev - dev->padded); 6368 netdev_freemem(dev);
6154 return; 6369 return;
6155 } 6370 }
6156 6371
@@ -6312,7 +6527,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6312 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6527 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6313 rcu_barrier(); 6528 rcu_barrier();
6314 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6529 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6315 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 6530 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
6316 6531
6317 /* 6532 /*
6318 * Flush the unicast and multicast chains 6533 * Flush the unicast and multicast chains
@@ -6351,7 +6566,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6351 * Prevent userspace races by waiting until the network 6566 * Prevent userspace races by waiting until the network
6352 * device is fully setup before sending notifications. 6567 * device is fully setup before sending notifications.
6353 */ 6568 */
6354 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); 6569 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
6355 6570
6356 synchronize_net(); 6571 synchronize_net();
6357 err = 0; 6572 err = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 6cda4e2c2132..ec40a849fc42 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -752,7 +752,7 @@ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
752EXPORT_SYMBOL(dev_mc_del_global); 752EXPORT_SYMBOL(dev_mc_del_global);
753 753
754/** 754/**
755 * dev_mc_sync - Synchronize device's unicast list to another device 755 * dev_mc_sync - Synchronize device's multicast list to another device
756 * @to: destination device 756 * @to: destination device
757 * @from: source device 757 * @from: source device
758 * 758 *
@@ -780,7 +780,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
780EXPORT_SYMBOL(dev_mc_sync); 780EXPORT_SYMBOL(dev_mc_sync);
781 781
782/** 782/**
783 * dev_mc_sync_multiple - Synchronize device's unicast list to another 783 * dev_mc_sync_multiple - Synchronize device's multicast list to another
784 * device, but allow for multiple calls to sync to multiple devices. 784 * device, but allow for multiple calls to sync to multiple devices.
785 * @to: destination device 785 * @to: destination device
786 * @from: source device 786 * @from: source device
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5e78d44333b9..e70301eb7a4a 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = {
64 .hdrsize = 0, 64 .hdrsize = 0,
65 .name = "NET_DM", 65 .name = "NET_DM",
66 .version = 2, 66 .version = 2,
67 .maxattr = NET_DM_CMD_MAX,
68}; 67};
69 68
70static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 69static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
@@ -106,6 +105,10 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
106 return skb; 105 return skb;
107} 106}
108 107
108static struct genl_multicast_group dropmon_mcgrps[] = {
109 { .name = "events", },
110};
111
109static void send_dm_alert(struct work_struct *work) 112static void send_dm_alert(struct work_struct *work)
110{ 113{
111 struct sk_buff *skb; 114 struct sk_buff *skb;
@@ -116,7 +119,8 @@ static void send_dm_alert(struct work_struct *work)
116 skb = reset_per_cpu_data(data); 119 skb = reset_per_cpu_data(data);
117 120
118 if (skb) 121 if (skb)
119 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); 122 genlmsg_multicast(&net_drop_monitor_family, skb, 0,
123 0, GFP_KERNEL);
120} 124}
121 125
122/* 126/*
@@ -333,7 +337,7 @@ out:
333 return NOTIFY_DONE; 337 return NOTIFY_DONE;
334} 338}
335 339
336static struct genl_ops dropmon_ops[] = { 340static const struct genl_ops dropmon_ops[] = {
337 { 341 {
338 .cmd = NET_DM_CMD_CONFIG, 342 .cmd = NET_DM_CMD_CONFIG,
339 .doit = net_dm_cmd_config, 343 .doit = net_dm_cmd_config,
@@ -364,13 +368,13 @@ static int __init init_net_drop_monitor(void)
364 return -ENOSPC; 368 return -ENOSPC;
365 } 369 }
366 370
367 rc = genl_register_family_with_ops(&net_drop_monitor_family, 371 rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
368 dropmon_ops, 372 dropmon_ops, dropmon_mcgrps);
369 ARRAY_SIZE(dropmon_ops));
370 if (rc) { 373 if (rc) {
371 pr_err("Could not create drop monitor netlink family\n"); 374 pr_err("Could not create drop monitor netlink family\n");
372 return rc; 375 return rc;
373 } 376 }
377 WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
374 378
375 rc = register_netdevice_notifier(&dropmon_net_notifier); 379 rc = register_netdevice_notifier(&dropmon_net_notifier);
376 if (rc < 0) { 380 if (rc < 0) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 78e9d9223e40..30071dec287a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
81 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 81 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
84 [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation",
85 [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation",
84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", 86 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
85 [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", 87 [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
86 88
@@ -94,6 +96,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
94 [NETIF_F_LOOPBACK_BIT] = "loopback", 96 [NETIF_F_LOOPBACK_BIT] = "loopback",
95 [NETIF_F_RXFCS_BIT] = "rx-fcs", 97 [NETIF_F_RXFCS_BIT] = "rx-fcs",
96 [NETIF_F_RXALL_BIT] = "rx-all", 98 [NETIF_F_RXALL_BIT] = "rx-all",
99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
97}; 100};
98 101
99static int ethtool_get_features(struct net_device *dev, void __user *useraddr) 102static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 2e654138433c..f409e0bd35c0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -460,7 +460,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
460 if (frh->action && (frh->action != rule->action)) 460 if (frh->action && (frh->action != rule->action))
461 continue; 461 continue;
462 462
463 if (frh->table && (frh_get_table(frh, tb) != rule->table)) 463 if (frh_get_table(frh, tb) &&
464 (frh_get_table(frh, tb) != rule->table))
464 continue; 465 continue;
465 466
466 if (tb[FRA_PRIORITY] && 467 if (tb[FRA_PRIORITY] &&
diff --git a/net/core/filter.c b/net/core/filter.c
index 01b780856db2..ad30d626a5bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,6 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/unaligned.h> 37#include <asm/unaligned.h>
38#include <linux/filter.h> 38#include <linux/filter.h>
39#include <linux/reciprocal_div.h>
40#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
41#include <linux/seccomp.h> 40#include <linux/seccomp.h>
42#include <linux/if_vlan.h> 41#include <linux/if_vlan.h>
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
166 A /= X; 165 A /= X;
167 continue; 166 continue;
168 case BPF_S_ALU_DIV_K: 167 case BPF_S_ALU_DIV_K:
169 A = reciprocal_divide(A, K); 168 A /= K;
170 continue; 169 continue;
171 case BPF_S_ALU_MOD_X: 170 case BPF_S_ALU_MOD_X:
172 if (X == 0) 171 if (X == 0)
@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
553 /* Some instructions need special checks */ 552 /* Some instructions need special checks */
554 switch (code) { 553 switch (code) {
555 case BPF_S_ALU_DIV_K: 554 case BPF_S_ALU_DIV_K:
556 /* check for division by zero */
557 if (ftest->k == 0)
558 return -EINVAL;
559 ftest->k = reciprocal_value(ftest->k);
560 break;
561 case BPF_S_ALU_MOD_K: 555 case BPF_S_ALU_MOD_K:
562 /* check for division by zero */ 556 /* check for division by zero */
563 if (ftest->k == 0) 557 if (ftest->k == 0)
@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
853 to->code = decodes[code]; 847 to->code = decodes[code];
854 to->jt = filt->jt; 848 to->jt = filt->jt;
855 to->jf = filt->jf; 849 to->jf = filt->jf;
856 850 to->k = filt->k;
857 if (code == BPF_S_ALU_DIV_K) {
858 /*
859 * When loaded this rule user gave us X, which was
860 * translated into R = r(X). Now we calculate the
861 * RR = r(R) and report it back. If next time this
862 * value is loaded and RRR = r(RR) is calculated
863 * then the R == RRR will be true.
864 *
865 * One exception. X == 1 translates into R == 0 and
866 * we can't calculate RR out of it with r().
867 */
868
869 if (filt->k == 0)
870 to->k = 1;
871 else
872 to->k = reciprocal_value(filt->k);
873
874 BUG_ON(reciprocal_value(to->k) != filt->k);
875 } else
876 to->k = filt->k;
877} 851}
878 852
879int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 853int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8d7d0dd72db2..2fc5beaf5783 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,9 +25,35 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
25 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); 25 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
26} 26}
27 27
28/**
29 * skb_flow_get_ports - extract the upper layer ports and return them
30 * @skb: buffer to extract the ports from
31 * @thoff: transport header offset
32 * @ip_proto: protocol for which to get port offset
33 *
34 * The function will try to retrieve the ports at offset thoff + poff where poff
35 * is the protocol port offset returned from proto_ports_offset
36 */
37__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
38{
39 int poff = proto_ports_offset(ip_proto);
40
41 if (poff >= 0) {
42 __be32 *ports, _ports;
43
44 ports = skb_header_pointer(skb, thoff + poff,
45 sizeof(_ports), &_ports);
46 if (ports)
47 return *ports;
48 }
49
50 return 0;
51}
52EXPORT_SYMBOL(skb_flow_get_ports);
53
28bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) 54bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
29{ 55{
30 int poff, nhoff = skb_network_offset(skb); 56 int nhoff = skb_network_offset(skb);
31 u8 ip_proto; 57 u8 ip_proto;
32 __be16 proto = skb->protocol; 58 __be16 proto = skb->protocol;
33 59
@@ -40,15 +66,15 @@ again:
40 struct iphdr _iph; 66 struct iphdr _iph;
41ip: 67ip:
42 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 68 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
43 if (!iph) 69 if (!iph || iph->ihl < 5)
44 return false; 70 return false;
71 nhoff += iph->ihl * 4;
45 72
73 ip_proto = iph->protocol;
46 if (ip_is_fragment(iph)) 74 if (ip_is_fragment(iph))
47 ip_proto = 0; 75 ip_proto = 0;
48 else 76
49 ip_proto = iph->protocol;
50 iph_to_flow_copy_addrs(flow, iph); 77 iph_to_flow_copy_addrs(flow, iph);
51 nhoff += iph->ihl * 4;
52 break; 78 break;
53 } 79 }
54 case __constant_htons(ETH_P_IPV6): { 80 case __constant_htons(ETH_P_IPV6): {
@@ -150,16 +176,7 @@ ipv6:
150 } 176 }
151 177
152 flow->ip_proto = ip_proto; 178 flow->ip_proto = ip_proto;
153 poff = proto_ports_offset(ip_proto); 179 flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
154 if (poff >= 0) {
155 __be32 *ports, _ports;
156
157 ports = skb_header_pointer(skb, nhoff + poff,
158 sizeof(_ports), &_ports);
159 if (ports)
160 flow->ports = *ports;
161 }
162
163 flow->thoff = (u16) nhoff; 180 flow->thoff = (u16) nhoff;
164 181
165 return true; 182 return true;
@@ -167,6 +184,22 @@ ipv6:
167EXPORT_SYMBOL(skb_flow_dissect); 184EXPORT_SYMBOL(skb_flow_dissect);
168 185
169static u32 hashrnd __read_mostly; 186static u32 hashrnd __read_mostly;
187static __always_inline void __flow_hash_secret_init(void)
188{
189 net_get_random_once(&hashrnd, sizeof(hashrnd));
190}
191
192static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
193{
194 __flow_hash_secret_init();
195 return jhash_3words(a, b, c, hashrnd);
196}
197
198static __always_inline u32 __flow_hash_1word(u32 a)
199{
200 __flow_hash_secret_init();
201 return jhash_1word(a, hashrnd);
202}
170 203
171/* 204/*
172 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 205 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
@@ -193,9 +226,9 @@ void __skb_get_rxhash(struct sk_buff *skb)
193 swap(keys.port16[0], keys.port16[1]); 226 swap(keys.port16[0], keys.port16[1]);
194 } 227 }
195 228
196 hash = jhash_3words((__force u32)keys.dst, 229 hash = __flow_hash_3words((__force u32)keys.dst,
197 (__force u32)keys.src, 230 (__force u32)keys.src,
198 (__force u32)keys.ports, hashrnd); 231 (__force u32)keys.ports);
199 if (!hash) 232 if (!hash)
200 hash = 1; 233 hash = 1;
201 234
@@ -231,7 +264,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
231 hash = skb->sk->sk_hash; 264 hash = skb->sk->sk_hash;
232 else 265 else
233 hash = (__force u16) skb->protocol; 266 hash = (__force u16) skb->protocol;
234 hash = jhash_1word(hash, hashrnd); 267 hash = __flow_hash_1word(hash);
235 268
236 return (u16) (((u64) hash * qcount) >> 32) + qoffset; 269 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
237} 270}
@@ -323,7 +356,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
323 else 356 else
324 hash = (__force u16) skb->protocol ^ 357 hash = (__force u16) skb->protocol ^
325 skb->rxhash; 358 skb->rxhash;
326 hash = jhash_1word(hash, hashrnd); 359 hash = __flow_hash_1word(hash);
327 queue_index = map->queues[ 360 queue_index = map->queues[
328 ((u64)hash * map->len) >> 32]; 361 ((u64)hash * map->len) >> 32];
329 } 362 }
@@ -362,27 +395,23 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
362EXPORT_SYMBOL(__netdev_pick_tx); 395EXPORT_SYMBOL(__netdev_pick_tx);
363 396
364struct netdev_queue *netdev_pick_tx(struct net_device *dev, 397struct netdev_queue *netdev_pick_tx(struct net_device *dev,
365 struct sk_buff *skb) 398 struct sk_buff *skb,
399 void *accel_priv)
366{ 400{
367 int queue_index = 0; 401 int queue_index = 0;
368 402
369 if (dev->real_num_tx_queues != 1) { 403 if (dev->real_num_tx_queues != 1) {
370 const struct net_device_ops *ops = dev->netdev_ops; 404 const struct net_device_ops *ops = dev->netdev_ops;
371 if (ops->ndo_select_queue) 405 if (ops->ndo_select_queue)
372 queue_index = ops->ndo_select_queue(dev, skb); 406 queue_index = ops->ndo_select_queue(dev, skb,
407 accel_priv);
373 else 408 else
374 queue_index = __netdev_pick_tx(dev, skb); 409 queue_index = __netdev_pick_tx(dev, skb);
375 queue_index = dev_cap_txqueue(dev, queue_index); 410
411 if (!accel_priv)
412 queue_index = dev_cap_txqueue(dev, queue_index);
376 } 413 }
377 414
378 skb_set_queue_mapping(skb, queue_index); 415 skb_set_queue_mapping(skb, queue_index);
379 return netdev_get_tx_queue(dev, queue_index); 416 return netdev_get_tx_queue(dev, queue_index);
380} 417}
381
382static int __init initialize_hashrnd(void)
383{
384 get_random_bytes(&hashrnd, sizeof(hashrnd));
385 return 0;
386}
387
388late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b77eeecc0011..b61869429f4c 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
48 if (err < 0) 48 if (err < 0)
49 return err; 49 return err;
50 } 50 }
51 m->msg_name = address; 51 if (m->msg_name)
52 m->msg_name = address;
52 } else { 53 } else {
53 m->msg_name = NULL; 54 m->msg_name = NULL;
54 } 55 }
@@ -100,7 +101,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
100EXPORT_SYMBOL(memcpy_toiovecend); 101EXPORT_SYMBOL(memcpy_toiovecend);
101 102
102/* 103/*
103 * Copy iovec from kernel. Returns -EFAULT on error. 104 * Copy iovec to kernel. Returns -EFAULT on error.
104 */ 105 */
105 106
106int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, 107int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6072610a8672..932c6d7cf666 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -867,7 +867,7 @@ static void neigh_invalidate(struct neighbour *neigh)
867static void neigh_probe(struct neighbour *neigh) 867static void neigh_probe(struct neighbour *neigh)
868 __releases(neigh->lock) 868 __releases(neigh->lock)
869{ 869{
870 struct sk_buff *skb = skb_peek(&neigh->arp_queue); 870 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
871 /* keep skb alive even if arp_queue overflows */ 871 /* keep skb alive even if arp_queue overflows */
872 if (skb) 872 if (skb)
873 skb = skb_copy(skb, GFP_ATOMIC); 873 skb = skb_copy(skb, GFP_ATOMIC);
@@ -1161,6 +1161,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1161 neigh->parms->reachable_time : 1161 neigh->parms->reachable_time :
1162 0))); 1162 0)));
1163 neigh->nud_state = new; 1163 neigh->nud_state = new;
1164 notify = 1;
1164 } 1165 }
1165 1166
1166 if (lladdr != neigh->ha) { 1167 if (lladdr != neigh->ha) {
@@ -1274,7 +1275,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1274 1275
1275 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, 1276 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276 skb->len) < 0 && 1277 skb->len) < 0 &&
1277 dev->header_ops->rebuild(skb)) 1278 dev_rebuild_header(skb))
1278 return 0; 1279 return 0;
1279 1280
1280 return dev_queue_xmit(skb); 1281 return dev_queue_xmit(skb);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d954b56b4e47..f3edf9635e02 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d)
1263 BUG_ON(dev->reg_state != NETREG_RELEASED); 1263 BUG_ON(dev->reg_state != NETREG_RELEASED);
1264 1264
1265 kfree(dev->ifalias); 1265 kfree(dev->ifalias);
1266 kfree((char *)dev - dev->padded); 1266 netdev_freemem(dev);
1267} 1267}
1268 1268
1269static const void *net_namespace(struct device *d) 1269static const void *net_namespace(struct device *d)
@@ -1344,17 +1344,19 @@ int netdev_register_kobject(struct net_device *net)
1344 return error; 1344 return error;
1345} 1345}
1346 1346
1347int netdev_class_create_file(struct class_attribute *class_attr) 1347int netdev_class_create_file_ns(struct class_attribute *class_attr,
1348 const void *ns)
1348{ 1349{
1349 return class_create_file(&net_class, class_attr); 1350 return class_create_file_ns(&net_class, class_attr, ns);
1350} 1351}
1351EXPORT_SYMBOL(netdev_class_create_file); 1352EXPORT_SYMBOL(netdev_class_create_file_ns);
1352 1353
1353void netdev_class_remove_file(struct class_attribute *class_attr) 1354void netdev_class_remove_file_ns(struct class_attribute *class_attr,
1355 const void *ns)
1354{ 1356{
1355 class_remove_file(&net_class, class_attr); 1357 class_remove_file_ns(&net_class, class_attr, ns);
1356} 1358}
1357EXPORT_SYMBOL(netdev_class_remove_file); 1359EXPORT_SYMBOL(netdev_class_remove_file_ns);
1358 1360
1359int netdev_kobject_init(void) 1361int netdev_kobject_init(void)
1360{ 1362{
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc75c9e461b8..19fe9c717ced 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -375,7 +375,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
375 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { 375 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
376 struct netdev_queue *txq; 376 struct netdev_queue *txq;
377 377
378 txq = netdev_pick_tx(dev, skb); 378 txq = netdev_pick_tx(dev, skb, NULL);
379 379
380 /* try until next clock tick */ 380 /* try until next clock tick */
381 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 381 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
@@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
386 !vlan_hw_offload_capable(netif_skb_features(skb), 386 !vlan_hw_offload_capable(netif_skb_features(skb),
387 skb->vlan_proto)) { 387 skb->vlan_proto)) {
388 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); 388 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
389 if (unlikely(!skb)) 389 if (unlikely(!skb)) {
390 break; 390 /* This is actually a packet drop, but we
391 * don't want the code at the end of this
392 * function to try and re-queue a NULL skb.
393 */
394 status = NETDEV_TX_OK;
395 goto unlock_txq;
396 }
391 skb->vlan_tci = 0; 397 skb->vlan_tci = 0;
392 } 398 }
393 399
@@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
395 if (status == NETDEV_TX_OK) 401 if (status == NETDEV_TX_OK)
396 txq_trans_update(txq); 402 txq_trans_update(txq);
397 } 403 }
404 unlock_txq:
398 __netif_tx_unlock(txq); 405 __netif_tx_unlock(txq);
399 406
400 if (status == NETDEV_TX_OK) 407 if (status == NETDEV_TX_OK)
@@ -636,8 +643,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
636 643
637 netpoll_send_skb(np, send_skb); 644 netpoll_send_skb(np, send_skb);
638 645
639 /* If there are several rx_hooks for the same address, 646 /* If there are several rx_skb_hooks for the same
640 we're fine by sending a single reply */ 647 * address we're fine by sending a single reply
648 */
641 break; 649 break;
642 } 650 }
643 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 651 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -719,8 +727,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
719 727
720 netpoll_send_skb(np, send_skb); 728 netpoll_send_skb(np, send_skb);
721 729
722 /* If there are several rx_hooks for the same address, 730 /* If there are several rx_skb_hooks for the same
723 we're fine by sending a single reply */ 731 * address, we're fine by sending a single reply
732 */
724 break; 733 break;
725 } 734 }
726 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 735 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -756,11 +765,12 @@ static bool pkt_is_ns(struct sk_buff *skb)
756 765
757int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) 766int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
758{ 767{
759 int proto, len, ulen; 768 int proto, len, ulen, data_len;
760 int hits = 0; 769 int hits = 0, offset;
761 const struct iphdr *iph; 770 const struct iphdr *iph;
762 struct udphdr *uh; 771 struct udphdr *uh;
763 struct netpoll *np, *tmp; 772 struct netpoll *np, *tmp;
773 uint16_t source;
764 774
765 if (list_empty(&npinfo->rx_np)) 775 if (list_empty(&npinfo->rx_np))
766 goto out; 776 goto out;
@@ -820,7 +830,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
820 830
821 len -= iph->ihl*4; 831 len -= iph->ihl*4;
822 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); 832 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
833 offset = (unsigned char *)(uh + 1) - skb->data;
823 ulen = ntohs(uh->len); 834 ulen = ntohs(uh->len);
835 data_len = skb->len - offset;
836 source = ntohs(uh->source);
824 837
825 if (ulen != len) 838 if (ulen != len)
826 goto out; 839 goto out;
@@ -834,9 +847,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
834 if (np->local_port && np->local_port != ntohs(uh->dest)) 847 if (np->local_port && np->local_port != ntohs(uh->dest))
835 continue; 848 continue;
836 849
837 np->rx_hook(np, ntohs(uh->source), 850 np->rx_skb_hook(np, source, skb, offset, data_len);
838 (char *)(uh+1),
839 ulen - sizeof(struct udphdr));
840 hits++; 851 hits++;
841 } 852 }
842 } else { 853 } else {
@@ -859,7 +870,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
859 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 870 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
860 goto out; 871 goto out;
861 uh = udp_hdr(skb); 872 uh = udp_hdr(skb);
873 offset = (unsigned char *)(uh + 1) - skb->data;
862 ulen = ntohs(uh->len); 874 ulen = ntohs(uh->len);
875 data_len = skb->len - offset;
876 source = ntohs(uh->source);
863 if (ulen != skb->len) 877 if (ulen != skb->len)
864 goto out; 878 goto out;
865 if (udp6_csum_init(skb, uh, IPPROTO_UDP)) 879 if (udp6_csum_init(skb, uh, IPPROTO_UDP))
@@ -872,9 +886,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
872 if (np->local_port && np->local_port != ntohs(uh->dest)) 886 if (np->local_port && np->local_port != ntohs(uh->dest))
873 continue; 887 continue;
874 888
875 np->rx_hook(np, ntohs(uh->source), 889 np->rx_skb_hook(np, source, skb, offset, data_len);
876 (char *)(uh+1),
877 ulen - sizeof(struct udphdr));
878 hits++; 890 hits++;
879 } 891 }
880#endif 892#endif
@@ -1062,7 +1074,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1062 1074
1063 npinfo->netpoll = np; 1075 npinfo->netpoll = np;
1064 1076
1065 if (np->rx_hook) { 1077 if (np->rx_skb_hook) {
1066 spin_lock_irqsave(&npinfo->rx_lock, flags); 1078 spin_lock_irqsave(&npinfo->rx_lock, flags);
1067 npinfo->rx_flags |= NETPOLL_RX_ENABLED; 1079 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1068 list_add_tail(&np->rx, &npinfo->rx_np); 1080 list_add_tail(&np->rx, &npinfo->rx_np);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index d9cd627e6a16..9b7cf6c85f82 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -222,11 +222,10 @@ static void net_prio_attach(struct cgroup_subsys_state *css,
222 struct cgroup_taskset *tset) 222 struct cgroup_taskset *tset)
223{ 223{
224 struct task_struct *p; 224 struct task_struct *p;
225 void *v; 225 void *v = (void *)(unsigned long)css->cgroup->id;
226 226
227 cgroup_taskset_for_each(p, css, tset) { 227 cgroup_taskset_for_each(p, css, tset) {
228 task_lock(p); 228 task_lock(p);
229 v = (void *)(unsigned long)task_netprioidx(p);
230 iterate_fd(p->files, 0, update_netprio, v); 229 iterate_fd(p->files, 0, update_netprio, v);
231 task_unlock(p); 230 task_unlock(p);
232 } 231 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 261357a66300..a797fff7f222 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2527 if (x) { 2527 if (x) {
2528 int ret; 2528 int ret;
2529 __u8 *eth; 2529 __u8 *eth;
2530 struct iphdr *iph;
2531
2530 nhead = x->props.header_len - skb_headroom(skb); 2532 nhead = x->props.header_len - skb_headroom(skb);
2531 if (nhead > 0) { 2533 if (nhead > 0) {
2532 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); 2534 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
@@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2548 eth = (__u8 *) skb_push(skb, ETH_HLEN); 2550 eth = (__u8 *) skb_push(skb, ETH_HLEN);
2549 memcpy(eth, pkt_dev->hh, 12); 2551 memcpy(eth, pkt_dev->hh, 12);
2550 *(u16 *) &eth[12] = protocol; 2552 *(u16 *) &eth[12] = protocol;
2553
2554 /* Update IPv4 header len as well as checksum value */
2555 iph = ip_hdr(skb);
2556 iph->tot_len = htons(skb->len - ETH_HLEN);
2557 ip_send_check(iph);
2551 } 2558 }
2552 } 2559 }
2553 return 1; 2560 return 1;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2a0e21de3060..cf67144d3e3c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
1647 } 1647 }
1648 1648
1649 dev->rtnl_link_state = RTNL_LINK_INITIALIZED; 1649 dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
1650 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
1651 1650
1652 __dev_notify_flags(dev, old_flags); 1651 __dev_notify_flags(dev, old_flags, ~0U);
1653 return 0; 1652 return 0;
1654} 1653}
1655EXPORT_SYMBOL(rtnl_configure_link); 1654EXPORT_SYMBOL(rtnl_configure_link);
@@ -1985,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1985 return skb->len; 1984 return skb->len;
1986} 1985}
1987 1986
1988void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) 1987void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
1988 gfp_t flags)
1989{ 1989{
1990 struct net *net = dev_net(dev); 1990 struct net *net = dev_net(dev);
1991 struct sk_buff *skb; 1991 struct sk_buff *skb;
1992 int err = -ENOBUFS; 1992 int err = -ENOBUFS;
1993 size_t if_info_size; 1993 size_t if_info_size;
1994 1994
1995 skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); 1995 skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
1996 if (skb == NULL) 1996 if (skb == NULL)
1997 goto errout; 1997 goto errout;
1998 1998
@@ -2003,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
2003 kfree_skb(skb); 2003 kfree_skb(skb);
2004 goto errout; 2004 goto errout;
2005 } 2005 }
2006 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); 2006 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
2007 return; 2007 return;
2008errout: 2008errout:
2009 if (err < 0) 2009 if (err < 0)
@@ -2717,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
2717 case NETDEV_JOIN: 2717 case NETDEV_JOIN:
2718 break; 2718 break;
2719 default: 2719 default:
2720 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 2720 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
2721 break; 2721 break;
2722 } 2722 }
2723 return NOTIFY_DONE; 2723 return NOTIFY_DONE;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 8d9d05edd2eb..897da56f3aff 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -7,6 +7,7 @@
7#include <linux/hrtimer.h> 7#include <linux/hrtimer.h>
8#include <linux/ktime.h> 8#include <linux/ktime.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/net.h>
10 11
11#include <net/secure_seq.h> 12#include <net/secure_seq.h>
12 13
@@ -15,20 +16,9 @@
15 16
16static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; 17static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
17 18
18static void net_secret_init(void) 19static __always_inline void net_secret_init(void)
19{ 20{
20 u32 tmp; 21 net_get_random_once(net_secret, sizeof(net_secret));
21 int i;
22
23 if (likely(net_secret[0]))
24 return;
25
26 for (i = NET_SECRET_SIZE; i > 0;) {
27 do {
28 get_random_bytes(&tmp, sizeof(tmp));
29 } while (!tmp);
30 cmpxchg(&net_secret[--i], 0, tmp);
31 }
32} 22}
33#endif 23#endif
34 24
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d81cff119f73..06e72d3cdf60 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -476,6 +476,18 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
476} 476}
477EXPORT_SYMBOL(skb_add_rx_frag); 477EXPORT_SYMBOL(skb_add_rx_frag);
478 478
479void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
480 unsigned int truesize)
481{
482 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
483
484 skb_frag_size_add(frag, size);
485 skb->len += size;
486 skb->data_len += size;
487 skb->truesize += truesize;
488}
489EXPORT_SYMBOL(skb_coalesce_rx_frag);
490
479static void skb_drop_list(struct sk_buff **listp) 491static void skb_drop_list(struct sk_buff **listp)
480{ 492{
481 kfree_skb_list(*listp); 493 kfree_skb_list(*listp);
@@ -580,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
580#if IS_ENABLED(CONFIG_NF_CONNTRACK) 592#if IS_ENABLED(CONFIG_NF_CONNTRACK)
581 nf_conntrack_put(skb->nfct); 593 nf_conntrack_put(skb->nfct);
582#endif 594#endif
583#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
584 nf_conntrack_put_reasm(skb->nfct_reasm);
585#endif
586#ifdef CONFIG_BRIDGE_NETFILTER 595#ifdef CONFIG_BRIDGE_NETFILTER
587 nf_bridge_put(skb->nf_bridge); 596 nf_bridge_put(skb->nf_bridge);
588#endif 597#endif
@@ -903,6 +912,9 @@ EXPORT_SYMBOL(skb_clone);
903 912
904static void skb_headers_offset_update(struct sk_buff *skb, int off) 913static void skb_headers_offset_update(struct sk_buff *skb, int off)
905{ 914{
915 /* Only adjust this if it actually is csum_start rather than csum */
916 if (skb->ip_summed == CHECKSUM_PARTIAL)
917 skb->csum_start += off;
906 /* {transport,network,mac}_header and tail are relative to skb->head */ 918 /* {transport,network,mac}_header and tail are relative to skb->head */
907 skb->transport_header += off; 919 skb->transport_header += off;
908 skb->network_header += off; 920 skb->network_header += off;
@@ -1036,8 +1048,8 @@ EXPORT_SYMBOL(__pskb_copy);
1036 * @ntail: room to add at tail 1048 * @ntail: room to add at tail
1037 * @gfp_mask: allocation priority 1049 * @gfp_mask: allocation priority
1038 * 1050 *
1039 * Expands (or creates identical copy, if &nhead and &ntail are zero) 1051 * Expands (or creates identical copy, if @nhead and @ntail are zero)
1040 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 1052 * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
1041 * reference count of 1. Returns zero in the case of success or error, 1053 * reference count of 1. Returns zero in the case of success or error,
1042 * if expansion failed. In the last case, &sk_buff is not changed. 1054 * if expansion failed. In the last case, &sk_buff is not changed.
1043 * 1055 *
@@ -1109,9 +1121,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1109#endif 1121#endif
1110 skb->tail += off; 1122 skb->tail += off;
1111 skb_headers_offset_update(skb, nhead); 1123 skb_headers_offset_update(skb, nhead);
1112 /* Only adjust this if it actually is csum_start rather than csum */
1113 if (skb->ip_summed == CHECKSUM_PARTIAL)
1114 skb->csum_start += nhead;
1115 skb->cloned = 0; 1124 skb->cloned = 0;
1116 skb->hdr_len = 0; 1125 skb->hdr_len = 0;
1117 skb->nohdr = 0; 1126 skb->nohdr = 0;
@@ -1176,7 +1185,6 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1176 NUMA_NO_NODE); 1185 NUMA_NO_NODE);
1177 int oldheadroom = skb_headroom(skb); 1186 int oldheadroom = skb_headroom(skb);
1178 int head_copy_len, head_copy_off; 1187 int head_copy_len, head_copy_off;
1179 int off;
1180 1188
1181 if (!n) 1189 if (!n)
1182 return NULL; 1190 return NULL;
@@ -1200,11 +1208,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1200 1208
1201 copy_skb_header(n, skb); 1209 copy_skb_header(n, skb);
1202 1210
1203 off = newheadroom - oldheadroom; 1211 skb_headers_offset_update(n, newheadroom - oldheadroom);
1204 if (n->ip_summed == CHECKSUM_PARTIAL)
1205 n->csum_start += off;
1206
1207 skb_headers_offset_update(n, off);
1208 1212
1209 return n; 1213 return n;
1210} 1214}
@@ -1257,6 +1261,29 @@ free_skb:
1257EXPORT_SYMBOL(skb_pad); 1261EXPORT_SYMBOL(skb_pad);
1258 1262
1259/** 1263/**
1264 * pskb_put - add data to the tail of a potentially fragmented buffer
1265 * @skb: start of the buffer to use
1266 * @tail: tail fragment of the buffer to use
1267 * @len: amount of data to add
1268 *
1269 * This function extends the used data area of the potentially
1270 * fragmented buffer. @tail must be the last fragment of @skb -- or
1271 * @skb itself. If this would exceed the total buffer size the kernel
1272 * will panic. A pointer to the first byte of the extra data is
1273 * returned.
1274 */
1275
1276unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
1277{
1278 if (tail != skb) {
1279 skb->data_len += len;
1280 skb->len += len;
1281 }
1282 return skb_put(tail, len);
1283}
1284EXPORT_SYMBOL_GPL(pskb_put);
1285
1286/**
1260 * skb_put - add data to a buffer 1287 * skb_put - add data to a buffer
1261 * @skb: buffer to use 1288 * @skb: buffer to use
1262 * @len: amount of data to add 1289 * @len: amount of data to add
@@ -1933,9 +1960,8 @@ fault:
1933EXPORT_SYMBOL(skb_store_bits); 1960EXPORT_SYMBOL(skb_store_bits);
1934 1961
1935/* Checksum skb data. */ 1962/* Checksum skb data. */
1936 1963__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
1937__wsum skb_checksum(const struct sk_buff *skb, int offset, 1964 __wsum csum, const struct skb_checksum_ops *ops)
1938 int len, __wsum csum)
1939{ 1965{
1940 int start = skb_headlen(skb); 1966 int start = skb_headlen(skb);
1941 int i, copy = start - offset; 1967 int i, copy = start - offset;
@@ -1946,7 +1972,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1946 if (copy > 0) { 1972 if (copy > 0) {
1947 if (copy > len) 1973 if (copy > len)
1948 copy = len; 1974 copy = len;
1949 csum = csum_partial(skb->data + offset, copy, csum); 1975 csum = ops->update(skb->data + offset, copy, csum);
1950 if ((len -= copy) == 0) 1976 if ((len -= copy) == 0)
1951 return csum; 1977 return csum;
1952 offset += copy; 1978 offset += copy;
@@ -1967,10 +1993,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1967 if (copy > len) 1993 if (copy > len)
1968 copy = len; 1994 copy = len;
1969 vaddr = kmap_atomic(skb_frag_page(frag)); 1995 vaddr = kmap_atomic(skb_frag_page(frag));
1970 csum2 = csum_partial(vaddr + frag->page_offset + 1996 csum2 = ops->update(vaddr + frag->page_offset +
1971 offset - start, copy, 0); 1997 offset - start, copy, 0);
1972 kunmap_atomic(vaddr); 1998 kunmap_atomic(vaddr);
1973 csum = csum_block_add(csum, csum2, pos); 1999 csum = ops->combine(csum, csum2, pos, copy);
1974 if (!(len -= copy)) 2000 if (!(len -= copy))
1975 return csum; 2001 return csum;
1976 offset += copy; 2002 offset += copy;
@@ -1989,9 +2015,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1989 __wsum csum2; 2015 __wsum csum2;
1990 if (copy > len) 2016 if (copy > len)
1991 copy = len; 2017 copy = len;
1992 csum2 = skb_checksum(frag_iter, offset - start, 2018 csum2 = __skb_checksum(frag_iter, offset - start,
1993 copy, 0); 2019 copy, 0, ops);
1994 csum = csum_block_add(csum, csum2, pos); 2020 csum = ops->combine(csum, csum2, pos, copy);
1995 if ((len -= copy) == 0) 2021 if ((len -= copy) == 0)
1996 return csum; 2022 return csum;
1997 offset += copy; 2023 offset += copy;
@@ -2003,6 +2029,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
2003 2029
2004 return csum; 2030 return csum;
2005} 2031}
2032EXPORT_SYMBOL(__skb_checksum);
2033
2034__wsum skb_checksum(const struct sk_buff *skb, int offset,
2035 int len, __wsum csum)
2036{
2037 const struct skb_checksum_ops ops = {
2038 .update = csum_partial_ext,
2039 .combine = csum_block_add_ext,
2040 };
2041
2042 return __skb_checksum(skb, offset, len, csum, &ops);
2043}
2006EXPORT_SYMBOL(skb_checksum); 2044EXPORT_SYMBOL(skb_checksum);
2007 2045
2008/* Both of above in one bottle. */ 2046/* Both of above in one bottle. */
@@ -2522,14 +2560,14 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
2522 * @data: destination pointer for data to be returned 2560 * @data: destination pointer for data to be returned
2523 * @st: state variable 2561 * @st: state variable
2524 * 2562 *
2525 * Reads a block of skb data at &consumed relative to the 2563 * Reads a block of skb data at @consumed relative to the
2526 * lower offset specified to skb_prepare_seq_read(). Assigns 2564 * lower offset specified to skb_prepare_seq_read(). Assigns
2527 * the head of the data block to &data and returns the length 2565 * the head of the data block to @data and returns the length
2528 * of the block or 0 if the end of the skb data or the upper 2566 * of the block or 0 if the end of the skb data or the upper
2529 * offset has been reached. 2567 * offset has been reached.
2530 * 2568 *
2531 * The caller is not required to consume all of the data 2569 * The caller is not required to consume all of the data
2532 * returned, i.e. &consumed is typically set to the number 2570 * returned, i.e. @consumed is typically set to the number
2533 * of bytes already consumed and the next call to 2571 * of bytes already consumed and the next call to
2534 * skb_seq_read() will return the remaining part of the block. 2572 * skb_seq_read() will return the remaining part of the block.
2535 * 2573 *
@@ -2758,6 +2796,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2758 struct sk_buff *segs = NULL; 2796 struct sk_buff *segs = NULL;
2759 struct sk_buff *tail = NULL; 2797 struct sk_buff *tail = NULL;
2760 struct sk_buff *fskb = skb_shinfo(skb)->frag_list; 2798 struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
2799 skb_frag_t *skb_frag = skb_shinfo(skb)->frags;
2761 unsigned int mss = skb_shinfo(skb)->gso_size; 2800 unsigned int mss = skb_shinfo(skb)->gso_size;
2762 unsigned int doffset = skb->data - skb_mac_header(skb); 2801 unsigned int doffset = skb->data - skb_mac_header(skb);
2763 unsigned int offset = doffset; 2802 unsigned int offset = doffset;
@@ -2797,16 +2836,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2797 if (hsize > len || !sg) 2836 if (hsize > len || !sg)
2798 hsize = len; 2837 hsize = len;
2799 2838
2800 if (!hsize && i >= nfrags) { 2839 if (!hsize && i >= nfrags && skb_headlen(fskb) &&
2801 BUG_ON(fskb->len != len); 2840 (skb_headlen(fskb) == len || sg)) {
2841 BUG_ON(skb_headlen(fskb) > len);
2842
2843 i = 0;
2844 nfrags = skb_shinfo(fskb)->nr_frags;
2845 skb_frag = skb_shinfo(fskb)->frags;
2846 pos += skb_headlen(fskb);
2847
2848 while (pos < offset + len) {
2849 BUG_ON(i >= nfrags);
2850
2851 size = skb_frag_size(skb_frag);
2852 if (pos + size > offset + len)
2853 break;
2854
2855 i++;
2856 pos += size;
2857 skb_frag++;
2858 }
2802 2859
2803 pos += len;
2804 nskb = skb_clone(fskb, GFP_ATOMIC); 2860 nskb = skb_clone(fskb, GFP_ATOMIC);
2805 fskb = fskb->next; 2861 fskb = fskb->next;
2806 2862
2807 if (unlikely(!nskb)) 2863 if (unlikely(!nskb))
2808 goto err; 2864 goto err;
2809 2865
2866 if (unlikely(pskb_trim(nskb, len))) {
2867 kfree_skb(nskb);
2868 goto err;
2869 }
2870
2810 hsize = skb_end_offset(nskb); 2871 hsize = skb_end_offset(nskb);
2811 if (skb_cow_head(nskb, doffset + headroom)) { 2872 if (skb_cow_head(nskb, doffset + headroom)) {
2812 kfree_skb(nskb); 2873 kfree_skb(nskb);
@@ -2837,20 +2898,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2837 __copy_skb_header(nskb, skb); 2898 __copy_skb_header(nskb, skb);
2838 nskb->mac_len = skb->mac_len; 2899 nskb->mac_len = skb->mac_len;
2839 2900
2840 /* nskb and skb might have different headroom */ 2901 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
2841 if (nskb->ip_summed == CHECKSUM_PARTIAL)
2842 nskb->csum_start += skb_headroom(nskb) - headroom;
2843
2844 skb_reset_mac_header(nskb);
2845 skb_set_network_header(nskb, skb->mac_len);
2846 nskb->transport_header = (nskb->network_header +
2847 skb_network_header_len(skb));
2848 2902
2849 skb_copy_from_linear_data_offset(skb, -tnl_hlen, 2903 skb_copy_from_linear_data_offset(skb, -tnl_hlen,
2850 nskb->data - tnl_hlen, 2904 nskb->data - tnl_hlen,
2851 doffset + tnl_hlen); 2905 doffset + tnl_hlen);
2852 2906
2853 if (fskb != skb_shinfo(skb)->frag_list) 2907 if (nskb->len == len + doffset)
2854 goto perform_csum_check; 2908 goto perform_csum_check;
2855 2909
2856 if (!sg) { 2910 if (!sg) {
@@ -2868,8 +2922,28 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2868 2922
2869 skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; 2923 skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2870 2924
2871 while (pos < offset + len && i < nfrags) { 2925 while (pos < offset + len) {
2872 *frag = skb_shinfo(skb)->frags[i]; 2926 if (i >= nfrags) {
2927 BUG_ON(skb_headlen(fskb));
2928
2929 i = 0;
2930 nfrags = skb_shinfo(fskb)->nr_frags;
2931 skb_frag = skb_shinfo(fskb)->frags;
2932
2933 BUG_ON(!nfrags);
2934
2935 fskb = fskb->next;
2936 }
2937
2938 if (unlikely(skb_shinfo(nskb)->nr_frags >=
2939 MAX_SKB_FRAGS)) {
2940 net_warn_ratelimited(
2941 "skb_segment: too many frags: %u %u\n",
2942 pos, mss);
2943 goto err;
2944 }
2945
2946 *frag = *skb_frag;
2873 __skb_frag_ref(frag); 2947 __skb_frag_ref(frag);
2874 size = skb_frag_size(frag); 2948 size = skb_frag_size(frag);
2875 2949
@@ -2882,6 +2956,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2882 2956
2883 if (pos + size <= offset + len) { 2957 if (pos + size <= offset + len) {
2884 i++; 2958 i++;
2959 skb_frag++;
2885 pos += size; 2960 pos += size;
2886 } else { 2961 } else {
2887 skb_frag_size_sub(frag, pos + size - (offset + len)); 2962 skb_frag_size_sub(frag, pos + size - (offset + len));
@@ -2891,25 +2966,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2891 frag++; 2966 frag++;
2892 } 2967 }
2893 2968
2894 if (pos < offset + len) {
2895 struct sk_buff *fskb2 = fskb;
2896
2897 BUG_ON(pos + fskb->len != offset + len);
2898
2899 pos += fskb->len;
2900 fskb = fskb->next;
2901
2902 if (fskb2->next) {
2903 fskb2 = skb_clone(fskb2, GFP_ATOMIC);
2904 if (!fskb2)
2905 goto err;
2906 } else
2907 skb_get(fskb2);
2908
2909 SKB_FRAG_ASSERT(nskb);
2910 skb_shinfo(nskb)->frag_list = fskb2;
2911 }
2912
2913skip_fraglist: 2969skip_fraglist:
2914 nskb->data_len = len - hsize; 2970 nskb->data_len = len - hsize;
2915 nskb->len += nskb->data_len; 2971 nskb->len += nskb->data_len;
@@ -2936,32 +2992,30 @@ EXPORT_SYMBOL_GPL(skb_segment);
2936 2992
2937int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2993int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2938{ 2994{
2939 struct sk_buff *p = *head; 2995 struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
2940 struct sk_buff *nskb;
2941 struct skb_shared_info *skbinfo = skb_shinfo(skb);
2942 struct skb_shared_info *pinfo = skb_shinfo(p);
2943 unsigned int headroom;
2944 unsigned int len = skb_gro_len(skb);
2945 unsigned int offset = skb_gro_offset(skb); 2996 unsigned int offset = skb_gro_offset(skb);
2946 unsigned int headlen = skb_headlen(skb); 2997 unsigned int headlen = skb_headlen(skb);
2998 struct sk_buff *nskb, *lp, *p = *head;
2999 unsigned int len = skb_gro_len(skb);
2947 unsigned int delta_truesize; 3000 unsigned int delta_truesize;
3001 unsigned int headroom;
2948 3002
2949 if (p->len + len >= 65536) 3003 if (unlikely(p->len + len >= 65536))
2950 return -E2BIG; 3004 return -E2BIG;
2951 3005
2952 if (pinfo->frag_list) 3006 lp = NAPI_GRO_CB(p)->last ?: p;
2953 goto merge; 3007 pinfo = skb_shinfo(lp);
2954 else if (headlen <= offset) { 3008
3009 if (headlen <= offset) {
2955 skb_frag_t *frag; 3010 skb_frag_t *frag;
2956 skb_frag_t *frag2; 3011 skb_frag_t *frag2;
2957 int i = skbinfo->nr_frags; 3012 int i = skbinfo->nr_frags;
2958 int nr_frags = pinfo->nr_frags + i; 3013 int nr_frags = pinfo->nr_frags + i;
2959 3014
2960 offset -= headlen;
2961
2962 if (nr_frags > MAX_SKB_FRAGS) 3015 if (nr_frags > MAX_SKB_FRAGS)
2963 return -E2BIG; 3016 goto merge;
2964 3017
3018 offset -= headlen;
2965 pinfo->nr_frags = nr_frags; 3019 pinfo->nr_frags = nr_frags;
2966 skbinfo->nr_frags = 0; 3020 skbinfo->nr_frags = 0;
2967 3021
@@ -2992,7 +3046,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2992 unsigned int first_offset; 3046 unsigned int first_offset;
2993 3047
2994 if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) 3048 if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
2995 return -E2BIG; 3049 goto merge;
2996 3050
2997 first_offset = skb->data - 3051 first_offset = skb->data -
2998 (unsigned char *)page_address(page) + 3052 (unsigned char *)page_address(page) +
@@ -3010,7 +3064,10 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
3010 delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); 3064 delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
3011 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; 3065 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
3012 goto done; 3066 goto done;
3013 } else if (skb_gro_len(p) != pinfo->gso_size) 3067 }
3068 if (pinfo->frag_list)
3069 goto merge;
3070 if (skb_gro_len(p) != pinfo->gso_size)
3014 return -E2BIG; 3071 return -E2BIG;
3015 3072
3016 headroom = skb_headroom(p); 3073 headroom = skb_headroom(p);
@@ -3062,16 +3119,24 @@ merge:
3062 3119
3063 __skb_pull(skb, offset); 3120 __skb_pull(skb, offset);
3064 3121
3065 NAPI_GRO_CB(p)->last->next = skb; 3122 if (!NAPI_GRO_CB(p)->last)
3123 skb_shinfo(p)->frag_list = skb;
3124 else
3125 NAPI_GRO_CB(p)->last->next = skb;
3066 NAPI_GRO_CB(p)->last = skb; 3126 NAPI_GRO_CB(p)->last = skb;
3067 skb_header_release(skb); 3127 skb_header_release(skb);
3128 lp = p;
3068 3129
3069done: 3130done:
3070 NAPI_GRO_CB(p)->count++; 3131 NAPI_GRO_CB(p)->count++;
3071 p->data_len += len; 3132 p->data_len += len;
3072 p->truesize += delta_truesize; 3133 p->truesize += delta_truesize;
3073 p->len += len; 3134 p->len += len;
3074 3135 if (lp != p) {
3136 lp->data_len += len;
3137 lp->truesize += delta_truesize;
3138 lp->len += len;
3139 }
3075 NAPI_GRO_CB(skb)->same_flow = 1; 3140 NAPI_GRO_CB(skb)->same_flow = 1;
3076 return 0; 3141 return 0;
3077} 3142}
@@ -3519,6 +3584,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
3519 skb->tstamp.tv64 = 0; 3584 skb->tstamp.tv64 = 0;
3520 skb->pkt_type = PACKET_HOST; 3585 skb->pkt_type = PACKET_HOST;
3521 skb->skb_iif = 0; 3586 skb->skb_iif = 0;
3587 skb->local_df = 0;
3522 skb_dst_drop(skb); 3588 skb_dst_drop(skb);
3523 skb->mark = 0; 3589 skb->mark = 0;
3524 secpath_reset(skb); 3590 secpath_reset(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 0b39e7ae4383..5393b4b719d7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -475,12 +475,6 @@ discard_and_relse:
475} 475}
476EXPORT_SYMBOL(sk_receive_skb); 476EXPORT_SYMBOL(sk_receive_skb);
477 477
478void sk_reset_txq(struct sock *sk)
479{
480 sk_tx_queue_clear(sk);
481}
482EXPORT_SYMBOL(sk_reset_txq);
483
484struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 478struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
485{ 479{
486 struct dst_entry *dst = __sk_dst_get(sk); 480 struct dst_entry *dst = __sk_dst_get(sk);
@@ -888,7 +882,7 @@ set_rcvbuf:
888 882
889 case SO_PEEK_OFF: 883 case SO_PEEK_OFF:
890 if (sock->ops->set_peek_off) 884 if (sock->ops->set_peek_off)
891 sock->ops->set_peek_off(sk, val); 885 ret = sock->ops->set_peek_off(sk, val);
892 else 886 else
893 ret = -EOPNOTSUPP; 887 ret = -EOPNOTSUPP;
894 break; 888 break;
@@ -914,6 +908,13 @@ set_rcvbuf:
914 } 908 }
915 break; 909 break;
916#endif 910#endif
911
912 case SO_MAX_PACING_RATE:
913 sk->sk_max_pacing_rate = val;
914 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
915 sk->sk_max_pacing_rate);
916 break;
917
917 default: 918 default:
918 ret = -ENOPROTOOPT; 919 ret = -ENOPROTOOPT;
919 break; 920 break;
@@ -1177,6 +1178,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1177 break; 1178 break;
1178#endif 1179#endif
1179 1180
1181 case SO_MAX_PACING_RATE:
1182 v.val = sk->sk_max_pacing_rate;
1183 break;
1184
1180 default: 1185 default:
1181 return -ENOPROTOOPT; 1186 return -ENOPROTOOPT;
1182 } 1187 }
@@ -1836,7 +1841,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
1836/* On 32bit arches, an skb frag is limited to 2^15 */ 1841/* On 32bit arches, an skb frag is limited to 2^15 */
1837#define SKB_FRAG_PAGE_ORDER get_order(32768) 1842#define SKB_FRAG_PAGE_ORDER get_order(32768)
1838 1843
1839bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) 1844/**
1845 * skb_page_frag_refill - check that a page_frag contains enough room
1846 * @sz: minimum size of the fragment we want to get
1847 * @pfrag: pointer to page_frag
1848 * @prio: priority for memory allocation
1849 *
1850 * Note: While this allocator tries to use high order pages, there is
1851 * no guarantee that allocations succeed. Therefore, @sz MUST be
1852 * less or equal than PAGE_SIZE.
1853 */
1854bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
1840{ 1855{
1841 int order; 1856 int order;
1842 1857
@@ -1845,16 +1860,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1845 pfrag->offset = 0; 1860 pfrag->offset = 0;
1846 return true; 1861 return true;
1847 } 1862 }
1848 if (pfrag->offset < pfrag->size) 1863 if (pfrag->offset + sz <= pfrag->size)
1849 return true; 1864 return true;
1850 put_page(pfrag->page); 1865 put_page(pfrag->page);
1851 } 1866 }
1852 1867
1853 /* We restrict high order allocations to users that can afford to wait */ 1868 /* We restrict high order allocations to users that can afford to wait */
1854 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; 1869 order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1855 1870
1856 do { 1871 do {
1857 gfp_t gfp = sk->sk_allocation; 1872 gfp_t gfp = prio;
1858 1873
1859 if (order) 1874 if (order)
1860 gfp |= __GFP_COMP | __GFP_NOWARN; 1875 gfp |= __GFP_COMP | __GFP_NOWARN;
@@ -1866,6 +1881,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1866 } 1881 }
1867 } while (--order >= 0); 1882 } while (--order >= 0);
1868 1883
1884 return false;
1885}
1886EXPORT_SYMBOL(skb_page_frag_refill);
1887
1888bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1889{
1890 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
1891 return true;
1892
1869 sk_enter_memory_pressure(sk); 1893 sk_enter_memory_pressure(sk);
1870 sk_stream_moderate_sndbuf(sk); 1894 sk_stream_moderate_sndbuf(sk);
1871 return false; 1895 return false;
@@ -2319,6 +2343,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2319 sk->sk_ll_usec = sysctl_net_busy_read; 2343 sk->sk_ll_usec = sysctl_net_busy_read;
2320#endif 2344#endif
2321 2345
2346 sk->sk_max_pacing_rate = ~0U;
2322 sk->sk_pacing_rate = ~0U; 2347 sk->sk_pacing_rate = ~0U;
2323 /* 2348 /*
2324 * Before updating sk_refcnt, we must commit prior changes to memory 2349 * Before updating sk_refcnt, we must commit prior changes to memory
diff --git a/net/core/utils.c b/net/core/utils.c
index aa88e23fc87a..2f737bf90b3f 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,3 +338,52 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
338 csum_unfold(*sum))); 338 csum_unfold(*sum)));
339} 339}
340EXPORT_SYMBOL(inet_proto_csum_replace16); 340EXPORT_SYMBOL(inet_proto_csum_replace16);
341
342struct __net_random_once_work {
343 struct work_struct work;
344 struct static_key *key;
345};
346
347static void __net_random_once_deferred(struct work_struct *w)
348{
349 struct __net_random_once_work *work =
350 container_of(w, struct __net_random_once_work, work);
351 if (!static_key_enabled(work->key))
352 static_key_slow_inc(work->key);
353 kfree(work);
354}
355
356static void __net_random_once_disable_jump(struct static_key *key)
357{
358 struct __net_random_once_work *w;
359
360 w = kmalloc(sizeof(*w), GFP_ATOMIC);
361 if (!w)
362 return;
363
364 INIT_WORK(&w->work, __net_random_once_deferred);
365 w->key = key;
366 schedule_work(&w->work);
367}
368
369bool __net_get_random_once(void *buf, int nbytes, bool *done,
370 struct static_key *done_key)
371{
372 static DEFINE_SPINLOCK(lock);
373 unsigned long flags;
374
375 spin_lock_irqsave(&lock, flags);
376 if (*done) {
377 spin_unlock_irqrestore(&lock, flags);
378 return false;
379 }
380
381 get_random_bytes(buf, nbytes);
382 *done = true;
383 spin_unlock_irqrestore(&lock, flags);
384
385 __net_random_once_disable_jump(done_key);
386
387 return true;
388}
389EXPORT_SYMBOL(__net_get_random_once);