aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c223
-rw-r--r--net/core/ethtool.c45
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/flow_dissector.c21
-rw-r--r--net/core/iovec.c137
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/net_namespace.c213
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c138
-rw-r--r--net/core/skbuff.c59
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sysctl_net_core.c13
13 files changed, 575 insertions, 304 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 235e6c50708d..fec0856dd6c0 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ 5obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o 6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c1556aeec29..d030575532a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371static inline struct list_head *ptype_head(const struct packet_type *pt) 371static inline struct list_head *ptype_head(const struct packet_type *pt)
372{ 372{
373 if (pt->type == htons(ETH_P_ALL)) 373 if (pt->type == htons(ETH_P_ALL))
374 return &ptype_all; 374 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
375 else 375 else
376 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; 376 return pt->dev ? &pt->dev->ptype_specific :
377 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
377} 378}
378 379
379/** 380/**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
1734 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1735 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1735} 1736}
1736 1737
1738static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1739 struct packet_type **pt,
1740 struct net_device *dev, __be16 type,
1741 struct list_head *ptype_list)
1742{
1743 struct packet_type *ptype, *pt_prev = *pt;
1744
1745 list_for_each_entry_rcu(ptype, ptype_list, list) {
1746 if (ptype->type != type)
1747 continue;
1748 if (pt_prev)
1749 deliver_skb(skb, pt_prev, dev);
1750 pt_prev = ptype;
1751 }
1752 *pt = pt_prev;
1753}
1754
1737static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) 1755static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1738{ 1756{
1739 if (!ptype->af_packet_priv || !skb->sk) 1757 if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1757 struct packet_type *ptype; 1775 struct packet_type *ptype;
1758 struct sk_buff *skb2 = NULL; 1776 struct sk_buff *skb2 = NULL;
1759 struct packet_type *pt_prev = NULL; 1777 struct packet_type *pt_prev = NULL;
1778 struct list_head *ptype_list = &ptype_all;
1760 1779
1761 rcu_read_lock(); 1780 rcu_read_lock();
1762 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1781again:
1782 list_for_each_entry_rcu(ptype, ptype_list, list) {
1763 /* Never send packets back to the socket 1783 /* Never send packets back to the socket
1764 * they originated from - MvS (miquels@drinkel.ow.org) 1784 * they originated from - MvS (miquels@drinkel.ow.org)
1765 */ 1785 */
1766 if ((ptype->dev == dev || !ptype->dev) && 1786 if (skb_loop_sk(ptype, skb))
1767 (!skb_loop_sk(ptype, skb))) { 1787 continue;
1768 if (pt_prev) {
1769 deliver_skb(skb2, pt_prev, skb->dev);
1770 pt_prev = ptype;
1771 continue;
1772 }
1773 1788
1774 skb2 = skb_clone(skb, GFP_ATOMIC); 1789 if (pt_prev) {
1775 if (!skb2) 1790 deliver_skb(skb2, pt_prev, skb->dev);
1776 break; 1791 pt_prev = ptype;
1792 continue;
1793 }
1777 1794
1778 net_timestamp_set(skb2); 1795 /* need to clone skb, done only once */
1796 skb2 = skb_clone(skb, GFP_ATOMIC);
1797 if (!skb2)
1798 goto out_unlock;
1779 1799
1780 /* skb->nh should be correctly 1800 net_timestamp_set(skb2);
1781 set by sender, so that the second statement is
1782 just protection against buggy protocols.
1783 */
1784 skb_reset_mac_header(skb2);
1785
1786 if (skb_network_header(skb2) < skb2->data ||
1787 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1788 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1789 ntohs(skb2->protocol),
1790 dev->name);
1791 skb_reset_network_header(skb2);
1792 }
1793 1801
1794 skb2->transport_header = skb2->network_header; 1802 /* skb->nh should be correctly
1795 skb2->pkt_type = PACKET_OUTGOING; 1803 * set by sender, so that the second statement is
1796 pt_prev = ptype; 1804 * just protection against buggy protocols.
1805 */
1806 skb_reset_mac_header(skb2);
1807
1808 if (skb_network_header(skb2) < skb2->data ||
1809 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1810 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1811 ntohs(skb2->protocol),
1812 dev->name);
1813 skb_reset_network_header(skb2);
1797 } 1814 }
1815
1816 skb2->transport_header = skb2->network_header;
1817 skb2->pkt_type = PACKET_OUTGOING;
1818 pt_prev = ptype;
1798 } 1819 }
1820
1821 if (ptype_list == &ptype_all) {
1822 ptype_list = &dev->ptype_all;
1823 goto again;
1824 }
1825out_unlock:
1799 if (pt_prev) 1826 if (pt_prev)
1800 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1827 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1801 rcu_read_unlock(); 1828 rcu_read_unlock();
@@ -2549,7 +2576,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2549 if (skb->encapsulation) 2576 if (skb->encapsulation)
2550 features &= dev->hw_enc_features; 2577 features &= dev->hw_enc_features;
2551 2578
2552 if (!vlan_tx_tag_present(skb)) { 2579 if (!skb_vlan_tag_present(skb)) {
2553 if (unlikely(protocol == htons(ETH_P_8021Q) || 2580 if (unlikely(protocol == htons(ETH_P_8021Q) ||
2554 protocol == htons(ETH_P_8021AD))) { 2581 protocol == htons(ETH_P_8021AD))) {
2555 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2582 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2588,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
2588 unsigned int len; 2615 unsigned int len;
2589 int rc; 2616 int rc;
2590 2617
2591 if (!list_empty(&ptype_all)) 2618 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
2592 dev_queue_xmit_nit(skb, dev); 2619 dev_queue_xmit_nit(skb, dev);
2593 2620
2594 len = skb->len; 2621 len = skb->len;
@@ -2630,7 +2657,7 @@ out:
2630static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, 2657static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2631 netdev_features_t features) 2658 netdev_features_t features)
2632{ 2659{
2633 if (vlan_tx_tag_present(skb) && 2660 if (skb_vlan_tag_present(skb) &&
2634 !vlan_hw_offload_capable(features, skb->vlan_proto)) 2661 !vlan_hw_offload_capable(features, skb->vlan_proto))
2635 skb = __vlan_hwaccel_push_inside(skb); 2662 skb = __vlan_hwaccel_push_inside(skb);
2636 return skb; 2663 return skb;
@@ -3003,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
3003/* One global table that all flow-based protocols share. */ 3030/* One global table that all flow-based protocols share. */
3004struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 3031struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3005EXPORT_SYMBOL(rps_sock_flow_table); 3032EXPORT_SYMBOL(rps_sock_flow_table);
3033u32 rps_cpu_mask __read_mostly;
3034EXPORT_SYMBOL(rps_cpu_mask);
3006 3035
3007struct static_key rps_needed __read_mostly; 3036struct static_key rps_needed __read_mostly;
3008 3037
@@ -3059,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3059static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 3088static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3060 struct rps_dev_flow **rflowp) 3089 struct rps_dev_flow **rflowp)
3061{ 3090{
3062 struct netdev_rx_queue *rxqueue; 3091 const struct rps_sock_flow_table *sock_flow_table;
3063 struct rps_map *map; 3092 struct netdev_rx_queue *rxqueue = dev->_rx;
3064 struct rps_dev_flow_table *flow_table; 3093 struct rps_dev_flow_table *flow_table;
3065 struct rps_sock_flow_table *sock_flow_table; 3094 struct rps_map *map;
3066 int cpu = -1; 3095 int cpu = -1;
3067 u16 tcpu; 3096 u32 tcpu;
3068 u32 hash; 3097 u32 hash;
3069 3098
3070 if (skb_rx_queue_recorded(skb)) { 3099 if (skb_rx_queue_recorded(skb)) {
3071 u16 index = skb_get_rx_queue(skb); 3100 u16 index = skb_get_rx_queue(skb);
3101
3072 if (unlikely(index >= dev->real_num_rx_queues)) { 3102 if (unlikely(index >= dev->real_num_rx_queues)) {
3073 WARN_ONCE(dev->real_num_rx_queues > 1, 3103 WARN_ONCE(dev->real_num_rx_queues > 1,
3074 "%s received packet on queue %u, but number " 3104 "%s received packet on queue %u, but number "
@@ -3076,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3076 dev->name, index, dev->real_num_rx_queues); 3106 dev->name, index, dev->real_num_rx_queues);
3077 goto done; 3107 goto done;
3078 } 3108 }
3079 rxqueue = dev->_rx + index; 3109 rxqueue += index;
3080 } else 3110 }
3081 rxqueue = dev->_rx;
3082 3111
3112 /* Avoid computing hash if RFS/RPS is not active for this rxqueue */
3113
3114 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3083 map = rcu_dereference(rxqueue->rps_map); 3115 map = rcu_dereference(rxqueue->rps_map);
3084 if (map) { 3116 if (!flow_table && !map)
3085 if (map->len == 1 &&
3086 !rcu_access_pointer(rxqueue->rps_flow_table)) {
3087 tcpu = map->cpus[0];
3088 if (cpu_online(tcpu))
3089 cpu = tcpu;
3090 goto done;
3091 }
3092 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
3093 goto done; 3117 goto done;
3094 }
3095 3118
3096 skb_reset_network_header(skb); 3119 skb_reset_network_header(skb);
3097 hash = skb_get_hash(skb); 3120 hash = skb_get_hash(skb);
3098 if (!hash) 3121 if (!hash)
3099 goto done; 3122 goto done;
3100 3123
3101 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3102 sock_flow_table = rcu_dereference(rps_sock_flow_table); 3124 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3103 if (flow_table && sock_flow_table) { 3125 if (flow_table && sock_flow_table) {
3104 u16 next_cpu;
3105 struct rps_dev_flow *rflow; 3126 struct rps_dev_flow *rflow;
3127 u32 next_cpu;
3128 u32 ident;
3129
3130 /* First check into global flow table if there is a match */
3131 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
3132 if ((ident ^ hash) & ~rps_cpu_mask)
3133 goto try_rps;
3134
3135 next_cpu = ident & rps_cpu_mask;
3106 3136
3137 /* OK, now we know there is a match,
3138 * we can look at the local (per receive queue) flow table
3139 */
3107 rflow = &flow_table->flows[hash & flow_table->mask]; 3140 rflow = &flow_table->flows[hash & flow_table->mask];
3108 tcpu = rflow->cpu; 3141 tcpu = rflow->cpu;
3109 3142
3110 next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
3111
3112 /* 3143 /*
3113 * If the desired CPU (where last recvmsg was done) is 3144 * If the desired CPU (where last recvmsg was done) is
3114 * different from current CPU (one in the rx-queue flow 3145 * different from current CPU (one in the rx-queue flow
@@ -3135,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3135 } 3166 }
3136 } 3167 }
3137 3168
3169try_rps:
3170
3138 if (map) { 3171 if (map) {
3139 tcpu = map->cpus[reciprocal_scale(hash, map->len)]; 3172 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3140 if (cpu_online(tcpu)) { 3173 if (cpu_online(tcpu)) {
@@ -3586,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3586 struct packet_type *ptype, *pt_prev; 3619 struct packet_type *ptype, *pt_prev;
3587 rx_handler_func_t *rx_handler; 3620 rx_handler_func_t *rx_handler;
3588 struct net_device *orig_dev; 3621 struct net_device *orig_dev;
3589 struct net_device *null_or_dev;
3590 bool deliver_exact = false; 3622 bool deliver_exact = false;
3591 int ret = NET_RX_DROP; 3623 int ret = NET_RX_DROP;
3592 __be16 type; 3624 __be16 type;
@@ -3629,11 +3661,15 @@ another_round:
3629 goto skip_taps; 3661 goto skip_taps;
3630 3662
3631 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3663 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3632 if (!ptype->dev || ptype->dev == skb->dev) { 3664 if (pt_prev)
3633 if (pt_prev) 3665 ret = deliver_skb(skb, pt_prev, orig_dev);
3634 ret = deliver_skb(skb, pt_prev, orig_dev); 3666 pt_prev = ptype;
3635 pt_prev = ptype; 3667 }
3636 } 3668
3669 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
3670 if (pt_prev)
3671 ret = deliver_skb(skb, pt_prev, orig_dev);
3672 pt_prev = ptype;
3637 } 3673 }
3638 3674
3639skip_taps: 3675skip_taps:
@@ -3647,7 +3683,7 @@ ncls:
3647 if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) 3683 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3648 goto drop; 3684 goto drop;
3649 3685
3650 if (vlan_tx_tag_present(skb)) { 3686 if (skb_vlan_tag_present(skb)) {
3651 if (pt_prev) { 3687 if (pt_prev) {
3652 ret = deliver_skb(skb, pt_prev, orig_dev); 3688 ret = deliver_skb(skb, pt_prev, orig_dev);
3653 pt_prev = NULL; 3689 pt_prev = NULL;
@@ -3679,8 +3715,8 @@ ncls:
3679 } 3715 }
3680 } 3716 }
3681 3717
3682 if (unlikely(vlan_tx_tag_present(skb))) { 3718 if (unlikely(skb_vlan_tag_present(skb))) {
3683 if (vlan_tx_tag_get_id(skb)) 3719 if (skb_vlan_tag_get_id(skb))
3684 skb->pkt_type = PACKET_OTHERHOST; 3720 skb->pkt_type = PACKET_OTHERHOST;
3685 /* Note: we might in the future use prio bits 3721 /* Note: we might in the future use prio bits
3686 * and set skb->priority like in vlan_do_receive() 3722 * and set skb->priority like in vlan_do_receive()
@@ -3689,19 +3725,21 @@ ncls:
3689 skb->vlan_tci = 0; 3725 skb->vlan_tci = 0;
3690 } 3726 }
3691 3727
3728 type = skb->protocol;
3729
3692 /* deliver only exact match when indicated */ 3730 /* deliver only exact match when indicated */
3693 null_or_dev = deliver_exact ? skb->dev : NULL; 3731 if (likely(!deliver_exact)) {
3732 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3733 &ptype_base[ntohs(type) &
3734 PTYPE_HASH_MASK]);
3735 }
3694 3736
3695 type = skb->protocol; 3737 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3696 list_for_each_entry_rcu(ptype, 3738 &orig_dev->ptype_specific);
3697 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3739
3698 if (ptype->type == type && 3740 if (unlikely(skb->dev != orig_dev)) {
3699 (ptype->dev == null_or_dev || ptype->dev == skb->dev || 3741 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3700 ptype->dev == orig_dev)) { 3742 &skb->dev->ptype_specific);
3701 if (pt_prev)
3702 ret = deliver_skb(skb, pt_prev, orig_dev);
3703 pt_prev = ptype;
3704 }
3705 } 3743 }
3706 3744
3707 if (pt_prev) { 3745 if (pt_prev) {
@@ -5294,6 +5332,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
5294} 5332}
5295EXPORT_SYMBOL(netdev_upper_dev_unlink); 5333EXPORT_SYMBOL(netdev_upper_dev_unlink);
5296 5334
5335/**
5336 * netdev_bonding_info_change - Dispatch event about slave change
5337 * @dev: device
5338 * @netdev_bonding_info: info to dispatch
5339 *
5340 * Send NETDEV_BONDING_INFO to netdev notifiers with info.
5341 * The caller must hold the RTNL lock.
5342 */
5343void netdev_bonding_info_change(struct net_device *dev,
5344 struct netdev_bonding_info *bonding_info)
5345{
5346 struct netdev_notifier_bonding_info info;
5347
5348 memcpy(&info.bonding_info, bonding_info,
5349 sizeof(struct netdev_bonding_info));
5350 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
5351 &info.info);
5352}
5353EXPORT_SYMBOL(netdev_bonding_info_change);
5354
5297static void netdev_adjacent_add_links(struct net_device *dev) 5355static void netdev_adjacent_add_links(struct net_device *dev)
5298{ 5356{
5299 struct netdev_adjacent *iter; 5357 struct netdev_adjacent *iter;
@@ -6143,13 +6201,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
6143{ 6201{
6144 unsigned int i, count = dev->num_rx_queues; 6202 unsigned int i, count = dev->num_rx_queues;
6145 struct netdev_rx_queue *rx; 6203 struct netdev_rx_queue *rx;
6204 size_t sz = count * sizeof(*rx);
6146 6205
6147 BUG_ON(count < 1); 6206 BUG_ON(count < 1);
6148 6207
6149 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); 6208 rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6150 if (!rx) 6209 if (!rx) {
6151 return -ENOMEM; 6210 rx = vzalloc(sz);
6152 6211 if (!rx)
6212 return -ENOMEM;
6213 }
6153 dev->_rx = rx; 6214 dev->_rx = rx;
6154 6215
6155 for (i = 0; i < count; i++) 6216 for (i = 0; i < count; i++)
@@ -6547,6 +6608,8 @@ void netdev_run_todo(void)
6547 6608
6548 /* paranoia */ 6609 /* paranoia */
6549 BUG_ON(netdev_refcnt_read(dev)); 6610 BUG_ON(netdev_refcnt_read(dev));
6611 BUG_ON(!list_empty(&dev->ptype_all));
6612 BUG_ON(!list_empty(&dev->ptype_specific));
6550 WARN_ON(rcu_access_pointer(dev->ip_ptr)); 6613 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6551 WARN_ON(rcu_access_pointer(dev->ip6_ptr)); 6614 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
6552 WARN_ON(dev->dn_ptr); 6615 WARN_ON(dev->dn_ptr);
@@ -6729,6 +6792,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6729 INIT_LIST_HEAD(&dev->adj_list.lower); 6792 INIT_LIST_HEAD(&dev->adj_list.lower);
6730 INIT_LIST_HEAD(&dev->all_adj_list.upper); 6793 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6731 INIT_LIST_HEAD(&dev->all_adj_list.lower); 6794 INIT_LIST_HEAD(&dev->all_adj_list.lower);
6795 INIT_LIST_HEAD(&dev->ptype_all);
6796 INIT_LIST_HEAD(&dev->ptype_specific);
6732 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; 6797 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
6733 setup(dev); 6798 setup(dev);
6734 6799
@@ -6779,7 +6844,7 @@ void free_netdev(struct net_device *dev)
6779 6844
6780 netif_free_tx_queues(dev); 6845 netif_free_tx_queues(dev);
6781#ifdef CONFIG_SYSFS 6846#ifdef CONFIG_SYSFS
6782 kfree(dev->_rx); 6847 kvfree(dev->_rx);
6783#endif 6848#endif
6784 6849
6785 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 6850 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 550892cd6b3f..91f74f3eb204 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
1597 return err; 1597 return err;
1598} 1598}
1599 1599
1600static int __ethtool_get_module_info(struct net_device *dev,
1601 struct ethtool_modinfo *modinfo)
1602{
1603 const struct ethtool_ops *ops = dev->ethtool_ops;
1604 struct phy_device *phydev = dev->phydev;
1605
1606 if (phydev && phydev->drv && phydev->drv->module_info)
1607 return phydev->drv->module_info(phydev, modinfo);
1608
1609 if (ops->get_module_info)
1610 return ops->get_module_info(dev, modinfo);
1611
1612 return -EOPNOTSUPP;
1613}
1614
1600static int ethtool_get_module_info(struct net_device *dev, 1615static int ethtool_get_module_info(struct net_device *dev,
1601 void __user *useraddr) 1616 void __user *useraddr)
1602{ 1617{
1603 int ret; 1618 int ret;
1604 struct ethtool_modinfo modinfo; 1619 struct ethtool_modinfo modinfo;
1605 const struct ethtool_ops *ops = dev->ethtool_ops;
1606
1607 if (!ops->get_module_info)
1608 return -EOPNOTSUPP;
1609 1620
1610 if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) 1621 if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
1611 return -EFAULT; 1622 return -EFAULT;
1612 1623
1613 ret = ops->get_module_info(dev, &modinfo); 1624 ret = __ethtool_get_module_info(dev, &modinfo);
1614 if (ret) 1625 if (ret)
1615 return ret; 1626 return ret;
1616 1627
@@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev,
1620 return 0; 1631 return 0;
1621} 1632}
1622 1633
1634static int __ethtool_get_module_eeprom(struct net_device *dev,
1635 struct ethtool_eeprom *ee, u8 *data)
1636{
1637 const struct ethtool_ops *ops = dev->ethtool_ops;
1638 struct phy_device *phydev = dev->phydev;
1639
1640 if (phydev && phydev->drv && phydev->drv->module_eeprom)
1641 return phydev->drv->module_eeprom(phydev, ee, data);
1642
1643 if (ops->get_module_eeprom)
1644 return ops->get_module_eeprom(dev, ee, data);
1645
1646 return -EOPNOTSUPP;
1647}
1648
1623static int ethtool_get_module_eeprom(struct net_device *dev, 1649static int ethtool_get_module_eeprom(struct net_device *dev,
1624 void __user *useraddr) 1650 void __user *useraddr)
1625{ 1651{
1626 int ret; 1652 int ret;
1627 struct ethtool_modinfo modinfo; 1653 struct ethtool_modinfo modinfo;
1628 const struct ethtool_ops *ops = dev->ethtool_ops;
1629
1630 if (!ops->get_module_info || !ops->get_module_eeprom)
1631 return -EOPNOTSUPP;
1632 1654
1633 ret = ops->get_module_info(dev, &modinfo); 1655 ret = __ethtool_get_module_info(dev, &modinfo);
1634 if (ret) 1656 if (ret)
1635 return ret; 1657 return ret;
1636 1658
1637 return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom, 1659 return ethtool_get_any_eeprom(dev, useraddr,
1660 __ethtool_get_module_eeprom,
1638 modinfo.eeprom_len); 1661 modinfo.eeprom_len);
1639} 1662}
1640 1663
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..44706e81b2e0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
609 if (ops->fill(rule, skb, frh) < 0) 609 if (ops->fill(rule, skb, frh) < 0)
610 goto nla_put_failure; 610 goto nla_put_failure;
611 611
612 return nlmsg_end(skb, nlh); 612 nlmsg_end(skb, nlh);
613 return 0;
613 614
614nla_put_failure: 615nla_put_failure:
615 nlmsg_cancel(skb, nlh); 616 nlmsg_cancel(skb, nlh);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 45084938c403..2c35c02a931e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,6 +178,20 @@ ipv6:
178 return false; 178 return false;
179 } 179 }
180 } 180 }
181 case htons(ETH_P_TIPC): {
182 struct {
183 __be32 pre[3];
184 __be32 srcnode;
185 } *hdr, _hdr;
186 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
187 if (!hdr)
188 return false;
189 flow->src = hdr->srcnode;
190 flow->dst = 0;
191 flow->n_proto = proto;
192 flow->thoff = (u16)nhoff;
193 return true;
194 }
181 case htons(ETH_P_FCOE): 195 case htons(ETH_P_FCOE):
182 flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); 196 flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
183 /* fall through */ 197 /* fall through */
@@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
408 dev_maps = rcu_dereference(dev->xps_maps); 422 dev_maps = rcu_dereference(dev->xps_maps);
409 if (dev_maps) { 423 if (dev_maps) {
410 map = rcu_dereference( 424 map = rcu_dereference(
411 dev_maps->cpu_map[raw_smp_processor_id()]); 425 dev_maps->cpu_map[skb->sender_cpu - 1]);
412 if (map) { 426 if (map) {
413 if (map->len == 1) 427 if (map->len == 1)
414 queue_index = map->queues[0]; 428 queue_index = map->queues[0];
@@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
454{ 468{
455 int queue_index = 0; 469 int queue_index = 0;
456 470
471#ifdef CONFIG_XPS
472 if (skb->sender_cpu == 0)
473 skb->sender_cpu = raw_smp_processor_id() + 1;
474#endif
475
457 if (dev->real_num_tx_queues != 1) { 476 if (dev->real_num_tx_queues != 1) {
458 const struct net_device_ops *ops = dev->netdev_ops; 477 const struct net_device_ops *ops = dev->netdev_ops;
459 if (ops->ndo_select_queue) 478 if (ops->ndo_select_queue)
diff --git a/net/core/iovec.c b/net/core/iovec.c
deleted file mode 100644
index dcbe98b3726a..000000000000
--- a/net/core/iovec.c
+++ /dev/null
@@ -1,137 +0,0 @@
1/*
2 * iovec manipulation routines.
3 *
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * Fixes:
11 * Andrew Lunn : Errors in iovec copying.
12 * Pedro Roque : Added memcpy_fromiovecend and
13 * csum_..._fromiovecend.
14 * Andi Kleen : fixed error handling for 2.1
15 * Alexey Kuznetsov: 2.1 optimisations
16 * Andi Kleen : Fix csum*fromiovecend for IPv6.
17 */
18
19#include <linux/errno.h>
20#include <linux/module.h>
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/net.h>
24#include <linux/in6.h>
25#include <asm/uaccess.h>
26#include <asm/byteorder.h>
27#include <net/checksum.h>
28#include <net/sock.h>
29
30/*
31 * And now for the all-in-one: copy and checksum from a user iovec
32 * directly to a datagram
33 * Calls to csum_partial but the last must be in 32 bit chunks
34 *
35 * ip_build_xmit must ensure that when fragmenting only the last
36 * call to this function will be unaligned also.
37 */
38int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
39 int offset, unsigned int len, __wsum *csump)
40{
41 __wsum csum = *csump;
42 int partial_cnt = 0, err = 0;
43
44 /* Skip over the finished iovecs */
45 while (offset >= iov->iov_len) {
46 offset -= iov->iov_len;
47 iov++;
48 }
49
50 while (len > 0) {
51 u8 __user *base = iov->iov_base + offset;
52 int copy = min_t(unsigned int, len, iov->iov_len - offset);
53
54 offset = 0;
55
56 /* There is a remnant from previous iov. */
57 if (partial_cnt) {
58 int par_len = 4 - partial_cnt;
59
60 /* iov component is too short ... */
61 if (par_len > copy) {
62 if (copy_from_user(kdata, base, copy))
63 goto out_fault;
64 kdata += copy;
65 base += copy;
66 partial_cnt += copy;
67 len -= copy;
68 iov++;
69 if (len)
70 continue;
71 *csump = csum_partial(kdata - partial_cnt,
72 partial_cnt, csum);
73 goto out;
74 }
75 if (copy_from_user(kdata, base, par_len))
76 goto out_fault;
77 csum = csum_partial(kdata - partial_cnt, 4, csum);
78 kdata += par_len;
79 base += par_len;
80 copy -= par_len;
81 len -= par_len;
82 partial_cnt = 0;
83 }
84
85 if (len > copy) {
86 partial_cnt = copy % 4;
87 if (partial_cnt) {
88 copy -= partial_cnt;
89 if (copy_from_user(kdata + copy, base + copy,
90 partial_cnt))
91 goto out_fault;
92 }
93 }
94
95 if (copy) {
96 csum = csum_and_copy_from_user(base, kdata, copy,
97 csum, &err);
98 if (err)
99 goto out;
100 }
101 len -= copy + partial_cnt;
102 kdata += copy + partial_cnt;
103 iov++;
104 }
105 *csump = csum;
106out:
107 return err;
108
109out_fault:
110 err = -EFAULT;
111 goto out;
112}
113EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
114
115unsigned long iov_pages(const struct iovec *iov, int offset,
116 unsigned long nr_segs)
117{
118 unsigned long seg, base;
119 int pages = 0, len, size;
120
121 while (nr_segs && (offset >= iov->iov_len)) {
122 offset -= iov->iov_len;
123 ++iov;
124 --nr_segs;
125 }
126
127 for (seg = 0; seg < nr_segs; seg++) {
128 base = (unsigned long)iov[seg].iov_base + offset;
129 len = iov[seg].iov_len - offset;
130 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
131 pages += size;
132 offset = 0;
133 }
134
135 return pages;
136}
137EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8d614c93f86a..70fe9e10ac86 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1884 goto nla_put_failure; 1884 goto nla_put_failure;
1885 1885
1886 read_unlock_bh(&tbl->lock); 1886 read_unlock_bh(&tbl->lock);
1887 return nlmsg_end(skb, nlh); 1887 nlmsg_end(skb, nlh);
1888 return 0;
1888 1889
1889nla_put_failure: 1890nla_put_failure:
1890 read_unlock_bh(&tbl->lock); 1891 read_unlock_bh(&tbl->lock);
@@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
1917 goto errout; 1918 goto errout;
1918 1919
1919 read_unlock_bh(&tbl->lock); 1920 read_unlock_bh(&tbl->lock);
1920 return nlmsg_end(skb, nlh); 1921 nlmsg_end(skb, nlh);
1922 return 0;
1921errout: 1923errout:
1922 read_unlock_bh(&tbl->lock); 1924 read_unlock_bh(&tbl->lock);
1923 nlmsg_cancel(skb, nlh); 1925 nlmsg_cancel(skb, nlh);
@@ -2126,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2126 2128
2127 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2129 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2128 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2130 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2129 NLM_F_MULTI) <= 0) 2131 NLM_F_MULTI) < 0)
2130 break; 2132 break;
2131 2133
2132 nidx = 0; 2134 nidx = 0;
@@ -2142,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2142 NETLINK_CB(cb->skb).portid, 2144 NETLINK_CB(cb->skb).portid,
2143 cb->nlh->nlmsg_seq, 2145 cb->nlh->nlmsg_seq,
2144 RTM_NEWNEIGHTBL, 2146 RTM_NEWNEIGHTBL,
2145 NLM_F_MULTI) <= 0) 2147 NLM_F_MULTI) < 0)
2146 goto out; 2148 goto out;
2147 next: 2149 next:
2148 nidx++; 2150 nidx++;
@@ -2202,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2202 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2204 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2203 goto nla_put_failure; 2205 goto nla_put_failure;
2204 2206
2205 return nlmsg_end(skb, nlh); 2207 nlmsg_end(skb, nlh);
2208 return 0;
2206 2209
2207nla_put_failure: 2210nla_put_failure:
2208 nlmsg_cancel(skb, nlh); 2211 nlmsg_cancel(skb, nlh);
@@ -2232,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2232 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2235 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2233 goto nla_put_failure; 2236 goto nla_put_failure;
2234 2237
2235 return nlmsg_end(skb, nlh); 2238 nlmsg_end(skb, nlh);
2239 return 0;
2236 2240
2237nla_put_failure: 2241nla_put_failure:
2238 nlmsg_cancel(skb, nlh); 2242 nlmsg_cancel(skb, nlh);
@@ -2270,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2270 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2274 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2271 cb->nlh->nlmsg_seq, 2275 cb->nlh->nlmsg_seq,
2272 RTM_NEWNEIGH, 2276 RTM_NEWNEIGH,
2273 NLM_F_MULTI) <= 0) { 2277 NLM_F_MULTI) < 0) {
2274 rc = -1; 2278 rc = -1;
2275 goto out; 2279 goto out;
2276 } 2280 }
@@ -2307,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2307 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2311 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2308 cb->nlh->nlmsg_seq, 2312 cb->nlh->nlmsg_seq,
2309 RTM_NEWNEIGH, 2313 RTM_NEWNEIGH,
2310 NLM_F_MULTI, tbl) <= 0) { 2314 NLM_F_MULTI, tbl) < 0) {
2311 read_unlock_bh(&tbl->lock); 2315 read_unlock_bh(&tbl->lock);
2312 rc = -1; 2316 rc = -1;
2313 goto out; 2317 goto out;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..cb5290b8c428 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/net_namespace.h>
19#include <linux/rtnetlink.h>
20#include <net/sock.h>
21#include <net/netlink.h>
18#include <net/net_namespace.h> 22#include <net/net_namespace.h>
19#include <net/netns/generic.h> 23#include <net/netns/generic.h>
20 24
@@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops,
144 } 148 }
145} 149}
146 150
151static int alloc_netid(struct net *net, struct net *peer, int reqid)
152{
153 int min = 0, max = 0;
154
155 ASSERT_RTNL();
156
157 if (reqid >= 0) {
158 min = reqid;
159 max = reqid + 1;
160 }
161
162 return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
163}
164
165/* This function is used by idr_for_each(). If net is equal to peer, the
166 * function returns the id so that idr_for_each() stops. Because we cannot
167 * returns the id 0 (idr_for_each() will not stop), we return the magic value
168 * NET_ID_ZERO (-1) for it.
169 */
170#define NET_ID_ZERO -1
171static int net_eq_idr(int id, void *net, void *peer)
172{
173 if (net_eq(net, peer))
174 return id ? : NET_ID_ZERO;
175 return 0;
176}
177
178static int __peernet2id(struct net *net, struct net *peer, bool alloc)
179{
180 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
181
182 ASSERT_RTNL();
183
184 /* Magic value for id 0. */
185 if (id == NET_ID_ZERO)
186 return 0;
187 if (id > 0)
188 return id;
189
190 if (alloc)
191 return alloc_netid(net, peer, -1);
192
193 return -ENOENT;
194}
195
196/* This function returns the id of a peer netns. If no id is assigned, one will
197 * be allocated and returned.
198 */
199int peernet2id(struct net *net, struct net *peer)
200{
201 int id = __peernet2id(net, peer, true);
202
203 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
204}
205EXPORT_SYMBOL(peernet2id);
206
207struct net *get_net_ns_by_id(struct net *net, int id)
208{
209 struct net *peer;
210
211 if (id < 0)
212 return NULL;
213
214 rcu_read_lock();
215 peer = idr_find(&net->netns_ids, id);
216 if (peer)
217 get_net(peer);
218 rcu_read_unlock();
219
220 return peer;
221}
222
147/* 223/*
148 * setup_net runs the initializers for the network namespace object. 224 * setup_net runs the initializers for the network namespace object.
149 */ 225 */
@@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
158 atomic_set(&net->passive, 1); 234 atomic_set(&net->passive, 1);
159 net->dev_base_seq = 1; 235 net->dev_base_seq = 1;
160 net->user_ns = user_ns; 236 net->user_ns = user_ns;
237 idr_init(&net->netns_ids);
161 238
162#ifdef NETNS_REFCNT_DEBUG 239#ifdef NETNS_REFCNT_DEBUG
163 atomic_set(&net->use_count, 0); 240 atomic_set(&net->use_count, 0);
@@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work)
288 list_for_each_entry(net, &net_kill_list, cleanup_list) { 365 list_for_each_entry(net, &net_kill_list, cleanup_list) {
289 list_del_rcu(&net->list); 366 list_del_rcu(&net->list);
290 list_add_tail(&net->exit_list, &net_exit_list); 367 list_add_tail(&net->exit_list, &net_exit_list);
368 for_each_net(tmp) {
369 int id = __peernet2id(tmp, net, false);
370
371 if (id >= 0)
372 idr_remove(&tmp->netns_ids, id);
373 }
374 idr_destroy(&net->netns_ids);
375
291 } 376 }
292 rtnl_unlock(); 377 rtnl_unlock();
293 378
@@ -361,6 +446,7 @@ struct net *get_net_ns_by_fd(int fd)
361 return ERR_PTR(-EINVAL); 446 return ERR_PTR(-EINVAL);
362} 447}
363#endif 448#endif
449EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
364 450
365struct net *get_net_ns_by_pid(pid_t pid) 451struct net *get_net_ns_by_pid(pid_t pid)
366{ 452{
@@ -402,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
402 .exit = net_ns_net_exit, 488 .exit = net_ns_net_exit,
403}; 489};
404 490
491static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
492 [NETNSA_NONE] = { .type = NLA_UNSPEC },
493 [NETNSA_NSID] = { .type = NLA_S32 },
494 [NETNSA_PID] = { .type = NLA_U32 },
495 [NETNSA_FD] = { .type = NLA_U32 },
496};
497
498static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
499{
500 struct net *net = sock_net(skb->sk);
501 struct nlattr *tb[NETNSA_MAX + 1];
502 struct net *peer;
503 int nsid, err;
504
505 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
506 rtnl_net_policy);
507 if (err < 0)
508 return err;
509 if (!tb[NETNSA_NSID])
510 return -EINVAL;
511 nsid = nla_get_s32(tb[NETNSA_NSID]);
512
513 if (tb[NETNSA_PID])
514 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
515 else if (tb[NETNSA_FD])
516 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
517 else
518 return -EINVAL;
519 if (IS_ERR(peer))
520 return PTR_ERR(peer);
521
522 if (__peernet2id(net, peer, false) >= 0) {
523 err = -EEXIST;
524 goto out;
525 }
526
527 err = alloc_netid(net, peer, nsid);
528 if (err > 0)
529 err = 0;
530out:
531 put_net(peer);
532 return err;
533}
534
535static int rtnl_net_get_size(void)
536{
537 return NLMSG_ALIGN(sizeof(struct rtgenmsg))
538 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
539 ;
540}
541
542static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
543 int cmd, struct net *net, struct net *peer)
544{
545 struct nlmsghdr *nlh;
546 struct rtgenmsg *rth;
547 int id;
548
549 ASSERT_RTNL();
550
551 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
552 if (!nlh)
553 return -EMSGSIZE;
554
555 rth = nlmsg_data(nlh);
556 rth->rtgen_family = AF_UNSPEC;
557
558 id = __peernet2id(net, peer, false);
559 if (id < 0)
560 id = NETNSA_NSID_NOT_ASSIGNED;
561 if (nla_put_s32(skb, NETNSA_NSID, id))
562 goto nla_put_failure;
563
564 nlmsg_end(skb, nlh);
565 return 0;
566
567nla_put_failure:
568 nlmsg_cancel(skb, nlh);
569 return -EMSGSIZE;
570}
571
572static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
573{
574 struct net *net = sock_net(skb->sk);
575 struct nlattr *tb[NETNSA_MAX + 1];
576 struct sk_buff *msg;
577 int err = -ENOBUFS;
578 struct net *peer;
579
580 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
581 rtnl_net_policy);
582 if (err < 0)
583 return err;
584 if (tb[NETNSA_PID])
585 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
586 else if (tb[NETNSA_FD])
587 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
588 else
589 return -EINVAL;
590
591 if (IS_ERR(peer))
592 return PTR_ERR(peer);
593
594 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
595 if (!msg) {
596 err = -ENOMEM;
597 goto out;
598 }
599
600 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
601 RTM_GETNSID, net, peer);
602 if (err < 0)
603 goto err_out;
604
605 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
606 goto out;
607
608err_out:
609 nlmsg_free(msg);
610out:
611 put_net(peer);
612 return err;
613}
614
405static int __init net_ns_init(void) 615static int __init net_ns_init(void)
406{ 616{
407 struct net_generic *ng; 617 struct net_generic *ng;
@@ -435,6 +645,9 @@ static int __init net_ns_init(void)
435 645
436 register_pernet_subsys(&net_ns_ops); 646 register_pernet_subsys(&net_ns_ops);
437 647
648 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
649 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
650
438 return 0; 651 return 0;
439} 652}
440 653
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e0ad5d16c9c5..c126a878c47c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
77 77
78 features = netif_skb_features(skb); 78 features = netif_skb_features(skb);
79 79
80 if (vlan_tx_tag_present(skb) && 80 if (skb_vlan_tag_present(skb) &&
81 !vlan_hw_offload_capable(features, skb->vlan_proto)) { 81 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
82 skb = __vlan_hwaccel_push_inside(skb); 82 skb = __vlan_hwaccel_push_inside(skb);
83 if (unlikely(!skb)) { 83 if (unlikely(!skb)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5daabfda6f6f..5be499b6a2d2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
50#include <net/arp.h> 50#include <net/arp.h>
51#include <net/route.h> 51#include <net/route.h>
52#include <net/udp.h> 52#include <net/udp.h>
53#include <net/tcp.h>
53#include <net/sock.h> 54#include <net/sock.h>
54#include <net/pkt_sched.h> 55#include <net/pkt_sched.h>
55#include <net/fib_rules.h> 56#include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
669 670
670 for (i = 0; i < RTAX_MAX; i++) { 671 for (i = 0; i < RTAX_MAX; i++) {
671 if (metrics[i]) { 672 if (metrics[i]) {
673 if (i == RTAX_CC_ALGO - 1) {
674 char tmp[TCP_CA_NAME_MAX], *name;
675
676 name = tcp_ca_get_name_by_key(metrics[i], tmp);
677 if (!name)
678 continue;
679 if (nla_put_string(skb, i + 1, name))
680 goto nla_put_failure;
681 } else {
682 if (nla_put_u32(skb, i + 1, metrics[i]))
683 goto nla_put_failure;
684 }
672 valid++; 685 valid++;
673 if (nla_put_u32(skb, i+1, metrics[i]))
674 goto nla_put_failure;
675 } 686 }
676 } 687 }
677 688
@@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
864 + nla_total_size(1) /* IFLA_OPERSTATE */ 875 + nla_total_size(1) /* IFLA_OPERSTATE */
865 + nla_total_size(1) /* IFLA_LINKMODE */ 876 + nla_total_size(1) /* IFLA_LINKMODE */
866 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ 877 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
878 + nla_total_size(4) /* IFLA_LINK_NETNSID */
867 + nla_total_size(ext_filter_mask 879 + nla_total_size(ext_filter_mask
868 & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ 880 & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
869 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ 881 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1158 goto nla_put_failure; 1170 goto nla_put_failure;
1159 } 1171 }
1160 1172
1173 if (dev->rtnl_link_ops &&
1174 dev->rtnl_link_ops->get_link_net) {
1175 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1176
1177 if (!net_eq(dev_net(dev), link_net)) {
1178 int id = peernet2id(dev_net(dev), link_net);
1179
1180 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1181 goto nla_put_failure;
1182 }
1183 }
1184
1161 if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) 1185 if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
1162 goto nla_put_failure; 1186 goto nla_put_failure;
1163 1187
@@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1188 1212
1189 nla_nest_end(skb, af_spec); 1213 nla_nest_end(skb, af_spec);
1190 1214
1191 return nlmsg_end(skb, nlh); 1215 nlmsg_end(skb, nlh);
1216 return 0;
1192 1217
1193nla_put_failure: 1218nla_put_failure:
1194 nlmsg_cancel(skb, nlh); 1219 nlmsg_cancel(skb, nlh);
@@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1223 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, 1248 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
1224 [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ 1249 [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
1225 [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, 1250 [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
1251 [IFLA_LINK_NETNSID] = { .type = NLA_S32 },
1226}; 1252};
1227 1253
1228static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1254static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1309,7 +1335,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1309 */ 1335 */
1310 WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); 1336 WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
1311 1337
1312 if (err <= 0) 1338 if (err < 0)
1313 goto out; 1339 goto out;
1314 1340
1315 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1341 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1990,7 +2016,7 @@ replay:
1990 struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; 2016 struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
1991 struct nlattr **data = NULL; 2017 struct nlattr **data = NULL;
1992 struct nlattr **slave_data = NULL; 2018 struct nlattr **slave_data = NULL;
1993 struct net *dest_net; 2019 struct net *dest_net, *link_net = NULL;
1994 2020
1995 if (ops) { 2021 if (ops) {
1996 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { 2022 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -2096,7 +2122,18 @@ replay:
2096 if (IS_ERR(dest_net)) 2122 if (IS_ERR(dest_net))
2097 return PTR_ERR(dest_net); 2123 return PTR_ERR(dest_net);
2098 2124
2099 dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); 2125 if (tb[IFLA_LINK_NETNSID]) {
2126 int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
2127
2128 link_net = get_net_ns_by_id(dest_net, id);
2129 if (!link_net) {
2130 err = -EINVAL;
2131 goto out;
2132 }
2133 }
2134
2135 dev = rtnl_create_link(link_net ? : dest_net, ifname,
2136 name_assign_type, ops, tb);
2100 if (IS_ERR(dev)) { 2137 if (IS_ERR(dev)) {
2101 err = PTR_ERR(dev); 2138 err = PTR_ERR(dev);
2102 goto out; 2139 goto out;
@@ -2105,7 +2142,7 @@ replay:
2105 dev->ifindex = ifm->ifi_index; 2142 dev->ifindex = ifm->ifi_index;
2106 2143
2107 if (ops->newlink) { 2144 if (ops->newlink) {
2108 err = ops->newlink(net, dev, tb, data); 2145 err = ops->newlink(link_net ? : net, dev, tb, data);
2109 /* Drivers should call free_netdev() in ->destructor 2146 /* Drivers should call free_netdev() in ->destructor
2110 * and unregister it on failure after registration 2147 * and unregister it on failure after registration
2111 * so that device could be finally freed in rtnl_unlock. 2148 * so that device could be finally freed in rtnl_unlock.
@@ -2124,9 +2161,19 @@ replay:
2124 } 2161 }
2125 } 2162 }
2126 err = rtnl_configure_link(dev, ifm); 2163 err = rtnl_configure_link(dev, ifm);
2127 if (err < 0) 2164 if (err < 0) {
2128 unregister_netdevice(dev); 2165 unregister_netdevice(dev);
2166 goto out;
2167 }
2168
2169 if (link_net) {
2170 err = dev_change_net_namespace(dev, dest_net, ifname);
2171 if (err < 0)
2172 unregister_netdevice(dev);
2173 }
2129out: 2174out:
2175 if (link_net)
2176 put_net(link_net);
2130 put_net(dest_net); 2177 put_net(dest_net);
2131 return err; 2178 return err;
2132 } 2179 }
@@ -2309,7 +2356,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
2309 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) 2356 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
2310 goto nla_put_failure; 2357 goto nla_put_failure;
2311 2358
2312 return nlmsg_end(skb, nlh); 2359 nlmsg_end(skb, nlh);
2360 return 0;
2313 2361
2314nla_put_failure: 2362nla_put_failure:
2315 nlmsg_cancel(skb, nlh); 2363 nlmsg_cancel(skb, nlh);
@@ -2692,10 +2740,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2692 idx); 2740 idx);
2693 } 2741 }
2694 2742
2695 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
2696 if (dev->netdev_ops->ndo_fdb_dump) 2743 if (dev->netdev_ops->ndo_fdb_dump)
2697 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, 2744 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
2698 idx); 2745 idx);
2746 else
2747 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
2699 2748
2700 cops = NULL; 2749 cops = NULL;
2701 } 2750 }
@@ -2791,7 +2840,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2791 2840
2792 nla_nest_end(skb, protinfo); 2841 nla_nest_end(skb, protinfo);
2793 2842
2794 return nlmsg_end(skb, nlh); 2843 nlmsg_end(skb, nlh);
2844 return 0;
2795nla_put_failure: 2845nla_put_failure:
2796 nlmsg_cancel(skb, nlh); 2846 nlmsg_cancel(skb, nlh);
2797 return -EMSGSIZE; 2847 return -EMSGSIZE;
@@ -2862,32 +2912,24 @@ static inline size_t bridge_nlmsg_size(void)
2862 + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ 2912 + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
2863} 2913}
2864 2914
2865static int rtnl_bridge_notify(struct net_device *dev, u16 flags) 2915static int rtnl_bridge_notify(struct net_device *dev)
2866{ 2916{
2867 struct net *net = dev_net(dev); 2917 struct net *net = dev_net(dev);
2868 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2869 struct sk_buff *skb; 2918 struct sk_buff *skb;
2870 int err = -EOPNOTSUPP; 2919 int err = -EOPNOTSUPP;
2871 2920
2921 if (!dev->netdev_ops->ndo_bridge_getlink)
2922 return 0;
2923
2872 skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); 2924 skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
2873 if (!skb) { 2925 if (!skb) {
2874 err = -ENOMEM; 2926 err = -ENOMEM;
2875 goto errout; 2927 goto errout;
2876 } 2928 }
2877 2929
2878 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && 2930 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2879 br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { 2931 if (err < 0)
2880 err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); 2932 goto errout;
2881 if (err < 0)
2882 goto errout;
2883 }
2884
2885 if ((flags & BRIDGE_FLAGS_SELF) &&
2886 dev->netdev_ops->ndo_bridge_getlink) {
2887 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2888 if (err < 0)
2889 goto errout;
2890 }
2891 2933
2892 if (!skb->len) 2934 if (!skb->len)
2893 goto errout; 2935 goto errout;
@@ -2909,7 +2951,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2909 struct net_device *dev; 2951 struct net_device *dev;
2910 struct nlattr *br_spec, *attr = NULL; 2952 struct nlattr *br_spec, *attr = NULL;
2911 int rem, err = -EOPNOTSUPP; 2953 int rem, err = -EOPNOTSUPP;
2912 u16 oflags, flags = 0; 2954 u16 flags = 0;
2913 bool have_flags = false; 2955 bool have_flags = false;
2914 2956
2915 if (nlmsg_len(nlh) < sizeof(*ifm)) 2957 if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -2939,8 +2981,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2939 } 2981 }
2940 } 2982 }
2941 2983
2942 oflags = flags;
2943
2944 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { 2984 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2945 struct net_device *br_dev = netdev_master_upper_dev_get(dev); 2985 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2946 2986
@@ -2949,7 +2989,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2949 goto out; 2989 goto out;
2950 } 2990 }
2951 2991
2952 err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); 2992 err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
2953 if (err) 2993 if (err)
2954 goto out; 2994 goto out;
2955 2995
@@ -2960,17 +3000,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2960 if (!dev->netdev_ops->ndo_bridge_setlink) 3000 if (!dev->netdev_ops->ndo_bridge_setlink)
2961 err = -EOPNOTSUPP; 3001 err = -EOPNOTSUPP;
2962 else 3002 else
2963 err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); 3003 err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
2964 3004 flags);
2965 if (!err) 3005 if (!err) {
2966 flags &= ~BRIDGE_FLAGS_SELF; 3006 flags &= ~BRIDGE_FLAGS_SELF;
3007
3008 /* Generate event to notify upper layer of bridge
3009 * change
3010 */
3011 err = rtnl_bridge_notify(dev);
3012 }
2967 } 3013 }
2968 3014
2969 if (have_flags) 3015 if (have_flags)
2970 memcpy(nla_data(attr), &flags, sizeof(flags)); 3016 memcpy(nla_data(attr), &flags, sizeof(flags));
2971 /* Generate event to notify upper layer of bridge change */
2972 if (!err)
2973 err = rtnl_bridge_notify(dev, oflags);
2974out: 3017out:
2975 return err; 3018 return err;
2976} 3019}
@@ -2982,7 +3025,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
2982 struct net_device *dev; 3025 struct net_device *dev;
2983 struct nlattr *br_spec, *attr = NULL; 3026 struct nlattr *br_spec, *attr = NULL;
2984 int rem, err = -EOPNOTSUPP; 3027 int rem, err = -EOPNOTSUPP;
2985 u16 oflags, flags = 0; 3028 u16 flags = 0;
2986 bool have_flags = false; 3029 bool have_flags = false;
2987 3030
2988 if (nlmsg_len(nlh) < sizeof(*ifm)) 3031 if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -3012,8 +3055,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
3012 } 3055 }
3013 } 3056 }
3014 3057
3015 oflags = flags;
3016
3017 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { 3058 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
3018 struct net_device *br_dev = netdev_master_upper_dev_get(dev); 3059 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
3019 3060
@@ -3022,7 +3063,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
3022 goto out; 3063 goto out;
3023 } 3064 }
3024 3065
3025 err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); 3066 err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
3026 if (err) 3067 if (err)
3027 goto out; 3068 goto out;
3028 3069
@@ -3033,17 +3074,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
3033 if (!dev->netdev_ops->ndo_bridge_dellink) 3074 if (!dev->netdev_ops->ndo_bridge_dellink)
3034 err = -EOPNOTSUPP; 3075 err = -EOPNOTSUPP;
3035 else 3076 else
3036 err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); 3077 err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
3078 flags);
3037 3079
3038 if (!err) 3080 if (!err) {
3039 flags &= ~BRIDGE_FLAGS_SELF; 3081 flags &= ~BRIDGE_FLAGS_SELF;
3082
3083 /* Generate event to notify upper layer of bridge
3084 * change
3085 */
3086 err = rtnl_bridge_notify(dev);
3087 }
3040 } 3088 }
3041 3089
3042 if (have_flags) 3090 if (have_flags)
3043 memcpy(nla_data(attr), &flags, sizeof(flags)); 3091 memcpy(nla_data(attr), &flags, sizeof(flags));
3044 /* Generate event to notify upper layer of bridge change */
3045 if (!err)
3046 err = rtnl_bridge_notify(dev, oflags);
3047out: 3092out:
3048 return err; 3093 return err;
3049} 3094}
@@ -3133,6 +3178,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
3133 case NETDEV_UNREGISTER_FINAL: 3178 case NETDEV_UNREGISTER_FINAL:
3134 case NETDEV_RELEASE: 3179 case NETDEV_RELEASE:
3135 case NETDEV_JOIN: 3180 case NETDEV_JOIN:
3181 case NETDEV_BONDING_INFO:
3136 break; 3182 break;
3137 default: 3183 default:
3138 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); 3184 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 395c15b82087..88c613eab142 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
74#include <asm/uaccess.h> 74#include <asm/uaccess.h>
75#include <trace/events/skb.h> 75#include <trace/events/skb.h>
76#include <linux/highmem.h> 76#include <linux/highmem.h>
77#include <linux/capability.h>
78#include <linux/user_namespace.h>
77 79
78struct kmem_cache *skbuff_head_cache __read_mostly; 80struct kmem_cache *skbuff_head_cache __read_mostly;
79static struct kmem_cache *skbuff_fclone_cache __read_mostly; 81static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb)
677#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 679#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
678 nf_bridge_put(skb->nf_bridge); 680 nf_bridge_put(skb->nf_bridge);
679#endif 681#endif
680/* XXX: IS this still necessary? - JHS */
681#ifdef CONFIG_NET_SCHED
682 skb->tc_index = 0;
683#ifdef CONFIG_NET_CLS_ACT
684 skb->tc_verd = 0;
685#endif
686#endif
687} 682}
688 683
689/* Free everything but the sk_buff shell. */ 684/* Free everything but the sk_buff shell. */
@@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
830#ifdef CONFIG_NET_RX_BUSY_POLL 825#ifdef CONFIG_NET_RX_BUSY_POLL
831 CHECK_SKB_FIELD(napi_id); 826 CHECK_SKB_FIELD(napi_id);
832#endif 827#endif
828#ifdef CONFIG_XPS
829 CHECK_SKB_FIELD(sender_cpu);
830#endif
833#ifdef CONFIG_NET_SCHED 831#ifdef CONFIG_NET_SCHED
834 CHECK_SKB_FIELD(tc_index); 832 CHECK_SKB_FIELD(tc_index);
835#ifdef CONFIG_NET_CLS_ACT 833#ifdef CONFIG_NET_CLS_ACT
@@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
3697 kfree_skb(skb); 3695 kfree_skb(skb);
3698} 3696}
3699 3697
3698static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
3699{
3700 bool ret;
3701
3702 if (likely(sysctl_tstamp_allow_data || tsonly))
3703 return true;
3704
3705 read_lock_bh(&sk->sk_callback_lock);
3706 ret = sk->sk_socket && sk->sk_socket->file &&
3707 file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
3708 read_unlock_bh(&sk->sk_callback_lock);
3709 return ret;
3710}
3711
3700void skb_complete_tx_timestamp(struct sk_buff *skb, 3712void skb_complete_tx_timestamp(struct sk_buff *skb,
3701 struct skb_shared_hwtstamps *hwtstamps) 3713 struct skb_shared_hwtstamps *hwtstamps)
3702{ 3714{
3703 struct sock *sk = skb->sk; 3715 struct sock *sk = skb->sk;
3704 3716
3717 if (!skb_may_tx_timestamp(sk, false))
3718 return;
3719
3705 /* take a reference to prevent skb_orphan() from freeing the socket */ 3720 /* take a reference to prevent skb_orphan() from freeing the socket */
3706 sock_hold(sk); 3721 sock_hold(sk);
3707 3722
@@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
3717 struct sock *sk, int tstype) 3732 struct sock *sk, int tstype)
3718{ 3733{
3719 struct sk_buff *skb; 3734 struct sk_buff *skb;
3735 bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
3720 3736
3721 if (!sk) 3737 if (!sk || !skb_may_tx_timestamp(sk, tsonly))
3722 return; 3738 return;
3723 3739
3724 if (hwtstamps) 3740 if (tsonly)
3725 *skb_hwtstamps(orig_skb) = *hwtstamps; 3741 skb = alloc_skb(0, GFP_ATOMIC);
3726 else 3742 else
3727 orig_skb->tstamp = ktime_get_real(); 3743 skb = skb_clone(orig_skb, GFP_ATOMIC);
3728
3729 skb = skb_clone(orig_skb, GFP_ATOMIC);
3730 if (!skb) 3744 if (!skb)
3731 return; 3745 return;
3732 3746
3747 if (tsonly) {
3748 skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
3749 skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
3750 }
3751
3752 if (hwtstamps)
3753 *skb_hwtstamps(skb) = *hwtstamps;
3754 else
3755 skb->tstamp = ktime_get_real();
3756
3733 __skb_complete_tx_timestamp(skb, sk, tstype); 3757 __skb_complete_tx_timestamp(skb, sk, tstype);
3734} 3758}
3735EXPORT_SYMBOL_GPL(__skb_tstamp_tx); 3759EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
@@ -4148,6 +4172,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4148 skb->ignore_df = 0; 4172 skb->ignore_df = 0;
4149 skb_dst_drop(skb); 4173 skb_dst_drop(skb);
4150 skb->mark = 0; 4174 skb->mark = 0;
4175 skb->sender_cpu = 0;
4151 skb_init_secmark(skb); 4176 skb_init_secmark(skb);
4152 secpath_reset(skb); 4177 secpath_reset(skb);
4153 nf_reset(skb); 4178 nf_reset(skb);
@@ -4204,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
4204 struct vlan_hdr *vhdr; 4229 struct vlan_hdr *vhdr;
4205 u16 vlan_tci; 4230 u16 vlan_tci;
4206 4231
4207 if (unlikely(vlan_tx_tag_present(skb))) { 4232 if (unlikely(skb_vlan_tag_present(skb))) {
4208 /* vlan_tci is already set-up so leave this for another time */ 4233 /* vlan_tci is already set-up so leave this for another time */
4209 return skb; 4234 return skb;
4210 } 4235 }
@@ -4290,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb)
4290 __be16 vlan_proto; 4315 __be16 vlan_proto;
4291 int err; 4316 int err;
4292 4317
4293 if (likely(vlan_tx_tag_present(skb))) { 4318 if (likely(skb_vlan_tag_present(skb))) {
4294 skb->vlan_tci = 0; 4319 skb->vlan_tci = 0;
4295 } else { 4320 } else {
4296 if (unlikely((skb->protocol != htons(ETH_P_8021Q) && 4321 if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
@@ -4320,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop);
4320 4345
4321int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) 4346int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4322{ 4347{
4323 if (vlan_tx_tag_present(skb)) { 4348 if (skb_vlan_tag_present(skb)) {
4324 unsigned int offset = skb->data - skb_mac_header(skb); 4349 unsigned int offset = skb->data - skb_mac_header(skb);
4325 int err; 4350 int err;
4326 4351
@@ -4330,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4330 */ 4355 */
4331 __skb_push(skb, offset); 4356 __skb_push(skb, offset);
4332 err = __vlan_insert_tag(skb, skb->vlan_proto, 4357 err = __vlan_insert_tag(skb, skb->vlan_proto,
4333 vlan_tx_tag_get(skb)); 4358 skb_vlan_tag_get(skb));
4334 if (err) 4359 if (err)
4335 return err; 4360 return err;
4336 skb->protocol = skb->vlan_proto; 4361 skb->protocol = skb->vlan_proto;
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33db1314..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
325int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 325int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
326EXPORT_SYMBOL(sysctl_optmem_max); 326EXPORT_SYMBOL(sysctl_optmem_max);
327 327
328int sysctl_tstamp_allow_data __read_mostly = 1;
329
328struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; 330struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
329EXPORT_SYMBOL_GPL(memalloc_socks); 331EXPORT_SYMBOL_GPL(memalloc_socks);
330 332
@@ -840,6 +842,7 @@ set_rcvbuf:
840 ret = -EINVAL; 842 ret = -EINVAL;
841 break; 843 break;
842 } 844 }
845
843 if (val & SOF_TIMESTAMPING_OPT_ID && 846 if (val & SOF_TIMESTAMPING_OPT_ID &&
844 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { 847 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
845 if (sk->sk_protocol == IPPROTO_TCP) { 848 if (sk->sk_protocol == IPPROTO_TCP) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..eaa51ddf2368 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
52 52
53 if (write) { 53 if (write) {
54 if (size) { 54 if (size) {
55 if (size > 1<<30) { 55 if (size > 1<<29) {
56 /* Enforce limit to prevent overflow */ 56 /* Enforce limit to prevent overflow */
57 mutex_unlock(&sock_flow_mutex); 57 mutex_unlock(&sock_flow_mutex);
58 return -EINVAL; 58 return -EINVAL;
@@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
65 mutex_unlock(&sock_flow_mutex); 65 mutex_unlock(&sock_flow_mutex);
66 return -ENOMEM; 66 return -ENOMEM;
67 } 67 }
68 68 rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
69 sock_table->mask = size - 1; 69 sock_table->mask = size - 1;
70 } else 70 } else
71 sock_table = orig_sock_table; 71 sock_table = orig_sock_table;
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
321 .mode = 0644, 321 .mode = 0644,
322 .proc_handler = proc_dointvec 322 .proc_handler = proc_dointvec
323 }, 323 },
324 {
325 .procname = "tstamp_allow_data",
326 .data = &sysctl_tstamp_allow_data,
327 .maxlen = sizeof(int),
328 .mode = 0644,
329 .proc_handler = proc_dointvec_minmax,
330 .extra1 = &zero,
331 .extra2 = &one
332 },
324#ifdef CONFIG_RPS 333#ifdef CONFIG_RPS
325 { 334 {
326 .procname = "rps_sock_flow_entries", 335 .procname = "rps_sock_flow_entries",