aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /net/core
parent82c477669a4665eb4e52030792051e0559ee2a36 (diff)
parent8b662fe70c68282f78482dc272df0c4f355e49f5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c562
-rw-r--r--net/core/dev_addr_lists.c115
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/flow_dissector.c6
-rw-r--r--net/core/neighbour.c477
-rw-r--r--net/core/net-sysfs.c82
-rw-r--r--net/core/net-sysfs.h2
-rw-r--r--net/core/netclassid_cgroup.c120
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c88
-rw-r--r--net/core/rtnetlink.c176
-rw-r--r--net/core/skbuff.c366
-rw-r--r--net/core/sock.c43
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c3
17 files changed, 1368 insertions, 685 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index b33b996f5dd6..9628c20acff6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
21obj-$(CONFIG_TRACEPOINTS) += net-traces.o 21obj-$(CONFIG_TRACEPOINTS) += net-traces.o
22obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 22obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
23obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o 23obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
24obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 2e0c6a90f6f2..3721db716350 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
147struct list_head ptype_all __read_mostly; /* Taps */ 147struct list_head ptype_all __read_mostly; /* Taps */
148static struct list_head offload_base __read_mostly; 148static struct list_head offload_base __read_mostly;
149 149
150static int netif_rx_internal(struct sk_buff *skb);
151
150/* 152/*
151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 153 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
152 * semaphore. 154 * semaphore.
@@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload);
480 * and must not be freed until after all the CPU's have gone 482 * and must not be freed until after all the CPU's have gone
481 * through a quiescent state. 483 * through a quiescent state.
482 */ 484 */
483void __dev_remove_offload(struct packet_offload *po) 485static void __dev_remove_offload(struct packet_offload *po)
484{ 486{
485 struct list_head *head = &offload_base; 487 struct list_head *head = &offload_base;
486 struct packet_offload *po1; 488 struct packet_offload *po1;
@@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po)
498out: 500out:
499 spin_unlock(&offload_lock); 501 spin_unlock(&offload_lock);
500} 502}
501EXPORT_SYMBOL(__dev_remove_offload);
502 503
503/** 504/**
504 * dev_remove_offload - remove packet offload handler 505 * dev_remove_offload - remove packet offload handler
@@ -1118,6 +1119,8 @@ rollback:
1118 1119
1119 write_seqcount_end(&devnet_rename_seq); 1120 write_seqcount_end(&devnet_rename_seq);
1120 1121
1122 netdev_adjacent_rename_links(dev, oldname);
1123
1121 write_lock_bh(&dev_base_lock); 1124 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1125 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock); 1126 write_unlock_bh(&dev_base_lock);
@@ -1137,6 +1140,7 @@ rollback:
1137 err = ret; 1140 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq); 1141 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1142 memcpy(dev->name, oldname, IFNAMSIZ);
1143 memcpy(oldname, newname, IFNAMSIZ);
1140 goto rollback; 1144 goto rollback;
1141 } else { 1145 } else {
1142 pr_err("%s: name change rollback failed: %d\n", 1146 pr_err("%s: name change rollback failed: %d\n",
@@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1566 * are as for raw_notifier_call_chain(). 1570 * are as for raw_notifier_call_chain().
1567 */ 1571 */
1568 1572
1569int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, 1573static int call_netdevice_notifiers_info(unsigned long val,
1570 struct netdev_notifier_info *info) 1574 struct net_device *dev,
1575 struct netdev_notifier_info *info)
1571{ 1576{
1572 ASSERT_RTNL(); 1577 ASSERT_RTNL();
1573 netdev_notifier_info_init(info, dev); 1578 netdev_notifier_info_init(info, dev);
1574 return raw_notifier_call_chain(&netdev_chain, val, info); 1579 return raw_notifier_call_chain(&netdev_chain, val, info);
1575} 1580}
1576EXPORT_SYMBOL(call_netdevice_notifiers_info);
1577 1581
1578/** 1582/**
1579 * call_netdevice_notifiers - call all network notifier blocks 1583 * call_netdevice_notifiers - call all network notifier blocks
@@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1699 skb_scrub_packet(skb, true); 1703 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev); 1704 skb->protocol = eth_type_trans(skb, dev);
1701 1705
1702 return netif_rx(skb); 1706 return netif_rx_internal(skb);
1703} 1707}
1704EXPORT_SYMBOL_GPL(dev_forward_skb); 1708EXPORT_SYMBOL_GPL(dev_forward_skb);
1705 1709
@@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2079} 2083}
2080EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2084EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081 2085
2082#ifdef CONFIG_RPS 2086#ifdef CONFIG_SYSFS
2083/** 2087/**
2084 * netif_set_real_num_rx_queues - set actual number of RX queues used 2088 * netif_set_real_num_rx_queues - set actual number of RX queues used
2085 * @dev: Network device 2089 * @dev: Network device
@@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q)
2145} 2149}
2146EXPORT_SYMBOL(__netif_schedule); 2150EXPORT_SYMBOL(__netif_schedule);
2147 2151
2148void dev_kfree_skb_irq(struct sk_buff *skb) 2152struct dev_kfree_skb_cb {
2153 enum skb_free_reason reason;
2154};
2155
2156static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2149{ 2157{
2150 if (atomic_dec_and_test(&skb->users)) { 2158 return (struct dev_kfree_skb_cb *)skb->cb;
2151 struct softnet_data *sd; 2159}
2152 unsigned long flags; 2160
2161void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2162{
2163 unsigned long flags;
2153 2164
2154 local_irq_save(flags); 2165 if (likely(atomic_read(&skb->users) == 1)) {
2155 sd = &__get_cpu_var(softnet_data); 2166 smp_rmb();
2156 skb->next = sd->completion_queue; 2167 atomic_set(&skb->users, 0);
2157 sd->completion_queue = skb; 2168 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2169 return;
2159 local_irq_restore(flags);
2160 } 2170 }
2171 get_kfree_skb_cb(skb)->reason = reason;
2172 local_irq_save(flags);
2173 skb->next = __this_cpu_read(softnet_data.completion_queue);
2174 __this_cpu_write(softnet_data.completion_queue, skb);
2175 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2176 local_irq_restore(flags);
2161} 2177}
2162EXPORT_SYMBOL(dev_kfree_skb_irq); 2178EXPORT_SYMBOL(__dev_kfree_skb_irq);
2163 2179
2164void dev_kfree_skb_any(struct sk_buff *skb) 2180void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2165{ 2181{
2166 if (in_irq() || irqs_disabled()) 2182 if (in_irq() || irqs_disabled())
2167 dev_kfree_skb_irq(skb); 2183 __dev_kfree_skb_irq(skb, reason);
2168 else 2184 else
2169 dev_kfree_skb(skb); 2185 dev_kfree_skb(skb);
2170} 2186}
2171EXPORT_SYMBOL(dev_kfree_skb_any); 2187EXPORT_SYMBOL(__dev_kfree_skb_any);
2172 2188
2173 2189
2174/** 2190/**
@@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
2442{ 2458{
2443 struct dev_gso_cb *cb; 2459 struct dev_gso_cb *cb;
2444 2460
2445 do { 2461 kfree_skb_list(skb->next);
2446 struct sk_buff *nskb = skb->next; 2462 skb->next = NULL;
2447
2448 skb->next = nskb->next;
2449 nskb->next = NULL;
2450 kfree_skb(nskb);
2451 } while (skb->next);
2452 2463
2453 cb = DEV_GSO_CB(skb); 2464 cb = DEV_GSO_CB(skb);
2454 if (cb->destructor) 2465 if (cb->destructor)
@@ -2523,21 +2534,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2523} 2534}
2524EXPORT_SYMBOL(netif_skb_features); 2535EXPORT_SYMBOL(netif_skb_features);
2525 2536
2526/*
2527 * Returns true if either:
2528 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2529 * 2. skb is fragmented and the device does not support SG.
2530 */
2531static inline int skb_needs_linearize(struct sk_buff *skb,
2532 netdev_features_t features)
2533{
2534 return skb_is_nonlinear(skb) &&
2535 ((skb_has_frag_list(skb) &&
2536 !(features & NETIF_F_FRAGLIST)) ||
2537 (skb_shinfo(skb)->nr_frags &&
2538 !(features & NETIF_F_SG)));
2539}
2540
2541int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2537int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2542 struct netdev_queue *txq) 2538 struct netdev_queue *txq)
2543{ 2539{
@@ -2605,8 +2601,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2605 dev_queue_xmit_nit(skb, dev); 2601 dev_queue_xmit_nit(skb, dev);
2606 2602
2607 skb_len = skb->len; 2603 skb_len = skb->len;
2608 rc = ops->ndo_start_xmit(skb, dev); 2604 trace_net_dev_start_xmit(skb, dev);
2609 2605 rc = ops->ndo_start_xmit(skb, dev);
2610 trace_net_dev_xmit(skb, rc, dev, skb_len); 2606 trace_net_dev_xmit(skb, rc, dev, skb_len);
2611 if (rc == NETDEV_TX_OK) 2607 if (rc == NETDEV_TX_OK)
2612 txq_trans_update(txq); 2608 txq_trans_update(txq);
@@ -2624,6 +2620,7 @@ gso:
2624 dev_queue_xmit_nit(nskb, dev); 2620 dev_queue_xmit_nit(nskb, dev);
2625 2621
2626 skb_len = nskb->len; 2622 skb_len = nskb->len;
2623 trace_net_dev_start_xmit(nskb, dev);
2627 rc = ops->ndo_start_xmit(nskb, dev); 2624 rc = ops->ndo_start_xmit(nskb, dev);
2628 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2625 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2629 if (unlikely(rc != NETDEV_TX_OK)) { 2626 if (unlikely(rc != NETDEV_TX_OK)) {
@@ -2744,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2744 return rc; 2741 return rc;
2745} 2742}
2746 2743
2747#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2744#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
2748static void skb_update_prio(struct sk_buff *skb) 2745static void skb_update_prio(struct sk_buff *skb)
2749{ 2746{
2750 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2747 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
@@ -2781,8 +2778,9 @@ int dev_loopback_xmit(struct sk_buff *skb)
2781EXPORT_SYMBOL(dev_loopback_xmit); 2778EXPORT_SYMBOL(dev_loopback_xmit);
2782 2779
2783/** 2780/**
2784 * dev_queue_xmit - transmit a buffer 2781 * __dev_queue_xmit - transmit a buffer
2785 * @skb: buffer to transmit 2782 * @skb: buffer to transmit
2783 * @accel_priv: private data used for L2 forwarding offload
2786 * 2784 *
2787 * Queue a buffer for transmission to a network device. The caller must 2785 * Queue a buffer for transmission to a network device. The caller must
2788 * have set the device and priority and built the buffer before calling 2786 * have set the device and priority and built the buffer before calling
@@ -3014,7 +3012,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3014 } 3012 }
3015 3013
3016 skb_reset_network_header(skb); 3014 skb_reset_network_header(skb);
3017 if (!skb_get_rxhash(skb)) 3015 if (!skb_get_hash(skb))
3018 goto done; 3016 goto done;
3019 3017
3020 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3018 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3159,7 +3157,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3159 rcu_read_lock(); 3157 rcu_read_lock();
3160 fl = rcu_dereference(sd->flow_limit); 3158 fl = rcu_dereference(sd->flow_limit);
3161 if (fl) { 3159 if (fl) {
3162 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); 3160 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3163 old_flow = fl->history[fl->history_head]; 3161 old_flow = fl->history[fl->history_head];
3164 fl->history[fl->history_head] = new_flow; 3162 fl->history[fl->history_head] = new_flow;
3165 3163
@@ -3227,22 +3225,7 @@ enqueue:
3227 return NET_RX_DROP; 3225 return NET_RX_DROP;
3228} 3226}
3229 3227
3230/** 3228static int netif_rx_internal(struct sk_buff *skb)
3231 * netif_rx - post buffer to the network code
3232 * @skb: buffer to post
3233 *
3234 * This function receives a packet from a device driver and queues it for
3235 * the upper (protocol) levels to process. It always succeeds. The buffer
3236 * may be dropped during processing for congestion control or by the
3237 * protocol layers.
3238 *
3239 * return values:
3240 * NET_RX_SUCCESS (no congestion)
3241 * NET_RX_DROP (packet was dropped)
3242 *
3243 */
3244
3245int netif_rx(struct sk_buff *skb)
3246{ 3229{
3247 int ret; 3230 int ret;
3248 3231
@@ -3278,14 +3261,38 @@ int netif_rx(struct sk_buff *skb)
3278 } 3261 }
3279 return ret; 3262 return ret;
3280} 3263}
3264
3265/**
3266 * netif_rx - post buffer to the network code
3267 * @skb: buffer to post
3268 *
3269 * This function receives a packet from a device driver and queues it for
3270 * the upper (protocol) levels to process. It always succeeds. The buffer
3271 * may be dropped during processing for congestion control or by the
3272 * protocol layers.
3273 *
3274 * return values:
3275 * NET_RX_SUCCESS (no congestion)
3276 * NET_RX_DROP (packet was dropped)
3277 *
3278 */
3279
3280int netif_rx(struct sk_buff *skb)
3281{
3282 trace_netif_rx_entry(skb);
3283
3284 return netif_rx_internal(skb);
3285}
3281EXPORT_SYMBOL(netif_rx); 3286EXPORT_SYMBOL(netif_rx);
3282 3287
3283int netif_rx_ni(struct sk_buff *skb) 3288int netif_rx_ni(struct sk_buff *skb)
3284{ 3289{
3285 int err; 3290 int err;
3286 3291
3292 trace_netif_rx_ni_entry(skb);
3293
3287 preempt_disable(); 3294 preempt_disable();
3288 err = netif_rx(skb); 3295 err = netif_rx_internal(skb);
3289 if (local_softirq_pending()) 3296 if (local_softirq_pending())
3290 do_softirq(); 3297 do_softirq();
3291 preempt_enable(); 3298 preempt_enable();
@@ -3311,7 +3318,10 @@ static void net_tx_action(struct softirq_action *h)
3311 clist = clist->next; 3318 clist = clist->next;
3312 3319
3313 WARN_ON(atomic_read(&skb->users)); 3320 WARN_ON(atomic_read(&skb->users));
3314 trace_kfree_skb(skb, net_tx_action); 3321 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3322 trace_consume_skb(skb);
3323 else
3324 trace_kfree_skb(skb, net_tx_action);
3315 __kfree_skb(skb); 3325 __kfree_skb(skb);
3316 } 3326 }
3317 } 3327 }
@@ -3667,22 +3677,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3667 return ret; 3677 return ret;
3668} 3678}
3669 3679
3670/** 3680static int netif_receive_skb_internal(struct sk_buff *skb)
3671 * netif_receive_skb - process receive buffer from network
3672 * @skb: buffer to process
3673 *
3674 * netif_receive_skb() is the main receive data processing function.
3675 * It always succeeds. The buffer may be dropped during processing
3676 * for congestion control or by the protocol layers.
3677 *
3678 * This function may only be called from softirq context and interrupts
3679 * should be enabled.
3680 *
3681 * Return values (usually ignored):
3682 * NET_RX_SUCCESS: no congestion
3683 * NET_RX_DROP: packet was dropped
3684 */
3685int netif_receive_skb(struct sk_buff *skb)
3686{ 3681{
3687 net_timestamp_check(netdev_tstamp_prequeue, skb); 3682 net_timestamp_check(netdev_tstamp_prequeue, skb);
3688 3683
@@ -3708,6 +3703,28 @@ int netif_receive_skb(struct sk_buff *skb)
3708#endif 3703#endif
3709 return __netif_receive_skb(skb); 3704 return __netif_receive_skb(skb);
3710} 3705}
3706
3707/**
3708 * netif_receive_skb - process receive buffer from network
3709 * @skb: buffer to process
3710 *
3711 * netif_receive_skb() is the main receive data processing function.
3712 * It always succeeds. The buffer may be dropped during processing
3713 * for congestion control or by the protocol layers.
3714 *
3715 * This function may only be called from softirq context and interrupts
3716 * should be enabled.
3717 *
3718 * Return values (usually ignored):
3719 * NET_RX_SUCCESS: no congestion
3720 * NET_RX_DROP: packet was dropped
3721 */
3722int netif_receive_skb(struct sk_buff *skb)
3723{
3724 trace_netif_receive_skb_entry(skb);
3725
3726 return netif_receive_skb_internal(skb);
3727}
3711EXPORT_SYMBOL(netif_receive_skb); 3728EXPORT_SYMBOL(netif_receive_skb);
3712 3729
3713/* Network device is going away, flush any packets still pending 3730/* Network device is going away, flush any packets still pending
@@ -3757,7 +3774,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3757 if (ptype->type != type || !ptype->callbacks.gro_complete) 3774 if (ptype->type != type || !ptype->callbacks.gro_complete)
3758 continue; 3775 continue;
3759 3776
3760 err = ptype->callbacks.gro_complete(skb); 3777 err = ptype->callbacks.gro_complete(skb, 0);
3761 break; 3778 break;
3762 } 3779 }
3763 rcu_read_unlock(); 3780 rcu_read_unlock();
@@ -3769,7 +3786,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3769 } 3786 }
3770 3787
3771out: 3788out:
3772 return netif_receive_skb(skb); 3789 return netif_receive_skb_internal(skb);
3773} 3790}
3774 3791
3775/* napi->gro_list contains packets ordered by age. 3792/* napi->gro_list contains packets ordered by age.
@@ -3805,10 +3822,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3805{ 3822{
3806 struct sk_buff *p; 3823 struct sk_buff *p;
3807 unsigned int maclen = skb->dev->hard_header_len; 3824 unsigned int maclen = skb->dev->hard_header_len;
3825 u32 hash = skb_get_hash_raw(skb);
3808 3826
3809 for (p = napi->gro_list; p; p = p->next) { 3827 for (p = napi->gro_list; p; p = p->next) {
3810 unsigned long diffs; 3828 unsigned long diffs;
3811 3829
3830 NAPI_GRO_CB(p)->flush = 0;
3831
3832 if (hash != skb_get_hash_raw(p)) {
3833 NAPI_GRO_CB(p)->same_flow = 0;
3834 continue;
3835 }
3836
3812 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3837 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3813 diffs |= p->vlan_tci ^ skb->vlan_tci; 3838 diffs |= p->vlan_tci ^ skb->vlan_tci;
3814 if (maclen == ETH_HLEN) 3839 if (maclen == ETH_HLEN)
@@ -3819,7 +3844,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3819 skb_gro_mac_header(skb), 3844 skb_gro_mac_header(skb),
3820 maclen); 3845 maclen);
3821 NAPI_GRO_CB(p)->same_flow = !diffs; 3846 NAPI_GRO_CB(p)->same_flow = !diffs;
3822 NAPI_GRO_CB(p)->flush = 0; 3847 }
3848}
3849
3850static void skb_gro_reset_offset(struct sk_buff *skb)
3851{
3852 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3853 const skb_frag_t *frag0 = &pinfo->frags[0];
3854
3855 NAPI_GRO_CB(skb)->data_offset = 0;
3856 NAPI_GRO_CB(skb)->frag0 = NULL;
3857 NAPI_GRO_CB(skb)->frag0_len = 0;
3858
3859 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3860 pinfo->nr_frags &&
3861 !PageHighMem(skb_frag_page(frag0))) {
3862 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3863 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3823 } 3864 }
3824} 3865}
3825 3866
@@ -3838,7 +3879,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3838 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3879 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3839 goto normal; 3880 goto normal;
3840 3881
3882 skb_gro_reset_offset(skb);
3841 gro_list_prepare(napi, skb); 3883 gro_list_prepare(napi, skb);
3884 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3842 3885
3843 rcu_read_lock(); 3886 rcu_read_lock();
3844 list_for_each_entry_rcu(ptype, head, list) { 3887 list_for_each_entry_rcu(ptype, head, list) {
@@ -3850,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3850 NAPI_GRO_CB(skb)->same_flow = 0; 3893 NAPI_GRO_CB(skb)->same_flow = 0;
3851 NAPI_GRO_CB(skb)->flush = 0; 3894 NAPI_GRO_CB(skb)->flush = 0;
3852 NAPI_GRO_CB(skb)->free = 0; 3895 NAPI_GRO_CB(skb)->free = 0;
3896 NAPI_GRO_CB(skb)->udp_mark = 0;
3853 3897
3854 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3898 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3855 break; 3899 break;
@@ -3874,10 +3918,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3874 if (same_flow) 3918 if (same_flow)
3875 goto ok; 3919 goto ok;
3876 3920
3877 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3921 if (NAPI_GRO_CB(skb)->flush)
3878 goto normal; 3922 goto normal;
3879 3923
3880 napi->gro_count++; 3924 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
3925 struct sk_buff *nskb = napi->gro_list;
3926
3927 /* locate the end of the list to select the 'oldest' flow */
3928 while (nskb->next) {
3929 pp = &nskb->next;
3930 nskb = *pp;
3931 }
3932 *pp = NULL;
3933 nskb->next = NULL;
3934 napi_gro_complete(nskb);
3935 } else {
3936 napi->gro_count++;
3937 }
3881 NAPI_GRO_CB(skb)->count = 1; 3938 NAPI_GRO_CB(skb)->count = 1;
3882 NAPI_GRO_CB(skb)->age = jiffies; 3939 NAPI_GRO_CB(skb)->age = jiffies;
3883 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3940 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
@@ -3915,12 +3972,39 @@ normal:
3915 goto pull; 3972 goto pull;
3916} 3973}
3917 3974
3975struct packet_offload *gro_find_receive_by_type(__be16 type)
3976{
3977 struct list_head *offload_head = &offload_base;
3978 struct packet_offload *ptype;
3979
3980 list_for_each_entry_rcu(ptype, offload_head, list) {
3981 if (ptype->type != type || !ptype->callbacks.gro_receive)
3982 continue;
3983 return ptype;
3984 }
3985 return NULL;
3986}
3987EXPORT_SYMBOL(gro_find_receive_by_type);
3988
3989struct packet_offload *gro_find_complete_by_type(__be16 type)
3990{
3991 struct list_head *offload_head = &offload_base;
3992 struct packet_offload *ptype;
3993
3994 list_for_each_entry_rcu(ptype, offload_head, list) {
3995 if (ptype->type != type || !ptype->callbacks.gro_complete)
3996 continue;
3997 return ptype;
3998 }
3999 return NULL;
4000}
4001EXPORT_SYMBOL(gro_find_complete_by_type);
3918 4002
3919static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 4003static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3920{ 4004{
3921 switch (ret) { 4005 switch (ret) {
3922 case GRO_NORMAL: 4006 case GRO_NORMAL:
3923 if (netif_receive_skb(skb)) 4007 if (netif_receive_skb_internal(skb))
3924 ret = GRO_DROP; 4008 ret = GRO_DROP;
3925 break; 4009 break;
3926 4010
@@ -3943,26 +4027,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3943 return ret; 4027 return ret;
3944} 4028}
3945 4029
3946static void skb_gro_reset_offset(struct sk_buff *skb)
3947{
3948 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3949 const skb_frag_t *frag0 = &pinfo->frags[0];
3950
3951 NAPI_GRO_CB(skb)->data_offset = 0;
3952 NAPI_GRO_CB(skb)->frag0 = NULL;
3953 NAPI_GRO_CB(skb)->frag0_len = 0;
3954
3955 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3956 pinfo->nr_frags &&
3957 !PageHighMem(skb_frag_page(frag0))) {
3958 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3959 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3960 }
3961}
3962
3963gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 4030gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3964{ 4031{
3965 skb_gro_reset_offset(skb); 4032 trace_napi_gro_receive_entry(skb);
3966 4033
3967 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4034 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3968} 4035}
@@ -3986,8 +4053,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3986 4053
3987 if (!skb) { 4054 if (!skb) {
3988 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4055 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3989 if (skb) 4056 napi->skb = skb;
3990 napi->skb = skb;
3991 } 4057 }
3992 return skb; 4058 return skb;
3993} 4059}
@@ -3998,12 +4064,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
3998{ 4064{
3999 switch (ret) { 4065 switch (ret) {
4000 case GRO_NORMAL: 4066 case GRO_NORMAL:
4001 case GRO_HELD: 4067 if (netif_receive_skb_internal(skb))
4002 skb->protocol = eth_type_trans(skb, skb->dev);
4003
4004 if (ret == GRO_HELD)
4005 skb_gro_pull(skb, -ETH_HLEN);
4006 else if (netif_receive_skb(skb))
4007 ret = GRO_DROP; 4068 ret = GRO_DROP;
4008 break; 4069 break;
4009 4070
@@ -4012,6 +4073,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4012 napi_reuse_skb(napi, skb); 4073 napi_reuse_skb(napi, skb);
4013 break; 4074 break;
4014 4075
4076 case GRO_HELD:
4015 case GRO_MERGED: 4077 case GRO_MERGED:
4016 break; 4078 break;
4017 } 4079 }
@@ -4022,36 +4084,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4022static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4084static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4023{ 4085{
4024 struct sk_buff *skb = napi->skb; 4086 struct sk_buff *skb = napi->skb;
4025 struct ethhdr *eth;
4026 unsigned int hlen;
4027 unsigned int off;
4028 4087
4029 napi->skb = NULL; 4088 napi->skb = NULL;
4030 4089
4031 skb_reset_mac_header(skb); 4090 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
4032 skb_gro_reset_offset(skb); 4091 napi_reuse_skb(napi, skb);
4033 4092 return NULL;
4034 off = skb_gro_offset(skb);
4035 hlen = off + sizeof(*eth);
4036 eth = skb_gro_header_fast(skb, off);
4037 if (skb_gro_header_hard(skb, hlen)) {
4038 eth = skb_gro_header_slow(skb, hlen, off);
4039 if (unlikely(!eth)) {
4040 napi_reuse_skb(napi, skb);
4041 skb = NULL;
4042 goto out;
4043 }
4044 } 4093 }
4094 skb->protocol = eth_type_trans(skb, skb->dev);
4045 4095
4046 skb_gro_pull(skb, sizeof(*eth));
4047
4048 /*
4049 * This works because the only protocols we care about don't require
4050 * special handling. We'll fix it up properly at the end.
4051 */
4052 skb->protocol = eth->h_proto;
4053
4054out:
4055 return skb; 4096 return skb;
4056} 4097}
4057 4098
@@ -4062,12 +4103,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
4062 if (!skb) 4103 if (!skb)
4063 return GRO_DROP; 4104 return GRO_DROP;
4064 4105
4106 trace_napi_gro_frags_entry(skb);
4107
4065 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4108 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4066} 4109}
4067EXPORT_SYMBOL(napi_gro_frags); 4110EXPORT_SYMBOL(napi_gro_frags);
4068 4111
4069/* 4112/*
4070 * net_rps_action sends any pending IPI's for rps. 4113 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
4071 * Note: called with local irq disabled, but exits with local irq enabled. 4114 * Note: called with local irq disabled, but exits with local irq enabled.
4072 */ 4115 */
4073static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4116static void net_rps_action_and_irq_enable(struct softnet_data *sd)
@@ -4272,17 +4315,10 @@ EXPORT_SYMBOL(netif_napi_add);
4272 4315
4273void netif_napi_del(struct napi_struct *napi) 4316void netif_napi_del(struct napi_struct *napi)
4274{ 4317{
4275 struct sk_buff *skb, *next;
4276
4277 list_del_init(&napi->dev_list); 4318 list_del_init(&napi->dev_list);
4278 napi_free_frags(napi); 4319 napi_free_frags(napi);
4279 4320
4280 for (skb = napi->gro_list; skb; skb = next) { 4321 kfree_skb_list(napi->gro_list);
4281 next = skb->next;
4282 skb->next = NULL;
4283 kfree_skb(skb);
4284 }
4285
4286 napi->gro_list = NULL; 4322 napi->gro_list = NULL;
4287 napi->gro_count = 0; 4323 napi->gro_count = 0;
4288} 4324}
@@ -4399,19 +4435,6 @@ struct netdev_adjacent {
4399 struct rcu_head rcu; 4435 struct rcu_head rcu;
4400}; 4436};
4401 4437
4402static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4403 struct net_device *adj_dev,
4404 struct list_head *adj_list)
4405{
4406 struct netdev_adjacent *adj;
4407
4408 list_for_each_entry_rcu(adj, adj_list, list) {
4409 if (adj->dev == adj_dev)
4410 return adj;
4411 }
4412 return NULL;
4413}
4414
4415static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4438static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4416 struct net_device *adj_dev, 4439 struct net_device *adj_dev,
4417 struct list_head *adj_list) 4440 struct list_head *adj_list)
@@ -4450,13 +4473,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
4450 * Find out if a device is linked to an upper device and return true in case 4473 * Find out if a device is linked to an upper device and return true in case
4451 * it is. The caller must hold the RTNL lock. 4474 * it is. The caller must hold the RTNL lock.
4452 */ 4475 */
4453bool netdev_has_any_upper_dev(struct net_device *dev) 4476static bool netdev_has_any_upper_dev(struct net_device *dev)
4454{ 4477{
4455 ASSERT_RTNL(); 4478 ASSERT_RTNL();
4456 4479
4457 return !list_empty(&dev->all_adj_list.upper); 4480 return !list_empty(&dev->all_adj_list.upper);
4458} 4481}
4459EXPORT_SYMBOL(netdev_has_any_upper_dev);
4460 4482
4461/** 4483/**
4462 * netdev_master_upper_dev_get - Get master upper device 4484 * netdev_master_upper_dev_get - Get master upper device
@@ -4576,6 +4598,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4576EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); 4598EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4577 4599
4578/** 4600/**
4601 * netdev_lower_get_first_private_rcu - Get the first ->private from the
4602 * lower neighbour list, RCU
4603 * variant
4604 * @dev: device
4605 *
4606 * Gets the first netdev_adjacent->private from the dev's lower neighbour
4607 * list. The caller must hold RCU read lock.
4608 */
4609void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4610{
4611 struct netdev_adjacent *lower;
4612
4613 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4614 struct netdev_adjacent, list);
4615 if (lower)
4616 return lower->private;
4617 return NULL;
4618}
4619EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4620
4621/**
4579 * netdev_master_upper_dev_get_rcu - Get master upper device 4622 * netdev_master_upper_dev_get_rcu - Get master upper device
4580 * @dev: device 4623 * @dev: device
4581 * 4624 *
@@ -4594,13 +4637,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4594} 4637}
4595EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4638EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4596 4639
4640int netdev_adjacent_sysfs_add(struct net_device *dev,
4641 struct net_device *adj_dev,
4642 struct list_head *dev_list)
4643{
4644 char linkname[IFNAMSIZ+7];
4645 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4646 "upper_%s" : "lower_%s", adj_dev->name);
4647 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4648 linkname);
4649}
4650void netdev_adjacent_sysfs_del(struct net_device *dev,
4651 char *name,
4652 struct list_head *dev_list)
4653{
4654 char linkname[IFNAMSIZ+7];
4655 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4656 "upper_%s" : "lower_%s", name);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658}
4659
4660#define netdev_adjacent_is_neigh_list(dev, dev_list) \
4661 (dev_list == &dev->adj_list.upper || \
4662 dev_list == &dev->adj_list.lower)
4663
4597static int __netdev_adjacent_dev_insert(struct net_device *dev, 4664static int __netdev_adjacent_dev_insert(struct net_device *dev,
4598 struct net_device *adj_dev, 4665 struct net_device *adj_dev,
4599 struct list_head *dev_list, 4666 struct list_head *dev_list,
4600 void *private, bool master) 4667 void *private, bool master)
4601{ 4668{
4602 struct netdev_adjacent *adj; 4669 struct netdev_adjacent *adj;
4603 char linkname[IFNAMSIZ+7];
4604 int ret; 4670 int ret;
4605 4671
4606 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4672 adj = __netdev_find_adj(dev, adj_dev, dev_list);
@@ -4623,16 +4689,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4623 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 4689 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4624 adj_dev->name, dev->name, adj_dev->name); 4690 adj_dev->name, dev->name, adj_dev->name);
4625 4691
4626 if (dev_list == &dev->adj_list.lower) { 4692 if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
4627 sprintf(linkname, "lower_%s", adj_dev->name); 4693 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
4628 ret = sysfs_create_link(&(dev->dev.kobj),
4629 &(adj_dev->dev.kobj), linkname);
4630 if (ret)
4631 goto free_adj;
4632 } else if (dev_list == &dev->adj_list.upper) {
4633 sprintf(linkname, "upper_%s", adj_dev->name);
4634 ret = sysfs_create_link(&(dev->dev.kobj),
4635 &(adj_dev->dev.kobj), linkname);
4636 if (ret) 4694 if (ret)
4637 goto free_adj; 4695 goto free_adj;
4638 } 4696 }
@@ -4652,14 +4710,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4652 return 0; 4710 return 0;
4653 4711
4654remove_symlinks: 4712remove_symlinks:
4655 if (dev_list == &dev->adj_list.lower) { 4713 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4656 sprintf(linkname, "lower_%s", adj_dev->name); 4714 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658 } else if (dev_list == &dev->adj_list.upper) {
4659 sprintf(linkname, "upper_%s", adj_dev->name);
4660 sysfs_remove_link(&(dev->dev.kobj), linkname);
4661 }
4662
4663free_adj: 4715free_adj:
4664 kfree(adj); 4716 kfree(adj);
4665 dev_put(adj_dev); 4717 dev_put(adj_dev);
@@ -4667,12 +4719,11 @@ free_adj:
4667 return ret; 4719 return ret;
4668} 4720}
4669 4721
4670void __netdev_adjacent_dev_remove(struct net_device *dev, 4722static void __netdev_adjacent_dev_remove(struct net_device *dev,
4671 struct net_device *adj_dev, 4723 struct net_device *adj_dev,
4672 struct list_head *dev_list) 4724 struct list_head *dev_list)
4673{ 4725{
4674 struct netdev_adjacent *adj; 4726 struct netdev_adjacent *adj;
4675 char linkname[IFNAMSIZ+7];
4676 4727
4677 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4728 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4678 4729
@@ -4692,13 +4743,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4692 if (adj->master) 4743 if (adj->master)
4693 sysfs_remove_link(&(dev->dev.kobj), "master"); 4744 sysfs_remove_link(&(dev->dev.kobj), "master");
4694 4745
4695 if (dev_list == &dev->adj_list.lower) { 4746 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4696 sprintf(linkname, "lower_%s", adj_dev->name); 4747 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4697 sysfs_remove_link(&(dev->dev.kobj), linkname);
4698 } else if (dev_list == &dev->adj_list.upper) {
4699 sprintf(linkname, "upper_%s", adj_dev->name);
4700 sysfs_remove_link(&(dev->dev.kobj), linkname);
4701 }
4702 4748
4703 list_del_rcu(&adj->list); 4749 list_del_rcu(&adj->list);
4704 pr_debug("dev_put for %s, because link removed from %s to %s\n", 4750 pr_debug("dev_put for %s, because link removed from %s to %s\n",
@@ -4707,11 +4753,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4707 kfree_rcu(adj, rcu); 4753 kfree_rcu(adj, rcu);
4708} 4754}
4709 4755
4710int __netdev_adjacent_dev_link_lists(struct net_device *dev, 4756static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4711 struct net_device *upper_dev, 4757 struct net_device *upper_dev,
4712 struct list_head *up_list, 4758 struct list_head *up_list,
4713 struct list_head *down_list, 4759 struct list_head *down_list,
4714 void *private, bool master) 4760 void *private, bool master)
4715{ 4761{
4716 int ret; 4762 int ret;
4717 4763
@@ -4730,8 +4776,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4730 return 0; 4776 return 0;
4731} 4777}
4732 4778
4733int __netdev_adjacent_dev_link(struct net_device *dev, 4779static int __netdev_adjacent_dev_link(struct net_device *dev,
4734 struct net_device *upper_dev) 4780 struct net_device *upper_dev)
4735{ 4781{
4736 return __netdev_adjacent_dev_link_lists(dev, upper_dev, 4782 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4737 &dev->all_adj_list.upper, 4783 &dev->all_adj_list.upper,
@@ -4739,26 +4785,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
4739 NULL, false); 4785 NULL, false);
4740} 4786}
4741 4787
4742void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 4788static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4743 struct net_device *upper_dev, 4789 struct net_device *upper_dev,
4744 struct list_head *up_list, 4790 struct list_head *up_list,
4745 struct list_head *down_list) 4791 struct list_head *down_list)
4746{ 4792{
4747 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4793 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4748 __netdev_adjacent_dev_remove(upper_dev, dev, down_list); 4794 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4749} 4795}
4750 4796
4751void __netdev_adjacent_dev_unlink(struct net_device *dev, 4797static void __netdev_adjacent_dev_unlink(struct net_device *dev,
4752 struct net_device *upper_dev) 4798 struct net_device *upper_dev)
4753{ 4799{
4754 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4800 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4755 &dev->all_adj_list.upper, 4801 &dev->all_adj_list.upper,
4756 &upper_dev->all_adj_list.lower); 4802 &upper_dev->all_adj_list.lower);
4757} 4803}
4758 4804
4759int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4805static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4760 struct net_device *upper_dev, 4806 struct net_device *upper_dev,
4761 void *private, bool master) 4807 void *private, bool master)
4762{ 4808{
4763 int ret = __netdev_adjacent_dev_link(dev, upper_dev); 4809 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4764 4810
@@ -4777,8 +4823,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4777 return 0; 4823 return 0;
4778} 4824}
4779 4825
4780void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 4826static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4781 struct net_device *upper_dev) 4827 struct net_device *upper_dev)
4782{ 4828{
4783 __netdev_adjacent_dev_unlink(dev, upper_dev); 4829 __netdev_adjacent_dev_unlink(dev, upper_dev);
4784 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4830 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
@@ -4967,20 +5013,24 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4967} 5013}
4968EXPORT_SYMBOL(netdev_upper_dev_unlink); 5014EXPORT_SYMBOL(netdev_upper_dev_unlink);
4969 5015
4970void *netdev_lower_dev_get_private_rcu(struct net_device *dev, 5016void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
4971 struct net_device *lower_dev)
4972{ 5017{
4973 struct netdev_adjacent *lower; 5018 struct netdev_adjacent *iter;
4974 5019
4975 if (!lower_dev) 5020 list_for_each_entry(iter, &dev->adj_list.upper, list) {
4976 return NULL; 5021 netdev_adjacent_sysfs_del(iter->dev, oldname,
4977 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); 5022 &iter->dev->adj_list.lower);
4978 if (!lower) 5023 netdev_adjacent_sysfs_add(iter->dev, dev,
4979 return NULL; 5024 &iter->dev->adj_list.lower);
5025 }
4980 5026
4981 return lower->private; 5027 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5028 netdev_adjacent_sysfs_del(iter->dev, oldname,
5029 &iter->dev->adj_list.upper);
5030 netdev_adjacent_sysfs_add(iter->dev, dev,
5031 &iter->dev->adj_list.upper);
5032 }
4982} 5033}
4983EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4984 5034
4985void *netdev_lower_dev_get_private(struct net_device *dev, 5035void *netdev_lower_dev_get_private(struct net_device *dev,
4986 struct net_device *lower_dev) 5036 struct net_device *lower_dev)
@@ -5314,6 +5364,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
5314} 5364}
5315EXPORT_SYMBOL(dev_change_flags); 5365EXPORT_SYMBOL(dev_change_flags);
5316 5366
5367static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5368{
5369 const struct net_device_ops *ops = dev->netdev_ops;
5370
5371 if (ops->ndo_change_mtu)
5372 return ops->ndo_change_mtu(dev, new_mtu);
5373
5374 dev->mtu = new_mtu;
5375 return 0;
5376}
5377
5317/** 5378/**
5318 * dev_set_mtu - Change maximum transfer unit 5379 * dev_set_mtu - Change maximum transfer unit
5319 * @dev: device 5380 * @dev: device
@@ -5323,8 +5384,7 @@ EXPORT_SYMBOL(dev_change_flags);
5323 */ 5384 */
5324int dev_set_mtu(struct net_device *dev, int new_mtu) 5385int dev_set_mtu(struct net_device *dev, int new_mtu)
5325{ 5386{
5326 const struct net_device_ops *ops = dev->netdev_ops; 5387 int err, orig_mtu;
5327 int err;
5328 5388
5329 if (new_mtu == dev->mtu) 5389 if (new_mtu == dev->mtu)
5330 return 0; 5390 return 0;
@@ -5336,14 +5396,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
5336 if (!netif_device_present(dev)) 5396 if (!netif_device_present(dev))
5337 return -ENODEV; 5397 return -ENODEV;
5338 5398
5339 err = 0; 5399 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5340 if (ops->ndo_change_mtu) 5400 err = notifier_to_errno(err);
5341 err = ops->ndo_change_mtu(dev, new_mtu); 5401 if (err)
5342 else 5402 return err;
5343 dev->mtu = new_mtu;
5344 5403
5345 if (!err) 5404 orig_mtu = dev->mtu;
5346 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5405 err = __dev_set_mtu(dev, new_mtu);
5406
5407 if (!err) {
5408 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5409 err = notifier_to_errno(err);
5410 if (err) {
5411 /* setting mtu back and notifying everyone again,
5412 * so that they have a chance to revert changes.
5413 */
5414 __dev_set_mtu(dev, orig_mtu);
5415 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5416 }
5417 }
5347 return err; 5418 return err;
5348} 5419}
5349EXPORT_SYMBOL(dev_set_mtu); 5420EXPORT_SYMBOL(dev_set_mtu);
@@ -5697,7 +5768,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5697} 5768}
5698EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5769EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5699 5770
5700#ifdef CONFIG_RPS 5771#ifdef CONFIG_SYSFS
5701static int netif_alloc_rx_queues(struct net_device *dev) 5772static int netif_alloc_rx_queues(struct net_device *dev)
5702{ 5773{
5703 unsigned int i, count = dev->num_rx_queues; 5774 unsigned int i, count = dev->num_rx_queues;
@@ -5836,13 +5907,8 @@ int register_netdevice(struct net_device *dev)
5836 dev->features |= NETIF_F_SOFT_FEATURES; 5907 dev->features |= NETIF_F_SOFT_FEATURES;
5837 dev->wanted_features = dev->features & dev->hw_features; 5908 dev->wanted_features = dev->features & dev->hw_features;
5838 5909
5839 /* Turn on no cache copy if HW is doing checksum */
5840 if (!(dev->flags & IFF_LOOPBACK)) { 5910 if (!(dev->flags & IFF_LOOPBACK)) {
5841 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5911 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5842 if (dev->features & NETIF_F_ALL_CSUM) {
5843 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5844 dev->features |= NETIF_F_NOCACHE_COPY;
5845 }
5846 } 5912 }
5847 5913
5848 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5914 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6247,7 +6313,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6247 return NULL; 6313 return NULL;
6248 } 6314 }
6249 6315
6250#ifdef CONFIG_RPS 6316#ifdef CONFIG_SYSFS
6251 if (rxqs < 1) { 6317 if (rxqs < 1) {
6252 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6318 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6253 return NULL; 6319 return NULL;
@@ -6303,7 +6369,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6303 if (netif_alloc_netdev_queues(dev)) 6369 if (netif_alloc_netdev_queues(dev))
6304 goto free_all; 6370 goto free_all;
6305 6371
6306#ifdef CONFIG_RPS 6372#ifdef CONFIG_SYSFS
6307 dev->num_rx_queues = rxqs; 6373 dev->num_rx_queues = rxqs;
6308 dev->real_num_rx_queues = rxqs; 6374 dev->real_num_rx_queues = rxqs;
6309 if (netif_alloc_rx_queues(dev)) 6375 if (netif_alloc_rx_queues(dev))
@@ -6323,7 +6389,7 @@ free_all:
6323free_pcpu: 6389free_pcpu:
6324 free_percpu(dev->pcpu_refcnt); 6390 free_percpu(dev->pcpu_refcnt);
6325 netif_free_tx_queues(dev); 6391 netif_free_tx_queues(dev);
6326#ifdef CONFIG_RPS 6392#ifdef CONFIG_SYSFS
6327 kfree(dev->_rx); 6393 kfree(dev->_rx);
6328#endif 6394#endif
6329 6395
@@ -6348,7 +6414,7 @@ void free_netdev(struct net_device *dev)
6348 release_net(dev_net(dev)); 6414 release_net(dev_net(dev));
6349 6415
6350 netif_free_tx_queues(dev); 6416 netif_free_tx_queues(dev);
6351#ifdef CONFIG_RPS 6417#ifdef CONFIG_SYSFS
6352 kfree(dev->_rx); 6418 kfree(dev->_rx);
6353#endif 6419#endif
6354 6420
@@ -6618,11 +6684,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6618 6684
6619 /* Process offline CPU's input_pkt_queue */ 6685 /* Process offline CPU's input_pkt_queue */
6620 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6686 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6621 netif_rx(skb); 6687 netif_rx_internal(skb);
6622 input_queue_head_incr(oldsd); 6688 input_queue_head_incr(oldsd);
6623 } 6689 }
6624 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6690 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6625 netif_rx(skb); 6691 netif_rx_internal(skb);
6626 input_queue_head_incr(oldsd); 6692 input_queue_head_incr(oldsd);
6627 } 6693 }
6628 6694
@@ -6935,28 +7001,18 @@ static int __init net_dev_init(void)
6935 for_each_possible_cpu(i) { 7001 for_each_possible_cpu(i) {
6936 struct softnet_data *sd = &per_cpu(softnet_data, i); 7002 struct softnet_data *sd = &per_cpu(softnet_data, i);
6937 7003
6938 memset(sd, 0, sizeof(*sd));
6939 skb_queue_head_init(&sd->input_pkt_queue); 7004 skb_queue_head_init(&sd->input_pkt_queue);
6940 skb_queue_head_init(&sd->process_queue); 7005 skb_queue_head_init(&sd->process_queue);
6941 sd->completion_queue = NULL;
6942 INIT_LIST_HEAD(&sd->poll_list); 7006 INIT_LIST_HEAD(&sd->poll_list);
6943 sd->output_queue = NULL;
6944 sd->output_queue_tailp = &sd->output_queue; 7007 sd->output_queue_tailp = &sd->output_queue;
6945#ifdef CONFIG_RPS 7008#ifdef CONFIG_RPS
6946 sd->csd.func = rps_trigger_softirq; 7009 sd->csd.func = rps_trigger_softirq;
6947 sd->csd.info = sd; 7010 sd->csd.info = sd;
6948 sd->csd.flags = 0;
6949 sd->cpu = i; 7011 sd->cpu = i;
6950#endif 7012#endif
6951 7013
6952 sd->backlog.poll = process_backlog; 7014 sd->backlog.poll = process_backlog;
6953 sd->backlog.weight = weight_p; 7015 sd->backlog.weight = weight_p;
6954 sd->backlog.gro_list = NULL;
6955 sd->backlog.gro_count = 0;
6956
6957#ifdef CONFIG_NET_FLOW_LIMIT
6958 sd->flow_limit = NULL;
6959#endif
6960 } 7016 }
6961 7017
6962 dev_boot_phase = 0; 7018 dev_boot_phase = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index ec40a849fc42..329d5794e7dc 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -38,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
38 ha->type = addr_type; 38 ha->type = addr_type;
39 ha->refcount = 1; 39 ha->refcount = 1;
40 ha->global_use = global; 40 ha->global_use = global;
41 ha->synced = sync; 41 ha->synced = sync ? 1 : 0;
42 ha->sync_cnt = 0; 42 ha->sync_cnt = 0;
43 list_add_tail_rcu(&ha->list, &list->list); 43 list_add_tail_rcu(&ha->list, &list->list);
44 list->count++; 44 list->count++;
@@ -48,7 +48,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
48 48
49static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, 49static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
50 const unsigned char *addr, int addr_len, 50 const unsigned char *addr, int addr_len,
51 unsigned char addr_type, bool global, bool sync) 51 unsigned char addr_type, bool global, bool sync,
52 int sync_count)
52{ 53{
53 struct netdev_hw_addr *ha; 54 struct netdev_hw_addr *ha;
54 55
@@ -66,10 +67,10 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
66 ha->global_use = true; 67 ha->global_use = true;
67 } 68 }
68 if (sync) { 69 if (sync) {
69 if (ha->synced) 70 if (ha->synced && sync_count)
70 return -EEXIST; 71 return -EEXIST;
71 else 72 else
72 ha->synced = true; 73 ha->synced++;
73 } 74 }
74 ha->refcount++; 75 ha->refcount++;
75 return 0; 76 return 0;
@@ -84,7 +85,8 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list,
84 const unsigned char *addr, int addr_len, 85 const unsigned char *addr, int addr_len,
85 unsigned char addr_type) 86 unsigned char addr_type)
86{ 87{
87 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); 88 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
89 0);
88} 90}
89 91
90static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, 92static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
@@ -101,7 +103,7 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
101 ha->global_use = false; 103 ha->global_use = false;
102 104
103 if (sync) 105 if (sync)
104 ha->synced = false; 106 ha->synced--;
105 107
106 if (--ha->refcount) 108 if (--ha->refcount)
107 return 0; 109 return 0;
@@ -139,7 +141,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
139 int err; 141 int err;
140 142
141 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, 143 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
142 false, true); 144 false, true, ha->sync_cnt);
143 if (err && err != -EEXIST) 145 if (err && err != -EEXIST)
144 return err; 146 return err;
145 147
@@ -186,47 +188,6 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
186 return err; 188 return err;
187} 189}
188 190
189int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
190 struct netdev_hw_addr_list *from_list,
191 int addr_len, unsigned char addr_type)
192{
193 int err;
194 struct netdev_hw_addr *ha, *ha2;
195 unsigned char type;
196
197 list_for_each_entry(ha, &from_list->list, list) {
198 type = addr_type ? addr_type : ha->type;
199 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
200 if (err)
201 goto unroll;
202 }
203 return 0;
204
205unroll:
206 list_for_each_entry(ha2, &from_list->list, list) {
207 if (ha2 == ha)
208 break;
209 type = addr_type ? addr_type : ha2->type;
210 __hw_addr_del(to_list, ha2->addr, addr_len, type);
211 }
212 return err;
213}
214EXPORT_SYMBOL(__hw_addr_add_multiple);
215
216void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
217 struct netdev_hw_addr_list *from_list,
218 int addr_len, unsigned char addr_type)
219{
220 struct netdev_hw_addr *ha;
221 unsigned char type;
222
223 list_for_each_entry(ha, &from_list->list, list) {
224 type = addr_type ? addr_type : ha->type;
225 __hw_addr_del(to_list, ha->addr, addr_len, type);
226 }
227}
228EXPORT_SYMBOL(__hw_addr_del_multiple);
229
230/* This function only works where there is a strict 1-1 relationship 191/* This function only works where there is a strict 1-1 relationship
231 * between source and destionation of they synch. If you ever need to 192 * between source and destionation of they synch. If you ever need to
232 * sync addresses to more then 1 destination, you need to use 193 * sync addresses to more then 1 destination, you need to use
@@ -264,7 +225,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
264} 225}
265EXPORT_SYMBOL(__hw_addr_unsync); 226EXPORT_SYMBOL(__hw_addr_unsync);
266 227
267void __hw_addr_flush(struct netdev_hw_addr_list *list) 228static void __hw_addr_flush(struct netdev_hw_addr_list *list)
268{ 229{
269 struct netdev_hw_addr *ha, *tmp; 230 struct netdev_hw_addr *ha, *tmp;
270 231
@@ -274,7 +235,6 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
274 } 235 }
275 list->count = 0; 236 list->count = 0;
276} 237}
277EXPORT_SYMBOL(__hw_addr_flush);
278 238
279void __hw_addr_init(struct netdev_hw_addr_list *list) 239void __hw_addr_init(struct netdev_hw_addr_list *list)
280{ 240{
@@ -400,59 +360,6 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr,
400} 360}
401EXPORT_SYMBOL(dev_addr_del); 361EXPORT_SYMBOL(dev_addr_del);
402 362
403/**
404 * dev_addr_add_multiple - Add device addresses from another device
405 * @to_dev: device to which addresses will be added
406 * @from_dev: device from which addresses will be added
407 * @addr_type: address type - 0 means type will be used from from_dev
408 *
409 * Add device addresses of the one device to another.
410 **
411 * The caller must hold the rtnl_mutex.
412 */
413int dev_addr_add_multiple(struct net_device *to_dev,
414 struct net_device *from_dev,
415 unsigned char addr_type)
416{
417 int err;
418
419 ASSERT_RTNL();
420
421 if (from_dev->addr_len != to_dev->addr_len)
422 return -EINVAL;
423 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
424 to_dev->addr_len, addr_type);
425 if (!err)
426 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
427 return err;
428}
429EXPORT_SYMBOL(dev_addr_add_multiple);
430
431/**
432 * dev_addr_del_multiple - Delete device addresses by another device
433 * @to_dev: device where the addresses will be deleted
434 * @from_dev: device supplying the addresses to be deleted
435 * @addr_type: address type - 0 means type will be used from from_dev
436 *
437 * Deletes addresses in to device by the list of addresses in from device.
438 *
439 * The caller must hold the rtnl_mutex.
440 */
441int dev_addr_del_multiple(struct net_device *to_dev,
442 struct net_device *from_dev,
443 unsigned char addr_type)
444{
445 ASSERT_RTNL();
446
447 if (from_dev->addr_len != to_dev->addr_len)
448 return -EINVAL;
449 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
450 to_dev->addr_len, addr_type);
451 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
452 return 0;
453}
454EXPORT_SYMBOL(dev_addr_del_multiple);
455
456/* 363/*
457 * Unicast list handling functions 364 * Unicast list handling functions
458 */ 365 */
@@ -676,7 +583,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
676 583
677 netif_addr_lock_bh(dev); 584 netif_addr_lock_bh(dev);
678 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, 585 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
679 NETDEV_HW_ADDR_T_MULTICAST, global, false); 586 NETDEV_HW_ADDR_T_MULTICAST, global, false, 0);
680 if (!err) 587 if (!err)
681 __dev_set_rx_mode(dev); 588 __dev_set_rx_mode(dev);
682 netif_addr_unlock_bh(dev); 589 netif_addr_unlock_bh(dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5b7d0e1d0664..cf999e09bcd2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -327,6 +327,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
327 cmd == SIOCBRADDIF || 327 cmd == SIOCBRADDIF ||
328 cmd == SIOCBRDELIF || 328 cmd == SIOCBRDELIF ||
329 cmd == SIOCSHWTSTAMP || 329 cmd == SIOCSHWTSTAMP ||
330 cmd == SIOCGHWTSTAMP ||
330 cmd == SIOCWANDEV) { 331 cmd == SIOCWANDEV) {
331 err = -EOPNOTSUPP; 332 err = -EOPNOTSUPP;
332 if (ops->ndo_do_ioctl) { 333 if (ops->ndo_do_ioctl) {
@@ -546,6 +547,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
546 */ 547 */
547 default: 548 default:
548 if (cmd == SIOCWANDEV || 549 if (cmd == SIOCWANDEV ||
550 cmd == SIOCGHWTSTAMP ||
549 (cmd >= SIOCDEVPRIVATE && 551 (cmd >= SIOCDEVPRIVATE &&
550 cmd <= SIOCDEVPRIVATE + 15)) { 552 cmd <= SIOCDEVPRIVATE + 15)) {
551 dev_load(net, ifr.ifr_name); 553 dev_load(net, ifr.ifr_name);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2fc5beaf5783..87577d447554 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a)
202} 202}
203 203
204/* 204/*
205 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 205 * __skb_get_hash: calculate a flow hash based on src/dst addresses
206 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value 206 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
207 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb 207 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
208 * if hash is a canonical 4-tuple hash over transport ports. 208 * if hash is a canonical 4-tuple hash over transport ports.
209 */ 209 */
210void __skb_get_rxhash(struct sk_buff *skb) 210void __skb_get_hash(struct sk_buff *skb)
211{ 211{
212 struct flow_keys keys; 212 struct flow_keys keys;
213 u32 hash; 213 u32 hash;
@@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb)
234 234
235 skb->rxhash = hash; 235 skb->rxhash = hash;
236} 236}
237EXPORT_SYMBOL(__skb_get_rxhash); 237EXPORT_SYMBOL(__skb_get_hash);
238 238
239/* 239/*
240 * Returns a Tx hash based on the given packet descriptor a Tx queues' number 240 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 932c6d7cf666..b9e9e0d38672 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -38,6 +38,8 @@
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/log2.h> 40#include <linux/log2.h>
41#include <linux/inetdevice.h>
42#include <net/addrconf.h>
41 43
42#define DEBUG 44#define DEBUG
43#define NEIGH_DEBUG 1 45#define NEIGH_DEBUG 1
@@ -115,7 +117,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
115 117
116unsigned long neigh_rand_reach_time(unsigned long base) 118unsigned long neigh_rand_reach_time(unsigned long base)
117{ 119{
118 return base ? (net_random() % base) + (base >> 1) : 0; 120 return base ? (prandom_u32() % base) + (base >> 1) : 0;
119} 121}
120EXPORT_SYMBOL(neigh_rand_reach_time); 122EXPORT_SYMBOL(neigh_rand_reach_time);
121 123
@@ -497,7 +499,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
497 goto out_neigh_release; 499 goto out_neigh_release;
498 } 500 }
499 501
500 n->confirmed = jiffies - (n->parms->base_reachable_time << 1); 502 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
501 503
502 write_lock_bh(&tbl->lock); 504 write_lock_bh(&tbl->lock);
503 nht = rcu_dereference_protected(tbl->nht, 505 nht = rcu_dereference_protected(tbl->nht,
@@ -776,7 +778,7 @@ static void neigh_periodic_work(struct work_struct *work)
776 tbl->last_rand = jiffies; 778 tbl->last_rand = jiffies;
777 for (p = &tbl->parms; p; p = p->next) 779 for (p = &tbl->parms; p; p = p->next)
778 p->reachable_time = 780 p->reachable_time =
779 neigh_rand_reach_time(p->base_reachable_time); 781 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
780 } 782 }
781 783
782 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 784 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
@@ -799,7 +801,7 @@ static void neigh_periodic_work(struct work_struct *work)
799 801
800 if (atomic_read(&n->refcnt) == 1 && 802 if (atomic_read(&n->refcnt) == 1 &&
801 (state == NUD_FAILED || 803 (state == NUD_FAILED ||
802 time_after(jiffies, n->used + n->parms->gc_staletime))) { 804 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
803 *np = n->next; 805 *np = n->next;
804 n->dead = 1; 806 n->dead = 1;
805 write_unlock(&n->lock); 807 write_unlock(&n->lock);
@@ -822,12 +824,12 @@ next_elt:
822 lockdep_is_held(&tbl->lock)); 824 lockdep_is_held(&tbl->lock));
823 } 825 }
824out: 826out:
825 /* Cycle through all hash buckets every base_reachable_time/2 ticks. 827 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
826 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 828 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
827 * base_reachable_time. 829 * BASE_REACHABLE_TIME.
828 */ 830 */
829 schedule_delayed_work(&tbl->gc_work, 831 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
830 tbl->parms.base_reachable_time >> 1); 832 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
831 write_unlock_bh(&tbl->lock); 833 write_unlock_bh(&tbl->lock);
832} 834}
833 835
@@ -835,8 +837,9 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
835{ 837{
836 struct neigh_parms *p = n->parms; 838 struct neigh_parms *p = n->parms;
837 return (n->nud_state & NUD_PROBE) ? 839 return (n->nud_state & NUD_PROBE) ?
838 p->ucast_probes : 840 NEIGH_VAR(p, UCAST_PROBES) :
839 p->ucast_probes + p->app_probes + p->mcast_probes; 841 NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
842 NEIGH_VAR(p, MCAST_PROBES);
840} 843}
841 844
842static void neigh_invalidate(struct neighbour *neigh) 845static void neigh_invalidate(struct neighbour *neigh)
@@ -901,12 +904,13 @@ static void neigh_timer_handler(unsigned long arg)
901 neigh_dbg(2, "neigh %p is still alive\n", neigh); 904 neigh_dbg(2, "neigh %p is still alive\n", neigh);
902 next = neigh->confirmed + neigh->parms->reachable_time; 905 next = neigh->confirmed + neigh->parms->reachable_time;
903 } else if (time_before_eq(now, 906 } else if (time_before_eq(now,
904 neigh->used + neigh->parms->delay_probe_time)) { 907 neigh->used +
908 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
905 neigh_dbg(2, "neigh %p is delayed\n", neigh); 909 neigh_dbg(2, "neigh %p is delayed\n", neigh);
906 neigh->nud_state = NUD_DELAY; 910 neigh->nud_state = NUD_DELAY;
907 neigh->updated = jiffies; 911 neigh->updated = jiffies;
908 neigh_suspect(neigh); 912 neigh_suspect(neigh);
909 next = now + neigh->parms->delay_probe_time; 913 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
910 } else { 914 } else {
911 neigh_dbg(2, "neigh %p is suspected\n", neigh); 915 neigh_dbg(2, "neigh %p is suspected\n", neigh);
912 neigh->nud_state = NUD_STALE; 916 neigh->nud_state = NUD_STALE;
@@ -916,7 +920,8 @@ static void neigh_timer_handler(unsigned long arg)
916 } 920 }
917 } else if (state & NUD_DELAY) { 921 } else if (state & NUD_DELAY) {
918 if (time_before_eq(now, 922 if (time_before_eq(now,
919 neigh->confirmed + neigh->parms->delay_probe_time)) { 923 neigh->confirmed +
924 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
920 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 925 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921 neigh->nud_state = NUD_REACHABLE; 926 neigh->nud_state = NUD_REACHABLE;
922 neigh->updated = jiffies; 927 neigh->updated = jiffies;
@@ -928,11 +933,11 @@ static void neigh_timer_handler(unsigned long arg)
928 neigh->nud_state = NUD_PROBE; 933 neigh->nud_state = NUD_PROBE;
929 neigh->updated = jiffies; 934 neigh->updated = jiffies;
930 atomic_set(&neigh->probes, 0); 935 atomic_set(&neigh->probes, 0);
931 next = now + neigh->parms->retrans_time; 936 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
932 } 937 }
933 } else { 938 } else {
934 /* NUD_PROBE|NUD_INCOMPLETE */ 939 /* NUD_PROBE|NUD_INCOMPLETE */
935 next = now + neigh->parms->retrans_time; 940 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
936 } 941 }
937 942
938 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 943 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -973,13 +978,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
973 goto out_unlock_bh; 978 goto out_unlock_bh;
974 979
975 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 980 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976 if (neigh->parms->mcast_probes + neigh->parms->app_probes) { 981 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
982 NEIGH_VAR(neigh->parms, APP_PROBES)) {
977 unsigned long next, now = jiffies; 983 unsigned long next, now = jiffies;
978 984
979 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 985 atomic_set(&neigh->probes,
986 NEIGH_VAR(neigh->parms, UCAST_PROBES));
980 neigh->nud_state = NUD_INCOMPLETE; 987 neigh->nud_state = NUD_INCOMPLETE;
981 neigh->updated = now; 988 neigh->updated = now;
982 next = now + max(neigh->parms->retrans_time, HZ/2); 989 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
990 HZ/2);
983 neigh_add_timer(neigh, next); 991 neigh_add_timer(neigh, next);
984 immediate_probe = true; 992 immediate_probe = true;
985 } else { 993 } else {
@@ -994,14 +1002,14 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
994 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1002 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995 neigh->nud_state = NUD_DELAY; 1003 neigh->nud_state = NUD_DELAY;
996 neigh->updated = jiffies; 1004 neigh->updated = jiffies;
997 neigh_add_timer(neigh, 1005 neigh_add_timer(neigh, jiffies +
998 jiffies + neigh->parms->delay_probe_time); 1006 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
999 } 1007 }
1000 1008
1001 if (neigh->nud_state == NUD_INCOMPLETE) { 1009 if (neigh->nud_state == NUD_INCOMPLETE) {
1002 if (skb) { 1010 if (skb) {
1003 while (neigh->arp_queue_len_bytes + skb->truesize > 1011 while (neigh->arp_queue_len_bytes + skb->truesize >
1004 neigh->parms->queue_len_bytes) { 1012 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1005 struct sk_buff *buff; 1013 struct sk_buff *buff;
1006 1014
1007 buff = __skb_dequeue(&neigh->arp_queue); 1015 buff = __skb_dequeue(&neigh->arp_queue);
@@ -1171,7 +1179,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1171 neigh_update_hhs(neigh); 1179 neigh_update_hhs(neigh);
1172 if (!(new & NUD_CONNECTED)) 1180 if (!(new & NUD_CONNECTED))
1173 neigh->confirmed = jiffies - 1181 neigh->confirmed = jiffies -
1174 (neigh->parms->base_reachable_time << 1); 1182 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1175 notify = 1; 1183 notify = 1;
1176 } 1184 }
1177 if (new == old) 1185 if (new == old)
@@ -1231,6 +1239,21 @@ out:
1231} 1239}
1232EXPORT_SYMBOL(neigh_update); 1240EXPORT_SYMBOL(neigh_update);
1233 1241
1242/* Update the neigh to listen temporarily for probe responses, even if it is
1243 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1244 */
1245void __neigh_set_probe_once(struct neighbour *neigh)
1246{
1247 neigh->updated = jiffies;
1248 if (!(neigh->nud_state & NUD_FAILED))
1249 return;
1250 neigh->nud_state = NUD_PROBE;
1251 atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES));
1252 neigh_add_timer(neigh,
1253 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1254}
1255EXPORT_SYMBOL(__neigh_set_probe_once);
1256
1234struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1257struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1235 u8 *lladdr, void *saddr, 1258 u8 *lladdr, void *saddr,
1236 struct net_device *dev) 1259 struct net_device *dev)
@@ -1392,9 +1415,11 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1392 struct sk_buff *skb) 1415 struct sk_buff *skb)
1393{ 1416{
1394 unsigned long now = jiffies; 1417 unsigned long now = jiffies;
1395 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1396 1418
1397 if (tbl->proxy_queue.qlen > p->proxy_qlen) { 1419 unsigned long sched_next = now + (prandom_u32() %
1420 NEIGH_VAR(p, PROXY_DELAY));
1421
1422 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1398 kfree_skb(skb); 1423 kfree_skb(skb);
1399 return; 1424 return;
1400 } 1425 }
@@ -1441,7 +1466,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1441 p->tbl = tbl; 1466 p->tbl = tbl;
1442 atomic_set(&p->refcnt, 1); 1467 atomic_set(&p->refcnt, 1);
1443 p->reachable_time = 1468 p->reachable_time =
1444 neigh_rand_reach_time(p->base_reachable_time); 1469 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1445 dev_hold(dev); 1470 dev_hold(dev);
1446 p->dev = dev; 1471 p->dev = dev;
1447 write_pnet(&p->net, hold_net(net)); 1472 write_pnet(&p->net, hold_net(net));
@@ -1458,6 +1483,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1458 p->next = tbl->parms.next; 1483 p->next = tbl->parms.next;
1459 tbl->parms.next = p; 1484 tbl->parms.next = p;
1460 write_unlock_bh(&tbl->lock); 1485 write_unlock_bh(&tbl->lock);
1486
1487 neigh_parms_data_state_cleanall(p);
1461 } 1488 }
1462 return p; 1489 return p;
1463} 1490}
@@ -1510,7 +1537,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1510 write_pnet(&tbl->parms.net, &init_net); 1537 write_pnet(&tbl->parms.net, &init_net);
1511 atomic_set(&tbl->parms.refcnt, 1); 1538 atomic_set(&tbl->parms.refcnt, 1);
1512 tbl->parms.reachable_time = 1539 tbl->parms.reachable_time =
1513 neigh_rand_reach_time(tbl->parms.base_reachable_time); 1540 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1514 1541
1515 tbl->stats = alloc_percpu(struct neigh_statistics); 1542 tbl->stats = alloc_percpu(struct neigh_statistics);
1516 if (!tbl->stats) 1543 if (!tbl->stats)
@@ -1538,7 +1565,8 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1538 1565
1539 rwlock_init(&tbl->lock); 1566 rwlock_init(&tbl->lock);
1540 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1567 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1541 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1568 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1569 tbl->parms.reachable_time);
1542 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); 1570 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1543 skb_queue_head_init_class(&tbl->proxy_queue, 1571 skb_queue_head_init_class(&tbl->proxy_queue,
1544 &neigh_table_proxy_queue_class); 1572 &neigh_table_proxy_queue_class);
@@ -1778,24 +1806,32 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1778 if ((parms->dev && 1806 if ((parms->dev &&
1779 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1807 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1780 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || 1808 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1781 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || 1809 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1810 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1782 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1811 /* approximative value for deprecated QUEUE_LEN (in packets) */
1783 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1812 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1784 parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1813 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1785 nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || 1814 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1786 nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || 1815 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1787 nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || 1816 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1788 nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || 1817 NEIGH_VAR(parms, UCAST_PROBES)) ||
1818 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1819 NEIGH_VAR(parms, MCAST_PROBES)) ||
1789 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || 1820 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1790 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 1821 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1791 parms->base_reachable_time) || 1822 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1792 nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || 1823 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1824 NEIGH_VAR(parms, GC_STALETIME)) ||
1793 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 1825 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1794 parms->delay_probe_time) || 1826 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1795 nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || 1827 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1796 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || 1828 NEIGH_VAR(parms, RETRANS_TIME)) ||
1797 nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || 1829 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1798 nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) 1830 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1831 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1832 NEIGH_VAR(parms, PROXY_DELAY)) ||
1833 nla_put_msecs(skb, NDTPA_LOCKTIME,
1834 NEIGH_VAR(parms, LOCKTIME)))
1799 goto nla_put_failure; 1835 goto nla_put_failure;
1800 return nla_nest_end(skb, nest); 1836 return nla_nest_end(skb, nest);
1801 1837
@@ -2011,44 +2047,57 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
2011 2047
2012 switch (i) { 2048 switch (i) {
2013 case NDTPA_QUEUE_LEN: 2049 case NDTPA_QUEUE_LEN:
2014 p->queue_len_bytes = nla_get_u32(tbp[i]) * 2050 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2015 SKB_TRUESIZE(ETH_FRAME_LEN); 2051 nla_get_u32(tbp[i]) *
2052 SKB_TRUESIZE(ETH_FRAME_LEN));
2016 break; 2053 break;
2017 case NDTPA_QUEUE_LENBYTES: 2054 case NDTPA_QUEUE_LENBYTES:
2018 p->queue_len_bytes = nla_get_u32(tbp[i]); 2055 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2056 nla_get_u32(tbp[i]));
2019 break; 2057 break;
2020 case NDTPA_PROXY_QLEN: 2058 case NDTPA_PROXY_QLEN:
2021 p->proxy_qlen = nla_get_u32(tbp[i]); 2059 NEIGH_VAR_SET(p, PROXY_QLEN,
2060 nla_get_u32(tbp[i]));
2022 break; 2061 break;
2023 case NDTPA_APP_PROBES: 2062 case NDTPA_APP_PROBES:
2024 p->app_probes = nla_get_u32(tbp[i]); 2063 NEIGH_VAR_SET(p, APP_PROBES,
2064 nla_get_u32(tbp[i]));
2025 break; 2065 break;
2026 case NDTPA_UCAST_PROBES: 2066 case NDTPA_UCAST_PROBES:
2027 p->ucast_probes = nla_get_u32(tbp[i]); 2067 NEIGH_VAR_SET(p, UCAST_PROBES,
2068 nla_get_u32(tbp[i]));
2028 break; 2069 break;
2029 case NDTPA_MCAST_PROBES: 2070 case NDTPA_MCAST_PROBES:
2030 p->mcast_probes = nla_get_u32(tbp[i]); 2071 NEIGH_VAR_SET(p, MCAST_PROBES,
2072 nla_get_u32(tbp[i]));
2031 break; 2073 break;
2032 case NDTPA_BASE_REACHABLE_TIME: 2074 case NDTPA_BASE_REACHABLE_TIME:
2033 p->base_reachable_time = nla_get_msecs(tbp[i]); 2075 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2076 nla_get_msecs(tbp[i]));
2034 break; 2077 break;
2035 case NDTPA_GC_STALETIME: 2078 case NDTPA_GC_STALETIME:
2036 p->gc_staletime = nla_get_msecs(tbp[i]); 2079 NEIGH_VAR_SET(p, GC_STALETIME,
2080 nla_get_msecs(tbp[i]));
2037 break; 2081 break;
2038 case NDTPA_DELAY_PROBE_TIME: 2082 case NDTPA_DELAY_PROBE_TIME:
2039 p->delay_probe_time = nla_get_msecs(tbp[i]); 2083 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2084 nla_get_msecs(tbp[i]));
2040 break; 2085 break;
2041 case NDTPA_RETRANS_TIME: 2086 case NDTPA_RETRANS_TIME:
2042 p->retrans_time = nla_get_msecs(tbp[i]); 2087 NEIGH_VAR_SET(p, RETRANS_TIME,
2088 nla_get_msecs(tbp[i]));
2043 break; 2089 break;
2044 case NDTPA_ANYCAST_DELAY: 2090 case NDTPA_ANYCAST_DELAY:
2045 p->anycast_delay = nla_get_msecs(tbp[i]); 2091 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2092 nla_get_msecs(tbp[i]));
2046 break; 2093 break;
2047 case NDTPA_PROXY_DELAY: 2094 case NDTPA_PROXY_DELAY:
2048 p->proxy_delay = nla_get_msecs(tbp[i]); 2095 NEIGH_VAR_SET(p, PROXY_DELAY,
2096 nla_get_msecs(tbp[i]));
2049 break; 2097 break;
2050 case NDTPA_LOCKTIME: 2098 case NDTPA_LOCKTIME:
2051 p->locktime = nla_get_msecs(tbp[i]); 2099 NEIGH_VAR_SET(p, LOCKTIME,
2100 nla_get_msecs(tbp[i]));
2052 break; 2101 break;
2053 } 2102 }
2054 } 2103 }
@@ -2789,133 +2838,167 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
2789 return ret; 2838 return ret;
2790} 2839}
2791 2840
2792enum { 2841static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2793 NEIGH_VAR_MCAST_PROBE, 2842 int family)
2794 NEIGH_VAR_UCAST_PROBE, 2843{
2795 NEIGH_VAR_APP_PROBE, 2844 switch (family) {
2796 NEIGH_VAR_RETRANS_TIME, 2845 case AF_INET:
2797 NEIGH_VAR_BASE_REACHABLE_TIME, 2846 return __in_dev_arp_parms_get_rcu(dev);
2798 NEIGH_VAR_DELAY_PROBE_TIME, 2847 case AF_INET6:
2799 NEIGH_VAR_GC_STALETIME, 2848 return __in6_dev_nd_parms_get_rcu(dev);
2800 NEIGH_VAR_QUEUE_LEN, 2849 }
2801 NEIGH_VAR_QUEUE_LEN_BYTES, 2850 return NULL;
2802 NEIGH_VAR_PROXY_QLEN, 2851}
2803 NEIGH_VAR_ANYCAST_DELAY, 2852
2804 NEIGH_VAR_PROXY_DELAY, 2853static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2805 NEIGH_VAR_LOCKTIME, 2854 int index)
2806 NEIGH_VAR_RETRANS_TIME_MS, 2855{
2807 NEIGH_VAR_BASE_REACHABLE_TIME_MS, 2856 struct net_device *dev;
2808 NEIGH_VAR_GC_INTERVAL, 2857 int family = neigh_parms_family(p);
2809 NEIGH_VAR_GC_THRESH1, 2858
2810 NEIGH_VAR_GC_THRESH2, 2859 rcu_read_lock();
2811 NEIGH_VAR_GC_THRESH3, 2860 for_each_netdev_rcu(net, dev) {
2812 NEIGH_VAR_MAX 2861 struct neigh_parms *dst_p =
2813}; 2862 neigh_get_dev_parms_rcu(dev, family);
2863
2864 if (dst_p && !test_bit(index, dst_p->data_state))
2865 dst_p->data[index] = p->data[index];
2866 }
2867 rcu_read_unlock();
2868}
2869
2870static void neigh_proc_update(struct ctl_table *ctl, int write)
2871{
2872 struct net_device *dev = ctl->extra1;
2873 struct neigh_parms *p = ctl->extra2;
2874 struct net *net = neigh_parms_net(p);
2875 int index = (int *) ctl->data - p->data;
2876
2877 if (!write)
2878 return;
2879
2880 set_bit(index, p->data_state);
2881 if (!dev) /* NULL dev means this is default value */
2882 neigh_copy_dflt_parms(net, p, index);
2883}
2884
2885static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2886 void __user *buffer,
2887 size_t *lenp, loff_t *ppos)
2888{
2889 struct ctl_table tmp = *ctl;
2890 int ret;
2891
2892 tmp.extra1 = &zero;
2893 tmp.extra2 = &int_max;
2894
2895 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2896 neigh_proc_update(ctl, write);
2897 return ret;
2898}
2899
2900int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2901 void __user *buffer, size_t *lenp, loff_t *ppos)
2902{
2903 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2904
2905 neigh_proc_update(ctl, write);
2906 return ret;
2907}
2908EXPORT_SYMBOL(neigh_proc_dointvec);
2909
2910int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2911 void __user *buffer,
2912 size_t *lenp, loff_t *ppos)
2913{
2914 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2915
2916 neigh_proc_update(ctl, write);
2917 return ret;
2918}
2919EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2920
2921static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2922 void __user *buffer,
2923 size_t *lenp, loff_t *ppos)
2924{
2925 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2926
2927 neigh_proc_update(ctl, write);
2928 return ret;
2929}
2930
2931int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2932 void __user *buffer,
2933 size_t *lenp, loff_t *ppos)
2934{
2935 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2936
2937 neigh_proc_update(ctl, write);
2938 return ret;
2939}
2940EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2941
2942static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2943 void __user *buffer,
2944 size_t *lenp, loff_t *ppos)
2945{
2946 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2947
2948 neigh_proc_update(ctl, write);
2949 return ret;
2950}
2951
2952#define NEIGH_PARMS_DATA_OFFSET(index) \
2953 (&((struct neigh_parms *) 0)->data[index])
2954
2955#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2956 [NEIGH_VAR_ ## attr] = { \
2957 .procname = name, \
2958 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2959 .maxlen = sizeof(int), \
2960 .mode = mval, \
2961 .proc_handler = proc, \
2962 }
2963
2964#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2965 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2966
2967#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2968 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2969
2970#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2971 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2972
2973#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2974 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2975
2976#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2977 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2978
2979#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2980 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2814 2981
2815static struct neigh_sysctl_table { 2982static struct neigh_sysctl_table {
2816 struct ctl_table_header *sysctl_header; 2983 struct ctl_table_header *sysctl_header;
2817 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 2984 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2818} neigh_sysctl_template __read_mostly = { 2985} neigh_sysctl_template __read_mostly = {
2819 .neigh_vars = { 2986 .neigh_vars = {
2820 [NEIGH_VAR_MCAST_PROBE] = { 2987 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2821 .procname = "mcast_solicit", 2988 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2822 .maxlen = sizeof(int), 2989 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2823 .mode = 0644, 2990 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2824 .extra1 = &zero, 2991 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2825 .extra2 = &int_max, 2992 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2826 .proc_handler = proc_dointvec_minmax, 2993 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2827 }, 2994 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
2828 [NEIGH_VAR_UCAST_PROBE] = { 2995 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
2829 .procname = "ucast_solicit", 2996 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
2830 .maxlen = sizeof(int), 2997 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
2831 .mode = 0644, 2998 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
2832 .extra1 = &zero, 2999 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
2833 .extra2 = &int_max, 3000 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
2834 .proc_handler = proc_dointvec_minmax, 3001 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
2835 },
2836 [NEIGH_VAR_APP_PROBE] = {
2837 .procname = "app_solicit",
2838 .maxlen = sizeof(int),
2839 .mode = 0644,
2840 .extra1 = &zero,
2841 .extra2 = &int_max,
2842 .proc_handler = proc_dointvec_minmax,
2843 },
2844 [NEIGH_VAR_RETRANS_TIME] = {
2845 .procname = "retrans_time",
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
2848 .proc_handler = proc_dointvec_userhz_jiffies,
2849 },
2850 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2851 .procname = "base_reachable_time",
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
2854 .proc_handler = proc_dointvec_jiffies,
2855 },
2856 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2857 .procname = "delay_first_probe_time",
2858 .maxlen = sizeof(int),
2859 .mode = 0644,
2860 .proc_handler = proc_dointvec_jiffies,
2861 },
2862 [NEIGH_VAR_GC_STALETIME] = {
2863 .procname = "gc_stale_time",
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .proc_handler = proc_dointvec_jiffies,
2867 },
2868 [NEIGH_VAR_QUEUE_LEN] = {
2869 .procname = "unres_qlen",
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
2872 .proc_handler = proc_unres_qlen,
2873 },
2874 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2875 .procname = "unres_qlen_bytes",
2876 .maxlen = sizeof(int),
2877 .mode = 0644,
2878 .extra1 = &zero,
2879 .proc_handler = proc_dointvec_minmax,
2880 },
2881 [NEIGH_VAR_PROXY_QLEN] = {
2882 .procname = "proxy_qlen",
2883 .maxlen = sizeof(int),
2884 .mode = 0644,
2885 .extra1 = &zero,
2886 .extra2 = &int_max,
2887 .proc_handler = proc_dointvec_minmax,
2888 },
2889 [NEIGH_VAR_ANYCAST_DELAY] = {
2890 .procname = "anycast_delay",
2891 .maxlen = sizeof(int),
2892 .mode = 0644,
2893 .proc_handler = proc_dointvec_userhz_jiffies,
2894 },
2895 [NEIGH_VAR_PROXY_DELAY] = {
2896 .procname = "proxy_delay",
2897 .maxlen = sizeof(int),
2898 .mode = 0644,
2899 .proc_handler = proc_dointvec_userhz_jiffies,
2900 },
2901 [NEIGH_VAR_LOCKTIME] = {
2902 .procname = "locktime",
2903 .maxlen = sizeof(int),
2904 .mode = 0644,
2905 .proc_handler = proc_dointvec_userhz_jiffies,
2906 },
2907 [NEIGH_VAR_RETRANS_TIME_MS] = {
2908 .procname = "retrans_time_ms",
2909 .maxlen = sizeof(int),
2910 .mode = 0644,
2911 .proc_handler = proc_dointvec_ms_jiffies,
2912 },
2913 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2914 .procname = "base_reachable_time_ms",
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
2917 .proc_handler = proc_dointvec_ms_jiffies,
2918 },
2919 [NEIGH_VAR_GC_INTERVAL] = { 3002 [NEIGH_VAR_GC_INTERVAL] = {
2920 .procname = "gc_interval", 3003 .procname = "gc_interval",
2921 .maxlen = sizeof(int), 3004 .maxlen = sizeof(int),
@@ -2951,31 +3034,23 @@ static struct neigh_sysctl_table {
2951}; 3034};
2952 3035
2953int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3036int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2954 char *p_name, proc_handler *handler) 3037 proc_handler *handler)
2955{ 3038{
3039 int i;
2956 struct neigh_sysctl_table *t; 3040 struct neigh_sysctl_table *t;
2957 const char *dev_name_source = NULL; 3041 const char *dev_name_source;
2958 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3042 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3043 char *p_name;
2959 3044
2960 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3045 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2961 if (!t) 3046 if (!t)
2962 goto err; 3047 goto err;
2963 3048
2964 t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; 3049 for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
2965 t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; 3050 t->neigh_vars[i].data += (long) p;
2966 t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; 3051 t->neigh_vars[i].extra1 = dev;
2967 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; 3052 t->neigh_vars[i].extra2 = p;
2968 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; 3053 }
2969 t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
2970 t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
2971 t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
2972 t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
2973 t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
2974 t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
2975 t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2976 t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2977 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
2978 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
2979 3054
2980 if (dev) { 3055 if (dev) {
2981 dev_name_source = dev->name; 3056 dev_name_source = dev->name;
@@ -2990,26 +3065,32 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2990 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; 3065 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2991 } 3066 }
2992 3067
2993
2994 if (handler) { 3068 if (handler) {
2995 /* RetransTime */ 3069 /* RetransTime */
2996 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3070 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2997 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2998 /* ReachableTime */ 3071 /* ReachableTime */
2999 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3072 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3000 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3001 /* RetransTime (in milliseconds)*/ 3073 /* RetransTime (in milliseconds)*/
3002 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3074 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3003 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3004 /* ReachableTime (in milliseconds) */ 3075 /* ReachableTime (in milliseconds) */
3005 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3076 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3006 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3007 } 3077 }
3008 3078
3009 /* Don't export sysctls to unprivileged users */ 3079 /* Don't export sysctls to unprivileged users */
3010 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3080 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3011 t->neigh_vars[0].procname = NULL; 3081 t->neigh_vars[0].procname = NULL;
3012 3082
3083 switch (neigh_parms_family(p)) {
3084 case AF_INET:
3085 p_name = "ipv4";
3086 break;
3087 case AF_INET6:
3088 p_name = "ipv6";
3089 break;
3090 default:
3091 BUG();
3092 }
3093
3013 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3094 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3014 p_name, dev_name_source); 3095 p_name, dev_name_source);
3015 t->sysctl_header = 3096 t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f3edf9635e02..93886246a0b4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = {
498#define net_class_groups NULL 498#define net_class_groups NULL
499#endif /* CONFIG_SYSFS */ 499#endif /* CONFIG_SYSFS */
500 500
501#ifdef CONFIG_RPS 501#ifdef CONFIG_SYSFS
502/*
503 * RX queue sysfs structures and functions.
504 */
505struct rx_queue_attribute {
506 struct attribute attr;
507 ssize_t (*show)(struct netdev_rx_queue *queue,
508 struct rx_queue_attribute *attr, char *buf);
509 ssize_t (*store)(struct netdev_rx_queue *queue,
510 struct rx_queue_attribute *attr, const char *buf, size_t len);
511};
512#define to_rx_queue_attr(_attr) container_of(_attr, \ 502#define to_rx_queue_attr(_attr) container_of(_attr, \
513 struct rx_queue_attribute, attr) 503 struct rx_queue_attribute, attr)
514 504
@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
543 .store = rx_queue_attr_store, 533 .store = rx_queue_attr_store,
544}; 534};
545 535
536#ifdef CONFIG_RPS
546static ssize_t show_rps_map(struct netdev_rx_queue *queue, 537static ssize_t show_rps_map(struct netdev_rx_queue *queue,
547 struct rx_queue_attribute *attribute, char *buf) 538 struct rx_queue_attribute *attribute, char *buf)
548{ 539{
@@ -676,8 +667,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
676 while ((mask | (mask >> 1)) != mask) 667 while ((mask | (mask >> 1)) != mask)
677 mask |= (mask >> 1); 668 mask |= (mask >> 1);
678 /* On 64 bit arches, must check mask fits in table->mask (u32), 669 /* On 64 bit arches, must check mask fits in table->mask (u32),
679 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) 670 * and on 32bit arches, must check
680 * doesnt overflow. 671 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
681 */ 672 */
682#if BITS_PER_LONG > 32 673#if BITS_PER_LONG > 32
683 if (mask > (unsigned long)(u32)mask) 674 if (mask > (unsigned long)(u32)mask)
@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute =
718static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = 709static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
719 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, 710 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
720 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 711 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
712#endif /* CONFIG_RPS */
721 713
722static struct attribute *rx_queue_default_attrs[] = { 714static struct attribute *rx_queue_default_attrs[] = {
715#ifdef CONFIG_RPS
723 &rps_cpus_attribute.attr, 716 &rps_cpus_attribute.attr,
724 &rps_dev_flow_table_cnt_attribute.attr, 717 &rps_dev_flow_table_cnt_attribute.attr,
718#endif
725 NULL 719 NULL
726}; 720};
727 721
728static void rx_queue_release(struct kobject *kobj) 722static void rx_queue_release(struct kobject *kobj)
729{ 723{
730 struct netdev_rx_queue *queue = to_rx_queue(kobj); 724 struct netdev_rx_queue *queue = to_rx_queue(kobj);
725#ifdef CONFIG_RPS
731 struct rps_map *map; 726 struct rps_map *map;
732 struct rps_dev_flow_table *flow_table; 727 struct rps_dev_flow_table *flow_table;
733 728
@@ -743,15 +738,29 @@ static void rx_queue_release(struct kobject *kobj)
743 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 738 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
744 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 739 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
745 } 740 }
741#endif
746 742
747 memset(kobj, 0, sizeof(*kobj)); 743 memset(kobj, 0, sizeof(*kobj));
748 dev_put(queue->dev); 744 dev_put(queue->dev);
749} 745}
750 746
747static const void *rx_queue_namespace(struct kobject *kobj)
748{
749 struct netdev_rx_queue *queue = to_rx_queue(kobj);
750 struct device *dev = &queue->dev->dev;
751 const void *ns = NULL;
752
753 if (dev->class && dev->class->ns_type)
754 ns = dev->class->namespace(dev);
755
756 return ns;
757}
758
751static struct kobj_type rx_queue_ktype = { 759static struct kobj_type rx_queue_ktype = {
752 .sysfs_ops = &rx_queue_sysfs_ops, 760 .sysfs_ops = &rx_queue_sysfs_ops,
753 .release = rx_queue_release, 761 .release = rx_queue_release,
754 .default_attrs = rx_queue_default_attrs, 762 .default_attrs = rx_queue_default_attrs,
763 .namespace = rx_queue_namespace
755}; 764};
756 765
757static int rx_queue_add_kobject(struct net_device *net, int index) 766static int rx_queue_add_kobject(struct net_device *net, int index)
@@ -763,25 +772,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
763 kobj->kset = net->queues_kset; 772 kobj->kset = net->queues_kset;
764 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 773 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
765 "rx-%u", index); 774 "rx-%u", index);
766 if (error) { 775 if (error)
767 kobject_put(kobj); 776 goto exit;
768 return error; 777
778 if (net->sysfs_rx_queue_group) {
779 error = sysfs_create_group(kobj, net->sysfs_rx_queue_group);
780 if (error)
781 goto exit;
769 } 782 }
770 783
771 kobject_uevent(kobj, KOBJ_ADD); 784 kobject_uevent(kobj, KOBJ_ADD);
772 dev_hold(queue->dev); 785 dev_hold(queue->dev);
773 786
774 return error; 787 return error;
788exit:
789 kobject_put(kobj);
790 return error;
775} 791}
776#endif /* CONFIG_RPS */ 792#endif /* CONFIG_SYFS */
777 793
778int 794int
779net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 795net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
780{ 796{
781#ifdef CONFIG_RPS 797#ifdef CONFIG_SYSFS
782 int i; 798 int i;
783 int error = 0; 799 int error = 0;
784 800
801#ifndef CONFIG_RPS
802 if (!net->sysfs_rx_queue_group)
803 return 0;
804#endif
785 for (i = old_num; i < new_num; i++) { 805 for (i = old_num; i < new_num; i++) {
786 error = rx_queue_add_kobject(net, i); 806 error = rx_queue_add_kobject(net, i);
787 if (error) { 807 if (error) {
@@ -790,8 +810,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
790 } 810 }
791 } 811 }
792 812
793 while (--i >= new_num) 813 while (--i >= new_num) {
814 if (net->sysfs_rx_queue_group)
815 sysfs_remove_group(&net->_rx[i].kobj,
816 net->sysfs_rx_queue_group);
794 kobject_put(&net->_rx[i].kobj); 817 kobject_put(&net->_rx[i].kobj);
818 }
795 819
796 return error; 820 return error;
797#else 821#else
@@ -1082,10 +1106,23 @@ static void netdev_queue_release(struct kobject *kobj)
1082 dev_put(queue->dev); 1106 dev_put(queue->dev);
1083} 1107}
1084 1108
1109static const void *netdev_queue_namespace(struct kobject *kobj)
1110{
1111 struct netdev_queue *queue = to_netdev_queue(kobj);
1112 struct device *dev = &queue->dev->dev;
1113 const void *ns = NULL;
1114
1115 if (dev->class && dev->class->ns_type)
1116 ns = dev->class->namespace(dev);
1117
1118 return ns;
1119}
1120
1085static struct kobj_type netdev_queue_ktype = { 1121static struct kobj_type netdev_queue_ktype = {
1086 .sysfs_ops = &netdev_queue_sysfs_ops, 1122 .sysfs_ops = &netdev_queue_sysfs_ops,
1087 .release = netdev_queue_release, 1123 .release = netdev_queue_release,
1088 .default_attrs = netdev_queue_default_attrs, 1124 .default_attrs = netdev_queue_default_attrs,
1125 .namespace = netdev_queue_namespace,
1089}; 1126};
1090 1127
1091static int netdev_queue_add_kobject(struct net_device *net, int index) 1128static int netdev_queue_add_kobject(struct net_device *net, int index)
@@ -1155,9 +1192,6 @@ static int register_queue_kobjects(struct net_device *net)
1155 NULL, &net->dev.kobj); 1192 NULL, &net->dev.kobj);
1156 if (!net->queues_kset) 1193 if (!net->queues_kset)
1157 return -ENOMEM; 1194 return -ENOMEM;
1158#endif
1159
1160#ifdef CONFIG_RPS
1161 real_rx = net->real_num_rx_queues; 1195 real_rx = net->real_num_rx_queues;
1162#endif 1196#endif
1163 real_tx = net->real_num_tx_queues; 1197 real_tx = net->real_num_tx_queues;
@@ -1184,7 +1218,7 @@ static void remove_queue_kobjects(struct net_device *net)
1184{ 1218{
1185 int real_rx = 0, real_tx = 0; 1219 int real_rx = 0, real_tx = 0;
1186 1220
1187#ifdef CONFIG_RPS 1221#ifdef CONFIG_SYSFS
1188 real_rx = net->real_num_rx_queues; 1222 real_rx = net->real_num_rx_queues;
1189#endif 1223#endif
1190 real_tx = net->real_num_tx_queues; 1224 real_tx = net->real_num_tx_queues;
@@ -1358,7 +1392,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr,
1358} 1392}
1359EXPORT_SYMBOL(netdev_class_remove_file_ns); 1393EXPORT_SYMBOL(netdev_class_remove_file_ns);
1360 1394
1361int netdev_kobject_init(void) 1395int __init netdev_kobject_init(void)
1362{ 1396{
1363 kobj_ns_type_register(&net_ns_type_operations); 1397 kobj_ns_type_register(&net_ns_type_operations);
1364 return class_register(&net_class); 1398 return class_register(&net_class);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index bd7751ec1c4d..2745a1b51e03 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,7 +1,7 @@
1#ifndef __NET_SYSFS_H__ 1#ifndef __NET_SYSFS_H__
2#define __NET_SYSFS_H__ 2#define __NET_SYSFS_H__
3 3
4int netdev_kobject_init(void); 4int __init netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); 7int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
new file mode 100644
index 000000000000..719efd541668
--- /dev/null
+++ b/net/core/netclassid_cgroup.c
@@ -0,0 +1,120 @@
1/*
2 * net/core/netclassid_cgroup.c Classid Cgroupfs Handling
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/cgroup.h>
15#include <linux/fdtable.h>
16#include <net/cls_cgroup.h>
17#include <net/sock.h>
18
19static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
20{
21 return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
22}
23
24struct cgroup_cls_state *task_cls_state(struct task_struct *p)
25{
26 return css_cls_state(task_css(p, net_cls_subsys_id));
27}
28EXPORT_SYMBOL_GPL(task_cls_state);
29
30static struct cgroup_subsys_state *
31cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
32{
33 struct cgroup_cls_state *cs;
34
35 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
36 if (!cs)
37 return ERR_PTR(-ENOMEM);
38
39 return &cs->css;
40}
41
42static int cgrp_css_online(struct cgroup_subsys_state *css)
43{
44 struct cgroup_cls_state *cs = css_cls_state(css);
45 struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
46
47 if (parent)
48 cs->classid = parent->classid;
49
50 return 0;
51}
52
53static void cgrp_css_free(struct cgroup_subsys_state *css)
54{
55 kfree(css_cls_state(css));
56}
57
58static int update_classid(const void *v, struct file *file, unsigned n)
59{
60 int err;
61 struct socket *sock = sock_from_file(file, &err);
62
63 if (sock)
64 sock->sk->sk_classid = (u32)(unsigned long)v;
65
66 return 0;
67}
68
69static void cgrp_attach(struct cgroup_subsys_state *css,
70 struct cgroup_taskset *tset)
71{
72 struct cgroup_cls_state *cs = css_cls_state(css);
73 void *v = (void *)(unsigned long)cs->classid;
74 struct task_struct *p;
75
76 cgroup_taskset_for_each(p, css, tset) {
77 task_lock(p);
78 iterate_fd(p->files, 0, update_classid, v);
79 task_unlock(p);
80 }
81}
82
83static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
84{
85 return css_cls_state(css)->classid;
86}
87
88static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
89 u64 value)
90{
91 css_cls_state(css)->classid = (u32) value;
92
93 return 0;
94}
95
96static struct cftype ss_files[] = {
97 {
98 .name = "classid",
99 .read_u64 = read_classid,
100 .write_u64 = write_classid,
101 },
102 { } /* terminate */
103};
104
105struct cgroup_subsys net_cls_subsys = {
106 .name = "net_cls",
107 .css_alloc = cgrp_css_alloc,
108 .css_online = cgrp_css_online,
109 .css_free = cgrp_css_free,
110 .attach = cgrp_attach,
111 .subsys_id = net_cls_subsys_id,
112 .base_cftypes = ss_files,
113 .module = THIS_MODULE,
114};
115
116static int __init init_netclassid_cgroup(void)
117{
118 return cgroup_load_subsys(&net_cls_subsys);
119}
120__initcall(init_netclassid_cgroup);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 19fe9c717ced..c03f3dec4763 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -520,8 +520,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
520 skb->protocol = eth->h_proto = htons(ETH_P_IP); 520 skb->protocol = eth->h_proto = htons(ETH_P_IP);
521 } 521 }
522 522
523 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); 523 ether_addr_copy(eth->h_source, np->dev->dev_addr);
524 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); 524 ether_addr_copy(eth->h_dest, np->remote_mac);
525 525
526 skb->dev = np->dev; 526 skb->dev = np->dev;
527 527
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 56cbb69ba024..9043caedcd08 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -30,7 +30,7 @@
30#define PRIOMAP_MIN_SZ 128 30#define PRIOMAP_MIN_SZ 128
31 31
32/* 32/*
33 * Extend @dev->priomap so that it's large enough to accomodate 33 * Extend @dev->priomap so that it's large enough to accommodate
34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful 34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
35 * return. Must be called under rtnl lock. 35 * return. Must be called under rtnl lock.
36 */ 36 */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a797fff7f222..fdac61cac1bd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -389,6 +389,9 @@ struct pktgen_dev {
389#ifdef CONFIG_XFRM 389#ifdef CONFIG_XFRM
390 __u8 ipsmode; /* IPSEC mode (config) */ 390 __u8 ipsmode; /* IPSEC mode (config) */
391 __u8 ipsproto; /* IPSEC type (config) */ 391 __u8 ipsproto; /* IPSEC type (config) */
392 __u32 spi;
393 struct dst_entry dst;
394 struct dst_ops dstops;
392#endif 395#endif
393 char result[512]; 396 char result[512];
394}; 397};
@@ -654,8 +657,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
654 } 657 }
655 658
656#ifdef CONFIG_XFRM 659#ifdef CONFIG_XFRM
657 if (pkt_dev->flags & F_IPSEC_ON) 660 if (pkt_dev->flags & F_IPSEC_ON) {
658 seq_printf(seq, "IPSEC "); 661 seq_printf(seq, "IPSEC ");
662 if (pkt_dev->spi)
663 seq_printf(seq, "spi:%u", pkt_dev->spi);
664 }
659#endif 665#endif
660 666
661 if (pkt_dev->flags & F_MACSRC_RND) 667 if (pkt_dev->flags & F_MACSRC_RND)
@@ -1434,7 +1440,7 @@ static ssize_t pktgen_if_write(struct file *file,
1434 if (!mac_pton(valstr, pkt_dev->dst_mac)) 1440 if (!mac_pton(valstr, pkt_dev->dst_mac))
1435 return -EINVAL; 1441 return -EINVAL;
1436 /* Set up Dest MAC */ 1442 /* Set up Dest MAC */
1437 memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); 1443 ether_addr_copy(&pkt_dev->hh[0], pkt_dev->dst_mac);
1438 1444
1439 sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); 1445 sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
1440 return count; 1446 return count;
@@ -1451,7 +1457,7 @@ static ssize_t pktgen_if_write(struct file *file,
1451 if (!mac_pton(valstr, pkt_dev->src_mac)) 1457 if (!mac_pton(valstr, pkt_dev->src_mac))
1452 return -EINVAL; 1458 return -EINVAL;
1453 /* Set up Src MAC */ 1459 /* Set up Src MAC */
1454 memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); 1460 ether_addr_copy(&pkt_dev->hh[6], pkt_dev->src_mac);
1455 1461
1456 sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); 1462 sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
1457 return count; 1463 return count;
@@ -1476,7 +1482,18 @@ static ssize_t pktgen_if_write(struct file *file,
1476 sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); 1482 sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows);
1477 return count; 1483 return count;
1478 } 1484 }
1485#ifdef CONFIG_XFRM
1486 if (!strcmp(name, "spi")) {
1487 len = num_arg(&user_buffer[i], 10, &value);
1488 if (len < 0)
1489 return len;
1479 1490
1491 i += len;
1492 pkt_dev->spi = value;
1493 sprintf(pg_result, "OK: spi=%u", pkt_dev->spi);
1494 return count;
1495 }
1496#endif
1480 if (!strcmp(name, "flowlen")) { 1497 if (!strcmp(name, "flowlen")) {
1481 len = num_arg(&user_buffer[i], 10, &value); 1498 len = num_arg(&user_buffer[i], 10, &value);
1482 if (len < 0) 1499 if (len < 0)
@@ -2043,10 +2060,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2043 /* Default to the interface's mac if not explicitly set. */ 2060 /* Default to the interface's mac if not explicitly set. */
2044 2061
2045 if (is_zero_ether_addr(pkt_dev->src_mac)) 2062 if (is_zero_ether_addr(pkt_dev->src_mac))
2046 memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); 2063 ether_addr_copy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr);
2047 2064
2048 /* Set up Dest MAC */ 2065 /* Set up Dest MAC */
2049 memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); 2066 ether_addr_copy(&(pkt_dev->hh[0]), pkt_dev->dst_mac);
2050 2067
2051 if (pkt_dev->flags & F_IPV6) { 2068 if (pkt_dev->flags & F_IPV6) {
2052 int i, set = 0, err = 1; 2069 int i, set = 0, err = 1;
@@ -2233,13 +2250,21 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2233 struct xfrm_state *x = pkt_dev->flows[flow].x; 2250 struct xfrm_state *x = pkt_dev->flows[flow].x;
2234 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); 2251 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
2235 if (!x) { 2252 if (!x) {
2236 /*slow path: we dont already have xfrm_state*/ 2253
2237 x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 2254 if (pkt_dev->spi) {
2238 (xfrm_address_t *)&pkt_dev->cur_daddr, 2255 /* We need as quick as possible to find the right SA
2239 (xfrm_address_t *)&pkt_dev->cur_saddr, 2256 * Searching with minimum criteria to archieve this.
2240 AF_INET, 2257 */
2241 pkt_dev->ipsmode, 2258 x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
2242 pkt_dev->ipsproto, 0); 2259 } else {
2260 /* slow path: we dont already have xfrm_state */
2261 x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
2262 (xfrm_address_t *)&pkt_dev->cur_daddr,
2263 (xfrm_address_t *)&pkt_dev->cur_saddr,
2264 AF_INET,
2265 pkt_dev->ipsmode,
2266 pkt_dev->ipsproto, 0);
2267 }
2243 if (x) { 2268 if (x) {
2244 pkt_dev->flows[flow].x = x; 2269 pkt_dev->flows[flow].x = x;
2245 set_pkt_overhead(pkt_dev); 2270 set_pkt_overhead(pkt_dev);
@@ -2475,31 +2500,47 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2475 2500
2476 2501
2477#ifdef CONFIG_XFRM 2502#ifdef CONFIG_XFRM
2503static u32 pktgen_dst_metrics[RTAX_MAX + 1] = {
2504
2505 [RTAX_HOPLIMIT] = 0x5, /* Set a static hoplimit */
2506};
2507
2478static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) 2508static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2479{ 2509{
2480 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; 2510 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
2481 int err = 0; 2511 int err = 0;
2512 struct net *net = dev_net(pkt_dev->odev);
2482 2513
2483 if (!x) 2514 if (!x)
2484 return 0; 2515 return 0;
2485 /* XXX: we dont support tunnel mode for now until 2516 /* XXX: we dont support tunnel mode for now until
2486 * we resolve the dst issue */ 2517 * we resolve the dst issue */
2487 if (x->props.mode != XFRM_MODE_TRANSPORT) 2518 if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0))
2488 return 0; 2519 return 0;
2489 2520
2490 spin_lock(&x->lock); 2521 /* But when user specify an valid SPI, transformation
2522 * supports both transport/tunnel mode + ESP/AH type.
2523 */
2524 if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
2525 skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
2491 2526
2527 rcu_read_lock_bh();
2492 err = x->outer_mode->output(x, skb); 2528 err = x->outer_mode->output(x, skb);
2493 if (err) 2529 rcu_read_unlock_bh();
2530 if (err) {
2531 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
2494 goto error; 2532 goto error;
2533 }
2495 err = x->type->output(x, skb); 2534 err = x->type->output(x, skb);
2496 if (err) 2535 if (err) {
2536 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR);
2497 goto error; 2537 goto error;
2498 2538 }
2539 spin_lock_bh(&x->lock);
2499 x->curlft.bytes += skb->len; 2540 x->curlft.bytes += skb->len;
2500 x->curlft.packets++; 2541 x->curlft.packets++;
2542 spin_unlock_bh(&x->lock);
2501error: 2543error:
2502 spin_unlock(&x->lock);
2503 return err; 2544 return err;
2504} 2545}
2505 2546
@@ -3542,6 +3583,17 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3542#ifdef CONFIG_XFRM 3583#ifdef CONFIG_XFRM
3543 pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; 3584 pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
3544 pkt_dev->ipsproto = IPPROTO_ESP; 3585 pkt_dev->ipsproto = IPPROTO_ESP;
3586
3587 /* xfrm tunnel mode needs additional dst to extract outter
3588 * ip header protocol/ttl/id field, here creat a phony one.
3589 * instead of looking for a valid rt, which definitely hurting
3590 * performance under such circumstance.
3591 */
3592 pkt_dev->dstops.family = AF_INET;
3593 pkt_dev->dst.dev = pkt_dev->odev;
3594 dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
3595 pkt_dev->dst.child = &pkt_dev->dst;
3596 pkt_dev->dst.ops = &pkt_dev->dstops;
3545#endif 3597#endif
3546 3598
3547 return add_dev_to_thread(t, pkt_dev); 3599 return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf67144d3e3c..393b1bc9a618 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -365,6 +365,22 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
365} 365}
366EXPORT_SYMBOL_GPL(rtnl_link_unregister); 366EXPORT_SYMBOL_GPL(rtnl_link_unregister);
367 367
368static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
369{
370 struct net_device *master_dev;
371 const struct rtnl_link_ops *ops;
372
373 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
374 if (!master_dev)
375 return 0;
376 ops = master_dev->rtnl_link_ops;
377 if (!ops->get_slave_size)
378 return 0;
379 /* IFLA_INFO_SLAVE_DATA + nested data */
380 return nla_total_size(sizeof(struct nlattr)) +
381 ops->get_slave_size(master_dev, dev);
382}
383
368static size_t rtnl_link_get_size(const struct net_device *dev) 384static size_t rtnl_link_get_size(const struct net_device *dev)
369{ 385{
370 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 386 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -385,6 +401,8 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
385 /* IFLA_INFO_XSTATS */ 401 /* IFLA_INFO_XSTATS */
386 size += nla_total_size(ops->get_xstats_size(dev)); 402 size += nla_total_size(ops->get_xstats_size(dev));
387 403
404 size += rtnl_link_get_slave_info_data_size(dev);
405
388 return size; 406 return size;
389} 407}
390 408
@@ -403,34 +421,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
403} 421}
404 422
405/** 423/**
406 * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
407 * @ops: struct rtnl_af_ops * to register
408 *
409 * The caller must hold the rtnl_mutex.
410 *
411 * Returns 0 on success or a negative error code.
412 */
413int __rtnl_af_register(struct rtnl_af_ops *ops)
414{
415 list_add_tail(&ops->list, &rtnl_af_ops);
416 return 0;
417}
418EXPORT_SYMBOL_GPL(__rtnl_af_register);
419
420/**
421 * rtnl_af_register - Register rtnl_af_ops with rtnetlink. 424 * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
422 * @ops: struct rtnl_af_ops * to register 425 * @ops: struct rtnl_af_ops * to register
423 * 426 *
424 * Returns 0 on success or a negative error code. 427 * Returns 0 on success or a negative error code.
425 */ 428 */
426int rtnl_af_register(struct rtnl_af_ops *ops) 429void rtnl_af_register(struct rtnl_af_ops *ops)
427{ 430{
428 int err;
429
430 rtnl_lock(); 431 rtnl_lock();
431 err = __rtnl_af_register(ops); 432 list_add_tail(&ops->list, &rtnl_af_ops);
432 rtnl_unlock(); 433 rtnl_unlock();
433 return err;
434} 434}
435EXPORT_SYMBOL_GPL(rtnl_af_register); 435EXPORT_SYMBOL_GPL(rtnl_af_register);
436 436
@@ -477,40 +477,100 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
477 return size; 477 return size;
478} 478}
479 479
480static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) 480static bool rtnl_have_link_slave_info(const struct net_device *dev)
481{ 481{
482 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 482 struct net_device *master_dev;
483 struct nlattr *linkinfo, *data;
484 int err = -EMSGSIZE;
485 483
486 linkinfo = nla_nest_start(skb, IFLA_LINKINFO); 484 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
487 if (linkinfo == NULL) 485 if (master_dev && master_dev->rtnl_link_ops)
488 goto out; 486 return true;
487 return false;
488}
489
490static int rtnl_link_slave_info_fill(struct sk_buff *skb,
491 const struct net_device *dev)
492{
493 struct net_device *master_dev;
494 const struct rtnl_link_ops *ops;
495 struct nlattr *slave_data;
496 int err;
497
498 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
499 if (!master_dev)
500 return 0;
501 ops = master_dev->rtnl_link_ops;
502 if (!ops)
503 return 0;
504 if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0)
505 return -EMSGSIZE;
506 if (ops->fill_slave_info) {
507 slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA);
508 if (!slave_data)
509 return -EMSGSIZE;
510 err = ops->fill_slave_info(skb, master_dev, dev);
511 if (err < 0)
512 goto err_cancel_slave_data;
513 nla_nest_end(skb, slave_data);
514 }
515 return 0;
516
517err_cancel_slave_data:
518 nla_nest_cancel(skb, slave_data);
519 return err;
520}
521
522static int rtnl_link_info_fill(struct sk_buff *skb,
523 const struct net_device *dev)
524{
525 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
526 struct nlattr *data;
527 int err;
489 528
529 if (!ops)
530 return 0;
490 if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) 531 if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
491 goto err_cancel_link; 532 return -EMSGSIZE;
492 if (ops->fill_xstats) { 533 if (ops->fill_xstats) {
493 err = ops->fill_xstats(skb, dev); 534 err = ops->fill_xstats(skb, dev);
494 if (err < 0) 535 if (err < 0)
495 goto err_cancel_link; 536 return err;
496 } 537 }
497 if (ops->fill_info) { 538 if (ops->fill_info) {
498 data = nla_nest_start(skb, IFLA_INFO_DATA); 539 data = nla_nest_start(skb, IFLA_INFO_DATA);
499 if (data == NULL) { 540 if (data == NULL)
500 err = -EMSGSIZE; 541 return -EMSGSIZE;
501 goto err_cancel_link;
502 }
503 err = ops->fill_info(skb, dev); 542 err = ops->fill_info(skb, dev);
504 if (err < 0) 543 if (err < 0)
505 goto err_cancel_data; 544 goto err_cancel_data;
506 nla_nest_end(skb, data); 545 nla_nest_end(skb, data);
507 } 546 }
508
509 nla_nest_end(skb, linkinfo);
510 return 0; 547 return 0;
511 548
512err_cancel_data: 549err_cancel_data:
513 nla_nest_cancel(skb, data); 550 nla_nest_cancel(skb, data);
551 return err;
552}
553
554static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
555{
556 struct nlattr *linkinfo;
557 int err = -EMSGSIZE;
558
559 linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
560 if (linkinfo == NULL)
561 goto out;
562
563 err = rtnl_link_info_fill(skb, dev);
564 if (err < 0)
565 goto err_cancel_link;
566
567 err = rtnl_link_slave_info_fill(skb, dev);
568 if (err < 0)
569 goto err_cancel_link;
570
571 nla_nest_end(skb, linkinfo);
572 return 0;
573
514err_cancel_link: 574err_cancel_link:
515 nla_nest_cancel(skb, linkinfo); 575 nla_nest_cancel(skb, linkinfo);
516out: 576out:
@@ -1019,7 +1079,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1019 if (rtnl_port_fill(skb, dev)) 1079 if (rtnl_port_fill(skb, dev))
1020 goto nla_put_failure; 1080 goto nla_put_failure;
1021 1081
1022 if (dev->rtnl_link_ops) { 1082 if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
1023 if (rtnl_link_fill(skb, dev) < 0) 1083 if (rtnl_link_fill(skb, dev) < 0)
1024 goto nla_put_failure; 1084 goto nla_put_failure;
1025 } 1085 }
@@ -1142,6 +1202,8 @@ EXPORT_SYMBOL(ifla_policy);
1142static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1202static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
1143 [IFLA_INFO_KIND] = { .type = NLA_STRING }, 1203 [IFLA_INFO_KIND] = { .type = NLA_STRING },
1144 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 1204 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
1205 [IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING },
1206 [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
1145}; 1207};
1146 1208
1147static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { 1209static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
@@ -1729,7 +1791,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1729{ 1791{
1730 struct net *net = sock_net(skb->sk); 1792 struct net *net = sock_net(skb->sk);
1731 const struct rtnl_link_ops *ops; 1793 const struct rtnl_link_ops *ops;
1794 const struct rtnl_link_ops *m_ops = NULL;
1732 struct net_device *dev; 1795 struct net_device *dev;
1796 struct net_device *master_dev = NULL;
1733 struct ifinfomsg *ifm; 1797 struct ifinfomsg *ifm;
1734 char kind[MODULE_NAME_LEN]; 1798 char kind[MODULE_NAME_LEN];
1735 char ifname[IFNAMSIZ]; 1799 char ifname[IFNAMSIZ];
@@ -1759,6 +1823,12 @@ replay:
1759 dev = NULL; 1823 dev = NULL;
1760 } 1824 }
1761 1825
1826 if (dev) {
1827 master_dev = netdev_master_upper_dev_get(dev);
1828 if (master_dev)
1829 m_ops = master_dev->rtnl_link_ops;
1830 }
1831
1762 err = validate_linkmsg(dev, tb); 1832 err = validate_linkmsg(dev, tb);
1763 if (err < 0) 1833 if (err < 0)
1764 return err; 1834 return err;
@@ -1780,7 +1850,10 @@ replay:
1780 } 1850 }
1781 1851
1782 if (1) { 1852 if (1) {
1783 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; 1853 struct nlattr *attr[ops ? ops->maxtype + 1 : 0];
1854 struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
1855 struct nlattr **data = NULL;
1856 struct nlattr **slave_data = NULL;
1784 struct net *dest_net; 1857 struct net *dest_net;
1785 1858
1786 if (ops) { 1859 if (ops) {
@@ -1799,6 +1872,24 @@ replay:
1799 } 1872 }
1800 } 1873 }
1801 1874
1875 if (m_ops) {
1876 if (m_ops->slave_maxtype &&
1877 linkinfo[IFLA_INFO_SLAVE_DATA]) {
1878 err = nla_parse_nested(slave_attr,
1879 m_ops->slave_maxtype,
1880 linkinfo[IFLA_INFO_SLAVE_DATA],
1881 m_ops->slave_policy);
1882 if (err < 0)
1883 return err;
1884 slave_data = slave_attr;
1885 }
1886 if (m_ops->slave_validate) {
1887 err = m_ops->slave_validate(tb, slave_data);
1888 if (err < 0)
1889 return err;
1890 }
1891 }
1892
1802 if (dev) { 1893 if (dev) {
1803 int modified = 0; 1894 int modified = 0;
1804 1895
@@ -1818,6 +1909,17 @@ replay:
1818 modified = 1; 1909 modified = 1;
1819 } 1910 }
1820 1911
1912 if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
1913 if (!m_ops || !m_ops->slave_changelink)
1914 return -EOPNOTSUPP;
1915
1916 err = m_ops->slave_changelink(master_dev, dev,
1917 tb, slave_data);
1918 if (err < 0)
1919 return err;
1920 modified = 1;
1921 }
1922
1821 return do_setlink(dev, ifm, tb, ifname, modified); 1923 return do_setlink(dev, ifm, tb, ifname, modified);
1822 } 1924 }
1823 1925
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0b5149c5bc4a..8f519dbb358b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,6 +65,7 @@
65#include <net/dst.h> 65#include <net/dst.h>
66#include <net/sock.h> 66#include <net/sock.h>
67#include <net/checksum.h> 67#include <net/checksum.h>
68#include <net/ip6_checksum.h>
68#include <net/xfrm.h> 69#include <net/xfrm.h>
69 70
70#include <asm/uaccess.h> 71#include <asm/uaccess.h>
@@ -682,9 +683,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
682 new->inner_network_header = old->inner_network_header; 683 new->inner_network_header = old->inner_network_header;
683 new->inner_mac_header = old->inner_mac_header; 684 new->inner_mac_header = old->inner_mac_header;
684 skb_dst_copy(new, old); 685 skb_dst_copy(new, old);
685 new->rxhash = old->rxhash; 686 skb_copy_hash(new, old);
686 new->ooo_okay = old->ooo_okay; 687 new->ooo_okay = old->ooo_okay;
687 new->l4_rxhash = old->l4_rxhash;
688 new->no_fcs = old->no_fcs; 688 new->no_fcs = old->no_fcs;
689 new->encapsulation = old->encapsulation; 689 new->encapsulation = old->encapsulation;
690#ifdef CONFIG_XFRM 690#ifdef CONFIG_XFRM
@@ -2092,6 +2092,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2092} 2092}
2093EXPORT_SYMBOL(skb_copy_and_csum_bits); 2093EXPORT_SYMBOL(skb_copy_and_csum_bits);
2094 2094
2095 /**
2096 * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
2097 * @from: source buffer
2098 *
2099 * Calculates the amount of linear headroom needed in the 'to' skb passed
2100 * into skb_zerocopy().
2101 */
2102unsigned int
2103skb_zerocopy_headlen(const struct sk_buff *from)
2104{
2105 unsigned int hlen = 0;
2106
2107 if (!from->head_frag ||
2108 skb_headlen(from) < L1_CACHE_BYTES ||
2109 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
2110 hlen = skb_headlen(from);
2111
2112 if (skb_has_frag_list(from))
2113 hlen = from->len;
2114
2115 return hlen;
2116}
2117EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
2118
2119/**
2120 * skb_zerocopy - Zero copy skb to skb
2121 * @to: destination buffer
2122 * @source: source buffer
2123 * @len: number of bytes to copy from source buffer
2124 * @hlen: size of linear headroom in destination buffer
2125 *
2126 * Copies up to `len` bytes from `from` to `to` by creating references
2127 * to the frags in the source buffer.
2128 *
2129 * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
2130 * headroom in the `to` buffer.
2131 */
2132void
2133skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
2134{
2135 int i, j = 0;
2136 int plen = 0; /* length of skb->head fragment */
2137 struct page *page;
2138 unsigned int offset;
2139
2140 BUG_ON(!from->head_frag && !hlen);
2141
2142 /* dont bother with small payloads */
2143 if (len <= skb_tailroom(to)) {
2144 skb_copy_bits(from, 0, skb_put(to, len), len);
2145 return;
2146 }
2147
2148 if (hlen) {
2149 skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
2150 len -= hlen;
2151 } else {
2152 plen = min_t(int, skb_headlen(from), len);
2153 if (plen) {
2154 page = virt_to_head_page(from->head);
2155 offset = from->data - (unsigned char *)page_address(page);
2156 __skb_fill_page_desc(to, 0, page, offset, plen);
2157 get_page(page);
2158 j = 1;
2159 len -= plen;
2160 }
2161 }
2162
2163 to->truesize += len + plen;
2164 to->len += len + plen;
2165 to->data_len += len + plen;
2166
2167 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
2168 if (!len)
2169 break;
2170 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2171 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2172 len -= skb_shinfo(to)->frags[j].size;
2173 skb_frag_ref(to, j);
2174 j++;
2175 }
2176 skb_shinfo(to)->nr_frags = j;
2177}
2178EXPORT_SYMBOL_GPL(skb_zerocopy);
2179
2095void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 2180void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
2096{ 2181{
2097 __wsum csum; 2182 __wsum csum;
@@ -2952,10 +3037,7 @@ perform_csum_check:
2952 return segs; 3037 return segs;
2953 3038
2954err: 3039err:
2955 while ((skb = segs)) { 3040 kfree_skb_list(segs);
2956 segs = skb->next;
2957 kfree_skb(skb);
2958 }
2959 return ERR_PTR(err); 3041 return ERR_PTR(err);
2960} 3042}
2961EXPORT_SYMBOL_GPL(skb_segment); 3043EXPORT_SYMBOL_GPL(skb_segment);
@@ -3438,6 +3520,278 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3438} 3520}
3439EXPORT_SYMBOL_GPL(skb_partial_csum_set); 3521EXPORT_SYMBOL_GPL(skb_partial_csum_set);
3440 3522
3523static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
3524 unsigned int max)
3525{
3526 if (skb_headlen(skb) >= len)
3527 return 0;
3528
3529 /* If we need to pullup then pullup to the max, so we
3530 * won't need to do it again.
3531 */
3532 if (max > skb->len)
3533 max = skb->len;
3534
3535 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
3536 return -ENOMEM;
3537
3538 if (skb_headlen(skb) < len)
3539 return -EPROTO;
3540
3541 return 0;
3542}
3543
3544/* This value should be large enough to cover a tagged ethernet header plus
3545 * maximally sized IP and TCP or UDP headers.
3546 */
3547#define MAX_IP_HDR_LEN 128
3548
3549static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
3550{
3551 unsigned int off;
3552 bool fragment;
3553 int err;
3554
3555 fragment = false;
3556
3557 err = skb_maybe_pull_tail(skb,
3558 sizeof(struct iphdr),
3559 MAX_IP_HDR_LEN);
3560 if (err < 0)
3561 goto out;
3562
3563 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
3564 fragment = true;
3565
3566 off = ip_hdrlen(skb);
3567
3568 err = -EPROTO;
3569
3570 if (fragment)
3571 goto out;
3572
3573 switch (ip_hdr(skb)->protocol) {
3574 case IPPROTO_TCP:
3575 err = skb_maybe_pull_tail(skb,
3576 off + sizeof(struct tcphdr),
3577 MAX_IP_HDR_LEN);
3578 if (err < 0)
3579 goto out;
3580
3581 if (!skb_partial_csum_set(skb, off,
3582 offsetof(struct tcphdr, check))) {
3583 err = -EPROTO;
3584 goto out;
3585 }
3586
3587 if (recalculate)
3588 tcp_hdr(skb)->check =
3589 ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
3590 ip_hdr(skb)->daddr,
3591 skb->len - off,
3592 IPPROTO_TCP, 0);
3593 break;
3594 case IPPROTO_UDP:
3595 err = skb_maybe_pull_tail(skb,
3596 off + sizeof(struct udphdr),
3597 MAX_IP_HDR_LEN);
3598 if (err < 0)
3599 goto out;
3600
3601 if (!skb_partial_csum_set(skb, off,
3602 offsetof(struct udphdr, check))) {
3603 err = -EPROTO;
3604 goto out;
3605 }
3606
3607 if (recalculate)
3608 udp_hdr(skb)->check =
3609 ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
3610 ip_hdr(skb)->daddr,
3611 skb->len - off,
3612 IPPROTO_UDP, 0);
3613 break;
3614 default:
3615 goto out;
3616 }
3617
3618 err = 0;
3619
3620out:
3621 return err;
3622}
3623
3624/* This value should be large enough to cover a tagged ethernet header plus
3625 * an IPv6 header, all options, and a maximal TCP or UDP header.
3626 */
3627#define MAX_IPV6_HDR_LEN 256
3628
3629#define OPT_HDR(type, skb, off) \
3630 (type *)(skb_network_header(skb) + (off))
3631
3632static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
3633{
3634 int err;
3635 u8 nexthdr;
3636 unsigned int off;
3637 unsigned int len;
3638 bool fragment;
3639 bool done;
3640
3641 fragment = false;
3642 done = false;
3643
3644 off = sizeof(struct ipv6hdr);
3645
3646 err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
3647 if (err < 0)
3648 goto out;
3649
3650 nexthdr = ipv6_hdr(skb)->nexthdr;
3651
3652 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
3653 while (off <= len && !done) {
3654 switch (nexthdr) {
3655 case IPPROTO_DSTOPTS:
3656 case IPPROTO_HOPOPTS:
3657 case IPPROTO_ROUTING: {
3658 struct ipv6_opt_hdr *hp;
3659
3660 err = skb_maybe_pull_tail(skb,
3661 off +
3662 sizeof(struct ipv6_opt_hdr),
3663 MAX_IPV6_HDR_LEN);
3664 if (err < 0)
3665 goto out;
3666
3667 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
3668 nexthdr = hp->nexthdr;
3669 off += ipv6_optlen(hp);
3670 break;
3671 }
3672 case IPPROTO_AH: {
3673 struct ip_auth_hdr *hp;
3674
3675 err = skb_maybe_pull_tail(skb,
3676 off +
3677 sizeof(struct ip_auth_hdr),
3678 MAX_IPV6_HDR_LEN);
3679 if (err < 0)
3680 goto out;
3681
3682 hp = OPT_HDR(struct ip_auth_hdr, skb, off);
3683 nexthdr = hp->nexthdr;
3684 off += ipv6_authlen(hp);
3685 break;
3686 }
3687 case IPPROTO_FRAGMENT: {
3688 struct frag_hdr *hp;
3689
3690 err = skb_maybe_pull_tail(skb,
3691 off +
3692 sizeof(struct frag_hdr),
3693 MAX_IPV6_HDR_LEN);
3694 if (err < 0)
3695 goto out;
3696
3697 hp = OPT_HDR(struct frag_hdr, skb, off);
3698
3699 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
3700 fragment = true;
3701
3702 nexthdr = hp->nexthdr;
3703 off += sizeof(struct frag_hdr);
3704 break;
3705 }
3706 default:
3707 done = true;
3708 break;
3709 }
3710 }
3711
3712 err = -EPROTO;
3713
3714 if (!done || fragment)
3715 goto out;
3716
3717 switch (nexthdr) {
3718 case IPPROTO_TCP:
3719 err = skb_maybe_pull_tail(skb,
3720 off + sizeof(struct tcphdr),
3721 MAX_IPV6_HDR_LEN);
3722 if (err < 0)
3723 goto out;
3724
3725 if (!skb_partial_csum_set(skb, off,
3726 offsetof(struct tcphdr, check))) {
3727 err = -EPROTO;
3728 goto out;
3729 }
3730
3731 if (recalculate)
3732 tcp_hdr(skb)->check =
3733 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3734 &ipv6_hdr(skb)->daddr,
3735 skb->len - off,
3736 IPPROTO_TCP, 0);
3737 break;
3738 case IPPROTO_UDP:
3739 err = skb_maybe_pull_tail(skb,
3740 off + sizeof(struct udphdr),
3741 MAX_IPV6_HDR_LEN);
3742 if (err < 0)
3743 goto out;
3744
3745 if (!skb_partial_csum_set(skb, off,
3746 offsetof(struct udphdr, check))) {
3747 err = -EPROTO;
3748 goto out;
3749 }
3750
3751 if (recalculate)
3752 udp_hdr(skb)->check =
3753 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3754 &ipv6_hdr(skb)->daddr,
3755 skb->len - off,
3756 IPPROTO_UDP, 0);
3757 break;
3758 default:
3759 goto out;
3760 }
3761
3762 err = 0;
3763
3764out:
3765 return err;
3766}
3767
3768/**
3769 * skb_checksum_setup - set up partial checksum offset
3770 * @skb: the skb to set up
3771 * @recalculate: if true the pseudo-header checksum will be recalculated
3772 */
3773int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
3774{
3775 int err;
3776
3777 switch (skb->protocol) {
3778 case htons(ETH_P_IP):
3779 err = skb_checksum_setup_ip(skb, recalculate);
3780 break;
3781
3782 case htons(ETH_P_IPV6):
3783 err = skb_checksum_setup_ipv6(skb, recalculate);
3784 break;
3785
3786 default:
3787 err = -EPROTO;
3788 break;
3789 }
3790
3791 return err;
3792}
3793EXPORT_SYMBOL(skb_checksum_setup);
3794
3441void __skb_warn_lro_forwarding(const struct sk_buff *skb) 3795void __skb_warn_lro_forwarding(const struct sk_buff *skb)
3442{ 3796{
3443 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", 3797 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
diff --git a/net/core/sock.c b/net/core/sock.c
index 5393b4b719d7..0c127dcdf6a8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -925,8 +925,8 @@ set_rcvbuf:
925EXPORT_SYMBOL(sock_setsockopt); 925EXPORT_SYMBOL(sock_setsockopt);
926 926
927 927
928void cred_to_ucred(struct pid *pid, const struct cred *cred, 928static void cred_to_ucred(struct pid *pid, const struct cred *cred,
929 struct ucred *ucred) 929 struct ucred *ucred)
930{ 930{
931 ucred->pid = pid_vnr(pid); 931 ucred->pid = pid_vnr(pid);
932 ucred->uid = ucred->gid = -1; 932 ucred->uid = ucred->gid = -1;
@@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
937 ucred->gid = from_kgid_munged(current_ns, cred->egid); 937 ucred->gid = from_kgid_munged(current_ns, cred->egid);
938 } 938 }
939} 939}
940EXPORT_SYMBOL_GPL(cred_to_ucred);
941 940
942int sock_getsockopt(struct socket *sock, int level, int optname, 941int sock_getsockopt(struct socket *sock, int level, int optname,
943 char __user *optval, int __user *optlen) 942 char __user *optval, int __user *optlen)
@@ -1168,6 +1167,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1168 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1167 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1169 break; 1168 break;
1170 1169
1170 case SO_BPF_EXTENSIONS:
1171 v.val = bpf_tell_extensions();
1172 break;
1173
1171 case SO_SELECT_ERR_QUEUE: 1174 case SO_SELECT_ERR_QUEUE:
1172 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1175 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1173 break; 1176 break;
@@ -1308,19 +1311,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1308 module_put(owner); 1311 module_put(owner);
1309} 1312}
1310 1313
1311#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1314#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
1312void sock_update_classid(struct sock *sk)
1313{
1314 u32 classid;
1315
1316 classid = task_cls_classid(current);
1317 if (classid != sk->sk_classid)
1318 sk->sk_classid = classid;
1319}
1320EXPORT_SYMBOL(sock_update_classid);
1321#endif
1322
1323#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1324void sock_update_netprioidx(struct sock *sk) 1315void sock_update_netprioidx(struct sock *sk)
1325{ 1316{
1326 if (in_interrupt()) 1317 if (in_interrupt())
@@ -1666,22 +1657,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1666EXPORT_SYMBOL(sock_wmalloc); 1657EXPORT_SYMBOL(sock_wmalloc);
1667 1658
1668/* 1659/*
1669 * Allocate a skb from the socket's receive buffer.
1670 */
1671struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1672 gfp_t priority)
1673{
1674 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1675 struct sk_buff *skb = alloc_skb(size, priority);
1676 if (skb) {
1677 skb_set_owner_r(skb, sk);
1678 return skb;
1679 }
1680 }
1681 return NULL;
1682}
1683
1684/*
1685 * Allocate a memory block from the socket's option memory buffer. 1660 * Allocate a memory block from the socket's option memory buffer.
1686 */ 1661 */
1687void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1662void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
@@ -1865,9 +1840,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
1865 put_page(pfrag->page); 1840 put_page(pfrag->page);
1866 } 1841 }
1867 1842
1868 /* We restrict high order allocations to users that can afford to wait */ 1843 order = SKB_FRAG_PAGE_ORDER;
1869 order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1870
1871 do { 1844 do {
1872 gfp_t gfp = prio; 1845 gfp_t gfp = prio;
1873 1846
diff --git a/net/core/stream.c b/net/core/stream.c
index 512f0a24269b..301c05f26060 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -122,7 +122,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
122 DEFINE_WAIT(wait); 122 DEFINE_WAIT(wait);
123 123
124 if (sk_stream_memory_free(sk)) 124 if (sk_stream_memory_free(sk))
125 current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; 125 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
126 126
127 while (1) { 127 while (1) {
128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cca444190907..cf9cd13509a7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,7 +122,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
122 synchronize_rcu(); 122 synchronize_rcu();
123 kfree(cur); 123 kfree(cur);
124 } else if (!cur && cpumask_test_cpu(i, mask)) { 124 } else if (!cur && cpumask_test_cpu(i, mask)) {
125 cur = kzalloc(len, GFP_KERNEL); 125 cur = kzalloc_node(len, GFP_KERNEL,
126 cpu_to_node(i));
126 if (!cur) { 127 if (!cur) {
127 /* not unwinding previous changes */ 128 /* not unwinding previous changes */
128 ret = -ENOMEM; 129 ret = -ENOMEM;