aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c588
-rw-r--r--net/core/dev_addr_lists.c115
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/fib_rules.c7
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/flow_dissector.c26
-rw-r--r--net/core/neighbour.c483
-rw-r--r--net/core/net-sysfs.c82
-rw-r--r--net/core/net-sysfs.h2
-rw-r--r--net/core/netclassid_cgroup.c120
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c10
-rw-r--r--net/core/pktgen.c88
-rw-r--r--net/core/rtnetlink.c205
-rw-r--r--net/core/skbuff.c526
-rw-r--r--net/core/sock.c54
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c3
19 files changed, 1512 insertions, 844 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index b33b996f5dd6..9628c20acff6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
21obj-$(CONFIG_TRACEPOINTS) += net-traces.o 21obj-$(CONFIG_TRACEPOINTS) += net-traces.o
22obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 22obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
23obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o 23obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
24obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ce469e5ec80..b1b0c8d4d7df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
147struct list_head ptype_all __read_mostly; /* Taps */ 147struct list_head ptype_all __read_mostly; /* Taps */
148static struct list_head offload_base __read_mostly; 148static struct list_head offload_base __read_mostly;
149 149
150static int netif_rx_internal(struct sk_buff *skb);
151
150/* 152/*
151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 153 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
152 * semaphore. 154 * semaphore.
@@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload);
480 * and must not be freed until after all the CPU's have gone 482 * and must not be freed until after all the CPU's have gone
481 * through a quiescent state. 483 * through a quiescent state.
482 */ 484 */
483void __dev_remove_offload(struct packet_offload *po) 485static void __dev_remove_offload(struct packet_offload *po)
484{ 486{
485 struct list_head *head = &offload_base; 487 struct list_head *head = &offload_base;
486 struct packet_offload *po1; 488 struct packet_offload *po1;
@@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po)
498out: 500out:
499 spin_unlock(&offload_lock); 501 spin_unlock(&offload_lock);
500} 502}
501EXPORT_SYMBOL(__dev_remove_offload);
502 503
503/** 504/**
504 * dev_remove_offload - remove packet offload handler 505 * dev_remove_offload - remove packet offload handler
@@ -1118,6 +1119,8 @@ rollback:
1118 1119
1119 write_seqcount_end(&devnet_rename_seq); 1120 write_seqcount_end(&devnet_rename_seq);
1120 1121
1122 netdev_adjacent_rename_links(dev, oldname);
1123
1121 write_lock_bh(&dev_base_lock); 1124 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1125 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock); 1126 write_unlock_bh(&dev_base_lock);
@@ -1137,6 +1140,7 @@ rollback:
1137 err = ret; 1140 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq); 1141 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1142 memcpy(dev->name, oldname, IFNAMSIZ);
1143 memcpy(oldname, newname, IFNAMSIZ);
1140 goto rollback; 1144 goto rollback;
1141 } else { 1145 } else {
1142 pr_err("%s: name change rollback failed: %d\n", 1146 pr_err("%s: name change rollback failed: %d\n",
@@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1566 * are as for raw_notifier_call_chain(). 1570 * are as for raw_notifier_call_chain().
1567 */ 1571 */
1568 1572
1569int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, 1573static int call_netdevice_notifiers_info(unsigned long val,
1570 struct netdev_notifier_info *info) 1574 struct net_device *dev,
1575 struct netdev_notifier_info *info)
1571{ 1576{
1572 ASSERT_RTNL(); 1577 ASSERT_RTNL();
1573 netdev_notifier_info_init(info, dev); 1578 netdev_notifier_info_init(info, dev);
1574 return raw_notifier_call_chain(&netdev_chain, val, info); 1579 return raw_notifier_call_chain(&netdev_chain, val, info);
1575} 1580}
1576EXPORT_SYMBOL(call_netdevice_notifiers_info);
1577 1581
1578/** 1582/**
1579 * call_netdevice_notifiers - call all network notifier blocks 1583 * call_netdevice_notifiers - call all network notifier blocks
@@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1699 skb_scrub_packet(skb, true); 1703 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev); 1704 skb->protocol = eth_type_trans(skb, dev);
1701 1705
1702 return netif_rx(skb); 1706 return netif_rx_internal(skb);
1703} 1707}
1704EXPORT_SYMBOL_GPL(dev_forward_skb); 1708EXPORT_SYMBOL_GPL(dev_forward_skb);
1705 1709
@@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2079} 2083}
2080EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2084EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081 2085
2082#ifdef CONFIG_RPS 2086#ifdef CONFIG_SYSFS
2083/** 2087/**
2084 * netif_set_real_num_rx_queues - set actual number of RX queues used 2088 * netif_set_real_num_rx_queues - set actual number of RX queues used
2085 * @dev: Network device 2089 * @dev: Network device
@@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q)
2145} 2149}
2146EXPORT_SYMBOL(__netif_schedule); 2150EXPORT_SYMBOL(__netif_schedule);
2147 2151
2148void dev_kfree_skb_irq(struct sk_buff *skb) 2152struct dev_kfree_skb_cb {
2153 enum skb_free_reason reason;
2154};
2155
2156static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2149{ 2157{
2150 if (atomic_dec_and_test(&skb->users)) { 2158 return (struct dev_kfree_skb_cb *)skb->cb;
2151 struct softnet_data *sd; 2159}
2152 unsigned long flags; 2160
2161void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2162{
2163 unsigned long flags;
2153 2164
2154 local_irq_save(flags); 2165 if (likely(atomic_read(&skb->users) == 1)) {
2155 sd = &__get_cpu_var(softnet_data); 2166 smp_rmb();
2156 skb->next = sd->completion_queue; 2167 atomic_set(&skb->users, 0);
2157 sd->completion_queue = skb; 2168 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2169 return;
2159 local_irq_restore(flags);
2160 } 2170 }
2171 get_kfree_skb_cb(skb)->reason = reason;
2172 local_irq_save(flags);
2173 skb->next = __this_cpu_read(softnet_data.completion_queue);
2174 __this_cpu_write(softnet_data.completion_queue, skb);
2175 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2176 local_irq_restore(flags);
2161} 2177}
2162EXPORT_SYMBOL(dev_kfree_skb_irq); 2178EXPORT_SYMBOL(__dev_kfree_skb_irq);
2163 2179
2164void dev_kfree_skb_any(struct sk_buff *skb) 2180void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2165{ 2181{
2166 if (in_irq() || irqs_disabled()) 2182 if (in_irq() || irqs_disabled())
2167 dev_kfree_skb_irq(skb); 2183 __dev_kfree_skb_irq(skb, reason);
2168 else 2184 else
2169 dev_kfree_skb(skb); 2185 dev_kfree_skb(skb);
2170} 2186}
2171EXPORT_SYMBOL(dev_kfree_skb_any); 2187EXPORT_SYMBOL(__dev_kfree_skb_any);
2172 2188
2173 2189
2174/** 2190/**
@@ -2404,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
2404 * 2. No high memory really exists on this machine. 2420 * 2. No high memory really exists on this machine.
2405 */ 2421 */
2406 2422
2407static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 2423static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
2408{ 2424{
2409#ifdef CONFIG_HIGHMEM 2425#ifdef CONFIG_HIGHMEM
2410 int i; 2426 int i;
@@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
2442{ 2458{
2443 struct dev_gso_cb *cb; 2459 struct dev_gso_cb *cb;
2444 2460
2445 do { 2461 kfree_skb_list(skb->next);
2446 struct sk_buff *nskb = skb->next; 2462 skb->next = NULL;
2447
2448 skb->next = nskb->next;
2449 nskb->next = NULL;
2450 kfree_skb(nskb);
2451 } while (skb->next);
2452 2463
2453 cb = DEV_GSO_CB(skb); 2464 cb = DEV_GSO_CB(skb);
2454 if (cb->destructor) 2465 if (cb->destructor)
@@ -2484,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2484} 2495}
2485 2496
2486static netdev_features_t harmonize_features(struct sk_buff *skb, 2497static netdev_features_t harmonize_features(struct sk_buff *skb,
2487 netdev_features_t features) 2498 const struct net_device *dev,
2499 netdev_features_t features)
2488{ 2500{
2489 if (skb->ip_summed != CHECKSUM_NONE && 2501 if (skb->ip_summed != CHECKSUM_NONE &&
2490 !can_checksum_protocol(features, skb_network_protocol(skb))) { 2502 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2491 features &= ~NETIF_F_ALL_CSUM; 2503 features &= ~NETIF_F_ALL_CSUM;
2492 } else if (illegal_highdma(skb->dev, skb)) { 2504 } else if (illegal_highdma(dev, skb)) {
2493 features &= ~NETIF_F_SG; 2505 features &= ~NETIF_F_SG;
2494 } 2506 }
2495 2507
2496 return features; 2508 return features;
2497} 2509}
2498 2510
2499netdev_features_t netif_skb_features(struct sk_buff *skb) 2511netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
2512 const struct net_device *dev)
2500{ 2513{
2501 __be16 protocol = skb->protocol; 2514 __be16 protocol = skb->protocol;
2502 netdev_features_t features = skb->dev->features; 2515 netdev_features_t features = dev->features;
2503 2516
2504 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2517 if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
2505 features &= ~NETIF_F_GSO_MASK; 2518 features &= ~NETIF_F_GSO_MASK;
2506 2519
2507 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { 2520 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2508 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2521 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2509 protocol = veh->h_vlan_encapsulated_proto; 2522 protocol = veh->h_vlan_encapsulated_proto;
2510 } else if (!vlan_tx_tag_present(skb)) { 2523 } else if (!vlan_tx_tag_present(skb)) {
2511 return harmonize_features(skb, features); 2524 return harmonize_features(skb, dev, features);
2512 } 2525 }
2513 2526
2514 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | 2527 features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2515 NETIF_F_HW_VLAN_STAG_TX); 2528 NETIF_F_HW_VLAN_STAG_TX);
2516 2529
2517 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) 2530 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2519,24 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2519 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | 2532 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2520 NETIF_F_HW_VLAN_STAG_TX; 2533 NETIF_F_HW_VLAN_STAG_TX;
2521 2534
2522 return harmonize_features(skb, features); 2535 return harmonize_features(skb, dev, features);
2523}
2524EXPORT_SYMBOL(netif_skb_features);
2525
2526/*
2527 * Returns true if either:
2528 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2529 * 2. skb is fragmented and the device does not support SG.
2530 */
2531static inline int skb_needs_linearize(struct sk_buff *skb,
2532 netdev_features_t features)
2533{
2534 return skb_is_nonlinear(skb) &&
2535 ((skb_has_frag_list(skb) &&
2536 !(features & NETIF_F_FRAGLIST)) ||
2537 (skb_shinfo(skb)->nr_frags &&
2538 !(features & NETIF_F_SG)));
2539} 2536}
2537EXPORT_SYMBOL(netif_skb_dev_features);
2540 2538
2541int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2539int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2542 struct netdev_queue *txq) 2540 struct netdev_queue *txq)
@@ -2605,8 +2603,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2605 dev_queue_xmit_nit(skb, dev); 2603 dev_queue_xmit_nit(skb, dev);
2606 2604
2607 skb_len = skb->len; 2605 skb_len = skb->len;
2608 rc = ops->ndo_start_xmit(skb, dev); 2606 trace_net_dev_start_xmit(skb, dev);
2609 2607 rc = ops->ndo_start_xmit(skb, dev);
2610 trace_net_dev_xmit(skb, rc, dev, skb_len); 2608 trace_net_dev_xmit(skb, rc, dev, skb_len);
2611 if (rc == NETDEV_TX_OK) 2609 if (rc == NETDEV_TX_OK)
2612 txq_trans_update(txq); 2610 txq_trans_update(txq);
@@ -2624,6 +2622,7 @@ gso:
2624 dev_queue_xmit_nit(nskb, dev); 2622 dev_queue_xmit_nit(nskb, dev);
2625 2623
2626 skb_len = nskb->len; 2624 skb_len = nskb->len;
2625 trace_net_dev_start_xmit(nskb, dev);
2627 rc = ops->ndo_start_xmit(nskb, dev); 2626 rc = ops->ndo_start_xmit(nskb, dev);
2628 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2627 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2629 if (unlikely(rc != NETDEV_TX_OK)) { 2628 if (unlikely(rc != NETDEV_TX_OK)) {
@@ -2744,7 +2743,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2744 return rc; 2743 return rc;
2745} 2744}
2746 2745
2747#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2746#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
2748static void skb_update_prio(struct sk_buff *skb) 2747static void skb_update_prio(struct sk_buff *skb)
2749{ 2748{
2750 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2749 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
@@ -2781,8 +2780,9 @@ int dev_loopback_xmit(struct sk_buff *skb)
2781EXPORT_SYMBOL(dev_loopback_xmit); 2780EXPORT_SYMBOL(dev_loopback_xmit);
2782 2781
2783/** 2782/**
2784 * dev_queue_xmit - transmit a buffer 2783 * __dev_queue_xmit - transmit a buffer
2785 * @skb: buffer to transmit 2784 * @skb: buffer to transmit
2785 * @accel_priv: private data used for L2 forwarding offload
2786 * 2786 *
2787 * Queue a buffer for transmission to a network device. The caller must 2787 * Queue a buffer for transmission to a network device. The caller must
2788 * have set the device and priority and built the buffer before calling 2788 * have set the device and priority and built the buffer before calling
@@ -2805,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
2805 * the BH enable code must have IRQs enabled so that it will not deadlock. 2805 * the BH enable code must have IRQs enabled so that it will not deadlock.
2806 * --BLG 2806 * --BLG
2807 */ 2807 */
2808int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) 2808static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2809{ 2809{
2810 struct net_device *dev = skb->dev; 2810 struct net_device *dev = skb->dev;
2811 struct netdev_queue *txq; 2811 struct netdev_queue *txq;
@@ -3014,7 +3014,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3014 } 3014 }
3015 3015
3016 skb_reset_network_header(skb); 3016 skb_reset_network_header(skb);
3017 if (!skb_get_rxhash(skb)) 3017 if (!skb_get_hash(skb))
3018 goto done; 3018 goto done;
3019 3019
3020 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3020 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3159,7 +3159,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3159 rcu_read_lock(); 3159 rcu_read_lock();
3160 fl = rcu_dereference(sd->flow_limit); 3160 fl = rcu_dereference(sd->flow_limit);
3161 if (fl) { 3161 if (fl) {
3162 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); 3162 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3163 old_flow = fl->history[fl->history_head]; 3163 old_flow = fl->history[fl->history_head];
3164 fl->history[fl->history_head] = new_flow; 3164 fl->history[fl->history_head] = new_flow;
3165 3165
@@ -3227,22 +3227,7 @@ enqueue:
3227 return NET_RX_DROP; 3227 return NET_RX_DROP;
3228} 3228}
3229 3229
3230/** 3230static int netif_rx_internal(struct sk_buff *skb)
3231 * netif_rx - post buffer to the network code
3232 * @skb: buffer to post
3233 *
3234 * This function receives a packet from a device driver and queues it for
3235 * the upper (protocol) levels to process. It always succeeds. The buffer
3236 * may be dropped during processing for congestion control or by the
3237 * protocol layers.
3238 *
3239 * return values:
3240 * NET_RX_SUCCESS (no congestion)
3241 * NET_RX_DROP (packet was dropped)
3242 *
3243 */
3244
3245int netif_rx(struct sk_buff *skb)
3246{ 3231{
3247 int ret; 3232 int ret;
3248 3233
@@ -3278,14 +3263,38 @@ int netif_rx(struct sk_buff *skb)
3278 } 3263 }
3279 return ret; 3264 return ret;
3280} 3265}
3266
3267/**
3268 * netif_rx - post buffer to the network code
3269 * @skb: buffer to post
3270 *
3271 * This function receives a packet from a device driver and queues it for
3272 * the upper (protocol) levels to process. It always succeeds. The buffer
3273 * may be dropped during processing for congestion control or by the
3274 * protocol layers.
3275 *
3276 * return values:
3277 * NET_RX_SUCCESS (no congestion)
3278 * NET_RX_DROP (packet was dropped)
3279 *
3280 */
3281
3282int netif_rx(struct sk_buff *skb)
3283{
3284 trace_netif_rx_entry(skb);
3285
3286 return netif_rx_internal(skb);
3287}
3281EXPORT_SYMBOL(netif_rx); 3288EXPORT_SYMBOL(netif_rx);
3282 3289
3283int netif_rx_ni(struct sk_buff *skb) 3290int netif_rx_ni(struct sk_buff *skb)
3284{ 3291{
3285 int err; 3292 int err;
3286 3293
3294 trace_netif_rx_ni_entry(skb);
3295
3287 preempt_disable(); 3296 preempt_disable();
3288 err = netif_rx(skb); 3297 err = netif_rx_internal(skb);
3289 if (local_softirq_pending()) 3298 if (local_softirq_pending())
3290 do_softirq(); 3299 do_softirq();
3291 preempt_enable(); 3300 preempt_enable();
@@ -3311,7 +3320,10 @@ static void net_tx_action(struct softirq_action *h)
3311 clist = clist->next; 3320 clist = clist->next;
3312 3321
3313 WARN_ON(atomic_read(&skb->users)); 3322 WARN_ON(atomic_read(&skb->users));
3314 trace_kfree_skb(skb, net_tx_action); 3323 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3324 trace_consume_skb(skb);
3325 else
3326 trace_kfree_skb(skb, net_tx_action);
3315 __kfree_skb(skb); 3327 __kfree_skb(skb);
3316 } 3328 }
3317 } 3329 }
@@ -3667,22 +3679,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3667 return ret; 3679 return ret;
3668} 3680}
3669 3681
3670/** 3682static int netif_receive_skb_internal(struct sk_buff *skb)
3671 * netif_receive_skb - process receive buffer from network
3672 * @skb: buffer to process
3673 *
3674 * netif_receive_skb() is the main receive data processing function.
3675 * It always succeeds. The buffer may be dropped during processing
3676 * for congestion control or by the protocol layers.
3677 *
3678 * This function may only be called from softirq context and interrupts
3679 * should be enabled.
3680 *
3681 * Return values (usually ignored):
3682 * NET_RX_SUCCESS: no congestion
3683 * NET_RX_DROP: packet was dropped
3684 */
3685int netif_receive_skb(struct sk_buff *skb)
3686{ 3683{
3687 net_timestamp_check(netdev_tstamp_prequeue, skb); 3684 net_timestamp_check(netdev_tstamp_prequeue, skb);
3688 3685
@@ -3708,6 +3705,28 @@ int netif_receive_skb(struct sk_buff *skb)
3708#endif 3705#endif
3709 return __netif_receive_skb(skb); 3706 return __netif_receive_skb(skb);
3710} 3707}
3708
3709/**
3710 * netif_receive_skb - process receive buffer from network
3711 * @skb: buffer to process
3712 *
3713 * netif_receive_skb() is the main receive data processing function.
3714 * It always succeeds. The buffer may be dropped during processing
3715 * for congestion control or by the protocol layers.
3716 *
3717 * This function may only be called from softirq context and interrupts
3718 * should be enabled.
3719 *
3720 * Return values (usually ignored):
3721 * NET_RX_SUCCESS: no congestion
3722 * NET_RX_DROP: packet was dropped
3723 */
3724int netif_receive_skb(struct sk_buff *skb)
3725{
3726 trace_netif_receive_skb_entry(skb);
3727
3728 return netif_receive_skb_internal(skb);
3729}
3711EXPORT_SYMBOL(netif_receive_skb); 3730EXPORT_SYMBOL(netif_receive_skb);
3712 3731
3713/* Network device is going away, flush any packets still pending 3732/* Network device is going away, flush any packets still pending
@@ -3757,7 +3776,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3757 if (ptype->type != type || !ptype->callbacks.gro_complete) 3776 if (ptype->type != type || !ptype->callbacks.gro_complete)
3758 continue; 3777 continue;
3759 3778
3760 err = ptype->callbacks.gro_complete(skb); 3779 err = ptype->callbacks.gro_complete(skb, 0);
3761 break; 3780 break;
3762 } 3781 }
3763 rcu_read_unlock(); 3782 rcu_read_unlock();
@@ -3769,7 +3788,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3769 } 3788 }
3770 3789
3771out: 3790out:
3772 return netif_receive_skb(skb); 3791 return netif_receive_skb_internal(skb);
3773} 3792}
3774 3793
3775/* napi->gro_list contains packets ordered by age. 3794/* napi->gro_list contains packets ordered by age.
@@ -3805,10 +3824,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3805{ 3824{
3806 struct sk_buff *p; 3825 struct sk_buff *p;
3807 unsigned int maclen = skb->dev->hard_header_len; 3826 unsigned int maclen = skb->dev->hard_header_len;
3827 u32 hash = skb_get_hash_raw(skb);
3808 3828
3809 for (p = napi->gro_list; p; p = p->next) { 3829 for (p = napi->gro_list; p; p = p->next) {
3810 unsigned long diffs; 3830 unsigned long diffs;
3811 3831
3832 NAPI_GRO_CB(p)->flush = 0;
3833
3834 if (hash != skb_get_hash_raw(p)) {
3835 NAPI_GRO_CB(p)->same_flow = 0;
3836 continue;
3837 }
3838
3812 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3839 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3813 diffs |= p->vlan_tci ^ skb->vlan_tci; 3840 diffs |= p->vlan_tci ^ skb->vlan_tci;
3814 if (maclen == ETH_HLEN) 3841 if (maclen == ETH_HLEN)
@@ -3819,7 +3846,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3819 skb_gro_mac_header(skb), 3846 skb_gro_mac_header(skb),
3820 maclen); 3847 maclen);
3821 NAPI_GRO_CB(p)->same_flow = !diffs; 3848 NAPI_GRO_CB(p)->same_flow = !diffs;
3822 NAPI_GRO_CB(p)->flush = 0; 3849 }
3850}
3851
3852static void skb_gro_reset_offset(struct sk_buff *skb)
3853{
3854 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3855 const skb_frag_t *frag0 = &pinfo->frags[0];
3856
3857 NAPI_GRO_CB(skb)->data_offset = 0;
3858 NAPI_GRO_CB(skb)->frag0 = NULL;
3859 NAPI_GRO_CB(skb)->frag0_len = 0;
3860
3861 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3862 pinfo->nr_frags &&
3863 !PageHighMem(skb_frag_page(frag0))) {
3864 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3865 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3823 } 3866 }
3824} 3867}
3825 3868
@@ -3838,7 +3881,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3838 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3881 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3839 goto normal; 3882 goto normal;
3840 3883
3884 skb_gro_reset_offset(skb);
3841 gro_list_prepare(napi, skb); 3885 gro_list_prepare(napi, skb);
3886 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3842 3887
3843 rcu_read_lock(); 3888 rcu_read_lock();
3844 list_for_each_entry_rcu(ptype, head, list) { 3889 list_for_each_entry_rcu(ptype, head, list) {
@@ -3850,6 +3895,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3850 NAPI_GRO_CB(skb)->same_flow = 0; 3895 NAPI_GRO_CB(skb)->same_flow = 0;
3851 NAPI_GRO_CB(skb)->flush = 0; 3896 NAPI_GRO_CB(skb)->flush = 0;
3852 NAPI_GRO_CB(skb)->free = 0; 3897 NAPI_GRO_CB(skb)->free = 0;
3898 NAPI_GRO_CB(skb)->udp_mark = 0;
3853 3899
3854 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3900 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3855 break; 3901 break;
@@ -3874,10 +3920,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3874 if (same_flow) 3920 if (same_flow)
3875 goto ok; 3921 goto ok;
3876 3922
3877 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3923 if (NAPI_GRO_CB(skb)->flush)
3878 goto normal; 3924 goto normal;
3879 3925
3880 napi->gro_count++; 3926 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
3927 struct sk_buff *nskb = napi->gro_list;
3928
3929 /* locate the end of the list to select the 'oldest' flow */
3930 while (nskb->next) {
3931 pp = &nskb->next;
3932 nskb = *pp;
3933 }
3934 *pp = NULL;
3935 nskb->next = NULL;
3936 napi_gro_complete(nskb);
3937 } else {
3938 napi->gro_count++;
3939 }
3881 NAPI_GRO_CB(skb)->count = 1; 3940 NAPI_GRO_CB(skb)->count = 1;
3882 NAPI_GRO_CB(skb)->age = jiffies; 3941 NAPI_GRO_CB(skb)->age = jiffies;
3883 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3942 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
@@ -3915,12 +3974,39 @@ normal:
3915 goto pull; 3974 goto pull;
3916} 3975}
3917 3976
3977struct packet_offload *gro_find_receive_by_type(__be16 type)
3978{
3979 struct list_head *offload_head = &offload_base;
3980 struct packet_offload *ptype;
3981
3982 list_for_each_entry_rcu(ptype, offload_head, list) {
3983 if (ptype->type != type || !ptype->callbacks.gro_receive)
3984 continue;
3985 return ptype;
3986 }
3987 return NULL;
3988}
3989EXPORT_SYMBOL(gro_find_receive_by_type);
3990
3991struct packet_offload *gro_find_complete_by_type(__be16 type)
3992{
3993 struct list_head *offload_head = &offload_base;
3994 struct packet_offload *ptype;
3995
3996 list_for_each_entry_rcu(ptype, offload_head, list) {
3997 if (ptype->type != type || !ptype->callbacks.gro_complete)
3998 continue;
3999 return ptype;
4000 }
4001 return NULL;
4002}
4003EXPORT_SYMBOL(gro_find_complete_by_type);
3918 4004
3919static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 4005static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3920{ 4006{
3921 switch (ret) { 4007 switch (ret) {
3922 case GRO_NORMAL: 4008 case GRO_NORMAL:
3923 if (netif_receive_skb(skb)) 4009 if (netif_receive_skb_internal(skb))
3924 ret = GRO_DROP; 4010 ret = GRO_DROP;
3925 break; 4011 break;
3926 4012
@@ -3943,26 +4029,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3943 return ret; 4029 return ret;
3944} 4030}
3945 4031
3946static void skb_gro_reset_offset(struct sk_buff *skb)
3947{
3948 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3949 const skb_frag_t *frag0 = &pinfo->frags[0];
3950
3951 NAPI_GRO_CB(skb)->data_offset = 0;
3952 NAPI_GRO_CB(skb)->frag0 = NULL;
3953 NAPI_GRO_CB(skb)->frag0_len = 0;
3954
3955 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3956 pinfo->nr_frags &&
3957 !PageHighMem(skb_frag_page(frag0))) {
3958 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3959 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3960 }
3961}
3962
3963gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 4032gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3964{ 4033{
3965 skb_gro_reset_offset(skb); 4034 trace_napi_gro_receive_entry(skb);
3966 4035
3967 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4036 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3968} 4037}
@@ -3986,8 +4055,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3986 4055
3987 if (!skb) { 4056 if (!skb) {
3988 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4057 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3989 if (skb) 4058 napi->skb = skb;
3990 napi->skb = skb;
3991 } 4059 }
3992 return skb; 4060 return skb;
3993} 4061}
@@ -3998,12 +4066,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
3998{ 4066{
3999 switch (ret) { 4067 switch (ret) {
4000 case GRO_NORMAL: 4068 case GRO_NORMAL:
4001 case GRO_HELD: 4069 if (netif_receive_skb_internal(skb))
4002 skb->protocol = eth_type_trans(skb, skb->dev);
4003
4004 if (ret == GRO_HELD)
4005 skb_gro_pull(skb, -ETH_HLEN);
4006 else if (netif_receive_skb(skb))
4007 ret = GRO_DROP; 4070 ret = GRO_DROP;
4008 break; 4071 break;
4009 4072
@@ -4012,6 +4075,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4012 napi_reuse_skb(napi, skb); 4075 napi_reuse_skb(napi, skb);
4013 break; 4076 break;
4014 4077
4078 case GRO_HELD:
4015 case GRO_MERGED: 4079 case GRO_MERGED:
4016 break; 4080 break;
4017 } 4081 }
@@ -4022,36 +4086,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4022static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4086static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4023{ 4087{
4024 struct sk_buff *skb = napi->skb; 4088 struct sk_buff *skb = napi->skb;
4025 struct ethhdr *eth;
4026 unsigned int hlen;
4027 unsigned int off;
4028 4089
4029 napi->skb = NULL; 4090 napi->skb = NULL;
4030 4091
4031 skb_reset_mac_header(skb); 4092 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
4032 skb_gro_reset_offset(skb); 4093 napi_reuse_skb(napi, skb);
4033 4094 return NULL;
4034 off = skb_gro_offset(skb);
4035 hlen = off + sizeof(*eth);
4036 eth = skb_gro_header_fast(skb, off);
4037 if (skb_gro_header_hard(skb, hlen)) {
4038 eth = skb_gro_header_slow(skb, hlen, off);
4039 if (unlikely(!eth)) {
4040 napi_reuse_skb(napi, skb);
4041 skb = NULL;
4042 goto out;
4043 }
4044 } 4095 }
4096 skb->protocol = eth_type_trans(skb, skb->dev);
4045 4097
4046 skb_gro_pull(skb, sizeof(*eth));
4047
4048 /*
4049 * This works because the only protocols we care about don't require
4050 * special handling. We'll fix it up properly at the end.
4051 */
4052 skb->protocol = eth->h_proto;
4053
4054out:
4055 return skb; 4098 return skb;
4056} 4099}
4057 4100
@@ -4062,12 +4105,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
4062 if (!skb) 4105 if (!skb)
4063 return GRO_DROP; 4106 return GRO_DROP;
4064 4107
4108 trace_napi_gro_frags_entry(skb);
4109
4065 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4110 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4066} 4111}
4067EXPORT_SYMBOL(napi_gro_frags); 4112EXPORT_SYMBOL(napi_gro_frags);
4068 4113
4069/* 4114/*
4070 * net_rps_action sends any pending IPI's for rps. 4115 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
4071 * Note: called with local irq disabled, but exits with local irq enabled. 4116 * Note: called with local irq disabled, but exits with local irq enabled.
4072 */ 4117 */
4073static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4118static void net_rps_action_and_irq_enable(struct softnet_data *sd)
@@ -4272,17 +4317,10 @@ EXPORT_SYMBOL(netif_napi_add);
4272 4317
4273void netif_napi_del(struct napi_struct *napi) 4318void netif_napi_del(struct napi_struct *napi)
4274{ 4319{
4275 struct sk_buff *skb, *next;
4276
4277 list_del_init(&napi->dev_list); 4320 list_del_init(&napi->dev_list);
4278 napi_free_frags(napi); 4321 napi_free_frags(napi);
4279 4322
4280 for (skb = napi->gro_list; skb; skb = next) { 4323 kfree_skb_list(napi->gro_list);
4281 next = skb->next;
4282 skb->next = NULL;
4283 kfree_skb(skb);
4284 }
4285
4286 napi->gro_list = NULL; 4324 napi->gro_list = NULL;
4287 napi->gro_count = 0; 4325 napi->gro_count = 0;
4288} 4326}
@@ -4399,19 +4437,6 @@ struct netdev_adjacent {
4399 struct rcu_head rcu; 4437 struct rcu_head rcu;
4400}; 4438};
4401 4439
4402static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4403 struct net_device *adj_dev,
4404 struct list_head *adj_list)
4405{
4406 struct netdev_adjacent *adj;
4407
4408 list_for_each_entry_rcu(adj, adj_list, list) {
4409 if (adj->dev == adj_dev)
4410 return adj;
4411 }
4412 return NULL;
4413}
4414
4415static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4440static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4416 struct net_device *adj_dev, 4441 struct net_device *adj_dev,
4417 struct list_head *adj_list) 4442 struct list_head *adj_list)
@@ -4450,13 +4475,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
4450 * Find out if a device is linked to an upper device and return true in case 4475 * Find out if a device is linked to an upper device and return true in case
4451 * it is. The caller must hold the RTNL lock. 4476 * it is. The caller must hold the RTNL lock.
4452 */ 4477 */
4453bool netdev_has_any_upper_dev(struct net_device *dev) 4478static bool netdev_has_any_upper_dev(struct net_device *dev)
4454{ 4479{
4455 ASSERT_RTNL(); 4480 ASSERT_RTNL();
4456 4481
4457 return !list_empty(&dev->all_adj_list.upper); 4482 return !list_empty(&dev->all_adj_list.upper);
4458} 4483}
4459EXPORT_SYMBOL(netdev_has_any_upper_dev);
4460 4484
4461/** 4485/**
4462 * netdev_master_upper_dev_get - Get master upper device 4486 * netdev_master_upper_dev_get - Get master upper device
@@ -4576,6 +4600,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4576EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); 4600EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4577 4601
4578/** 4602/**
4603 * netdev_lower_get_first_private_rcu - Get the first ->private from the
4604 * lower neighbour list, RCU
4605 * variant
4606 * @dev: device
4607 *
4608 * Gets the first netdev_adjacent->private from the dev's lower neighbour
4609 * list. The caller must hold RCU read lock.
4610 */
4611void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4612{
4613 struct netdev_adjacent *lower;
4614
4615 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4616 struct netdev_adjacent, list);
4617 if (lower)
4618 return lower->private;
4619 return NULL;
4620}
4621EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4622
4623/**
4579 * netdev_master_upper_dev_get_rcu - Get master upper device 4624 * netdev_master_upper_dev_get_rcu - Get master upper device
4580 * @dev: device 4625 * @dev: device
4581 * 4626 *
@@ -4594,13 +4639,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4594} 4639}
4595EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4640EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4596 4641
4642static int netdev_adjacent_sysfs_add(struct net_device *dev,
4643 struct net_device *adj_dev,
4644 struct list_head *dev_list)
4645{
4646 char linkname[IFNAMSIZ+7];
4647 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4648 "upper_%s" : "lower_%s", adj_dev->name);
4649 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4650 linkname);
4651}
4652static void netdev_adjacent_sysfs_del(struct net_device *dev,
4653 char *name,
4654 struct list_head *dev_list)
4655{
4656 char linkname[IFNAMSIZ+7];
4657 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4658 "upper_%s" : "lower_%s", name);
4659 sysfs_remove_link(&(dev->dev.kobj), linkname);
4660}
4661
4662#define netdev_adjacent_is_neigh_list(dev, dev_list) \
4663 (dev_list == &dev->adj_list.upper || \
4664 dev_list == &dev->adj_list.lower)
4665
4597static int __netdev_adjacent_dev_insert(struct net_device *dev, 4666static int __netdev_adjacent_dev_insert(struct net_device *dev,
4598 struct net_device *adj_dev, 4667 struct net_device *adj_dev,
4599 struct list_head *dev_list, 4668 struct list_head *dev_list,
4600 void *private, bool master) 4669 void *private, bool master)
4601{ 4670{
4602 struct netdev_adjacent *adj; 4671 struct netdev_adjacent *adj;
4603 char linkname[IFNAMSIZ+7];
4604 int ret; 4672 int ret;
4605 4673
4606 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4674 adj = __netdev_find_adj(dev, adj_dev, dev_list);
@@ -4623,16 +4691,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4623 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 4691 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4624 adj_dev->name, dev->name, adj_dev->name); 4692 adj_dev->name, dev->name, adj_dev->name);
4625 4693
4626 if (dev_list == &dev->adj_list.lower) { 4694 if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
4627 sprintf(linkname, "lower_%s", adj_dev->name); 4695 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
4628 ret = sysfs_create_link(&(dev->dev.kobj),
4629 &(adj_dev->dev.kobj), linkname);
4630 if (ret)
4631 goto free_adj;
4632 } else if (dev_list == &dev->adj_list.upper) {
4633 sprintf(linkname, "upper_%s", adj_dev->name);
4634 ret = sysfs_create_link(&(dev->dev.kobj),
4635 &(adj_dev->dev.kobj), linkname);
4636 if (ret) 4696 if (ret)
4637 goto free_adj; 4697 goto free_adj;
4638 } 4698 }
@@ -4652,14 +4712,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4652 return 0; 4712 return 0;
4653 4713
4654remove_symlinks: 4714remove_symlinks:
4655 if (dev_list == &dev->adj_list.lower) { 4715 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4656 sprintf(linkname, "lower_%s", adj_dev->name); 4716 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658 } else if (dev_list == &dev->adj_list.upper) {
4659 sprintf(linkname, "upper_%s", adj_dev->name);
4660 sysfs_remove_link(&(dev->dev.kobj), linkname);
4661 }
4662
4663free_adj: 4717free_adj:
4664 kfree(adj); 4718 kfree(adj);
4665 dev_put(adj_dev); 4719 dev_put(adj_dev);
@@ -4667,12 +4721,11 @@ free_adj:
4667 return ret; 4721 return ret;
4668} 4722}
4669 4723
4670void __netdev_adjacent_dev_remove(struct net_device *dev, 4724static void __netdev_adjacent_dev_remove(struct net_device *dev,
4671 struct net_device *adj_dev, 4725 struct net_device *adj_dev,
4672 struct list_head *dev_list) 4726 struct list_head *dev_list)
4673{ 4727{
4674 struct netdev_adjacent *adj; 4728 struct netdev_adjacent *adj;
4675 char linkname[IFNAMSIZ+7];
4676 4729
4677 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4730 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4678 4731
@@ -4692,13 +4745,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4692 if (adj->master) 4745 if (adj->master)
4693 sysfs_remove_link(&(dev->dev.kobj), "master"); 4746 sysfs_remove_link(&(dev->dev.kobj), "master");
4694 4747
4695 if (dev_list == &dev->adj_list.lower) { 4748 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4696 sprintf(linkname, "lower_%s", adj_dev->name); 4749 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4697 sysfs_remove_link(&(dev->dev.kobj), linkname);
4698 } else if (dev_list == &dev->adj_list.upper) {
4699 sprintf(linkname, "upper_%s", adj_dev->name);
4700 sysfs_remove_link(&(dev->dev.kobj), linkname);
4701 }
4702 4750
4703 list_del_rcu(&adj->list); 4751 list_del_rcu(&adj->list);
4704 pr_debug("dev_put for %s, because link removed from %s to %s\n", 4752 pr_debug("dev_put for %s, because link removed from %s to %s\n",
@@ -4707,11 +4755,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4707 kfree_rcu(adj, rcu); 4755 kfree_rcu(adj, rcu);
4708} 4756}
4709 4757
4710int __netdev_adjacent_dev_link_lists(struct net_device *dev, 4758static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4711 struct net_device *upper_dev, 4759 struct net_device *upper_dev,
4712 struct list_head *up_list, 4760 struct list_head *up_list,
4713 struct list_head *down_list, 4761 struct list_head *down_list,
4714 void *private, bool master) 4762 void *private, bool master)
4715{ 4763{
4716 int ret; 4764 int ret;
4717 4765
@@ -4730,8 +4778,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4730 return 0; 4778 return 0;
4731} 4779}
4732 4780
4733int __netdev_adjacent_dev_link(struct net_device *dev, 4781static int __netdev_adjacent_dev_link(struct net_device *dev,
4734 struct net_device *upper_dev) 4782 struct net_device *upper_dev)
4735{ 4783{
4736 return __netdev_adjacent_dev_link_lists(dev, upper_dev, 4784 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4737 &dev->all_adj_list.upper, 4785 &dev->all_adj_list.upper,
@@ -4739,26 +4787,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
4739 NULL, false); 4787 NULL, false);
4740} 4788}
4741 4789
4742void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 4790static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4743 struct net_device *upper_dev, 4791 struct net_device *upper_dev,
4744 struct list_head *up_list, 4792 struct list_head *up_list,
4745 struct list_head *down_list) 4793 struct list_head *down_list)
4746{ 4794{
4747 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4795 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4748 __netdev_adjacent_dev_remove(upper_dev, dev, down_list); 4796 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4749} 4797}
4750 4798
4751void __netdev_adjacent_dev_unlink(struct net_device *dev, 4799static void __netdev_adjacent_dev_unlink(struct net_device *dev,
4752 struct net_device *upper_dev) 4800 struct net_device *upper_dev)
4753{ 4801{
4754 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4802 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4755 &dev->all_adj_list.upper, 4803 &dev->all_adj_list.upper,
4756 &upper_dev->all_adj_list.lower); 4804 &upper_dev->all_adj_list.lower);
4757} 4805}
4758 4806
4759int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4807static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4760 struct net_device *upper_dev, 4808 struct net_device *upper_dev,
4761 void *private, bool master) 4809 void *private, bool master)
4762{ 4810{
4763 int ret = __netdev_adjacent_dev_link(dev, upper_dev); 4811 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4764 4812
@@ -4777,8 +4825,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4777 return 0; 4825 return 0;
4778} 4826}
4779 4827
4780void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 4828static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4781 struct net_device *upper_dev) 4829 struct net_device *upper_dev)
4782{ 4830{
4783 __netdev_adjacent_dev_unlink(dev, upper_dev); 4831 __netdev_adjacent_dev_unlink(dev, upper_dev);
4784 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4832 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
@@ -4967,20 +5015,24 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4967} 5015}
4968EXPORT_SYMBOL(netdev_upper_dev_unlink); 5016EXPORT_SYMBOL(netdev_upper_dev_unlink);
4969 5017
4970void *netdev_lower_dev_get_private_rcu(struct net_device *dev, 5018void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
4971 struct net_device *lower_dev)
4972{ 5019{
4973 struct netdev_adjacent *lower; 5020 struct netdev_adjacent *iter;
4974 5021
4975 if (!lower_dev) 5022 list_for_each_entry(iter, &dev->adj_list.upper, list) {
4976 return NULL; 5023 netdev_adjacent_sysfs_del(iter->dev, oldname,
4977 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); 5024 &iter->dev->adj_list.lower);
4978 if (!lower) 5025 netdev_adjacent_sysfs_add(iter->dev, dev,
4979 return NULL; 5026 &iter->dev->adj_list.lower);
5027 }
4980 5028
4981 return lower->private; 5029 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5030 netdev_adjacent_sysfs_del(iter->dev, oldname,
5031 &iter->dev->adj_list.upper);
5032 netdev_adjacent_sysfs_add(iter->dev, dev,
5033 &iter->dev->adj_list.upper);
5034 }
4982} 5035}
4983EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4984 5036
4985void *netdev_lower_dev_get_private(struct net_device *dev, 5037void *netdev_lower_dev_get_private(struct net_device *dev,
4986 struct net_device *lower_dev) 5038 struct net_device *lower_dev)
@@ -5314,6 +5366,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
5314} 5366}
5315EXPORT_SYMBOL(dev_change_flags); 5367EXPORT_SYMBOL(dev_change_flags);
5316 5368
5369static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5370{
5371 const struct net_device_ops *ops = dev->netdev_ops;
5372
5373 if (ops->ndo_change_mtu)
5374 return ops->ndo_change_mtu(dev, new_mtu);
5375
5376 dev->mtu = new_mtu;
5377 return 0;
5378}
5379
5317/** 5380/**
5318 * dev_set_mtu - Change maximum transfer unit 5381 * dev_set_mtu - Change maximum transfer unit
5319 * @dev: device 5382 * @dev: device
@@ -5323,8 +5386,7 @@ EXPORT_SYMBOL(dev_change_flags);
5323 */ 5386 */
5324int dev_set_mtu(struct net_device *dev, int new_mtu) 5387int dev_set_mtu(struct net_device *dev, int new_mtu)
5325{ 5388{
5326 const struct net_device_ops *ops = dev->netdev_ops; 5389 int err, orig_mtu;
5327 int err;
5328 5390
5329 if (new_mtu == dev->mtu) 5391 if (new_mtu == dev->mtu)
5330 return 0; 5392 return 0;
@@ -5336,14 +5398,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
5336 if (!netif_device_present(dev)) 5398 if (!netif_device_present(dev))
5337 return -ENODEV; 5399 return -ENODEV;
5338 5400
5339 err = 0; 5401 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5340 if (ops->ndo_change_mtu) 5402 err = notifier_to_errno(err);
5341 err = ops->ndo_change_mtu(dev, new_mtu); 5403 if (err)
5342 else 5404 return err;
5343 dev->mtu = new_mtu;
5344 5405
5345 if (!err) 5406 orig_mtu = dev->mtu;
5346 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5407 err = __dev_set_mtu(dev, new_mtu);
5408
5409 if (!err) {
5410 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5411 err = notifier_to_errno(err);
5412 if (err) {
5413 /* setting mtu back and notifying everyone again,
5414 * so that they have a chance to revert changes.
5415 */
5416 __dev_set_mtu(dev, orig_mtu);
5417 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5418 }
5419 }
5347 return err; 5420 return err;
5348} 5421}
5349EXPORT_SYMBOL(dev_set_mtu); 5422EXPORT_SYMBOL(dev_set_mtu);
@@ -5697,7 +5770,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5697} 5770}
5698EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5771EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5699 5772
5700#ifdef CONFIG_RPS 5773#ifdef CONFIG_SYSFS
5701static int netif_alloc_rx_queues(struct net_device *dev) 5774static int netif_alloc_rx_queues(struct net_device *dev)
5702{ 5775{
5703 unsigned int i, count = dev->num_rx_queues; 5776 unsigned int i, count = dev->num_rx_queues;
@@ -5836,13 +5909,8 @@ int register_netdevice(struct net_device *dev)
5836 dev->features |= NETIF_F_SOFT_FEATURES; 5909 dev->features |= NETIF_F_SOFT_FEATURES;
5837 dev->wanted_features = dev->features & dev->hw_features; 5910 dev->wanted_features = dev->features & dev->hw_features;
5838 5911
5839 /* Turn on no cache copy if HW is doing checksum */
5840 if (!(dev->flags & IFF_LOOPBACK)) { 5912 if (!(dev->flags & IFF_LOOPBACK)) {
5841 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5913 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5842 if (dev->features & NETIF_F_ALL_CSUM) {
5843 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5844 dev->features |= NETIF_F_NOCACHE_COPY;
5845 }
5846 } 5914 }
5847 5915
5848 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5916 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6229,7 +6297,7 @@ void netdev_freemem(struct net_device *dev)
6229 * @rxqs: the number of RX subqueues to allocate 6297 * @rxqs: the number of RX subqueues to allocate
6230 * 6298 *
6231 * Allocates a struct net_device with private data area for driver use 6299 * Allocates a struct net_device with private data area for driver use
6232 * and performs basic initialization. Also allocates subquue structs 6300 * and performs basic initialization. Also allocates subqueue structs
6233 * for each queue on the device. 6301 * for each queue on the device.
6234 */ 6302 */
6235struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 6303struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
@@ -6247,7 +6315,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6247 return NULL; 6315 return NULL;
6248 } 6316 }
6249 6317
6250#ifdef CONFIG_RPS 6318#ifdef CONFIG_SYSFS
6251 if (rxqs < 1) { 6319 if (rxqs < 1) {
6252 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6320 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6253 return NULL; 6321 return NULL;
@@ -6303,7 +6371,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6303 if (netif_alloc_netdev_queues(dev)) 6371 if (netif_alloc_netdev_queues(dev))
6304 goto free_all; 6372 goto free_all;
6305 6373
6306#ifdef CONFIG_RPS 6374#ifdef CONFIG_SYSFS
6307 dev->num_rx_queues = rxqs; 6375 dev->num_rx_queues = rxqs;
6308 dev->real_num_rx_queues = rxqs; 6376 dev->real_num_rx_queues = rxqs;
6309 if (netif_alloc_rx_queues(dev)) 6377 if (netif_alloc_rx_queues(dev))
@@ -6323,7 +6391,7 @@ free_all:
6323free_pcpu: 6391free_pcpu:
6324 free_percpu(dev->pcpu_refcnt); 6392 free_percpu(dev->pcpu_refcnt);
6325 netif_free_tx_queues(dev); 6393 netif_free_tx_queues(dev);
6326#ifdef CONFIG_RPS 6394#ifdef CONFIG_SYSFS
6327 kfree(dev->_rx); 6395 kfree(dev->_rx);
6328#endif 6396#endif
6329 6397
@@ -6348,7 +6416,7 @@ void free_netdev(struct net_device *dev)
6348 release_net(dev_net(dev)); 6416 release_net(dev_net(dev));
6349 6417
6350 netif_free_tx_queues(dev); 6418 netif_free_tx_queues(dev);
6351#ifdef CONFIG_RPS 6419#ifdef CONFIG_SYSFS
6352 kfree(dev->_rx); 6420 kfree(dev->_rx);
6353#endif 6421#endif
6354 6422
@@ -6618,11 +6686,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6618 6686
6619 /* Process offline CPU's input_pkt_queue */ 6687 /* Process offline CPU's input_pkt_queue */
6620 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6688 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6621 netif_rx(skb); 6689 netif_rx_internal(skb);
6622 input_queue_head_incr(oldsd); 6690 input_queue_head_incr(oldsd);
6623 } 6691 }
6624 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6692 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6625 netif_rx(skb); 6693 netif_rx_internal(skb);
6626 input_queue_head_incr(oldsd); 6694 input_queue_head_incr(oldsd);
6627 } 6695 }
6628 6696
@@ -6935,28 +7003,18 @@ static int __init net_dev_init(void)
6935 for_each_possible_cpu(i) { 7003 for_each_possible_cpu(i) {
6936 struct softnet_data *sd = &per_cpu(softnet_data, i); 7004 struct softnet_data *sd = &per_cpu(softnet_data, i);
6937 7005
6938 memset(sd, 0, sizeof(*sd));
6939 skb_queue_head_init(&sd->input_pkt_queue); 7006 skb_queue_head_init(&sd->input_pkt_queue);
6940 skb_queue_head_init(&sd->process_queue); 7007 skb_queue_head_init(&sd->process_queue);
6941 sd->completion_queue = NULL;
6942 INIT_LIST_HEAD(&sd->poll_list); 7008 INIT_LIST_HEAD(&sd->poll_list);
6943 sd->output_queue = NULL;
6944 sd->output_queue_tailp = &sd->output_queue; 7009 sd->output_queue_tailp = &sd->output_queue;
6945#ifdef CONFIG_RPS 7010#ifdef CONFIG_RPS
6946 sd->csd.func = rps_trigger_softirq; 7011 sd->csd.func = rps_trigger_softirq;
6947 sd->csd.info = sd; 7012 sd->csd.info = sd;
6948 sd->csd.flags = 0;
6949 sd->cpu = i; 7013 sd->cpu = i;
6950#endif 7014#endif
6951 7015
6952 sd->backlog.poll = process_backlog; 7016 sd->backlog.poll = process_backlog;
6953 sd->backlog.weight = weight_p; 7017 sd->backlog.weight = weight_p;
6954 sd->backlog.gro_list = NULL;
6955 sd->backlog.gro_count = 0;
6956
6957#ifdef CONFIG_NET_FLOW_LIMIT
6958 sd->flow_limit = NULL;
6959#endif
6960 } 7018 }
6961 7019
6962 dev_boot_phase = 0; 7020 dev_boot_phase = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index ec40a849fc42..329d5794e7dc 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -38,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
38 ha->type = addr_type; 38 ha->type = addr_type;
39 ha->refcount = 1; 39 ha->refcount = 1;
40 ha->global_use = global; 40 ha->global_use = global;
41 ha->synced = sync; 41 ha->synced = sync ? 1 : 0;
42 ha->sync_cnt = 0; 42 ha->sync_cnt = 0;
43 list_add_tail_rcu(&ha->list, &list->list); 43 list_add_tail_rcu(&ha->list, &list->list);
44 list->count++; 44 list->count++;
@@ -48,7 +48,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
48 48
49static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, 49static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
50 const unsigned char *addr, int addr_len, 50 const unsigned char *addr, int addr_len,
51 unsigned char addr_type, bool global, bool sync) 51 unsigned char addr_type, bool global, bool sync,
52 int sync_count)
52{ 53{
53 struct netdev_hw_addr *ha; 54 struct netdev_hw_addr *ha;
54 55
@@ -66,10 +67,10 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
66 ha->global_use = true; 67 ha->global_use = true;
67 } 68 }
68 if (sync) { 69 if (sync) {
69 if (ha->synced) 70 if (ha->synced && sync_count)
70 return -EEXIST; 71 return -EEXIST;
71 else 72 else
72 ha->synced = true; 73 ha->synced++;
73 } 74 }
74 ha->refcount++; 75 ha->refcount++;
75 return 0; 76 return 0;
@@ -84,7 +85,8 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list,
84 const unsigned char *addr, int addr_len, 85 const unsigned char *addr, int addr_len,
85 unsigned char addr_type) 86 unsigned char addr_type)
86{ 87{
87 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); 88 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
89 0);
88} 90}
89 91
90static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, 92static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
@@ -101,7 +103,7 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
101 ha->global_use = false; 103 ha->global_use = false;
102 104
103 if (sync) 105 if (sync)
104 ha->synced = false; 106 ha->synced--;
105 107
106 if (--ha->refcount) 108 if (--ha->refcount)
107 return 0; 109 return 0;
@@ -139,7 +141,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
139 int err; 141 int err;
140 142
141 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, 143 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
142 false, true); 144 false, true, ha->sync_cnt);
143 if (err && err != -EEXIST) 145 if (err && err != -EEXIST)
144 return err; 146 return err;
145 147
@@ -186,47 +188,6 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
186 return err; 188 return err;
187} 189}
188 190
189int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
190 struct netdev_hw_addr_list *from_list,
191 int addr_len, unsigned char addr_type)
192{
193 int err;
194 struct netdev_hw_addr *ha, *ha2;
195 unsigned char type;
196
197 list_for_each_entry(ha, &from_list->list, list) {
198 type = addr_type ? addr_type : ha->type;
199 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
200 if (err)
201 goto unroll;
202 }
203 return 0;
204
205unroll:
206 list_for_each_entry(ha2, &from_list->list, list) {
207 if (ha2 == ha)
208 break;
209 type = addr_type ? addr_type : ha2->type;
210 __hw_addr_del(to_list, ha2->addr, addr_len, type);
211 }
212 return err;
213}
214EXPORT_SYMBOL(__hw_addr_add_multiple);
215
216void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
217 struct netdev_hw_addr_list *from_list,
218 int addr_len, unsigned char addr_type)
219{
220 struct netdev_hw_addr *ha;
221 unsigned char type;
222
223 list_for_each_entry(ha, &from_list->list, list) {
224 type = addr_type ? addr_type : ha->type;
225 __hw_addr_del(to_list, ha->addr, addr_len, type);
226 }
227}
228EXPORT_SYMBOL(__hw_addr_del_multiple);
229
230/* This function only works where there is a strict 1-1 relationship 191/* This function only works where there is a strict 1-1 relationship
231 * between source and destionation of they synch. If you ever need to 192 * between source and destionation of they synch. If you ever need to
232 * sync addresses to more then 1 destination, you need to use 193 * sync addresses to more then 1 destination, you need to use
@@ -264,7 +225,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
264} 225}
265EXPORT_SYMBOL(__hw_addr_unsync); 226EXPORT_SYMBOL(__hw_addr_unsync);
266 227
267void __hw_addr_flush(struct netdev_hw_addr_list *list) 228static void __hw_addr_flush(struct netdev_hw_addr_list *list)
268{ 229{
269 struct netdev_hw_addr *ha, *tmp; 230 struct netdev_hw_addr *ha, *tmp;
270 231
@@ -274,7 +235,6 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
274 } 235 }
275 list->count = 0; 236 list->count = 0;
276} 237}
277EXPORT_SYMBOL(__hw_addr_flush);
278 238
279void __hw_addr_init(struct netdev_hw_addr_list *list) 239void __hw_addr_init(struct netdev_hw_addr_list *list)
280{ 240{
@@ -400,59 +360,6 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr,
400} 360}
401EXPORT_SYMBOL(dev_addr_del); 361EXPORT_SYMBOL(dev_addr_del);
402 362
403/**
404 * dev_addr_add_multiple - Add device addresses from another device
405 * @to_dev: device to which addresses will be added
406 * @from_dev: device from which addresses will be added
407 * @addr_type: address type - 0 means type will be used from from_dev
408 *
409 * Add device addresses of the one device to another.
410 **
411 * The caller must hold the rtnl_mutex.
412 */
413int dev_addr_add_multiple(struct net_device *to_dev,
414 struct net_device *from_dev,
415 unsigned char addr_type)
416{
417 int err;
418
419 ASSERT_RTNL();
420
421 if (from_dev->addr_len != to_dev->addr_len)
422 return -EINVAL;
423 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
424 to_dev->addr_len, addr_type);
425 if (!err)
426 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
427 return err;
428}
429EXPORT_SYMBOL(dev_addr_add_multiple);
430
431/**
432 * dev_addr_del_multiple - Delete device addresses by another device
433 * @to_dev: device where the addresses will be deleted
434 * @from_dev: device supplying the addresses to be deleted
435 * @addr_type: address type - 0 means type will be used from from_dev
436 *
437 * Deletes addresses in to device by the list of addresses in from device.
438 *
439 * The caller must hold the rtnl_mutex.
440 */
441int dev_addr_del_multiple(struct net_device *to_dev,
442 struct net_device *from_dev,
443 unsigned char addr_type)
444{
445 ASSERT_RTNL();
446
447 if (from_dev->addr_len != to_dev->addr_len)
448 return -EINVAL;
449 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
450 to_dev->addr_len, addr_type);
451 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
452 return 0;
453}
454EXPORT_SYMBOL(dev_addr_del_multiple);
455
456/* 363/*
457 * Unicast list handling functions 364 * Unicast list handling functions
458 */ 365 */
@@ -676,7 +583,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
676 583
677 netif_addr_lock_bh(dev); 584 netif_addr_lock_bh(dev);
678 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, 585 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
679 NETDEV_HW_ADDR_T_MULTICAST, global, false); 586 NETDEV_HW_ADDR_T_MULTICAST, global, false, 0);
680 if (!err) 587 if (!err)
681 __dev_set_rx_mode(dev); 588 __dev_set_rx_mode(dev);
682 netif_addr_unlock_bh(dev); 589 netif_addr_unlock_bh(dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5b7d0e1d0664..cf999e09bcd2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -327,6 +327,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
327 cmd == SIOCBRADDIF || 327 cmd == SIOCBRADDIF ||
328 cmd == SIOCBRDELIF || 328 cmd == SIOCBRDELIF ||
329 cmd == SIOCSHWTSTAMP || 329 cmd == SIOCSHWTSTAMP ||
330 cmd == SIOCGHWTSTAMP ||
330 cmd == SIOCWANDEV) { 331 cmd == SIOCWANDEV) {
331 err = -EOPNOTSUPP; 332 err = -EOPNOTSUPP;
332 if (ops->ndo_do_ioctl) { 333 if (ops->ndo_do_ioctl) {
@@ -546,6 +547,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
546 */ 547 */
547 default: 548 default:
548 if (cmd == SIOCWANDEV || 549 if (cmd == SIOCWANDEV ||
550 cmd == SIOCGHWTSTAMP ||
549 (cmd >= SIOCDEVPRIVATE && 551 (cmd >= SIOCDEVPRIVATE &&
550 cmd <= SIOCDEVPRIVATE + 15)) { 552 cmd <= SIOCDEVPRIVATE + 15)) {
551 dev_load(net, ifr.ifr_name); 553 dev_load(net, ifr.ifr_name);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f409e0bd35c0..185c341fafbd 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
745 attach_rules(&ops->rules_list, dev); 745 attach_rules(&ops->rules_list, dev);
746 break; 746 break;
747 747
748 case NETDEV_CHANGENAME:
749 list_for_each_entry(ops, &net->rules_ops, list) {
750 detach_rules(&ops->rules_list, dev);
751 attach_rules(&ops->rules_list, dev);
752 }
753 break;
754
748 case NETDEV_UNREGISTER: 755 case NETDEV_UNREGISTER:
749 list_for_each_entry(ops, &net->rules_ops, list) 756 list_for_each_entry(ops, &net->rules_ops, list)
750 detach_rules(&ops->rules_list, dev); 757 detach_rules(&ops->rules_list, dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index 01b780856db2..ad30d626a5bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,6 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/unaligned.h> 37#include <asm/unaligned.h>
38#include <linux/filter.h> 38#include <linux/filter.h>
39#include <linux/reciprocal_div.h>
40#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
41#include <linux/seccomp.h> 40#include <linux/seccomp.h>
42#include <linux/if_vlan.h> 41#include <linux/if_vlan.h>
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
166 A /= X; 165 A /= X;
167 continue; 166 continue;
168 case BPF_S_ALU_DIV_K: 167 case BPF_S_ALU_DIV_K:
169 A = reciprocal_divide(A, K); 168 A /= K;
170 continue; 169 continue;
171 case BPF_S_ALU_MOD_X: 170 case BPF_S_ALU_MOD_X:
172 if (X == 0) 171 if (X == 0)
@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
553 /* Some instructions need special checks */ 552 /* Some instructions need special checks */
554 switch (code) { 553 switch (code) {
555 case BPF_S_ALU_DIV_K: 554 case BPF_S_ALU_DIV_K:
556 /* check for division by zero */
557 if (ftest->k == 0)
558 return -EINVAL;
559 ftest->k = reciprocal_value(ftest->k);
560 break;
561 case BPF_S_ALU_MOD_K: 555 case BPF_S_ALU_MOD_K:
562 /* check for division by zero */ 556 /* check for division by zero */
563 if (ftest->k == 0) 557 if (ftest->k == 0)
@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
853 to->code = decodes[code]; 847 to->code = decodes[code];
854 to->jt = filt->jt; 848 to->jt = filt->jt;
855 to->jf = filt->jf; 849 to->jf = filt->jf;
856 850 to->k = filt->k;
857 if (code == BPF_S_ALU_DIV_K) {
858 /*
859 * When loaded this rule user gave us X, which was
860 * translated into R = r(X). Now we calculate the
861 * RR = r(R) and report it back. If next time this
862 * value is loaded and RRR = r(RR) is calculated
863 * then the R == RRR will be true.
864 *
865 * One exception. X == 1 translates into R == 0 and
866 * we can't calculate RR out of it with r().
867 */
868
869 if (filt->k == 0)
870 to->k = 1;
871 else
872 to->k = reciprocal_value(filt->k);
873
874 BUG_ON(reciprocal_value(to->k) != filt->k);
875 } else
876 to->k = filt->k;
877} 851}
878 852
879int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 853int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2fc5beaf5783..e29e810663d7 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a)
202} 202}
203 203
204/* 204/*
205 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 205 * __skb_get_hash: calculate a flow hash based on src/dst addresses
206 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value 206 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
207 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb 207 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
208 * if hash is a canonical 4-tuple hash over transport ports. 208 * if hash is a canonical 4-tuple hash over transport ports.
209 */ 209 */
210void __skb_get_rxhash(struct sk_buff *skb) 210void __skb_get_hash(struct sk_buff *skb)
211{ 211{
212 struct flow_keys keys; 212 struct flow_keys keys;
213 u32 hash; 213 u32 hash;
@@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb)
234 234
235 skb->rxhash = hash; 235 skb->rxhash = hash;
236} 236}
237EXPORT_SYMBOL(__skb_get_rxhash); 237EXPORT_SYMBOL(__skb_get_hash);
238 238
239/* 239/*
240 * Returns a Tx hash based on the given packet descriptor a Tx queues' number 240 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
@@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb)
323 return poff; 323 return poff;
324} 324}
325 325
326static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
327{
328 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
329 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
330 dev->name, queue_index,
331 dev->real_num_tx_queues);
332 return 0;
333 }
334 return queue_index;
335}
336
337static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 326static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
338{ 327{
339#ifdef CONFIG_XPS 328#ifdef CONFIG_XPS
@@ -372,7 +361,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
372#endif 361#endif
373} 362}
374 363
375u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) 364static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
376{ 365{
377 struct sock *sk = skb->sk; 366 struct sock *sk = skb->sk;
378 int queue_index = sk_tx_queue_get(sk); 367 int queue_index = sk_tx_queue_get(sk);
@@ -392,7 +381,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
392 381
393 return queue_index; 382 return queue_index;
394} 383}
395EXPORT_SYMBOL(__netdev_pick_tx);
396 384
397struct netdev_queue *netdev_pick_tx(struct net_device *dev, 385struct netdev_queue *netdev_pick_tx(struct net_device *dev,
398 struct sk_buff *skb, 386 struct sk_buff *skb,
@@ -403,13 +391,13 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
403 if (dev->real_num_tx_queues != 1) { 391 if (dev->real_num_tx_queues != 1) {
404 const struct net_device_ops *ops = dev->netdev_ops; 392 const struct net_device_ops *ops = dev->netdev_ops;
405 if (ops->ndo_select_queue) 393 if (ops->ndo_select_queue)
406 queue_index = ops->ndo_select_queue(dev, skb, 394 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
407 accel_priv); 395 __netdev_pick_tx);
408 else 396 else
409 queue_index = __netdev_pick_tx(dev, skb); 397 queue_index = __netdev_pick_tx(dev, skb);
410 398
411 if (!accel_priv) 399 if (!accel_priv)
412 queue_index = dev_cap_txqueue(dev, queue_index); 400 queue_index = netdev_cap_txqueue(dev, queue_index);
413 } 401 }
414 402
415 skb_set_queue_mapping(skb, queue_index); 403 skb_set_queue_mapping(skb, queue_index);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 932c6d7cf666..e16129019c66 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -38,6 +38,8 @@
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/log2.h> 40#include <linux/log2.h>
41#include <linux/inetdevice.h>
42#include <net/addrconf.h>
41 43
42#define DEBUG 44#define DEBUG
43#define NEIGH_DEBUG 1 45#define NEIGH_DEBUG 1
@@ -115,7 +117,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
115 117
116unsigned long neigh_rand_reach_time(unsigned long base) 118unsigned long neigh_rand_reach_time(unsigned long base)
117{ 119{
118 return base ? (net_random() % base) + (base >> 1) : 0; 120 return base ? (prandom_u32() % base) + (base >> 1) : 0;
119} 121}
120EXPORT_SYMBOL(neigh_rand_reach_time); 122EXPORT_SYMBOL(neigh_rand_reach_time);
121 123
@@ -497,7 +499,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
497 goto out_neigh_release; 499 goto out_neigh_release;
498 } 500 }
499 501
500 n->confirmed = jiffies - (n->parms->base_reachable_time << 1); 502 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
501 503
502 write_lock_bh(&tbl->lock); 504 write_lock_bh(&tbl->lock);
503 nht = rcu_dereference_protected(tbl->nht, 505 nht = rcu_dereference_protected(tbl->nht,
@@ -764,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work)
764 nht = rcu_dereference_protected(tbl->nht, 766 nht = rcu_dereference_protected(tbl->nht,
765 lockdep_is_held(&tbl->lock)); 767 lockdep_is_held(&tbl->lock));
766 768
767 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768 goto out;
769
770 /* 769 /*
771 * periodically recompute ReachableTime from random function 770 * periodically recompute ReachableTime from random function
772 */ 771 */
@@ -776,9 +775,12 @@ static void neigh_periodic_work(struct work_struct *work)
776 tbl->last_rand = jiffies; 775 tbl->last_rand = jiffies;
777 for (p = &tbl->parms; p; p = p->next) 776 for (p = &tbl->parms; p; p = p->next)
778 p->reachable_time = 777 p->reachable_time =
779 neigh_rand_reach_time(p->base_reachable_time); 778 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
780 } 779 }
781 780
781 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
782 goto out;
783
782 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 784 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783 np = &nht->hash_buckets[i]; 785 np = &nht->hash_buckets[i];
784 786
@@ -799,7 +801,7 @@ static void neigh_periodic_work(struct work_struct *work)
799 801
800 if (atomic_read(&n->refcnt) == 1 && 802 if (atomic_read(&n->refcnt) == 1 &&
801 (state == NUD_FAILED || 803 (state == NUD_FAILED ||
802 time_after(jiffies, n->used + n->parms->gc_staletime))) { 804 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
803 *np = n->next; 805 *np = n->next;
804 n->dead = 1; 806 n->dead = 1;
805 write_unlock(&n->lock); 807 write_unlock(&n->lock);
@@ -822,12 +824,12 @@ next_elt:
822 lockdep_is_held(&tbl->lock)); 824 lockdep_is_held(&tbl->lock));
823 } 825 }
824out: 826out:
825 /* Cycle through all hash buckets every base_reachable_time/2 ticks. 827 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
826 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 828 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
827 * base_reachable_time. 829 * BASE_REACHABLE_TIME.
828 */ 830 */
829 schedule_delayed_work(&tbl->gc_work, 831 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
830 tbl->parms.base_reachable_time >> 1); 832 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
831 write_unlock_bh(&tbl->lock); 833 write_unlock_bh(&tbl->lock);
832} 834}
833 835
@@ -835,8 +837,9 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
835{ 837{
836 struct neigh_parms *p = n->parms; 838 struct neigh_parms *p = n->parms;
837 return (n->nud_state & NUD_PROBE) ? 839 return (n->nud_state & NUD_PROBE) ?
838 p->ucast_probes : 840 NEIGH_VAR(p, UCAST_PROBES) :
839 p->ucast_probes + p->app_probes + p->mcast_probes; 841 NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
842 NEIGH_VAR(p, MCAST_PROBES);
840} 843}
841 844
842static void neigh_invalidate(struct neighbour *neigh) 845static void neigh_invalidate(struct neighbour *neigh)
@@ -901,12 +904,13 @@ static void neigh_timer_handler(unsigned long arg)
901 neigh_dbg(2, "neigh %p is still alive\n", neigh); 904 neigh_dbg(2, "neigh %p is still alive\n", neigh);
902 next = neigh->confirmed + neigh->parms->reachable_time; 905 next = neigh->confirmed + neigh->parms->reachable_time;
903 } else if (time_before_eq(now, 906 } else if (time_before_eq(now,
904 neigh->used + neigh->parms->delay_probe_time)) { 907 neigh->used +
908 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
905 neigh_dbg(2, "neigh %p is delayed\n", neigh); 909 neigh_dbg(2, "neigh %p is delayed\n", neigh);
906 neigh->nud_state = NUD_DELAY; 910 neigh->nud_state = NUD_DELAY;
907 neigh->updated = jiffies; 911 neigh->updated = jiffies;
908 neigh_suspect(neigh); 912 neigh_suspect(neigh);
909 next = now + neigh->parms->delay_probe_time; 913 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
910 } else { 914 } else {
911 neigh_dbg(2, "neigh %p is suspected\n", neigh); 915 neigh_dbg(2, "neigh %p is suspected\n", neigh);
912 neigh->nud_state = NUD_STALE; 916 neigh->nud_state = NUD_STALE;
@@ -916,7 +920,8 @@ static void neigh_timer_handler(unsigned long arg)
916 } 920 }
917 } else if (state & NUD_DELAY) { 921 } else if (state & NUD_DELAY) {
918 if (time_before_eq(now, 922 if (time_before_eq(now,
919 neigh->confirmed + neigh->parms->delay_probe_time)) { 923 neigh->confirmed +
924 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
920 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 925 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921 neigh->nud_state = NUD_REACHABLE; 926 neigh->nud_state = NUD_REACHABLE;
922 neigh->updated = jiffies; 927 neigh->updated = jiffies;
@@ -928,11 +933,11 @@ static void neigh_timer_handler(unsigned long arg)
928 neigh->nud_state = NUD_PROBE; 933 neigh->nud_state = NUD_PROBE;
929 neigh->updated = jiffies; 934 neigh->updated = jiffies;
930 atomic_set(&neigh->probes, 0); 935 atomic_set(&neigh->probes, 0);
931 next = now + neigh->parms->retrans_time; 936 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
932 } 937 }
933 } else { 938 } else {
934 /* NUD_PROBE|NUD_INCOMPLETE */ 939 /* NUD_PROBE|NUD_INCOMPLETE */
935 next = now + neigh->parms->retrans_time; 940 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
936 } 941 }
937 942
938 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 943 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -973,13 +978,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
973 goto out_unlock_bh; 978 goto out_unlock_bh;
974 979
975 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 980 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976 if (neigh->parms->mcast_probes + neigh->parms->app_probes) { 981 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
982 NEIGH_VAR(neigh->parms, APP_PROBES)) {
977 unsigned long next, now = jiffies; 983 unsigned long next, now = jiffies;
978 984
979 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 985 atomic_set(&neigh->probes,
986 NEIGH_VAR(neigh->parms, UCAST_PROBES));
980 neigh->nud_state = NUD_INCOMPLETE; 987 neigh->nud_state = NUD_INCOMPLETE;
981 neigh->updated = now; 988 neigh->updated = now;
982 next = now + max(neigh->parms->retrans_time, HZ/2); 989 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
990 HZ/2);
983 neigh_add_timer(neigh, next); 991 neigh_add_timer(neigh, next);
984 immediate_probe = true; 992 immediate_probe = true;
985 } else { 993 } else {
@@ -994,14 +1002,14 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
994 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1002 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995 neigh->nud_state = NUD_DELAY; 1003 neigh->nud_state = NUD_DELAY;
996 neigh->updated = jiffies; 1004 neigh->updated = jiffies;
997 neigh_add_timer(neigh, 1005 neigh_add_timer(neigh, jiffies +
998 jiffies + neigh->parms->delay_probe_time); 1006 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
999 } 1007 }
1000 1008
1001 if (neigh->nud_state == NUD_INCOMPLETE) { 1009 if (neigh->nud_state == NUD_INCOMPLETE) {
1002 if (skb) { 1010 if (skb) {
1003 while (neigh->arp_queue_len_bytes + skb->truesize > 1011 while (neigh->arp_queue_len_bytes + skb->truesize >
1004 neigh->parms->queue_len_bytes) { 1012 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1005 struct sk_buff *buff; 1013 struct sk_buff *buff;
1006 1014
1007 buff = __skb_dequeue(&neigh->arp_queue); 1015 buff = __skb_dequeue(&neigh->arp_queue);
@@ -1171,7 +1179,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1171 neigh_update_hhs(neigh); 1179 neigh_update_hhs(neigh);
1172 if (!(new & NUD_CONNECTED)) 1180 if (!(new & NUD_CONNECTED))
1173 neigh->confirmed = jiffies - 1181 neigh->confirmed = jiffies -
1174 (neigh->parms->base_reachable_time << 1); 1182 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1175 notify = 1; 1183 notify = 1;
1176 } 1184 }
1177 if (new == old) 1185 if (new == old)
@@ -1231,6 +1239,21 @@ out:
1231} 1239}
1232EXPORT_SYMBOL(neigh_update); 1240EXPORT_SYMBOL(neigh_update);
1233 1241
1242/* Update the neigh to listen temporarily for probe responses, even if it is
1243 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1244 */
1245void __neigh_set_probe_once(struct neighbour *neigh)
1246{
1247 neigh->updated = jiffies;
1248 if (!(neigh->nud_state & NUD_FAILED))
1249 return;
1250 neigh->nud_state = NUD_PROBE;
1251 atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES));
1252 neigh_add_timer(neigh,
1253 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1254}
1255EXPORT_SYMBOL(__neigh_set_probe_once);
1256
1234struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1257struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1235 u8 *lladdr, void *saddr, 1258 u8 *lladdr, void *saddr,
1236 struct net_device *dev) 1259 struct net_device *dev)
@@ -1392,9 +1415,11 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1392 struct sk_buff *skb) 1415 struct sk_buff *skb)
1393{ 1416{
1394 unsigned long now = jiffies; 1417 unsigned long now = jiffies;
1395 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1396 1418
1397 if (tbl->proxy_queue.qlen > p->proxy_qlen) { 1419 unsigned long sched_next = now + (prandom_u32() %
1420 NEIGH_VAR(p, PROXY_DELAY));
1421
1422 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1398 kfree_skb(skb); 1423 kfree_skb(skb);
1399 return; 1424 return;
1400 } 1425 }
@@ -1441,7 +1466,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1441 p->tbl = tbl; 1466 p->tbl = tbl;
1442 atomic_set(&p->refcnt, 1); 1467 atomic_set(&p->refcnt, 1);
1443 p->reachable_time = 1468 p->reachable_time =
1444 neigh_rand_reach_time(p->base_reachable_time); 1469 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1445 dev_hold(dev); 1470 dev_hold(dev);
1446 p->dev = dev; 1471 p->dev = dev;
1447 write_pnet(&p->net, hold_net(net)); 1472 write_pnet(&p->net, hold_net(net));
@@ -1458,6 +1483,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1458 p->next = tbl->parms.next; 1483 p->next = tbl->parms.next;
1459 tbl->parms.next = p; 1484 tbl->parms.next = p;
1460 write_unlock_bh(&tbl->lock); 1485 write_unlock_bh(&tbl->lock);
1486
1487 neigh_parms_data_state_cleanall(p);
1461 } 1488 }
1462 return p; 1489 return p;
1463} 1490}
@@ -1510,7 +1537,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1510 write_pnet(&tbl->parms.net, &init_net); 1537 write_pnet(&tbl->parms.net, &init_net);
1511 atomic_set(&tbl->parms.refcnt, 1); 1538 atomic_set(&tbl->parms.refcnt, 1);
1512 tbl->parms.reachable_time = 1539 tbl->parms.reachable_time =
1513 neigh_rand_reach_time(tbl->parms.base_reachable_time); 1540 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1514 1541
1515 tbl->stats = alloc_percpu(struct neigh_statistics); 1542 tbl->stats = alloc_percpu(struct neigh_statistics);
1516 if (!tbl->stats) 1543 if (!tbl->stats)
@@ -1538,7 +1565,8 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1538 1565
1539 rwlock_init(&tbl->lock); 1566 rwlock_init(&tbl->lock);
1540 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1567 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1541 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1568 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1569 tbl->parms.reachable_time);
1542 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); 1570 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1543 skb_queue_head_init_class(&tbl->proxy_queue, 1571 skb_queue_head_init_class(&tbl->proxy_queue,
1544 &neigh_table_proxy_queue_class); 1572 &neigh_table_proxy_queue_class);
@@ -1778,24 +1806,32 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1778 if ((parms->dev && 1806 if ((parms->dev &&
1779 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1807 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1780 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || 1808 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1781 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || 1809 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1810 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1782 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1811 /* approximative value for deprecated QUEUE_LEN (in packets) */
1783 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1812 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1784 parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1813 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1785 nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || 1814 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1786 nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || 1815 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1787 nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || 1816 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1788 nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || 1817 NEIGH_VAR(parms, UCAST_PROBES)) ||
1818 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1819 NEIGH_VAR(parms, MCAST_PROBES)) ||
1789 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || 1820 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1790 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 1821 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1791 parms->base_reachable_time) || 1822 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1792 nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || 1823 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1824 NEIGH_VAR(parms, GC_STALETIME)) ||
1793 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 1825 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1794 parms->delay_probe_time) || 1826 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1795 nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || 1827 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1796 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || 1828 NEIGH_VAR(parms, RETRANS_TIME)) ||
1797 nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || 1829 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1798 nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) 1830 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1831 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1832 NEIGH_VAR(parms, PROXY_DELAY)) ||
1833 nla_put_msecs(skb, NDTPA_LOCKTIME,
1834 NEIGH_VAR(parms, LOCKTIME)))
1799 goto nla_put_failure; 1835 goto nla_put_failure;
1800 return nla_nest_end(skb, nest); 1836 return nla_nest_end(skb, nest);
1801 1837
@@ -2011,44 +2047,57 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
2011 2047
2012 switch (i) { 2048 switch (i) {
2013 case NDTPA_QUEUE_LEN: 2049 case NDTPA_QUEUE_LEN:
2014 p->queue_len_bytes = nla_get_u32(tbp[i]) * 2050 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2015 SKB_TRUESIZE(ETH_FRAME_LEN); 2051 nla_get_u32(tbp[i]) *
2052 SKB_TRUESIZE(ETH_FRAME_LEN));
2016 break; 2053 break;
2017 case NDTPA_QUEUE_LENBYTES: 2054 case NDTPA_QUEUE_LENBYTES:
2018 p->queue_len_bytes = nla_get_u32(tbp[i]); 2055 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2056 nla_get_u32(tbp[i]));
2019 break; 2057 break;
2020 case NDTPA_PROXY_QLEN: 2058 case NDTPA_PROXY_QLEN:
2021 p->proxy_qlen = nla_get_u32(tbp[i]); 2059 NEIGH_VAR_SET(p, PROXY_QLEN,
2060 nla_get_u32(tbp[i]));
2022 break; 2061 break;
2023 case NDTPA_APP_PROBES: 2062 case NDTPA_APP_PROBES:
2024 p->app_probes = nla_get_u32(tbp[i]); 2063 NEIGH_VAR_SET(p, APP_PROBES,
2064 nla_get_u32(tbp[i]));
2025 break; 2065 break;
2026 case NDTPA_UCAST_PROBES: 2066 case NDTPA_UCAST_PROBES:
2027 p->ucast_probes = nla_get_u32(tbp[i]); 2067 NEIGH_VAR_SET(p, UCAST_PROBES,
2068 nla_get_u32(tbp[i]));
2028 break; 2069 break;
2029 case NDTPA_MCAST_PROBES: 2070 case NDTPA_MCAST_PROBES:
2030 p->mcast_probes = nla_get_u32(tbp[i]); 2071 NEIGH_VAR_SET(p, MCAST_PROBES,
2072 nla_get_u32(tbp[i]));
2031 break; 2073 break;
2032 case NDTPA_BASE_REACHABLE_TIME: 2074 case NDTPA_BASE_REACHABLE_TIME:
2033 p->base_reachable_time = nla_get_msecs(tbp[i]); 2075 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2076 nla_get_msecs(tbp[i]));
2034 break; 2077 break;
2035 case NDTPA_GC_STALETIME: 2078 case NDTPA_GC_STALETIME:
2036 p->gc_staletime = nla_get_msecs(tbp[i]); 2079 NEIGH_VAR_SET(p, GC_STALETIME,
2080 nla_get_msecs(tbp[i]));
2037 break; 2081 break;
2038 case NDTPA_DELAY_PROBE_TIME: 2082 case NDTPA_DELAY_PROBE_TIME:
2039 p->delay_probe_time = nla_get_msecs(tbp[i]); 2083 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2084 nla_get_msecs(tbp[i]));
2040 break; 2085 break;
2041 case NDTPA_RETRANS_TIME: 2086 case NDTPA_RETRANS_TIME:
2042 p->retrans_time = nla_get_msecs(tbp[i]); 2087 NEIGH_VAR_SET(p, RETRANS_TIME,
2088 nla_get_msecs(tbp[i]));
2043 break; 2089 break;
2044 case NDTPA_ANYCAST_DELAY: 2090 case NDTPA_ANYCAST_DELAY:
2045 p->anycast_delay = nla_get_msecs(tbp[i]); 2091 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2092 nla_get_msecs(tbp[i]));
2046 break; 2093 break;
2047 case NDTPA_PROXY_DELAY: 2094 case NDTPA_PROXY_DELAY:
2048 p->proxy_delay = nla_get_msecs(tbp[i]); 2095 NEIGH_VAR_SET(p, PROXY_DELAY,
2096 nla_get_msecs(tbp[i]));
2049 break; 2097 break;
2050 case NDTPA_LOCKTIME: 2098 case NDTPA_LOCKTIME:
2051 p->locktime = nla_get_msecs(tbp[i]); 2099 NEIGH_VAR_SET(p, LOCKTIME,
2100 nla_get_msecs(tbp[i]));
2052 break; 2101 break;
2053 } 2102 }
2054 } 2103 }
@@ -2789,133 +2838,167 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
2789 return ret; 2838 return ret;
2790} 2839}
2791 2840
2792enum { 2841static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2793 NEIGH_VAR_MCAST_PROBE, 2842 int family)
2794 NEIGH_VAR_UCAST_PROBE, 2843{
2795 NEIGH_VAR_APP_PROBE, 2844 switch (family) {
2796 NEIGH_VAR_RETRANS_TIME, 2845 case AF_INET:
2797 NEIGH_VAR_BASE_REACHABLE_TIME, 2846 return __in_dev_arp_parms_get_rcu(dev);
2798 NEIGH_VAR_DELAY_PROBE_TIME, 2847 case AF_INET6:
2799 NEIGH_VAR_GC_STALETIME, 2848 return __in6_dev_nd_parms_get_rcu(dev);
2800 NEIGH_VAR_QUEUE_LEN, 2849 }
2801 NEIGH_VAR_QUEUE_LEN_BYTES, 2850 return NULL;
2802 NEIGH_VAR_PROXY_QLEN, 2851}
2803 NEIGH_VAR_ANYCAST_DELAY, 2852
2804 NEIGH_VAR_PROXY_DELAY, 2853static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2805 NEIGH_VAR_LOCKTIME, 2854 int index)
2806 NEIGH_VAR_RETRANS_TIME_MS, 2855{
2807 NEIGH_VAR_BASE_REACHABLE_TIME_MS, 2856 struct net_device *dev;
2808 NEIGH_VAR_GC_INTERVAL, 2857 int family = neigh_parms_family(p);
2809 NEIGH_VAR_GC_THRESH1, 2858
2810 NEIGH_VAR_GC_THRESH2, 2859 rcu_read_lock();
2811 NEIGH_VAR_GC_THRESH3, 2860 for_each_netdev_rcu(net, dev) {
2812 NEIGH_VAR_MAX 2861 struct neigh_parms *dst_p =
2813}; 2862 neigh_get_dev_parms_rcu(dev, family);
2863
2864 if (dst_p && !test_bit(index, dst_p->data_state))
2865 dst_p->data[index] = p->data[index];
2866 }
2867 rcu_read_unlock();
2868}
2869
2870static void neigh_proc_update(struct ctl_table *ctl, int write)
2871{
2872 struct net_device *dev = ctl->extra1;
2873 struct neigh_parms *p = ctl->extra2;
2874 struct net *net = neigh_parms_net(p);
2875 int index = (int *) ctl->data - p->data;
2876
2877 if (!write)
2878 return;
2879
2880 set_bit(index, p->data_state);
2881 if (!dev) /* NULL dev means this is default value */
2882 neigh_copy_dflt_parms(net, p, index);
2883}
2884
2885static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2886 void __user *buffer,
2887 size_t *lenp, loff_t *ppos)
2888{
2889 struct ctl_table tmp = *ctl;
2890 int ret;
2891
2892 tmp.extra1 = &zero;
2893 tmp.extra2 = &int_max;
2894
2895 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2896 neigh_proc_update(ctl, write);
2897 return ret;
2898}
2899
2900int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2901 void __user *buffer, size_t *lenp, loff_t *ppos)
2902{
2903 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2904
2905 neigh_proc_update(ctl, write);
2906 return ret;
2907}
2908EXPORT_SYMBOL(neigh_proc_dointvec);
2909
2910int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2911 void __user *buffer,
2912 size_t *lenp, loff_t *ppos)
2913{
2914 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2915
2916 neigh_proc_update(ctl, write);
2917 return ret;
2918}
2919EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2920
2921static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2922 void __user *buffer,
2923 size_t *lenp, loff_t *ppos)
2924{
2925 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2926
2927 neigh_proc_update(ctl, write);
2928 return ret;
2929}
2930
2931int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2932 void __user *buffer,
2933 size_t *lenp, loff_t *ppos)
2934{
2935 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2936
2937 neigh_proc_update(ctl, write);
2938 return ret;
2939}
2940EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2941
2942static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2943 void __user *buffer,
2944 size_t *lenp, loff_t *ppos)
2945{
2946 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2947
2948 neigh_proc_update(ctl, write);
2949 return ret;
2950}
2951
2952#define NEIGH_PARMS_DATA_OFFSET(index) \
2953 (&((struct neigh_parms *) 0)->data[index])
2954
2955#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2956 [NEIGH_VAR_ ## attr] = { \
2957 .procname = name, \
2958 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2959 .maxlen = sizeof(int), \
2960 .mode = mval, \
2961 .proc_handler = proc, \
2962 }
2963
2964#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2965 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2966
2967#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2968 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2969
2970#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2971 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2972
2973#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2974 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2975
2976#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2977 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2978
2979#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2980 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2814 2981
2815static struct neigh_sysctl_table { 2982static struct neigh_sysctl_table {
2816 struct ctl_table_header *sysctl_header; 2983 struct ctl_table_header *sysctl_header;
2817 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 2984 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2818} neigh_sysctl_template __read_mostly = { 2985} neigh_sysctl_template __read_mostly = {
2819 .neigh_vars = { 2986 .neigh_vars = {
2820 [NEIGH_VAR_MCAST_PROBE] = { 2987 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2821 .procname = "mcast_solicit", 2988 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2822 .maxlen = sizeof(int), 2989 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2823 .mode = 0644, 2990 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2824 .extra1 = &zero, 2991 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2825 .extra2 = &int_max, 2992 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2826 .proc_handler = proc_dointvec_minmax, 2993 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2827 }, 2994 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
2828 [NEIGH_VAR_UCAST_PROBE] = { 2995 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
2829 .procname = "ucast_solicit", 2996 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
2830 .maxlen = sizeof(int), 2997 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
2831 .mode = 0644, 2998 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
2832 .extra1 = &zero, 2999 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
2833 .extra2 = &int_max, 3000 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
2834 .proc_handler = proc_dointvec_minmax, 3001 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
2835 },
2836 [NEIGH_VAR_APP_PROBE] = {
2837 .procname = "app_solicit",
2838 .maxlen = sizeof(int),
2839 .mode = 0644,
2840 .extra1 = &zero,
2841 .extra2 = &int_max,
2842 .proc_handler = proc_dointvec_minmax,
2843 },
2844 [NEIGH_VAR_RETRANS_TIME] = {
2845 .procname = "retrans_time",
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
2848 .proc_handler = proc_dointvec_userhz_jiffies,
2849 },
2850 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2851 .procname = "base_reachable_time",
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
2854 .proc_handler = proc_dointvec_jiffies,
2855 },
2856 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2857 .procname = "delay_first_probe_time",
2858 .maxlen = sizeof(int),
2859 .mode = 0644,
2860 .proc_handler = proc_dointvec_jiffies,
2861 },
2862 [NEIGH_VAR_GC_STALETIME] = {
2863 .procname = "gc_stale_time",
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .proc_handler = proc_dointvec_jiffies,
2867 },
2868 [NEIGH_VAR_QUEUE_LEN] = {
2869 .procname = "unres_qlen",
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
2872 .proc_handler = proc_unres_qlen,
2873 },
2874 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2875 .procname = "unres_qlen_bytes",
2876 .maxlen = sizeof(int),
2877 .mode = 0644,
2878 .extra1 = &zero,
2879 .proc_handler = proc_dointvec_minmax,
2880 },
2881 [NEIGH_VAR_PROXY_QLEN] = {
2882 .procname = "proxy_qlen",
2883 .maxlen = sizeof(int),
2884 .mode = 0644,
2885 .extra1 = &zero,
2886 .extra2 = &int_max,
2887 .proc_handler = proc_dointvec_minmax,
2888 },
2889 [NEIGH_VAR_ANYCAST_DELAY] = {
2890 .procname = "anycast_delay",
2891 .maxlen = sizeof(int),
2892 .mode = 0644,
2893 .proc_handler = proc_dointvec_userhz_jiffies,
2894 },
2895 [NEIGH_VAR_PROXY_DELAY] = {
2896 .procname = "proxy_delay",
2897 .maxlen = sizeof(int),
2898 .mode = 0644,
2899 .proc_handler = proc_dointvec_userhz_jiffies,
2900 },
2901 [NEIGH_VAR_LOCKTIME] = {
2902 .procname = "locktime",
2903 .maxlen = sizeof(int),
2904 .mode = 0644,
2905 .proc_handler = proc_dointvec_userhz_jiffies,
2906 },
2907 [NEIGH_VAR_RETRANS_TIME_MS] = {
2908 .procname = "retrans_time_ms",
2909 .maxlen = sizeof(int),
2910 .mode = 0644,
2911 .proc_handler = proc_dointvec_ms_jiffies,
2912 },
2913 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2914 .procname = "base_reachable_time_ms",
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
2917 .proc_handler = proc_dointvec_ms_jiffies,
2918 },
2919 [NEIGH_VAR_GC_INTERVAL] = { 3002 [NEIGH_VAR_GC_INTERVAL] = {
2920 .procname = "gc_interval", 3003 .procname = "gc_interval",
2921 .maxlen = sizeof(int), 3004 .maxlen = sizeof(int),
@@ -2951,31 +3034,23 @@ static struct neigh_sysctl_table {
2951}; 3034};
2952 3035
2953int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3036int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2954 char *p_name, proc_handler *handler) 3037 proc_handler *handler)
2955{ 3038{
3039 int i;
2956 struct neigh_sysctl_table *t; 3040 struct neigh_sysctl_table *t;
2957 const char *dev_name_source = NULL; 3041 const char *dev_name_source;
2958 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3042 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3043 char *p_name;
2959 3044
2960 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3045 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2961 if (!t) 3046 if (!t)
2962 goto err; 3047 goto err;
2963 3048
2964 t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; 3049 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
2965 t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; 3050 t->neigh_vars[i].data += (long) p;
2966 t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; 3051 t->neigh_vars[i].extra1 = dev;
2967 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; 3052 t->neigh_vars[i].extra2 = p;
2968 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; 3053 }
2969 t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
2970 t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
2971 t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
2972 t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
2973 t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
2974 t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
2975 t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2976 t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2977 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
2978 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
2979 3054
2980 if (dev) { 3055 if (dev) {
2981 dev_name_source = dev->name; 3056 dev_name_source = dev->name;
@@ -2990,26 +3065,32 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2990 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; 3065 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2991 } 3066 }
2992 3067
2993
2994 if (handler) { 3068 if (handler) {
2995 /* RetransTime */ 3069 /* RetransTime */
2996 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3070 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2997 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2998 /* ReachableTime */ 3071 /* ReachableTime */
2999 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3072 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3000 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3001 /* RetransTime (in milliseconds)*/ 3073 /* RetransTime (in milliseconds)*/
3002 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3074 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3003 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3004 /* ReachableTime (in milliseconds) */ 3075 /* ReachableTime (in milliseconds) */
3005 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3076 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3006 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3007 } 3077 }
3008 3078
3009 /* Don't export sysctls to unprivileged users */ 3079 /* Don't export sysctls to unprivileged users */
3010 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3080 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3011 t->neigh_vars[0].procname = NULL; 3081 t->neigh_vars[0].procname = NULL;
3012 3082
3083 switch (neigh_parms_family(p)) {
3084 case AF_INET:
3085 p_name = "ipv4";
3086 break;
3087 case AF_INET6:
3088 p_name = "ipv6";
3089 break;
3090 default:
3091 BUG();
3092 }
3093
3013 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3094 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3014 p_name, dev_name_source); 3095 p_name, dev_name_source);
3015 t->sysctl_header = 3096 t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f3edf9635e02..93886246a0b4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = {
498#define net_class_groups NULL 498#define net_class_groups NULL
499#endif /* CONFIG_SYSFS */ 499#endif /* CONFIG_SYSFS */
500 500
501#ifdef CONFIG_RPS 501#ifdef CONFIG_SYSFS
502/*
503 * RX queue sysfs structures and functions.
504 */
505struct rx_queue_attribute {
506 struct attribute attr;
507 ssize_t (*show)(struct netdev_rx_queue *queue,
508 struct rx_queue_attribute *attr, char *buf);
509 ssize_t (*store)(struct netdev_rx_queue *queue,
510 struct rx_queue_attribute *attr, const char *buf, size_t len);
511};
512#define to_rx_queue_attr(_attr) container_of(_attr, \ 502#define to_rx_queue_attr(_attr) container_of(_attr, \
513 struct rx_queue_attribute, attr) 503 struct rx_queue_attribute, attr)
514 504
@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
543 .store = rx_queue_attr_store, 533 .store = rx_queue_attr_store,
544}; 534};
545 535
536#ifdef CONFIG_RPS
546static ssize_t show_rps_map(struct netdev_rx_queue *queue, 537static ssize_t show_rps_map(struct netdev_rx_queue *queue,
547 struct rx_queue_attribute *attribute, char *buf) 538 struct rx_queue_attribute *attribute, char *buf)
548{ 539{
@@ -676,8 +667,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
676 while ((mask | (mask >> 1)) != mask) 667 while ((mask | (mask >> 1)) != mask)
677 mask |= (mask >> 1); 668 mask |= (mask >> 1);
678 /* On 64 bit arches, must check mask fits in table->mask (u32), 669 /* On 64 bit arches, must check mask fits in table->mask (u32),
679 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) 670 * and on 32bit arches, must check
680 * doesnt overflow. 671 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
681 */ 672 */
682#if BITS_PER_LONG > 32 673#if BITS_PER_LONG > 32
683 if (mask > (unsigned long)(u32)mask) 674 if (mask > (unsigned long)(u32)mask)
@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute =
718static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = 709static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
719 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, 710 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
720 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 711 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
712#endif /* CONFIG_RPS */
721 713
722static struct attribute *rx_queue_default_attrs[] = { 714static struct attribute *rx_queue_default_attrs[] = {
715#ifdef CONFIG_RPS
723 &rps_cpus_attribute.attr, 716 &rps_cpus_attribute.attr,
724 &rps_dev_flow_table_cnt_attribute.attr, 717 &rps_dev_flow_table_cnt_attribute.attr,
718#endif
725 NULL 719 NULL
726}; 720};
727 721
728static void rx_queue_release(struct kobject *kobj) 722static void rx_queue_release(struct kobject *kobj)
729{ 723{
730 struct netdev_rx_queue *queue = to_rx_queue(kobj); 724 struct netdev_rx_queue *queue = to_rx_queue(kobj);
725#ifdef CONFIG_RPS
731 struct rps_map *map; 726 struct rps_map *map;
732 struct rps_dev_flow_table *flow_table; 727 struct rps_dev_flow_table *flow_table;
733 728
@@ -743,15 +738,29 @@ static void rx_queue_release(struct kobject *kobj)
743 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 738 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
744 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 739 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
745 } 740 }
741#endif
746 742
747 memset(kobj, 0, sizeof(*kobj)); 743 memset(kobj, 0, sizeof(*kobj));
748 dev_put(queue->dev); 744 dev_put(queue->dev);
749} 745}
750 746
747static const void *rx_queue_namespace(struct kobject *kobj)
748{
749 struct netdev_rx_queue *queue = to_rx_queue(kobj);
750 struct device *dev = &queue->dev->dev;
751 const void *ns = NULL;
752
753 if (dev->class && dev->class->ns_type)
754 ns = dev->class->namespace(dev);
755
756 return ns;
757}
758
751static struct kobj_type rx_queue_ktype = { 759static struct kobj_type rx_queue_ktype = {
752 .sysfs_ops = &rx_queue_sysfs_ops, 760 .sysfs_ops = &rx_queue_sysfs_ops,
753 .release = rx_queue_release, 761 .release = rx_queue_release,
754 .default_attrs = rx_queue_default_attrs, 762 .default_attrs = rx_queue_default_attrs,
763 .namespace = rx_queue_namespace
755}; 764};
756 765
757static int rx_queue_add_kobject(struct net_device *net, int index) 766static int rx_queue_add_kobject(struct net_device *net, int index)
@@ -763,25 +772,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
763 kobj->kset = net->queues_kset; 772 kobj->kset = net->queues_kset;
764 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 773 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
765 "rx-%u", index); 774 "rx-%u", index);
766 if (error) { 775 if (error)
767 kobject_put(kobj); 776 goto exit;
768 return error; 777
778 if (net->sysfs_rx_queue_group) {
779 error = sysfs_create_group(kobj, net->sysfs_rx_queue_group);
780 if (error)
781 goto exit;
769 } 782 }
770 783
771 kobject_uevent(kobj, KOBJ_ADD); 784 kobject_uevent(kobj, KOBJ_ADD);
772 dev_hold(queue->dev); 785 dev_hold(queue->dev);
773 786
774 return error; 787 return error;
788exit:
789 kobject_put(kobj);
790 return error;
775} 791}
776#endif /* CONFIG_RPS */ 792#endif /* CONFIG_SYFS */
777 793
778int 794int
779net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 795net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
780{ 796{
781#ifdef CONFIG_RPS 797#ifdef CONFIG_SYSFS
782 int i; 798 int i;
783 int error = 0; 799 int error = 0;
784 800
801#ifndef CONFIG_RPS
802 if (!net->sysfs_rx_queue_group)
803 return 0;
804#endif
785 for (i = old_num; i < new_num; i++) { 805 for (i = old_num; i < new_num; i++) {
786 error = rx_queue_add_kobject(net, i); 806 error = rx_queue_add_kobject(net, i);
787 if (error) { 807 if (error) {
@@ -790,8 +810,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
790 } 810 }
791 } 811 }
792 812
793 while (--i >= new_num) 813 while (--i >= new_num) {
814 if (net->sysfs_rx_queue_group)
815 sysfs_remove_group(&net->_rx[i].kobj,
816 net->sysfs_rx_queue_group);
794 kobject_put(&net->_rx[i].kobj); 817 kobject_put(&net->_rx[i].kobj);
818 }
795 819
796 return error; 820 return error;
797#else 821#else
@@ -1082,10 +1106,23 @@ static void netdev_queue_release(struct kobject *kobj)
1082 dev_put(queue->dev); 1106 dev_put(queue->dev);
1083} 1107}
1084 1108
1109static const void *netdev_queue_namespace(struct kobject *kobj)
1110{
1111 struct netdev_queue *queue = to_netdev_queue(kobj);
1112 struct device *dev = &queue->dev->dev;
1113 const void *ns = NULL;
1114
1115 if (dev->class && dev->class->ns_type)
1116 ns = dev->class->namespace(dev);
1117
1118 return ns;
1119}
1120
1085static struct kobj_type netdev_queue_ktype = { 1121static struct kobj_type netdev_queue_ktype = {
1086 .sysfs_ops = &netdev_queue_sysfs_ops, 1122 .sysfs_ops = &netdev_queue_sysfs_ops,
1087 .release = netdev_queue_release, 1123 .release = netdev_queue_release,
1088 .default_attrs = netdev_queue_default_attrs, 1124 .default_attrs = netdev_queue_default_attrs,
1125 .namespace = netdev_queue_namespace,
1089}; 1126};
1090 1127
1091static int netdev_queue_add_kobject(struct net_device *net, int index) 1128static int netdev_queue_add_kobject(struct net_device *net, int index)
@@ -1155,9 +1192,6 @@ static int register_queue_kobjects(struct net_device *net)
1155 NULL, &net->dev.kobj); 1192 NULL, &net->dev.kobj);
1156 if (!net->queues_kset) 1193 if (!net->queues_kset)
1157 return -ENOMEM; 1194 return -ENOMEM;
1158#endif
1159
1160#ifdef CONFIG_RPS
1161 real_rx = net->real_num_rx_queues; 1195 real_rx = net->real_num_rx_queues;
1162#endif 1196#endif
1163 real_tx = net->real_num_tx_queues; 1197 real_tx = net->real_num_tx_queues;
@@ -1184,7 +1218,7 @@ static void remove_queue_kobjects(struct net_device *net)
1184{ 1218{
1185 int real_rx = 0, real_tx = 0; 1219 int real_rx = 0, real_tx = 0;
1186 1220
1187#ifdef CONFIG_RPS 1221#ifdef CONFIG_SYSFS
1188 real_rx = net->real_num_rx_queues; 1222 real_rx = net->real_num_rx_queues;
1189#endif 1223#endif
1190 real_tx = net->real_num_tx_queues; 1224 real_tx = net->real_num_tx_queues;
@@ -1358,7 +1392,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr,
1358} 1392}
1359EXPORT_SYMBOL(netdev_class_remove_file_ns); 1393EXPORT_SYMBOL(netdev_class_remove_file_ns);
1360 1394
1361int netdev_kobject_init(void) 1395int __init netdev_kobject_init(void)
1362{ 1396{
1363 kobj_ns_type_register(&net_ns_type_operations); 1397 kobj_ns_type_register(&net_ns_type_operations);
1364 return class_register(&net_class); 1398 return class_register(&net_class);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index bd7751ec1c4d..2745a1b51e03 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,7 +1,7 @@
1#ifndef __NET_SYSFS_H__ 1#ifndef __NET_SYSFS_H__
2#define __NET_SYSFS_H__ 2#define __NET_SYSFS_H__
3 3
4int netdev_kobject_init(void); 4int __init netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); 7int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
new file mode 100644
index 000000000000..719efd541668
--- /dev/null
+++ b/net/core/netclassid_cgroup.c
@@ -0,0 +1,120 @@
1/*
2 * net/core/netclassid_cgroup.c Classid Cgroupfs Handling
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/cgroup.h>
15#include <linux/fdtable.h>
16#include <net/cls_cgroup.h>
17#include <net/sock.h>
18
19static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
20{
21 return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
22}
23
24struct cgroup_cls_state *task_cls_state(struct task_struct *p)
25{
26 return css_cls_state(task_css(p, net_cls_subsys_id));
27}
28EXPORT_SYMBOL_GPL(task_cls_state);
29
30static struct cgroup_subsys_state *
31cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
32{
33 struct cgroup_cls_state *cs;
34
35 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
36 if (!cs)
37 return ERR_PTR(-ENOMEM);
38
39 return &cs->css;
40}
41
42static int cgrp_css_online(struct cgroup_subsys_state *css)
43{
44 struct cgroup_cls_state *cs = css_cls_state(css);
45 struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
46
47 if (parent)
48 cs->classid = parent->classid;
49
50 return 0;
51}
52
53static void cgrp_css_free(struct cgroup_subsys_state *css)
54{
55 kfree(css_cls_state(css));
56}
57
58static int update_classid(const void *v, struct file *file, unsigned n)
59{
60 int err;
61 struct socket *sock = sock_from_file(file, &err);
62
63 if (sock)
64 sock->sk->sk_classid = (u32)(unsigned long)v;
65
66 return 0;
67}
68
69static void cgrp_attach(struct cgroup_subsys_state *css,
70 struct cgroup_taskset *tset)
71{
72 struct cgroup_cls_state *cs = css_cls_state(css);
73 void *v = (void *)(unsigned long)cs->classid;
74 struct task_struct *p;
75
76 cgroup_taskset_for_each(p, css, tset) {
77 task_lock(p);
78 iterate_fd(p->files, 0, update_classid, v);
79 task_unlock(p);
80 }
81}
82
83static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
84{
85 return css_cls_state(css)->classid;
86}
87
88static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
89 u64 value)
90{
91 css_cls_state(css)->classid = (u32) value;
92
93 return 0;
94}
95
96static struct cftype ss_files[] = {
97 {
98 .name = "classid",
99 .read_u64 = read_classid,
100 .write_u64 = write_classid,
101 },
102 { } /* terminate */
103};
104
105struct cgroup_subsys net_cls_subsys = {
106 .name = "net_cls",
107 .css_alloc = cgrp_css_alloc,
108 .css_online = cgrp_css_online,
109 .css_free = cgrp_css_free,
110 .attach = cgrp_attach,
111 .subsys_id = net_cls_subsys_id,
112 .base_cftypes = ss_files,
113 .module = THIS_MODULE,
114};
115
116static int __init init_netclassid_cgroup(void)
117{
118 return cgroup_load_subsys(&net_cls_subsys);
119}
120__initcall(init_netclassid_cgroup);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 19fe9c717ced..df9e6b1a9759 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -520,8 +520,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
520 skb->protocol = eth->h_proto = htons(ETH_P_IP); 520 skb->protocol = eth->h_proto = htons(ETH_P_IP);
521 } 521 }
522 522
523 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); 523 ether_addr_copy(eth->h_source, np->dev->dev_addr);
524 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); 524 ether_addr_copy(eth->h_dest, np->remote_mac);
525 525
526 skb->dev = np->dev; 526 skb->dev = np->dev;
527 527
@@ -742,7 +742,7 @@ static bool pkt_is_ns(struct sk_buff *skb)
742 struct nd_msg *msg; 742 struct nd_msg *msg;
743 struct ipv6hdr *hdr; 743 struct ipv6hdr *hdr;
744 744
745 if (skb->protocol != htons(ETH_P_ARP)) 745 if (skb->protocol != htons(ETH_P_IPV6))
746 return false; 746 return false;
747 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) 747 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
748 return false; 748 return false;
@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
948{ 948{
949 char *cur=opt, *delim; 949 char *cur=opt, *delim;
950 int ipv6; 950 int ipv6;
951 bool ipversion_set = false;
951 952
952 if (*cur != '@') { 953 if (*cur != '@') {
953 if ((delim = strchr(cur, '@')) == NULL) 954 if ((delim = strchr(cur, '@')) == NULL)
@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
960 cur++; 961 cur++;
961 962
962 if (*cur != '/') { 963 if (*cur != '/') {
964 ipversion_set = true;
963 if ((delim = strchr(cur, '/')) == NULL) 965 if ((delim = strchr(cur, '/')) == NULL)
964 goto parse_failed; 966 goto parse_failed;
965 *delim = 0; 967 *delim = 0;
@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
1002 ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); 1004 ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
1003 if (ipv6 < 0) 1005 if (ipv6 < 0)
1004 goto parse_failed; 1006 goto parse_failed;
1005 else if (np->ipv6 != (bool)ipv6) 1007 else if (ipversion_set && np->ipv6 != (bool)ipv6)
1006 goto parse_failed; 1008 goto parse_failed;
1007 else 1009 else
1008 np->ipv6 = (bool)ipv6; 1010 np->ipv6 = (bool)ipv6;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 9b7cf6c85f82..9043caedcd08 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -30,7 +30,7 @@
30#define PRIOMAP_MIN_SZ 128 30#define PRIOMAP_MIN_SZ 128
31 31
32/* 32/*
33 * Extend @dev->priomap so that it's large enough to accomodate 33 * Extend @dev->priomap so that it's large enough to accommodate
34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful 34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
35 * return. Must be called under rtnl lock. 35 * return. Must be called under rtnl lock.
36 */ 36 */
@@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
173 return css->cgroup->id; 173 return css->cgroup->id;
174} 174}
175 175
176static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft, 176static int read_priomap(struct seq_file *sf, void *v)
177 struct cgroup_map_cb *cb)
178{ 177{
179 struct net_device *dev; 178 struct net_device *dev;
180 179
181 rcu_read_lock(); 180 rcu_read_lock();
182 for_each_netdev_rcu(&init_net, dev) 181 for_each_netdev_rcu(&init_net, dev)
183 cb->fill(cb, dev->name, netprio_prio(css, dev)); 182 seq_printf(sf, "%s %u\n", dev->name,
183 netprio_prio(seq_css(sf), dev));
184 rcu_read_unlock(); 184 rcu_read_unlock();
185 return 0; 185 return 0;
186} 186}
@@ -238,7 +238,7 @@ static struct cftype ss_files[] = {
238 }, 238 },
239 { 239 {
240 .name = "ifpriomap", 240 .name = "ifpriomap",
241 .read_map = read_priomap, 241 .seq_show = read_priomap,
242 .write_string = write_priomap, 242 .write_string = write_priomap,
243 }, 243 },
244 { } /* terminate */ 244 { } /* terminate */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a797fff7f222..fdac61cac1bd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -389,6 +389,9 @@ struct pktgen_dev {
389#ifdef CONFIG_XFRM 389#ifdef CONFIG_XFRM
390 __u8 ipsmode; /* IPSEC mode (config) */ 390 __u8 ipsmode; /* IPSEC mode (config) */
391 __u8 ipsproto; /* IPSEC type (config) */ 391 __u8 ipsproto; /* IPSEC type (config) */
392 __u32 spi;
393 struct dst_entry dst;
394 struct dst_ops dstops;
392#endif 395#endif
393 char result[512]; 396 char result[512];
394}; 397};
@@ -654,8 +657,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
654 } 657 }
655 658
656#ifdef CONFIG_XFRM 659#ifdef CONFIG_XFRM
657 if (pkt_dev->flags & F_IPSEC_ON) 660 if (pkt_dev->flags & F_IPSEC_ON) {
658 seq_printf(seq, "IPSEC "); 661 seq_printf(seq, "IPSEC ");
662 if (pkt_dev->spi)
663 seq_printf(seq, "spi:%u", pkt_dev->spi);
664 }
659#endif 665#endif
660 666
661 if (pkt_dev->flags & F_MACSRC_RND) 667 if (pkt_dev->flags & F_MACSRC_RND)
@@ -1434,7 +1440,7 @@ static ssize_t pktgen_if_write(struct file *file,
1434 if (!mac_pton(valstr, pkt_dev->dst_mac)) 1440 if (!mac_pton(valstr, pkt_dev->dst_mac))
1435 return -EINVAL; 1441 return -EINVAL;
1436 /* Set up Dest MAC */ 1442 /* Set up Dest MAC */
1437 memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); 1443 ether_addr_copy(&pkt_dev->hh[0], pkt_dev->dst_mac);
1438 1444
1439 sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); 1445 sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
1440 return count; 1446 return count;
@@ -1451,7 +1457,7 @@ static ssize_t pktgen_if_write(struct file *file,
1451 if (!mac_pton(valstr, pkt_dev->src_mac)) 1457 if (!mac_pton(valstr, pkt_dev->src_mac))
1452 return -EINVAL; 1458 return -EINVAL;
1453 /* Set up Src MAC */ 1459 /* Set up Src MAC */
1454 memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); 1460 ether_addr_copy(&pkt_dev->hh[6], pkt_dev->src_mac);
1455 1461
1456 sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); 1462 sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
1457 return count; 1463 return count;
@@ -1476,7 +1482,18 @@ static ssize_t pktgen_if_write(struct file *file,
1476 sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); 1482 sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows);
1477 return count; 1483 return count;
1478 } 1484 }
1485#ifdef CONFIG_XFRM
1486 if (!strcmp(name, "spi")) {
1487 len = num_arg(&user_buffer[i], 10, &value);
1488 if (len < 0)
1489 return len;
1479 1490
1491 i += len;
1492 pkt_dev->spi = value;
1493 sprintf(pg_result, "OK: spi=%u", pkt_dev->spi);
1494 return count;
1495 }
1496#endif
1480 if (!strcmp(name, "flowlen")) { 1497 if (!strcmp(name, "flowlen")) {
1481 len = num_arg(&user_buffer[i], 10, &value); 1498 len = num_arg(&user_buffer[i], 10, &value);
1482 if (len < 0) 1499 if (len < 0)
@@ -2043,10 +2060,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2043 /* Default to the interface's mac if not explicitly set. */ 2060 /* Default to the interface's mac if not explicitly set. */
2044 2061
2045 if (is_zero_ether_addr(pkt_dev->src_mac)) 2062 if (is_zero_ether_addr(pkt_dev->src_mac))
2046 memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); 2063 ether_addr_copy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr);
2047 2064
2048 /* Set up Dest MAC */ 2065 /* Set up Dest MAC */
2049 memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); 2066 ether_addr_copy(&(pkt_dev->hh[0]), pkt_dev->dst_mac);
2050 2067
2051 if (pkt_dev->flags & F_IPV6) { 2068 if (pkt_dev->flags & F_IPV6) {
2052 int i, set = 0, err = 1; 2069 int i, set = 0, err = 1;
@@ -2233,13 +2250,21 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2233 struct xfrm_state *x = pkt_dev->flows[flow].x; 2250 struct xfrm_state *x = pkt_dev->flows[flow].x;
2234 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); 2251 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
2235 if (!x) { 2252 if (!x) {
2236 /*slow path: we dont already have xfrm_state*/ 2253
2237 x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 2254 if (pkt_dev->spi) {
2238 (xfrm_address_t *)&pkt_dev->cur_daddr, 2255 /* We need as quick as possible to find the right SA
2239 (xfrm_address_t *)&pkt_dev->cur_saddr, 2256 * Searching with minimum criteria to archieve this.
2240 AF_INET, 2257 */
2241 pkt_dev->ipsmode, 2258 x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
2242 pkt_dev->ipsproto, 0); 2259 } else {
2260 /* slow path: we dont already have xfrm_state */
2261 x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
2262 (xfrm_address_t *)&pkt_dev->cur_daddr,
2263 (xfrm_address_t *)&pkt_dev->cur_saddr,
2264 AF_INET,
2265 pkt_dev->ipsmode,
2266 pkt_dev->ipsproto, 0);
2267 }
2243 if (x) { 2268 if (x) {
2244 pkt_dev->flows[flow].x = x; 2269 pkt_dev->flows[flow].x = x;
2245 set_pkt_overhead(pkt_dev); 2270 set_pkt_overhead(pkt_dev);
@@ -2475,31 +2500,47 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2475 2500
2476 2501
2477#ifdef CONFIG_XFRM 2502#ifdef CONFIG_XFRM
2503static u32 pktgen_dst_metrics[RTAX_MAX + 1] = {
2504
2505 [RTAX_HOPLIMIT] = 0x5, /* Set a static hoplimit */
2506};
2507
2478static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) 2508static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
2479{ 2509{
2480 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; 2510 struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
2481 int err = 0; 2511 int err = 0;
2512 struct net *net = dev_net(pkt_dev->odev);
2482 2513
2483 if (!x) 2514 if (!x)
2484 return 0; 2515 return 0;
2485 /* XXX: we dont support tunnel mode for now until 2516 /* XXX: we dont support tunnel mode for now until
2486 * we resolve the dst issue */ 2517 * we resolve the dst issue */
2487 if (x->props.mode != XFRM_MODE_TRANSPORT) 2518 if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0))
2488 return 0; 2519 return 0;
2489 2520
2490 spin_lock(&x->lock); 2521 /* But when user specify an valid SPI, transformation
2522 * supports both transport/tunnel mode + ESP/AH type.
2523 */
2524 if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
2525 skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
2491 2526
2527 rcu_read_lock_bh();
2492 err = x->outer_mode->output(x, skb); 2528 err = x->outer_mode->output(x, skb);
2493 if (err) 2529 rcu_read_unlock_bh();
2530 if (err) {
2531 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
2494 goto error; 2532 goto error;
2533 }
2495 err = x->type->output(x, skb); 2534 err = x->type->output(x, skb);
2496 if (err) 2535 if (err) {
2536 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR);
2497 goto error; 2537 goto error;
2498 2538 }
2539 spin_lock_bh(&x->lock);
2499 x->curlft.bytes += skb->len; 2540 x->curlft.bytes += skb->len;
2500 x->curlft.packets++; 2541 x->curlft.packets++;
2542 spin_unlock_bh(&x->lock);
2501error: 2543error:
2502 spin_unlock(&x->lock);
2503 return err; 2544 return err;
2504} 2545}
2505 2546
@@ -3542,6 +3583,17 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3542#ifdef CONFIG_XFRM 3583#ifdef CONFIG_XFRM
3543 pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; 3584 pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
3544 pkt_dev->ipsproto = IPPROTO_ESP; 3585 pkt_dev->ipsproto = IPPROTO_ESP;
3586
3587 /* xfrm tunnel mode needs additional dst to extract outter
3588 * ip header protocol/ttl/id field, here creat a phony one.
3589 * instead of looking for a valid rt, which definitely hurting
3590 * performance under such circumstance.
3591 */
3592 pkt_dev->dstops.family = AF_INET;
3593 pkt_dev->dst.dev = pkt_dev->odev;
3594 dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
3595 pkt_dev->dst.child = &pkt_dev->dst;
3596 pkt_dev->dst.ops = &pkt_dev->dstops;
3545#endif 3597#endif
3546 3598
3547 return add_dev_to_thread(t, pkt_dev); 3599 return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf67144d3e3c..120eecc0f5a4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -365,6 +365,22 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
365} 365}
366EXPORT_SYMBOL_GPL(rtnl_link_unregister); 366EXPORT_SYMBOL_GPL(rtnl_link_unregister);
367 367
368static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
369{
370 struct net_device *master_dev;
371 const struct rtnl_link_ops *ops;
372
373 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
374 if (!master_dev)
375 return 0;
376 ops = master_dev->rtnl_link_ops;
377 if (!ops || !ops->get_slave_size)
378 return 0;
379 /* IFLA_INFO_SLAVE_DATA + nested data */
380 return nla_total_size(sizeof(struct nlattr)) +
381 ops->get_slave_size(master_dev, dev);
382}
383
368static size_t rtnl_link_get_size(const struct net_device *dev) 384static size_t rtnl_link_get_size(const struct net_device *dev)
369{ 385{
370 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 386 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -385,6 +401,8 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
385 /* IFLA_INFO_XSTATS */ 401 /* IFLA_INFO_XSTATS */
386 size += nla_total_size(ops->get_xstats_size(dev)); 402 size += nla_total_size(ops->get_xstats_size(dev));
387 403
404 size += rtnl_link_get_slave_info_data_size(dev);
405
388 return size; 406 return size;
389} 407}
390 408
@@ -403,34 +421,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
403} 421}
404 422
405/** 423/**
406 * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
407 * @ops: struct rtnl_af_ops * to register
408 *
409 * The caller must hold the rtnl_mutex.
410 *
411 * Returns 0 on success or a negative error code.
412 */
413int __rtnl_af_register(struct rtnl_af_ops *ops)
414{
415 list_add_tail(&ops->list, &rtnl_af_ops);
416 return 0;
417}
418EXPORT_SYMBOL_GPL(__rtnl_af_register);
419
420/**
421 * rtnl_af_register - Register rtnl_af_ops with rtnetlink. 424 * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
422 * @ops: struct rtnl_af_ops * to register 425 * @ops: struct rtnl_af_ops * to register
423 * 426 *
424 * Returns 0 on success or a negative error code. 427 * Returns 0 on success or a negative error code.
425 */ 428 */
426int rtnl_af_register(struct rtnl_af_ops *ops) 429void rtnl_af_register(struct rtnl_af_ops *ops)
427{ 430{
428 int err;
429
430 rtnl_lock(); 431 rtnl_lock();
431 err = __rtnl_af_register(ops); 432 list_add_tail(&ops->list, &rtnl_af_ops);
432 rtnl_unlock(); 433 rtnl_unlock();
433 return err;
434} 434}
435EXPORT_SYMBOL_GPL(rtnl_af_register); 435EXPORT_SYMBOL_GPL(rtnl_af_register);
436 436
@@ -477,40 +477,100 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
477 return size; 477 return size;
478} 478}
479 479
480static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) 480static bool rtnl_have_link_slave_info(const struct net_device *dev)
481{ 481{
482 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 482 struct net_device *master_dev;
483 struct nlattr *linkinfo, *data;
484 int err = -EMSGSIZE;
485 483
486 linkinfo = nla_nest_start(skb, IFLA_LINKINFO); 484 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
487 if (linkinfo == NULL) 485 if (master_dev && master_dev->rtnl_link_ops)
488 goto out; 486 return true;
487 return false;
488}
489
490static int rtnl_link_slave_info_fill(struct sk_buff *skb,
491 const struct net_device *dev)
492{
493 struct net_device *master_dev;
494 const struct rtnl_link_ops *ops;
495 struct nlattr *slave_data;
496 int err;
497
498 master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
499 if (!master_dev)
500 return 0;
501 ops = master_dev->rtnl_link_ops;
502 if (!ops)
503 return 0;
504 if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0)
505 return -EMSGSIZE;
506 if (ops->fill_slave_info) {
507 slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA);
508 if (!slave_data)
509 return -EMSGSIZE;
510 err = ops->fill_slave_info(skb, master_dev, dev);
511 if (err < 0)
512 goto err_cancel_slave_data;
513 nla_nest_end(skb, slave_data);
514 }
515 return 0;
489 516
517err_cancel_slave_data:
518 nla_nest_cancel(skb, slave_data);
519 return err;
520}
521
522static int rtnl_link_info_fill(struct sk_buff *skb,
523 const struct net_device *dev)
524{
525 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
526 struct nlattr *data;
527 int err;
528
529 if (!ops)
530 return 0;
490 if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) 531 if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
491 goto err_cancel_link; 532 return -EMSGSIZE;
492 if (ops->fill_xstats) { 533 if (ops->fill_xstats) {
493 err = ops->fill_xstats(skb, dev); 534 err = ops->fill_xstats(skb, dev);
494 if (err < 0) 535 if (err < 0)
495 goto err_cancel_link; 536 return err;
496 } 537 }
497 if (ops->fill_info) { 538 if (ops->fill_info) {
498 data = nla_nest_start(skb, IFLA_INFO_DATA); 539 data = nla_nest_start(skb, IFLA_INFO_DATA);
499 if (data == NULL) { 540 if (data == NULL)
500 err = -EMSGSIZE; 541 return -EMSGSIZE;
501 goto err_cancel_link;
502 }
503 err = ops->fill_info(skb, dev); 542 err = ops->fill_info(skb, dev);
504 if (err < 0) 543 if (err < 0)
505 goto err_cancel_data; 544 goto err_cancel_data;
506 nla_nest_end(skb, data); 545 nla_nest_end(skb, data);
507 } 546 }
508
509 nla_nest_end(skb, linkinfo);
510 return 0; 547 return 0;
511 548
512err_cancel_data: 549err_cancel_data:
513 nla_nest_cancel(skb, data); 550 nla_nest_cancel(skb, data);
551 return err;
552}
553
554static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
555{
556 struct nlattr *linkinfo;
557 int err = -EMSGSIZE;
558
559 linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
560 if (linkinfo == NULL)
561 goto out;
562
563 err = rtnl_link_info_fill(skb, dev);
564 if (err < 0)
565 goto err_cancel_link;
566
567 err = rtnl_link_slave_info_fill(skb, dev);
568 if (err < 0)
569 goto err_cancel_link;
570
571 nla_nest_end(skb, linkinfo);
572 return 0;
573
514err_cancel_link: 574err_cancel_link:
515 nla_nest_cancel(skb, linkinfo); 575 nla_nest_cancel(skb, linkinfo);
516out: 576out:
@@ -1019,7 +1079,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1019 if (rtnl_port_fill(skb, dev)) 1079 if (rtnl_port_fill(skb, dev))
1020 goto nla_put_failure; 1080 goto nla_put_failure;
1021 1081
1022 if (dev->rtnl_link_ops) { 1082 if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
1023 if (rtnl_link_fill(skb, dev) < 0) 1083 if (rtnl_link_fill(skb, dev) < 0)
1024 goto nla_put_failure; 1084 goto nla_put_failure;
1025 } 1085 }
@@ -1142,6 +1202,8 @@ EXPORT_SYMBOL(ifla_policy);
1142static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1202static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
1143 [IFLA_INFO_KIND] = { .type = NLA_STRING }, 1203 [IFLA_INFO_KIND] = { .type = NLA_STRING },
1144 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 1204 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
1205 [IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING },
1206 [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
1145}; 1207};
1146 1208
1147static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { 1209static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
@@ -1729,7 +1791,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1729{ 1791{
1730 struct net *net = sock_net(skb->sk); 1792 struct net *net = sock_net(skb->sk);
1731 const struct rtnl_link_ops *ops; 1793 const struct rtnl_link_ops *ops;
1794 const struct rtnl_link_ops *m_ops = NULL;
1732 struct net_device *dev; 1795 struct net_device *dev;
1796 struct net_device *master_dev = NULL;
1733 struct ifinfomsg *ifm; 1797 struct ifinfomsg *ifm;
1734 char kind[MODULE_NAME_LEN]; 1798 char kind[MODULE_NAME_LEN];
1735 char ifname[IFNAMSIZ]; 1799 char ifname[IFNAMSIZ];
@@ -1759,6 +1823,12 @@ replay:
1759 dev = NULL; 1823 dev = NULL;
1760 } 1824 }
1761 1825
1826 if (dev) {
1827 master_dev = netdev_master_upper_dev_get(dev);
1828 if (master_dev)
1829 m_ops = master_dev->rtnl_link_ops;
1830 }
1831
1762 err = validate_linkmsg(dev, tb); 1832 err = validate_linkmsg(dev, tb);
1763 if (err < 0) 1833 if (err < 0)
1764 return err; 1834 return err;
@@ -1780,7 +1850,10 @@ replay:
1780 } 1850 }
1781 1851
1782 if (1) { 1852 if (1) {
1783 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; 1853 struct nlattr *attr[ops ? ops->maxtype + 1 : 0];
1854 struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
1855 struct nlattr **data = NULL;
1856 struct nlattr **slave_data = NULL;
1784 struct net *dest_net; 1857 struct net *dest_net;
1785 1858
1786 if (ops) { 1859 if (ops) {
@@ -1799,6 +1872,24 @@ replay:
1799 } 1872 }
1800 } 1873 }
1801 1874
1875 if (m_ops) {
1876 if (m_ops->slave_maxtype &&
1877 linkinfo[IFLA_INFO_SLAVE_DATA]) {
1878 err = nla_parse_nested(slave_attr,
1879 m_ops->slave_maxtype,
1880 linkinfo[IFLA_INFO_SLAVE_DATA],
1881 m_ops->slave_policy);
1882 if (err < 0)
1883 return err;
1884 slave_data = slave_attr;
1885 }
1886 if (m_ops->slave_validate) {
1887 err = m_ops->slave_validate(tb, slave_data);
1888 if (err < 0)
1889 return err;
1890 }
1891 }
1892
1802 if (dev) { 1893 if (dev) {
1803 int modified = 0; 1894 int modified = 0;
1804 1895
@@ -1818,6 +1909,17 @@ replay:
1818 modified = 1; 1909 modified = 1;
1819 } 1910 }
1820 1911
1912 if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
1913 if (!m_ops || !m_ops->slave_changelink)
1914 return -EOPNOTSUPP;
1915
1916 err = m_ops->slave_changelink(master_dev, dev,
1917 tb, slave_data);
1918 if (err < 0)
1919 return err;
1920 modified = 1;
1921 }
1922
1821 return do_setlink(dev, ifm, tb, ifname, modified); 1923 return do_setlink(dev, ifm, tb, ifname, modified);
1822 } 1924 }
1823 1925
@@ -1861,16 +1963,21 @@ replay:
1861 1963
1862 dev->ifindex = ifm->ifi_index; 1964 dev->ifindex = ifm->ifi_index;
1863 1965
1864 if (ops->newlink) 1966 if (ops->newlink) {
1865 err = ops->newlink(net, dev, tb, data); 1967 err = ops->newlink(net, dev, tb, data);
1866 else 1968 /* Drivers should call free_netdev() in ->destructor
1969 * and unregister it on failure so that device could be
1970 * finally freed in rtnl_unlock.
1971 */
1972 if (err < 0)
1973 goto out;
1974 } else {
1867 err = register_netdevice(dev); 1975 err = register_netdevice(dev);
1868 1976 if (err < 0) {
1869 if (err < 0) { 1977 free_netdev(dev);
1870 free_netdev(dev); 1978 goto out;
1871 goto out; 1979 }
1872 } 1980 }
1873
1874 err = rtnl_configure_link(dev, ifm); 1981 err = rtnl_configure_link(dev, ifm);
1875 if (err < 0) 1982 if (err < 0)
1876 unregister_netdevice(dev); 1983 unregister_netdevice(dev);
@@ -2014,12 +2121,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo);
2014static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 2121static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
2015 struct net_device *dev, 2122 struct net_device *dev,
2016 u8 *addr, u32 pid, u32 seq, 2123 u8 *addr, u32 pid, u32 seq,
2017 int type, unsigned int flags) 2124 int type, unsigned int flags,
2125 int nlflags)
2018{ 2126{
2019 struct nlmsghdr *nlh; 2127 struct nlmsghdr *nlh;
2020 struct ndmsg *ndm; 2128 struct ndmsg *ndm;
2021 2129
2022 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); 2130 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags);
2023 if (!nlh) 2131 if (!nlh)
2024 return -EMSGSIZE; 2132 return -EMSGSIZE;
2025 2133
@@ -2057,7 +2165,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
2057 if (!skb) 2165 if (!skb)
2058 goto errout; 2166 goto errout;
2059 2167
2060 err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); 2168 err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0);
2061 if (err < 0) { 2169 if (err < 0) {
2062 kfree_skb(skb); 2170 kfree_skb(skb);
2063 goto errout; 2171 goto errout;
@@ -2282,7 +2390,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
2282 2390
2283 err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 2391 err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
2284 portid, seq, 2392 portid, seq,
2285 RTM_NEWNEIGH, NTF_SELF); 2393 RTM_NEWNEIGH, NTF_SELF,
2394 NLM_F_MULTI);
2286 if (err < 0) 2395 if (err < 0)
2287 return err; 2396 return err;
2288skip: 2397skip:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06e72d3cdf60..869c7afe3b07 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -47,6 +47,8 @@
47#include <linux/in.h> 47#include <linux/in.h>
48#include <linux/inet.h> 48#include <linux/inet.h>
49#include <linux/slab.h> 49#include <linux/slab.h>
50#include <linux/tcp.h>
51#include <linux/udp.h>
50#include <linux/netdevice.h> 52#include <linux/netdevice.h>
51#ifdef CONFIG_NET_CLS_ACT 53#ifdef CONFIG_NET_CLS_ACT
52#include <net/pkt_sched.h> 54#include <net/pkt_sched.h>
@@ -65,6 +67,7 @@
65#include <net/dst.h> 67#include <net/dst.h>
66#include <net/sock.h> 68#include <net/sock.h>
67#include <net/checksum.h> 69#include <net/checksum.h>
70#include <net/ip6_checksum.h>
68#include <net/xfrm.h> 71#include <net/xfrm.h>
69 72
70#include <asm/uaccess.h> 73#include <asm/uaccess.h>
@@ -74,36 +77,6 @@
74struct kmem_cache *skbuff_head_cache __read_mostly; 77struct kmem_cache *skbuff_head_cache __read_mostly;
75static struct kmem_cache *skbuff_fclone_cache __read_mostly; 78static struct kmem_cache *skbuff_fclone_cache __read_mostly;
76 79
77static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
78 struct pipe_buffer *buf)
79{
80 put_page(buf->page);
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
86 get_page(buf->page);
87}
88
89static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
90 struct pipe_buffer *buf)
91{
92 return 1;
93}
94
95
96/* Pipe buffer operations for a socket. */
97static const struct pipe_buf_operations sock_pipe_buf_ops = {
98 .can_merge = 0,
99 .map = generic_pipe_buf_map,
100 .unmap = generic_pipe_buf_unmap,
101 .confirm = generic_pipe_buf_confirm,
102 .release = sock_pipe_buf_release,
103 .steal = sock_pipe_buf_steal,
104 .get = sock_pipe_buf_get,
105};
106
107/** 80/**
108 * skb_panic - private function for out-of-line support 81 * skb_panic - private function for out-of-line support
109 * @skb: buffer 82 * @skb: buffer
@@ -712,9 +685,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
712 new->inner_network_header = old->inner_network_header; 685 new->inner_network_header = old->inner_network_header;
713 new->inner_mac_header = old->inner_mac_header; 686 new->inner_mac_header = old->inner_mac_header;
714 skb_dst_copy(new, old); 687 skb_dst_copy(new, old);
715 new->rxhash = old->rxhash; 688 skb_copy_hash(new, old);
716 new->ooo_okay = old->ooo_okay; 689 new->ooo_okay = old->ooo_okay;
717 new->l4_rxhash = old->l4_rxhash;
718 new->no_fcs = old->no_fcs; 690 new->no_fcs = old->no_fcs;
719 new->encapsulation = old->encapsulation; 691 new->encapsulation = old->encapsulation;
720#ifdef CONFIG_XFRM 692#ifdef CONFIG_XFRM
@@ -735,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
735 new->mark = old->mark; 707 new->mark = old->mark;
736 new->skb_iif = old->skb_iif; 708 new->skb_iif = old->skb_iif;
737 __nf_copy(new, old); 709 __nf_copy(new, old);
738#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
739 new->nf_trace = old->nf_trace;
740#endif
741#ifdef CONFIG_NET_SCHED 710#ifdef CONFIG_NET_SCHED
742 new->tc_index = old->tc_index; 711 new->tc_index = old->tc_index;
743#ifdef CONFIG_NET_CLS_ACT 712#ifdef CONFIG_NET_CLS_ACT
@@ -1830,7 +1799,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1830 .partial = partial, 1799 .partial = partial,
1831 .nr_pages_max = MAX_SKB_FRAGS, 1800 .nr_pages_max = MAX_SKB_FRAGS,
1832 .flags = flags, 1801 .flags = flags,
1833 .ops = &sock_pipe_buf_ops, 1802 .ops = &nosteal_pipe_buf_ops,
1834 .spd_release = sock_spd_release, 1803 .spd_release = sock_spd_release,
1835 }; 1804 };
1836 struct sk_buff *frag_iter; 1805 struct sk_buff *frag_iter;
@@ -2122,6 +2091,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2122} 2091}
2123EXPORT_SYMBOL(skb_copy_and_csum_bits); 2092EXPORT_SYMBOL(skb_copy_and_csum_bits);
2124 2093
2094 /**
2095 * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
2096 * @from: source buffer
2097 *
2098 * Calculates the amount of linear headroom needed in the 'to' skb passed
2099 * into skb_zerocopy().
2100 */
2101unsigned int
2102skb_zerocopy_headlen(const struct sk_buff *from)
2103{
2104 unsigned int hlen = 0;
2105
2106 if (!from->head_frag ||
2107 skb_headlen(from) < L1_CACHE_BYTES ||
2108 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
2109 hlen = skb_headlen(from);
2110
2111 if (skb_has_frag_list(from))
2112 hlen = from->len;
2113
2114 return hlen;
2115}
2116EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
2117
2118/**
2119 * skb_zerocopy - Zero copy skb to skb
2120 * @to: destination buffer
2121 * @from: source buffer
2122 * @len: number of bytes to copy from source buffer
2123 * @hlen: size of linear headroom in destination buffer
2124 *
2125 * Copies up to `len` bytes from `from` to `to` by creating references
2126 * to the frags in the source buffer.
2127 *
2128 * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
2129 * headroom in the `to` buffer.
2130 */
2131void
2132skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
2133{
2134 int i, j = 0;
2135 int plen = 0; /* length of skb->head fragment */
2136 struct page *page;
2137 unsigned int offset;
2138
2139 BUG_ON(!from->head_frag && !hlen);
2140
2141 /* dont bother with small payloads */
2142 if (len <= skb_tailroom(to)) {
2143 skb_copy_bits(from, 0, skb_put(to, len), len);
2144 return;
2145 }
2146
2147 if (hlen) {
2148 skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
2149 len -= hlen;
2150 } else {
2151 plen = min_t(int, skb_headlen(from), len);
2152 if (plen) {
2153 page = virt_to_head_page(from->head);
2154 offset = from->data - (unsigned char *)page_address(page);
2155 __skb_fill_page_desc(to, 0, page, offset, plen);
2156 get_page(page);
2157 j = 1;
2158 len -= plen;
2159 }
2160 }
2161
2162 to->truesize += len + plen;
2163 to->len += len + plen;
2164 to->data_len += len + plen;
2165
2166 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
2167 if (!len)
2168 break;
2169 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2170 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2171 len -= skb_shinfo(to)->frags[j].size;
2172 skb_frag_ref(to, j);
2173 j++;
2174 }
2175 skb_shinfo(to)->nr_frags = j;
2176}
2177EXPORT_SYMBOL_GPL(skb_zerocopy);
2178
2125void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 2179void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
2126{ 2180{
2127 __wsum csum; 2181 __wsum csum;
@@ -2784,81 +2838,84 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2784 2838
2785/** 2839/**
2786 * skb_segment - Perform protocol segmentation on skb. 2840 * skb_segment - Perform protocol segmentation on skb.
2787 * @skb: buffer to segment 2841 * @head_skb: buffer to segment
2788 * @features: features for the output path (see dev->features) 2842 * @features: features for the output path (see dev->features)
2789 * 2843 *
2790 * This function performs segmentation on the given skb. It returns 2844 * This function performs segmentation on the given skb. It returns
2791 * a pointer to the first in a list of new skbs for the segments. 2845 * a pointer to the first in a list of new skbs for the segments.
2792 * In case of error it returns ERR_PTR(err). 2846 * In case of error it returns ERR_PTR(err).
2793 */ 2847 */
2794struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) 2848struct sk_buff *skb_segment(struct sk_buff *head_skb,
2849 netdev_features_t features)
2795{ 2850{
2796 struct sk_buff *segs = NULL; 2851 struct sk_buff *segs = NULL;
2797 struct sk_buff *tail = NULL; 2852 struct sk_buff *tail = NULL;
2798 struct sk_buff *fskb = skb_shinfo(skb)->frag_list; 2853 struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
2799 skb_frag_t *skb_frag = skb_shinfo(skb)->frags; 2854 skb_frag_t *frag = skb_shinfo(head_skb)->frags;
2800 unsigned int mss = skb_shinfo(skb)->gso_size; 2855 unsigned int mss = skb_shinfo(head_skb)->gso_size;
2801 unsigned int doffset = skb->data - skb_mac_header(skb); 2856 unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
2857 struct sk_buff *frag_skb = head_skb;
2802 unsigned int offset = doffset; 2858 unsigned int offset = doffset;
2803 unsigned int tnl_hlen = skb_tnl_header_len(skb); 2859 unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
2804 unsigned int headroom; 2860 unsigned int headroom;
2805 unsigned int len; 2861 unsigned int len;
2806 __be16 proto; 2862 __be16 proto;
2807 bool csum; 2863 bool csum;
2808 int sg = !!(features & NETIF_F_SG); 2864 int sg = !!(features & NETIF_F_SG);
2809 int nfrags = skb_shinfo(skb)->nr_frags; 2865 int nfrags = skb_shinfo(head_skb)->nr_frags;
2810 int err = -ENOMEM; 2866 int err = -ENOMEM;
2811 int i = 0; 2867 int i = 0;
2812 int pos; 2868 int pos;
2813 2869
2814 proto = skb_network_protocol(skb); 2870 proto = skb_network_protocol(head_skb);
2815 if (unlikely(!proto)) 2871 if (unlikely(!proto))
2816 return ERR_PTR(-EINVAL); 2872 return ERR_PTR(-EINVAL);
2817 2873
2818 csum = !!can_checksum_protocol(features, proto); 2874 csum = !!can_checksum_protocol(features, proto);
2819 __skb_push(skb, doffset); 2875 __skb_push(head_skb, doffset);
2820 headroom = skb_headroom(skb); 2876 headroom = skb_headroom(head_skb);
2821 pos = skb_headlen(skb); 2877 pos = skb_headlen(head_skb);
2822 2878
2823 do { 2879 do {
2824 struct sk_buff *nskb; 2880 struct sk_buff *nskb;
2825 skb_frag_t *frag; 2881 skb_frag_t *nskb_frag;
2826 int hsize; 2882 int hsize;
2827 int size; 2883 int size;
2828 2884
2829 len = skb->len - offset; 2885 len = head_skb->len - offset;
2830 if (len > mss) 2886 if (len > mss)
2831 len = mss; 2887 len = mss;
2832 2888
2833 hsize = skb_headlen(skb) - offset; 2889 hsize = skb_headlen(head_skb) - offset;
2834 if (hsize < 0) 2890 if (hsize < 0)
2835 hsize = 0; 2891 hsize = 0;
2836 if (hsize > len || !sg) 2892 if (hsize > len || !sg)
2837 hsize = len; 2893 hsize = len;
2838 2894
2839 if (!hsize && i >= nfrags && skb_headlen(fskb) && 2895 if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
2840 (skb_headlen(fskb) == len || sg)) { 2896 (skb_headlen(list_skb) == len || sg)) {
2841 BUG_ON(skb_headlen(fskb) > len); 2897 BUG_ON(skb_headlen(list_skb) > len);
2842 2898
2843 i = 0; 2899 i = 0;
2844 nfrags = skb_shinfo(fskb)->nr_frags; 2900 nfrags = skb_shinfo(list_skb)->nr_frags;
2845 skb_frag = skb_shinfo(fskb)->frags; 2901 frag = skb_shinfo(list_skb)->frags;
2846 pos += skb_headlen(fskb); 2902 frag_skb = list_skb;
2903 pos += skb_headlen(list_skb);
2847 2904
2848 while (pos < offset + len) { 2905 while (pos < offset + len) {
2849 BUG_ON(i >= nfrags); 2906 BUG_ON(i >= nfrags);
2850 2907
2851 size = skb_frag_size(skb_frag); 2908 size = skb_frag_size(frag);
2852 if (pos + size > offset + len) 2909 if (pos + size > offset + len)
2853 break; 2910 break;
2854 2911
2855 i++; 2912 i++;
2856 pos += size; 2913 pos += size;
2857 skb_frag++; 2914 frag++;
2858 } 2915 }
2859 2916
2860 nskb = skb_clone(fskb, GFP_ATOMIC); 2917 nskb = skb_clone(list_skb, GFP_ATOMIC);
2861 fskb = fskb->next; 2918 list_skb = list_skb->next;
2862 2919
2863 if (unlikely(!nskb)) 2920 if (unlikely(!nskb))
2864 goto err; 2921 goto err;
@@ -2879,7 +2936,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2879 __skb_push(nskb, doffset); 2936 __skb_push(nskb, doffset);
2880 } else { 2937 } else {
2881 nskb = __alloc_skb(hsize + doffset + headroom, 2938 nskb = __alloc_skb(hsize + doffset + headroom,
2882 GFP_ATOMIC, skb_alloc_rx_flag(skb), 2939 GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
2883 NUMA_NO_NODE); 2940 NUMA_NO_NODE);
2884 2941
2885 if (unlikely(!nskb)) 2942 if (unlikely(!nskb))
@@ -2895,12 +2952,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2895 segs = nskb; 2952 segs = nskb;
2896 tail = nskb; 2953 tail = nskb;
2897 2954
2898 __copy_skb_header(nskb, skb); 2955 __copy_skb_header(nskb, head_skb);
2899 nskb->mac_len = skb->mac_len; 2956 nskb->mac_len = head_skb->mac_len;
2900 2957
2901 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); 2958 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
2902 2959
2903 skb_copy_from_linear_data_offset(skb, -tnl_hlen, 2960 skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
2904 nskb->data - tnl_hlen, 2961 nskb->data - tnl_hlen,
2905 doffset + tnl_hlen); 2962 doffset + tnl_hlen);
2906 2963
@@ -2909,30 +2966,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2909 2966
2910 if (!sg) { 2967 if (!sg) {
2911 nskb->ip_summed = CHECKSUM_NONE; 2968 nskb->ip_summed = CHECKSUM_NONE;
2912 nskb->csum = skb_copy_and_csum_bits(skb, offset, 2969 nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
2913 skb_put(nskb, len), 2970 skb_put(nskb, len),
2914 len, 0); 2971 len, 0);
2915 continue; 2972 continue;
2916 } 2973 }
2917 2974
2918 frag = skb_shinfo(nskb)->frags; 2975 nskb_frag = skb_shinfo(nskb)->frags;
2919 2976
2920 skb_copy_from_linear_data_offset(skb, offset, 2977 skb_copy_from_linear_data_offset(head_skb, offset,
2921 skb_put(nskb, hsize), hsize); 2978 skb_put(nskb, hsize), hsize);
2922 2979
2923 skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; 2980 skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
2981 SKBTX_SHARED_FRAG;
2924 2982
2925 while (pos < offset + len) { 2983 while (pos < offset + len) {
2926 if (i >= nfrags) { 2984 if (i >= nfrags) {
2927 BUG_ON(skb_headlen(fskb)); 2985 BUG_ON(skb_headlen(list_skb));
2928 2986
2929 i = 0; 2987 i = 0;
2930 nfrags = skb_shinfo(fskb)->nr_frags; 2988 nfrags = skb_shinfo(list_skb)->nr_frags;
2931 skb_frag = skb_shinfo(fskb)->frags; 2989 frag = skb_shinfo(list_skb)->frags;
2990 frag_skb = list_skb;
2932 2991
2933 BUG_ON(!nfrags); 2992 BUG_ON(!nfrags);
2934 2993
2935 fskb = fskb->next; 2994 list_skb = list_skb->next;
2936 } 2995 }
2937 2996
2938 if (unlikely(skb_shinfo(nskb)->nr_frags >= 2997 if (unlikely(skb_shinfo(nskb)->nr_frags >=
@@ -2943,27 +3002,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2943 goto err; 3002 goto err;
2944 } 3003 }
2945 3004
2946 *frag = *skb_frag; 3005 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
2947 __skb_frag_ref(frag); 3006 goto err;
2948 size = skb_frag_size(frag); 3007
3008 *nskb_frag = *frag;
3009 __skb_frag_ref(nskb_frag);
3010 size = skb_frag_size(nskb_frag);
2949 3011
2950 if (pos < offset) { 3012 if (pos < offset) {
2951 frag->page_offset += offset - pos; 3013 nskb_frag->page_offset += offset - pos;
2952 skb_frag_size_sub(frag, offset - pos); 3014 skb_frag_size_sub(nskb_frag, offset - pos);
2953 } 3015 }
2954 3016
2955 skb_shinfo(nskb)->nr_frags++; 3017 skb_shinfo(nskb)->nr_frags++;
2956 3018
2957 if (pos + size <= offset + len) { 3019 if (pos + size <= offset + len) {
2958 i++; 3020 i++;
2959 skb_frag++; 3021 frag++;
2960 pos += size; 3022 pos += size;
2961 } else { 3023 } else {
2962 skb_frag_size_sub(frag, pos + size - (offset + len)); 3024 skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
2963 goto skip_fraglist; 3025 goto skip_fraglist;
2964 } 3026 }
2965 3027
2966 frag++; 3028 nskb_frag++;
2967 } 3029 }
2968 3030
2969skip_fraglist: 3031skip_fraglist:
@@ -2977,15 +3039,12 @@ perform_csum_check:
2977 nskb->len - doffset, 0); 3039 nskb->len - doffset, 0);
2978 nskb->ip_summed = CHECKSUM_NONE; 3040 nskb->ip_summed = CHECKSUM_NONE;
2979 } 3041 }
2980 } while ((offset += len) < skb->len); 3042 } while ((offset += len) < head_skb->len);
2981 3043
2982 return segs; 3044 return segs;
2983 3045
2984err: 3046err:
2985 while ((skb = segs)) { 3047 kfree_skb_list(segs);
2986 segs = skb->next;
2987 kfree_skb(skb);
2988 }
2989 return ERR_PTR(err); 3048 return ERR_PTR(err);
2990} 3049}
2991EXPORT_SYMBOL_GPL(skb_segment); 3050EXPORT_SYMBOL_GPL(skb_segment);
@@ -3468,6 +3527,278 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3468} 3527}
3469EXPORT_SYMBOL_GPL(skb_partial_csum_set); 3528EXPORT_SYMBOL_GPL(skb_partial_csum_set);
3470 3529
3530static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
3531 unsigned int max)
3532{
3533 if (skb_headlen(skb) >= len)
3534 return 0;
3535
3536 /* If we need to pullup then pullup to the max, so we
3537 * won't need to do it again.
3538 */
3539 if (max > skb->len)
3540 max = skb->len;
3541
3542 if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
3543 return -ENOMEM;
3544
3545 if (skb_headlen(skb) < len)
3546 return -EPROTO;
3547
3548 return 0;
3549}
3550
3551/* This value should be large enough to cover a tagged ethernet header plus
3552 * maximally sized IP and TCP or UDP headers.
3553 */
3554#define MAX_IP_HDR_LEN 128
3555
3556static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
3557{
3558 unsigned int off;
3559 bool fragment;
3560 int err;
3561
3562 fragment = false;
3563
3564 err = skb_maybe_pull_tail(skb,
3565 sizeof(struct iphdr),
3566 MAX_IP_HDR_LEN);
3567 if (err < 0)
3568 goto out;
3569
3570 if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
3571 fragment = true;
3572
3573 off = ip_hdrlen(skb);
3574
3575 err = -EPROTO;
3576
3577 if (fragment)
3578 goto out;
3579
3580 switch (ip_hdr(skb)->protocol) {
3581 case IPPROTO_TCP:
3582 err = skb_maybe_pull_tail(skb,
3583 off + sizeof(struct tcphdr),
3584 MAX_IP_HDR_LEN);
3585 if (err < 0)
3586 goto out;
3587
3588 if (!skb_partial_csum_set(skb, off,
3589 offsetof(struct tcphdr, check))) {
3590 err = -EPROTO;
3591 goto out;
3592 }
3593
3594 if (recalculate)
3595 tcp_hdr(skb)->check =
3596 ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
3597 ip_hdr(skb)->daddr,
3598 skb->len - off,
3599 IPPROTO_TCP, 0);
3600 break;
3601 case IPPROTO_UDP:
3602 err = skb_maybe_pull_tail(skb,
3603 off + sizeof(struct udphdr),
3604 MAX_IP_HDR_LEN);
3605 if (err < 0)
3606 goto out;
3607
3608 if (!skb_partial_csum_set(skb, off,
3609 offsetof(struct udphdr, check))) {
3610 err = -EPROTO;
3611 goto out;
3612 }
3613
3614 if (recalculate)
3615 udp_hdr(skb)->check =
3616 ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
3617 ip_hdr(skb)->daddr,
3618 skb->len - off,
3619 IPPROTO_UDP, 0);
3620 break;
3621 default:
3622 goto out;
3623 }
3624
3625 err = 0;
3626
3627out:
3628 return err;
3629}
3630
3631/* This value should be large enough to cover a tagged ethernet header plus
3632 * an IPv6 header, all options, and a maximal TCP or UDP header.
3633 */
3634#define MAX_IPV6_HDR_LEN 256
3635
3636#define OPT_HDR(type, skb, off) \
3637 (type *)(skb_network_header(skb) + (off))
3638
3639static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
3640{
3641 int err;
3642 u8 nexthdr;
3643 unsigned int off;
3644 unsigned int len;
3645 bool fragment;
3646 bool done;
3647
3648 fragment = false;
3649 done = false;
3650
3651 off = sizeof(struct ipv6hdr);
3652
3653 err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
3654 if (err < 0)
3655 goto out;
3656
3657 nexthdr = ipv6_hdr(skb)->nexthdr;
3658
3659 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
3660 while (off <= len && !done) {
3661 switch (nexthdr) {
3662 case IPPROTO_DSTOPTS:
3663 case IPPROTO_HOPOPTS:
3664 case IPPROTO_ROUTING: {
3665 struct ipv6_opt_hdr *hp;
3666
3667 err = skb_maybe_pull_tail(skb,
3668 off +
3669 sizeof(struct ipv6_opt_hdr),
3670 MAX_IPV6_HDR_LEN);
3671 if (err < 0)
3672 goto out;
3673
3674 hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
3675 nexthdr = hp->nexthdr;
3676 off += ipv6_optlen(hp);
3677 break;
3678 }
3679 case IPPROTO_AH: {
3680 struct ip_auth_hdr *hp;
3681
3682 err = skb_maybe_pull_tail(skb,
3683 off +
3684 sizeof(struct ip_auth_hdr),
3685 MAX_IPV6_HDR_LEN);
3686 if (err < 0)
3687 goto out;
3688
3689 hp = OPT_HDR(struct ip_auth_hdr, skb, off);
3690 nexthdr = hp->nexthdr;
3691 off += ipv6_authlen(hp);
3692 break;
3693 }
3694 case IPPROTO_FRAGMENT: {
3695 struct frag_hdr *hp;
3696
3697 err = skb_maybe_pull_tail(skb,
3698 off +
3699 sizeof(struct frag_hdr),
3700 MAX_IPV6_HDR_LEN);
3701 if (err < 0)
3702 goto out;
3703
3704 hp = OPT_HDR(struct frag_hdr, skb, off);
3705
3706 if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
3707 fragment = true;
3708
3709 nexthdr = hp->nexthdr;
3710 off += sizeof(struct frag_hdr);
3711 break;
3712 }
3713 default:
3714 done = true;
3715 break;
3716 }
3717 }
3718
3719 err = -EPROTO;
3720
3721 if (!done || fragment)
3722 goto out;
3723
3724 switch (nexthdr) {
3725 case IPPROTO_TCP:
3726 err = skb_maybe_pull_tail(skb,
3727 off + sizeof(struct tcphdr),
3728 MAX_IPV6_HDR_LEN);
3729 if (err < 0)
3730 goto out;
3731
3732 if (!skb_partial_csum_set(skb, off,
3733 offsetof(struct tcphdr, check))) {
3734 err = -EPROTO;
3735 goto out;
3736 }
3737
3738 if (recalculate)
3739 tcp_hdr(skb)->check =
3740 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3741 &ipv6_hdr(skb)->daddr,
3742 skb->len - off,
3743 IPPROTO_TCP, 0);
3744 break;
3745 case IPPROTO_UDP:
3746 err = skb_maybe_pull_tail(skb,
3747 off + sizeof(struct udphdr),
3748 MAX_IPV6_HDR_LEN);
3749 if (err < 0)
3750 goto out;
3751
3752 if (!skb_partial_csum_set(skb, off,
3753 offsetof(struct udphdr, check))) {
3754 err = -EPROTO;
3755 goto out;
3756 }
3757
3758 if (recalculate)
3759 udp_hdr(skb)->check =
3760 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3761 &ipv6_hdr(skb)->daddr,
3762 skb->len - off,
3763 IPPROTO_UDP, 0);
3764 break;
3765 default:
3766 goto out;
3767 }
3768
3769 err = 0;
3770
3771out:
3772 return err;
3773}
3774
3775/**
3776 * skb_checksum_setup - set up partial checksum offset
3777 * @skb: the skb to set up
3778 * @recalculate: if true the pseudo-header checksum will be recalculated
3779 */
3780int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
3781{
3782 int err;
3783
3784 switch (skb->protocol) {
3785 case htons(ETH_P_IP):
3786 err = skb_checksum_setup_ip(skb, recalculate);
3787 break;
3788
3789 case htons(ETH_P_IPV6):
3790 err = skb_checksum_setup_ipv6(skb, recalculate);
3791 break;
3792
3793 default:
3794 err = -EPROTO;
3795 break;
3796 }
3797
3798 return err;
3799}
3800EXPORT_SYMBOL(skb_checksum_setup);
3801
3471void __skb_warn_lro_forwarding(const struct sk_buff *skb) 3802void __skb_warn_lro_forwarding(const struct sk_buff *skb)
3472{ 3803{
3473 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", 3804 net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
@@ -3592,3 +3923,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
3592 nf_reset_trace(skb); 3923 nf_reset_trace(skb);
3593} 3924}
3594EXPORT_SYMBOL_GPL(skb_scrub_packet); 3925EXPORT_SYMBOL_GPL(skb_scrub_packet);
3926
3927/**
3928 * skb_gso_transport_seglen - Return length of individual segments of a gso packet
3929 *
3930 * @skb: GSO skb
3931 *
3932 * skb_gso_transport_seglen is used to determine the real size of the
3933 * individual segments, including Layer4 headers (TCP/UDP).
3934 *
3935 * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
3936 */
3937unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
3938{
3939 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3940 unsigned int hdr_len;
3941
3942 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
3943 hdr_len = tcp_hdrlen(skb);
3944 else
3945 hdr_len = sizeof(struct udphdr);
3946 return hdr_len + shinfo->gso_size;
3947}
3948EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5393b4b719d7..c0fc6bdad1e3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -925,8 +925,8 @@ set_rcvbuf:
925EXPORT_SYMBOL(sock_setsockopt); 925EXPORT_SYMBOL(sock_setsockopt);
926 926
927 927
928void cred_to_ucred(struct pid *pid, const struct cred *cred, 928static void cred_to_ucred(struct pid *pid, const struct cred *cred,
929 struct ucred *ucred) 929 struct ucred *ucred)
930{ 930{
931 ucred->pid = pid_vnr(pid); 931 ucred->pid = pid_vnr(pid);
932 ucred->uid = ucred->gid = -1; 932 ucred->uid = ucred->gid = -1;
@@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
937 ucred->gid = from_kgid_munged(current_ns, cred->egid); 937 ucred->gid = from_kgid_munged(current_ns, cred->egid);
938 } 938 }
939} 939}
940EXPORT_SYMBOL_GPL(cred_to_ucred);
941 940
942int sock_getsockopt(struct socket *sock, int level, int optname, 941int sock_getsockopt(struct socket *sock, int level, int optname,
943 char __user *optval, int __user *optlen) 942 char __user *optval, int __user *optlen)
@@ -1168,6 +1167,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1168 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1167 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1169 break; 1168 break;
1170 1169
1170 case SO_BPF_EXTENSIONS:
1171 v.val = bpf_tell_extensions();
1172 break;
1173
1171 case SO_SELECT_ERR_QUEUE: 1174 case SO_SELECT_ERR_QUEUE:
1172 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1175 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1173 break; 1176 break;
@@ -1308,19 +1311,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1308 module_put(owner); 1311 module_put(owner);
1309} 1312}
1310 1313
1311#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1314#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
1312void sock_update_classid(struct sock *sk)
1313{
1314 u32 classid;
1315
1316 classid = task_cls_classid(current);
1317 if (classid != sk->sk_classid)
1318 sk->sk_classid = classid;
1319}
1320EXPORT_SYMBOL(sock_update_classid);
1321#endif
1322
1323#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1324void sock_update_netprioidx(struct sock *sk) 1315void sock_update_netprioidx(struct sock *sk)
1325{ 1316{
1326 if (in_interrupt()) 1317 if (in_interrupt())
@@ -1666,22 +1657,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1666EXPORT_SYMBOL(sock_wmalloc); 1657EXPORT_SYMBOL(sock_wmalloc);
1667 1658
1668/* 1659/*
1669 * Allocate a skb from the socket's receive buffer.
1670 */
1671struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1672 gfp_t priority)
1673{
1674 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1675 struct sk_buff *skb = alloc_skb(size, priority);
1676 if (skb) {
1677 skb_set_owner_r(skb, sk);
1678 return skb;
1679 }
1680 }
1681 return NULL;
1682}
1683
1684/*
1685 * Allocate a memory block from the socket's option memory buffer. 1660 * Allocate a memory block from the socket's option memory buffer.
1686 */ 1661 */
1687void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1662void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
@@ -1800,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1800 while (order) { 1775 while (order) {
1801 if (npages >= 1 << order) { 1776 if (npages >= 1 << order) {
1802 page = alloc_pages(sk->sk_allocation | 1777 page = alloc_pages(sk->sk_allocation |
1803 __GFP_COMP | __GFP_NOWARN, 1778 __GFP_COMP |
1779 __GFP_NOWARN |
1780 __GFP_NORETRY,
1804 order); 1781 order);
1805 if (page) 1782 if (page)
1806 goto fill_page; 1783 goto fill_page;
@@ -1865,14 +1842,12 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
1865 put_page(pfrag->page); 1842 put_page(pfrag->page);
1866 } 1843 }
1867 1844
1868 /* We restrict high order allocations to users that can afford to wait */ 1845 order = SKB_FRAG_PAGE_ORDER;
1869 order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1870
1871 do { 1846 do {
1872 gfp_t gfp = prio; 1847 gfp_t gfp = prio;
1873 1848
1874 if (order) 1849 if (order)
1875 gfp |= __GFP_COMP | __GFP_NOWARN; 1850 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
1876 pfrag->page = alloc_pages(gfp, order); 1851 pfrag->page = alloc_pages(gfp, order);
1877 if (likely(pfrag->page)) { 1852 if (likely(pfrag->page)) {
1878 pfrag->offset = 0; 1853 pfrag->offset = 0;
@@ -2382,10 +2357,13 @@ void release_sock(struct sock *sk)
2382 if (sk->sk_backlog.tail) 2357 if (sk->sk_backlog.tail)
2383 __release_sock(sk); 2358 __release_sock(sk);
2384 2359
2360 /* Warning : release_cb() might need to release sk ownership,
2361 * ie call sock_release_ownership(sk) before us.
2362 */
2385 if (sk->sk_prot->release_cb) 2363 if (sk->sk_prot->release_cb)
2386 sk->sk_prot->release_cb(sk); 2364 sk->sk_prot->release_cb(sk);
2387 2365
2388 sk->sk_lock.owned = 0; 2366 sock_release_ownership(sk);
2389 if (waitqueue_active(&sk->sk_lock.wq)) 2367 if (waitqueue_active(&sk->sk_lock.wq))
2390 wake_up(&sk->sk_lock.wq); 2368 wake_up(&sk->sk_lock.wq);
2391 spin_unlock_bh(&sk->sk_lock.slock); 2369 spin_unlock_bh(&sk->sk_lock.slock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 512f0a24269b..301c05f26060 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -122,7 +122,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
122 DEFINE_WAIT(wait); 122 DEFINE_WAIT(wait);
123 123
124 if (sk_stream_memory_free(sk)) 124 if (sk_stream_memory_free(sk))
125 current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; 125 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
126 126
127 while (1) { 127 while (1) {
128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cca444190907..cf9cd13509a7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,7 +122,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
122 synchronize_rcu(); 122 synchronize_rcu();
123 kfree(cur); 123 kfree(cur);
124 } else if (!cur && cpumask_test_cpu(i, mask)) { 124 } else if (!cur && cpumask_test_cpu(i, mask)) {
125 cur = kzalloc(len, GFP_KERNEL); 125 cur = kzalloc_node(len, GFP_KERNEL,
126 cpu_to_node(i));
126 if (!cur) { 127 if (!cur) {
127 /* not unwinding previous changes */ 128 /* not unwinding previous changes */
128 ret = -ENOMEM; 129 ret = -ENOMEM;