aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/core
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c458
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/ethtool.c53
-rw-r--r--net/core/fib_rules.c7
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/flow.c6
-rw-r--r--net/core/gen_estimator.c97
-rw-r--r--net/core/neighbour.c73
-rw-r--r--net/core/net-sysfs.c15
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c20
-rw-r--r--net/core/pktgen.c42
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/skbuff.c277
-rw-r--r--net/core/sock.c46
-rw-r--r--net/core/sysctl_net_core.c68
17 files changed, 923 insertions, 286 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee631843c2f5..5e2ac0c4b07c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -209,7 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
209void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 209void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
210{ 210{
211 kfree_skb(skb); 211 kfree_skb(skb);
212 sk_mem_reclaim(sk); 212 sk_mem_reclaim_partial(sk);
213} 213}
214 214
215/** 215/**
@@ -248,8 +248,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
248 spin_unlock_bh(&sk->sk_receive_queue.lock); 248 spin_unlock_bh(&sk->sk_receive_queue.lock);
249 } 249 }
250 250
251 kfree_skb(skb); 251 skb_free_datagram(sk, skb);
252 sk_mem_reclaim(sk);
253 return err; 252 return err;
254} 253}
255 254
diff --git a/net/core/dev.c b/net/core/dev.c
index 89912ae6de65..446424027d24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -108,7 +108,6 @@
108#include <linux/init.h> 108#include <linux/init.h>
109#include <linux/kmod.h> 109#include <linux/kmod.h>
110#include <linux/module.h> 110#include <linux/module.h>
111#include <linux/kallsyms.h>
112#include <linux/netpoll.h> 111#include <linux/netpoll.h>
113#include <linux/rcupdate.h> 112#include <linux/rcupdate.h>
114#include <linux/delay.h> 113#include <linux/delay.h>
@@ -130,6 +129,9 @@
130 129
131#include "net-sysfs.h" 130#include "net-sysfs.h"
132 131
132/* Instead of increasing this, you should create a hash table. */
133#define MAX_GRO_SKBS 8
134
133/* 135/*
134 * The list of packet types we will receive (as opposed to discard) 136 * The list of packet types we will receive (as opposed to discard)
135 * and the routines to invoke. 137 * and the routines to invoke.
@@ -281,8 +283,8 @@ static const unsigned short netdev_lock_type[] =
281 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 283 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
282 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 284 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
283 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 285 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
284 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, 286 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
285 ARPHRD_NONE}; 287 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
286 288
287static const char *netdev_lock_name[] = 289static const char *netdev_lock_name[] =
288 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 290 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -298,8 +300,8 @@ static const char *netdev_lock_name[] =
298 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 300 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
299 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 301 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
300 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 302 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
301 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", 303 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
302 "_xmit_NONE"}; 304 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
303 305
304static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 306static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
305static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 307static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -924,10 +926,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
924 strlcpy(dev->name, newname, IFNAMSIZ); 926 strlcpy(dev->name, newname, IFNAMSIZ);
925 927
926rollback: 928rollback:
927 ret = device_rename(&dev->dev, dev->name); 929 /* For now only devices in the initial network namespace
928 if (ret) { 930 * are in sysfs.
929 memcpy(dev->name, oldname, IFNAMSIZ); 931 */
930 return ret; 932 if (net == &init_net) {
933 ret = device_rename(&dev->dev, dev->name);
934 if (ret) {
935 memcpy(dev->name, oldname, IFNAMSIZ);
936 return ret;
937 }
931 } 938 }
932 939
933 write_lock_bh(&dev_base_lock); 940 write_lock_bh(&dev_base_lock);
@@ -1055,6 +1062,7 @@ void dev_load(struct net *net, const char *name)
1055 */ 1062 */
1056int dev_open(struct net_device *dev) 1063int dev_open(struct net_device *dev)
1057{ 1064{
1065 const struct net_device_ops *ops = dev->netdev_ops;
1058 int ret = 0; 1066 int ret = 0;
1059 1067
1060 ASSERT_RTNL(); 1068 ASSERT_RTNL();
@@ -1077,11 +1085,11 @@ int dev_open(struct net_device *dev)
1077 */ 1085 */
1078 set_bit(__LINK_STATE_START, &dev->state); 1086 set_bit(__LINK_STATE_START, &dev->state);
1079 1087
1080 if (dev->validate_addr) 1088 if (ops->ndo_validate_addr)
1081 ret = dev->validate_addr(dev); 1089 ret = ops->ndo_validate_addr(dev);
1082 1090
1083 if (!ret && dev->open) 1091 if (!ret && ops->ndo_open)
1084 ret = dev->open(dev); 1092 ret = ops->ndo_open(dev);
1085 1093
1086 /* 1094 /*
1087 * If it went open OK then: 1095 * If it went open OK then:
@@ -1125,6 +1133,7 @@ int dev_open(struct net_device *dev)
1125 */ 1133 */
1126int dev_close(struct net_device *dev) 1134int dev_close(struct net_device *dev)
1127{ 1135{
1136 const struct net_device_ops *ops = dev->netdev_ops;
1128 ASSERT_RTNL(); 1137 ASSERT_RTNL();
1129 1138
1130 might_sleep(); 1139 might_sleep();
@@ -1157,8 +1166,8 @@ int dev_close(struct net_device *dev)
1157 * We allow it to be called even after a DETACH hot-plug 1166 * We allow it to be called even after a DETACH hot-plug
1158 * event. 1167 * event.
1159 */ 1168 */
1160 if (dev->stop) 1169 if (ops->ndo_stop)
1161 dev->stop(dev); 1170 ops->ndo_stop(dev);
1162 1171
1163 /* 1172 /*
1164 * Device is now down. 1173 * Device is now down.
@@ -1527,8 +1536,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1527 __be16 type = skb->protocol; 1536 __be16 type = skb->protocol;
1528 int err; 1537 int err;
1529 1538
1530 BUG_ON(skb_shinfo(skb)->frag_list);
1531
1532 skb_reset_mac_header(skb); 1539 skb_reset_mac_header(skb);
1533 skb->mac_len = skb->network_header - skb->mac_header; 1540 skb->mac_len = skb->network_header - skb->mac_header;
1534 __skb_pull(skb, skb->mac_len); 1541 __skb_pull(skb, skb->mac_len);
@@ -1654,6 +1661,9 @@ static int dev_gso_segment(struct sk_buff *skb)
1654int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1661int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1655 struct netdev_queue *txq) 1662 struct netdev_queue *txq)
1656{ 1663{
1664 const struct net_device_ops *ops = dev->netdev_ops;
1665
1666 prefetch(&dev->netdev_ops->ndo_start_xmit);
1657 if (likely(!skb->next)) { 1667 if (likely(!skb->next)) {
1658 if (!list_empty(&ptype_all)) 1668 if (!list_empty(&ptype_all))
1659 dev_queue_xmit_nit(skb, dev); 1669 dev_queue_xmit_nit(skb, dev);
@@ -1665,7 +1675,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1665 goto gso; 1675 goto gso;
1666 } 1676 }
1667 1677
1668 return dev->hard_start_xmit(skb, dev); 1678 return ops->ndo_start_xmit(skb, dev);
1669 } 1679 }
1670 1680
1671gso: 1681gso:
@@ -1675,7 +1685,7 @@ gso:
1675 1685
1676 skb->next = nskb->next; 1686 skb->next = nskb->next;
1677 nskb->next = NULL; 1687 nskb->next = NULL;
1678 rc = dev->hard_start_xmit(nskb, dev); 1688 rc = ops->ndo_start_xmit(nskb, dev);
1679 if (unlikely(rc)) { 1689 if (unlikely(rc)) {
1680 nskb->next = skb->next; 1690 nskb->next = skb->next;
1681 skb->next = nskb; 1691 skb->next = nskb;
@@ -1749,10 +1759,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1749static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1759static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1750 struct sk_buff *skb) 1760 struct sk_buff *skb)
1751{ 1761{
1762 const struct net_device_ops *ops = dev->netdev_ops;
1752 u16 queue_index = 0; 1763 u16 queue_index = 0;
1753 1764
1754 if (dev->select_queue) 1765 if (ops->ndo_select_queue)
1755 queue_index = dev->select_queue(dev, skb); 1766 queue_index = ops->ndo_select_queue(dev, skb);
1756 else if (dev->real_num_tx_queues > 1) 1767 else if (dev->real_num_tx_queues > 1)
1757 queue_index = simple_tx_hash(dev, skb); 1768 queue_index = simple_tx_hash(dev, skb);
1758 1769
@@ -2251,8 +2262,10 @@ int netif_receive_skb(struct sk_buff *skb)
2251 rcu_read_lock(); 2262 rcu_read_lock();
2252 2263
2253 /* Don't receive packets in an exiting network namespace */ 2264 /* Don't receive packets in an exiting network namespace */
2254 if (!net_alive(dev_net(skb->dev))) 2265 if (!net_alive(dev_net(skb->dev))) {
2266 kfree_skb(skb);
2255 goto out; 2267 goto out;
2268 }
2256 2269
2257#ifdef CONFIG_NET_CLS_ACT 2270#ifdef CONFIG_NET_CLS_ACT
2258 if (skb->tc_verd & TC_NCLS) { 2271 if (skb->tc_verd & TC_NCLS) {
@@ -2325,6 +2338,125 @@ static void flush_backlog(void *arg)
2325 } 2338 }
2326} 2339}
2327 2340
2341static int napi_gro_complete(struct sk_buff *skb)
2342{
2343 struct packet_type *ptype;
2344 __be16 type = skb->protocol;
2345 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2346 int err = -ENOENT;
2347
2348 if (!skb_shinfo(skb)->frag_list)
2349 goto out;
2350
2351 rcu_read_lock();
2352 list_for_each_entry_rcu(ptype, head, list) {
2353 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2354 continue;
2355
2356 err = ptype->gro_complete(skb);
2357 break;
2358 }
2359 rcu_read_unlock();
2360
2361 if (err) {
2362 WARN_ON(&ptype->list == head);
2363 kfree_skb(skb);
2364 return NET_RX_SUCCESS;
2365 }
2366
2367out:
2368 __skb_push(skb, -skb_network_offset(skb));
2369 return netif_receive_skb(skb);
2370}
2371
2372void napi_gro_flush(struct napi_struct *napi)
2373{
2374 struct sk_buff *skb, *next;
2375
2376 for (skb = napi->gro_list; skb; skb = next) {
2377 next = skb->next;
2378 skb->next = NULL;
2379 napi_gro_complete(skb);
2380 }
2381
2382 napi->gro_list = NULL;
2383}
2384EXPORT_SYMBOL(napi_gro_flush);
2385
2386int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2387{
2388 struct sk_buff **pp = NULL;
2389 struct packet_type *ptype;
2390 __be16 type = skb->protocol;
2391 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2392 int count = 0;
2393 int same_flow;
2394 int mac_len;
2395
2396 if (!(skb->dev->features & NETIF_F_GRO))
2397 goto normal;
2398
2399 rcu_read_lock();
2400 list_for_each_entry_rcu(ptype, head, list) {
2401 struct sk_buff *p;
2402
2403 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2404 continue;
2405
2406 skb_reset_network_header(skb);
2407 mac_len = skb->network_header - skb->mac_header;
2408 skb->mac_len = mac_len;
2409 NAPI_GRO_CB(skb)->same_flow = 0;
2410 NAPI_GRO_CB(skb)->flush = 0;
2411
2412 for (p = napi->gro_list; p; p = p->next) {
2413 count++;
2414 NAPI_GRO_CB(p)->same_flow =
2415 p->mac_len == mac_len &&
2416 !memcmp(skb_mac_header(p), skb_mac_header(skb),
2417 mac_len);
2418 NAPI_GRO_CB(p)->flush = 0;
2419 }
2420
2421 pp = ptype->gro_receive(&napi->gro_list, skb);
2422 break;
2423 }
2424 rcu_read_unlock();
2425
2426 if (&ptype->list == head)
2427 goto normal;
2428
2429 same_flow = NAPI_GRO_CB(skb)->same_flow;
2430
2431 if (pp) {
2432 struct sk_buff *nskb = *pp;
2433
2434 *pp = nskb->next;
2435 nskb->next = NULL;
2436 napi_gro_complete(nskb);
2437 count--;
2438 }
2439
2440 if (same_flow)
2441 goto ok;
2442
2443 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
2444 __skb_push(skb, -skb_network_offset(skb));
2445 goto normal;
2446 }
2447
2448 NAPI_GRO_CB(skb)->count = 1;
2449 skb->next = napi->gro_list;
2450 napi->gro_list = skb;
2451
2452ok:
2453 return NET_RX_SUCCESS;
2454
2455normal:
2456 return netif_receive_skb(skb);
2457}
2458EXPORT_SYMBOL(napi_gro_receive);
2459
2328static int process_backlog(struct napi_struct *napi, int quota) 2460static int process_backlog(struct napi_struct *napi, int quota)
2329{ 2461{
2330 int work = 0; 2462 int work = 0;
@@ -2344,9 +2476,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
2344 } 2476 }
2345 local_irq_enable(); 2477 local_irq_enable();
2346 2478
2347 netif_receive_skb(skb); 2479 napi_gro_receive(napi, skb);
2348 } while (++work < quota && jiffies == start_time); 2480 } while (++work < quota && jiffies == start_time);
2349 2481
2482 napi_gro_flush(napi);
2483
2350 return work; 2484 return work;
2351} 2485}
2352 2486
@@ -2367,11 +2501,73 @@ void __napi_schedule(struct napi_struct *n)
2367} 2501}
2368EXPORT_SYMBOL(__napi_schedule); 2502EXPORT_SYMBOL(__napi_schedule);
2369 2503
2504void __napi_complete(struct napi_struct *n)
2505{
2506 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2507 BUG_ON(n->gro_list);
2508
2509 list_del(&n->poll_list);
2510 smp_mb__before_clear_bit();
2511 clear_bit(NAPI_STATE_SCHED, &n->state);
2512}
2513EXPORT_SYMBOL(__napi_complete);
2514
2515void napi_complete(struct napi_struct *n)
2516{
2517 unsigned long flags;
2518
2519 /*
2520 * don't let napi dequeue from the cpu poll list
2521 * just in case its running on a different cpu
2522 */
2523 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2524 return;
2525
2526 napi_gro_flush(n);
2527 local_irq_save(flags);
2528 __napi_complete(n);
2529 local_irq_restore(flags);
2530}
2531EXPORT_SYMBOL(napi_complete);
2532
2533void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2534 int (*poll)(struct napi_struct *, int), int weight)
2535{
2536 INIT_LIST_HEAD(&napi->poll_list);
2537 napi->gro_list = NULL;
2538 napi->poll = poll;
2539 napi->weight = weight;
2540 list_add(&napi->dev_list, &dev->napi_list);
2541#ifdef CONFIG_NETPOLL
2542 napi->dev = dev;
2543 spin_lock_init(&napi->poll_lock);
2544 napi->poll_owner = -1;
2545#endif
2546 set_bit(NAPI_STATE_SCHED, &napi->state);
2547}
2548EXPORT_SYMBOL(netif_napi_add);
2549
2550void netif_napi_del(struct napi_struct *napi)
2551{
2552 struct sk_buff *skb, *next;
2553
2554 list_del_init(&napi->dev_list);
2555
2556 for (skb = napi->gro_list; skb; skb = next) {
2557 next = skb->next;
2558 skb->next = NULL;
2559 kfree_skb(skb);
2560 }
2561
2562 napi->gro_list = NULL;
2563}
2564EXPORT_SYMBOL(netif_napi_del);
2565
2370 2566
2371static void net_rx_action(struct softirq_action *h) 2567static void net_rx_action(struct softirq_action *h)
2372{ 2568{
2373 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 2569 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2374 unsigned long start_time = jiffies; 2570 unsigned long time_limit = jiffies + 2;
2375 int budget = netdev_budget; 2571 int budget = netdev_budget;
2376 void *have; 2572 void *have;
2377 2573
@@ -2382,13 +2578,10 @@ static void net_rx_action(struct softirq_action *h)
2382 int work, weight; 2578 int work, weight;
2383 2579
2384 /* If softirq window is exhuasted then punt. 2580 /* If softirq window is exhuasted then punt.
2385 * 2581 * Allow this to run for 2 jiffies since which will allow
2386 * Note that this is a slight policy change from the 2582 * an average latency of 1.5/HZ.
2387 * previous NAPI code, which would allow up to 2
2388 * jiffies to pass before breaking out. The test
2389 * used to be "jiffies - start_time > 1".
2390 */ 2583 */
2391 if (unlikely(budget <= 0 || jiffies != start_time)) 2584 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2392 goto softnet_break; 2585 goto softnet_break;
2393 2586
2394 local_irq_enable(); 2587 local_irq_enable();
@@ -2615,7 +2808,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
2615 2808
2616static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2809static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2617{ 2810{
2618 struct net_device_stats *stats = dev->get_stats(dev); 2811 const struct net_device_stats *stats = dev_get_stats(dev);
2619 2812
2620 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2813 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2621 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2814 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2797,31 +2990,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
2797 rcu_read_unlock(); 2990 rcu_read_unlock();
2798} 2991}
2799 2992
2800static void ptype_seq_decode(struct seq_file *seq, void *sym)
2801{
2802#ifdef CONFIG_KALLSYMS
2803 unsigned long offset = 0, symsize;
2804 const char *symname;
2805 char *modname;
2806 char namebuf[128];
2807
2808 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2809 &modname, namebuf);
2810
2811 if (symname) {
2812 char *delim = ":";
2813
2814 if (!modname)
2815 modname = delim = "";
2816 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2817 symname, offset);
2818 return;
2819 }
2820#endif
2821
2822 seq_printf(seq, "[%p]", sym);
2823}
2824
2825static int ptype_seq_show(struct seq_file *seq, void *v) 2993static int ptype_seq_show(struct seq_file *seq, void *v)
2826{ 2994{
2827 struct packet_type *pt = v; 2995 struct packet_type *pt = v;
@@ -2834,10 +3002,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
2834 else 3002 else
2835 seq_printf(seq, "%04x", ntohs(pt->type)); 3003 seq_printf(seq, "%04x", ntohs(pt->type));
2836 3004
2837 seq_printf(seq, " %-8s ", 3005 seq_printf(seq, " %-8s %pF\n",
2838 pt->dev ? pt->dev->name : ""); 3006 pt->dev ? pt->dev->name : "", pt->func);
2839 ptype_seq_decode(seq, pt->func);
2840 seq_putc(seq, '\n');
2841 } 3007 }
2842 3008
2843 return 0; 3009 return 0;
@@ -2954,8 +3120,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
2954 3120
2955static void dev_change_rx_flags(struct net_device *dev, int flags) 3121static void dev_change_rx_flags(struct net_device *dev, int flags)
2956{ 3122{
2957 if (dev->flags & IFF_UP && dev->change_rx_flags) 3123 const struct net_device_ops *ops = dev->netdev_ops;
2958 dev->change_rx_flags(dev, flags); 3124
3125 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3126 ops->ndo_change_rx_flags(dev, flags);
2959} 3127}
2960 3128
2961static int __dev_set_promiscuity(struct net_device *dev, int inc) 3129static int __dev_set_promiscuity(struct net_device *dev, int inc)
@@ -3079,6 +3247,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
3079 */ 3247 */
3080void __dev_set_rx_mode(struct net_device *dev) 3248void __dev_set_rx_mode(struct net_device *dev)
3081{ 3249{
3250 const struct net_device_ops *ops = dev->netdev_ops;
3251
3082 /* dev_open will call this function so the list will stay sane. */ 3252 /* dev_open will call this function so the list will stay sane. */
3083 if (!(dev->flags&IFF_UP)) 3253 if (!(dev->flags&IFF_UP))
3084 return; 3254 return;
@@ -3086,8 +3256,8 @@ void __dev_set_rx_mode(struct net_device *dev)
3086 if (!netif_device_present(dev)) 3256 if (!netif_device_present(dev))
3087 return; 3257 return;
3088 3258
3089 if (dev->set_rx_mode) 3259 if (ops->ndo_set_rx_mode)
3090 dev->set_rx_mode(dev); 3260 ops->ndo_set_rx_mode(dev);
3091 else { 3261 else {
3092 /* Unicast addresses changes may only happen under the rtnl, 3262 /* Unicast addresses changes may only happen under the rtnl,
3093 * therefore calling __dev_set_promiscuity here is safe. 3263 * therefore calling __dev_set_promiscuity here is safe.
@@ -3100,8 +3270,8 @@ void __dev_set_rx_mode(struct net_device *dev)
3100 dev->uc_promisc = 0; 3270 dev->uc_promisc = 0;
3101 } 3271 }
3102 3272
3103 if (dev->set_multicast_list) 3273 if (ops->ndo_set_multicast_list)
3104 dev->set_multicast_list(dev); 3274 ops->ndo_set_multicast_list(dev);
3105 } 3275 }
3106} 3276}
3107 3277
@@ -3460,6 +3630,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
3460 */ 3630 */
3461int dev_set_mtu(struct net_device *dev, int new_mtu) 3631int dev_set_mtu(struct net_device *dev, int new_mtu)
3462{ 3632{
3633 const struct net_device_ops *ops = dev->netdev_ops;
3463 int err; 3634 int err;
3464 3635
3465 if (new_mtu == dev->mtu) 3636 if (new_mtu == dev->mtu)
@@ -3473,10 +3644,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
3473 return -ENODEV; 3644 return -ENODEV;
3474 3645
3475 err = 0; 3646 err = 0;
3476 if (dev->change_mtu) 3647 if (ops->ndo_change_mtu)
3477 err = dev->change_mtu(dev, new_mtu); 3648 err = ops->ndo_change_mtu(dev, new_mtu);
3478 else 3649 else
3479 dev->mtu = new_mtu; 3650 dev->mtu = new_mtu;
3651
3480 if (!err && dev->flags & IFF_UP) 3652 if (!err && dev->flags & IFF_UP)
3481 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 3653 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3482 return err; 3654 return err;
@@ -3491,15 +3663,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
3491 */ 3663 */
3492int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 3664int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3493{ 3665{
3666 const struct net_device_ops *ops = dev->netdev_ops;
3494 int err; 3667 int err;
3495 3668
3496 if (!dev->set_mac_address) 3669 if (!ops->ndo_set_mac_address)
3497 return -EOPNOTSUPP; 3670 return -EOPNOTSUPP;
3498 if (sa->sa_family != dev->type) 3671 if (sa->sa_family != dev->type)
3499 return -EINVAL; 3672 return -EINVAL;
3500 if (!netif_device_present(dev)) 3673 if (!netif_device_present(dev))
3501 return -ENODEV; 3674 return -ENODEV;
3502 err = dev->set_mac_address(dev, sa); 3675 err = ops->ndo_set_mac_address(dev, sa);
3503 if (!err) 3676 if (!err)
3504 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 3677 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3505 return err; 3678 return err;
@@ -3579,10 +3752,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3579{ 3752{
3580 int err; 3753 int err;
3581 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 3754 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3755 const struct net_device_ops *ops;
3582 3756
3583 if (!dev) 3757 if (!dev)
3584 return -ENODEV; 3758 return -ENODEV;
3585 3759
3760 ops = dev->netdev_ops;
3761
3586 switch (cmd) { 3762 switch (cmd) {
3587 case SIOCSIFFLAGS: /* Set interface flags */ 3763 case SIOCSIFFLAGS: /* Set interface flags */
3588 return dev_change_flags(dev, ifr->ifr_flags); 3764 return dev_change_flags(dev, ifr->ifr_flags);
@@ -3606,15 +3782,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3606 return 0; 3782 return 0;
3607 3783
3608 case SIOCSIFMAP: 3784 case SIOCSIFMAP:
3609 if (dev->set_config) { 3785 if (ops->ndo_set_config) {
3610 if (!netif_device_present(dev)) 3786 if (!netif_device_present(dev))
3611 return -ENODEV; 3787 return -ENODEV;
3612 return dev->set_config(dev, &ifr->ifr_map); 3788 return ops->ndo_set_config(dev, &ifr->ifr_map);
3613 } 3789 }
3614 return -EOPNOTSUPP; 3790 return -EOPNOTSUPP;
3615 3791
3616 case SIOCADDMULTI: 3792 case SIOCADDMULTI:
3617 if ((!dev->set_multicast_list && !dev->set_rx_mode) || 3793 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3618 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3794 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3619 return -EINVAL; 3795 return -EINVAL;
3620 if (!netif_device_present(dev)) 3796 if (!netif_device_present(dev))
@@ -3623,7 +3799,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3623 dev->addr_len, 1); 3799 dev->addr_len, 1);
3624 3800
3625 case SIOCDELMULTI: 3801 case SIOCDELMULTI:
3626 if ((!dev->set_multicast_list && !dev->set_rx_mode) || 3802 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
3627 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 3803 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3628 return -EINVAL; 3804 return -EINVAL;
3629 if (!netif_device_present(dev)) 3805 if (!netif_device_present(dev))
@@ -3661,10 +3837,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3661 cmd == SIOCBRDELIF || 3837 cmd == SIOCBRDELIF ||
3662 cmd == SIOCWANDEV) { 3838 cmd == SIOCWANDEV) {
3663 err = -EOPNOTSUPP; 3839 err = -EOPNOTSUPP;
3664 if (dev->do_ioctl) { 3840 if (ops->ndo_do_ioctl) {
3665 if (netif_device_present(dev)) 3841 if (netif_device_present(dev))
3666 err = dev->do_ioctl(dev, ifr, 3842 err = ops->ndo_do_ioctl(dev, ifr, cmd);
3667 cmd);
3668 else 3843 else
3669 err = -ENODEV; 3844 err = -ENODEV;
3670 } 3845 }
@@ -3925,8 +4100,8 @@ static void rollback_registered(struct net_device *dev)
3925 */ 4100 */
3926 dev_addr_discard(dev); 4101 dev_addr_discard(dev);
3927 4102
3928 if (dev->uninit) 4103 if (dev->netdev_ops->ndo_uninit)
3929 dev->uninit(dev); 4104 dev->netdev_ops->ndo_uninit(dev);
3930 4105
3931 /* Notifier chain MUST detach us from master device. */ 4106 /* Notifier chain MUST detach us from master device. */
3932 WARN_ON(dev->master); 4107 WARN_ON(dev->master);
@@ -4016,7 +4191,7 @@ int register_netdevice(struct net_device *dev)
4016 struct hlist_head *head; 4191 struct hlist_head *head;
4017 struct hlist_node *p; 4192 struct hlist_node *p;
4018 int ret; 4193 int ret;
4019 struct net *net; 4194 struct net *net = dev_net(dev);
4020 4195
4021 BUG_ON(dev_boot_phase); 4196 BUG_ON(dev_boot_phase);
4022 ASSERT_RTNL(); 4197 ASSERT_RTNL();
@@ -4025,8 +4200,7 @@ int register_netdevice(struct net_device *dev)
4025 4200
4026 /* When net_device's are persistent, this will be fatal. */ 4201 /* When net_device's are persistent, this will be fatal. */
4027 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 4202 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4028 BUG_ON(!dev_net(dev)); 4203 BUG_ON(!net);
4029 net = dev_net(dev);
4030 4204
4031 spin_lock_init(&dev->addr_list_lock); 4205 spin_lock_init(&dev->addr_list_lock);
4032 netdev_set_addr_lockdep_class(dev); 4206 netdev_set_addr_lockdep_class(dev);
@@ -4034,9 +4208,46 @@ int register_netdevice(struct net_device *dev)
4034 4208
4035 dev->iflink = -1; 4209 dev->iflink = -1;
4036 4210
4211#ifdef CONFIG_COMPAT_NET_DEV_OPS
4212 /* Netdevice_ops API compatiability support.
4213 * This is temporary until all network devices are converted.
4214 */
4215 if (dev->netdev_ops) {
4216 const struct net_device_ops *ops = dev->netdev_ops;
4217
4218 dev->init = ops->ndo_init;
4219 dev->uninit = ops->ndo_uninit;
4220 dev->open = ops->ndo_open;
4221 dev->change_rx_flags = ops->ndo_change_rx_flags;
4222 dev->set_rx_mode = ops->ndo_set_rx_mode;
4223 dev->set_multicast_list = ops->ndo_set_multicast_list;
4224 dev->set_mac_address = ops->ndo_set_mac_address;
4225 dev->validate_addr = ops->ndo_validate_addr;
4226 dev->do_ioctl = ops->ndo_do_ioctl;
4227 dev->set_config = ops->ndo_set_config;
4228 dev->change_mtu = ops->ndo_change_mtu;
4229 dev->tx_timeout = ops->ndo_tx_timeout;
4230 dev->get_stats = ops->ndo_get_stats;
4231 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4232 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4233 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4234#ifdef CONFIG_NET_POLL_CONTROLLER
4235 dev->poll_controller = ops->ndo_poll_controller;
4236#endif
4237 } else {
4238 char drivername[64];
4239 pr_info("%s (%s): not using net_device_ops yet\n",
4240 dev->name, netdev_drivername(dev, drivername, 64));
4241
4242 /* This works only because net_device_ops and the
4243 compatiablity structure are the same. */
4244 dev->netdev_ops = (void *) &(dev->init);
4245 }
4246#endif
4247
4037 /* Init, if this function is available */ 4248 /* Init, if this function is available */
4038 if (dev->init) { 4249 if (dev->netdev_ops->ndo_init) {
4039 ret = dev->init(dev); 4250 ret = dev->netdev_ops->ndo_init(dev);
4040 if (ret) { 4251 if (ret) {
4041 if (ret > 0) 4252 if (ret > 0)
4042 ret = -EIO; 4253 ret = -EIO;
@@ -4114,8 +4325,8 @@ out:
4114 return ret; 4325 return ret;
4115 4326
4116err_uninit: 4327err_uninit:
4117 if (dev->uninit) 4328 if (dev->netdev_ops->ndo_uninit)
4118 dev->uninit(dev); 4329 dev->netdev_ops->ndo_uninit(dev);
4119 goto out; 4330 goto out;
4120} 4331}
4121 4332
@@ -4271,10 +4482,24 @@ void netdev_run_todo(void)
4271 } 4482 }
4272} 4483}
4273 4484
4274static struct net_device_stats *internal_stats(struct net_device *dev) 4485/**
4275{ 4486 * dev_get_stats - get network device statistics
4276 return &dev->stats; 4487 * @dev: device to get statistics from
4488 *
4489 * Get network statistics from device. The device driver may provide
4490 * its own method by setting dev->netdev_ops->get_stats; otherwise
4491 * the internal statistics structure is used.
4492 */
4493const struct net_device_stats *dev_get_stats(struct net_device *dev)
4494 {
4495 const struct net_device_ops *ops = dev->netdev_ops;
4496
4497 if (ops->ndo_get_stats)
4498 return ops->ndo_get_stats(dev);
4499 else
4500 return &dev->stats;
4277} 4501}
4502EXPORT_SYMBOL(dev_get_stats);
4278 4503
4279static void netdev_init_one_queue(struct net_device *dev, 4504static void netdev_init_one_queue(struct net_device *dev,
4280 struct netdev_queue *queue, 4505 struct netdev_queue *queue,
@@ -4343,18 +4568,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4343 dev->num_tx_queues = queue_count; 4568 dev->num_tx_queues = queue_count;
4344 dev->real_num_tx_queues = queue_count; 4569 dev->real_num_tx_queues = queue_count;
4345 4570
4346 if (sizeof_priv) {
4347 dev->priv = ((char *)dev +
4348 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4349 & ~NETDEV_ALIGN_CONST));
4350 }
4351
4352 dev->gso_max_size = GSO_MAX_SIZE; 4571 dev->gso_max_size = GSO_MAX_SIZE;
4353 4572
4354 netdev_init_queues(dev); 4573 netdev_init_queues(dev);
4355 4574
4356 dev->get_stats = internal_stats; 4575 INIT_LIST_HEAD(&dev->napi_list);
4357 netpoll_netdev_init(dev);
4358 setup(dev); 4576 setup(dev);
4359 strcpy(dev->name, name); 4577 strcpy(dev->name, name);
4360 return dev; 4578 return dev;
@@ -4371,10 +4589,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
4371 */ 4589 */
4372void free_netdev(struct net_device *dev) 4590void free_netdev(struct net_device *dev)
4373{ 4591{
4592 struct napi_struct *p, *n;
4593
4374 release_net(dev_net(dev)); 4594 release_net(dev_net(dev));
4375 4595
4376 kfree(dev->_tx); 4596 kfree(dev->_tx);
4377 4597
4598 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4599 netif_napi_del(p);
4600
4378 /* Compatibility with error handling in drivers */ 4601 /* Compatibility with error handling in drivers */
4379 if (dev->reg_state == NETREG_UNINITIALIZED) { 4602 if (dev->reg_state == NETREG_UNINITIALIZED) {
4380 kfree((char *)dev - dev->padded); 4603 kfree((char *)dev - dev->padded);
@@ -4467,6 +4690,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
4467 if (dev->features & NETIF_F_NETNS_LOCAL) 4690 if (dev->features & NETIF_F_NETNS_LOCAL)
4468 goto out; 4691 goto out;
4469 4692
4693#ifdef CONFIG_SYSFS
4694 /* Don't allow real devices to be moved when sysfs
4695 * is enabled.
4696 */
4697 err = -EINVAL;
4698 if (dev->dev.parent)
4699 goto out;
4700#endif
4701
4470 /* Ensure the device has been registrered */ 4702 /* Ensure the device has been registrered */
4471 err = -EINVAL; 4703 err = -EINVAL;
4472 if (dev->reg_state != NETREG_REGISTERED) 4704 if (dev->reg_state != NETREG_REGISTERED)
@@ -4524,6 +4756,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
4524 */ 4756 */
4525 dev_addr_discard(dev); 4757 dev_addr_discard(dev);
4526 4758
4759 netdev_unregister_kobject(dev);
4760
4527 /* Actually switch the network namespace */ 4761 /* Actually switch the network namespace */
4528 dev_net_set(dev, net); 4762 dev_net_set(dev, net);
4529 4763
@@ -4540,7 +4774,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
4540 } 4774 }
4541 4775
4542 /* Fixup kobjects */ 4776 /* Fixup kobjects */
4543 netdev_unregister_kobject(dev);
4544 err = netdev_register_kobject(dev); 4777 err = netdev_register_kobject(dev);
4545 WARN_ON(err); 4778 WARN_ON(err);
4546 4779
@@ -4847,6 +5080,12 @@ static void __net_exit default_device_exit(struct net *net)
4847 if (dev->features & NETIF_F_NETNS_LOCAL) 5080 if (dev->features & NETIF_F_NETNS_LOCAL)
4848 continue; 5081 continue;
4849 5082
5083 /* Delete virtual devices */
5084 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5085 dev->rtnl_link_ops->dellink(dev);
5086 continue;
5087 }
5088
4850 /* Push remaing network devices to init_net */ 5089 /* Push remaing network devices to init_net */
4851 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5090 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4852 err = dev_change_net_namespace(dev, &init_net, fb_name); 5091 err = dev_change_net_namespace(dev, &init_net, fb_name);
@@ -4893,9 +5132,6 @@ static int __init net_dev_init(void)
4893 if (register_pernet_subsys(&netdev_net_ops)) 5132 if (register_pernet_subsys(&netdev_net_ops))
4894 goto out; 5133 goto out;
4895 5134
4896 if (register_pernet_device(&default_device_ops))
4897 goto out;
4898
4899 /* 5135 /*
4900 * Initialise the packet receive queues. 5136 * Initialise the packet receive queues.
4901 */ 5137 */
@@ -4910,12 +5146,28 @@ static int __init net_dev_init(void)
4910 5146
4911 queue->backlog.poll = process_backlog; 5147 queue->backlog.poll = process_backlog;
4912 queue->backlog.weight = weight_p; 5148 queue->backlog.weight = weight_p;
5149 queue->backlog.gro_list = NULL;
4913 } 5150 }
4914 5151
4915 netdev_dma_register();
4916
4917 dev_boot_phase = 0; 5152 dev_boot_phase = 0;
4918 5153
5154 /* The loopback device is special if any other network devices
5155 * is present in a network namespace the loopback device must
5156 * be present. Since we now dynamically allocate and free the
5157 * loopback device ensure this invariant is maintained by
5158 * keeping the loopback device as the first device on the
5159 * list of network devices. Ensuring the loopback devices
5160 * is the first device that appears and the last network device
5161 * that disappears.
5162 */
5163 if (register_pernet_device(&loopback_net_ops))
5164 goto out;
5165
5166 if (register_pernet_device(&default_device_ops))
5167 goto out;
5168
5169 netdev_dma_register();
5170
4919 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 5171 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4920 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 5172 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
4921 5173
diff --git a/net/core/dst.c b/net/core/dst.c
index 09c1530f4681..57bc4d5b8d08 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -263,9 +263,11 @@ again:
263void dst_release(struct dst_entry *dst) 263void dst_release(struct dst_entry *dst)
264{ 264{
265 if (dst) { 265 if (dst) {
266 WARN_ON(atomic_read(&dst->__refcnt) < 1); 266 int newrefcnt;
267
267 smp_mb__before_atomic_dec(); 268 smp_mb__before_atomic_dec();
268 atomic_dec(&dst->__refcnt); 269 newrefcnt = atomic_dec_return(&dst->__refcnt);
270 WARN_ON(newrefcnt < 0);
269 } 271 }
270} 272}
271EXPORT_SYMBOL(dst_release); 273EXPORT_SYMBOL(dst_release);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 14ada537f895..947710a36ced 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
528 return dev->ethtool_ops->set_tx_csum(dev, edata.data); 528 return dev->ethtool_ops->set_tx_csum(dev, edata.data);
529} 529}
530 530
531static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
532{
533 struct ethtool_value edata;
534
535 if (!dev->ethtool_ops->set_rx_csum)
536 return -EOPNOTSUPP;
537
538 if (copy_from_user(&edata, useraddr, sizeof(edata)))
539 return -EFAULT;
540
541 if (!edata.data && dev->ethtool_ops->set_sg)
542 dev->features &= ~NETIF_F_GRO;
543
544 return dev->ethtool_ops->set_rx_csum(dev, edata.data);
545}
546
531static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) 547static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
532{ 548{
533 struct ethtool_value edata; 549 struct ethtool_value edata;
@@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
599 return 0; 615 return 0;
600} 616}
601 617
618static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
619{
620 struct ethtool_value edata = { ETHTOOL_GGRO };
621
622 edata.data = dev->features & NETIF_F_GRO;
623 if (copy_to_user(useraddr, &edata, sizeof(edata)))
624 return -EFAULT;
625 return 0;
626}
627
628static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
629{
630 struct ethtool_value edata;
631
632 if (copy_from_user(&edata, useraddr, sizeof(edata)))
633 return -EFAULT;
634
635 if (edata.data) {
636 if (!dev->ethtool_ops->get_rx_csum ||
637 !dev->ethtool_ops->get_rx_csum(dev))
638 return -EINVAL;
639 dev->features |= NETIF_F_GRO;
640 } else
641 dev->features &= ~NETIF_F_GRO;
642
643 return 0;
644}
645
602static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 646static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
603{ 647{
604 struct ethtool_test test; 648 struct ethtool_test test;
@@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
932 dev->ethtool_ops->get_rx_csum); 976 dev->ethtool_ops->get_rx_csum);
933 break; 977 break;
934 case ETHTOOL_SRXCSUM: 978 case ETHTOOL_SRXCSUM:
935 rc = ethtool_set_value(dev, useraddr, 979 rc = ethtool_set_rx_csum(dev, useraddr);
936 dev->ethtool_ops->set_rx_csum);
937 break; 980 break;
938 case ETHTOOL_GTXCSUM: 981 case ETHTOOL_GTXCSUM:
939 rc = ethtool_get_value(dev, useraddr, ethcmd, 982 rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1014 case ETHTOOL_SRXFH: 1057 case ETHTOOL_SRXFH:
1015 rc = ethtool_set_rxhash(dev, useraddr); 1058 rc = ethtool_set_rxhash(dev, useraddr);
1016 break; 1059 break;
1060 case ETHTOOL_GGRO:
1061 rc = ethtool_get_gro(dev, useraddr);
1062 break;
1063 case ETHTOOL_SGRO:
1064 rc = ethtool_set_gro(dev, useraddr);
1065 break;
1017 default: 1066 default:
1018 rc = -EOPNOTSUPP; 1067 rc = -EOPNOTSUPP;
1019 } 1068 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 79de3b14a8d1..32b3a0152d7a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -664,17 +664,18 @@ static int __init fib_rules_init(void)
664 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); 664 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
665 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); 665 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
666 666
667 err = register_netdevice_notifier(&fib_rules_notifier); 667 err = register_pernet_subsys(&fib_rules_net_ops);
668 if (err < 0) 668 if (err < 0)
669 goto fail; 669 goto fail;
670 670
671 err = register_pernet_subsys(&fib_rules_net_ops); 671 err = register_netdevice_notifier(&fib_rules_notifier);
672 if (err < 0) 672 if (err < 0)
673 goto fail_unregister; 673 goto fail_unregister;
674
674 return 0; 675 return 0;
675 676
676fail_unregister: 677fail_unregister:
677 unregister_netdevice_notifier(&fib_rules_notifier); 678 unregister_pernet_subsys(&fib_rules_net_ops);
678fail: 679fail:
679 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); 680 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
680 rtnl_unregister(PF_UNSPEC, RTM_DELRULE); 681 rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
diff --git a/net/core/filter.c b/net/core/filter.c
index df3744355839..d1d779ca096d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -319,6 +319,25 @@ load_b:
319 A = 0; 319 A = 0;
320 continue; 320 continue;
321 } 321 }
322 case SKF_AD_NLATTR_NEST: {
323 struct nlattr *nla;
324
325 if (skb_is_nonlinear(skb))
326 return 0;
327 if (A > skb->len - sizeof(struct nlattr))
328 return 0;
329
330 nla = (struct nlattr *)&skb->data[A];
331 if (nla->nla_len > A - skb->len)
332 return 0;
333
334 nla = nla_find_nested(nla, X);
335 if (nla)
336 A = (void *)nla - (void *)skb->data;
337 else
338 A = 0;
339 continue;
340 }
322 default: 341 default:
323 return 0; 342 return 0;
324 } 343 }
diff --git a/net/core/flow.c b/net/core/flow.c
index 5cf81052d044..96015871ecea 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
165 return 0; 165 return 0;
166} 166}
167 167
168void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, 168void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
169 flow_resolve_t resolver) 169 flow_resolve_t resolver)
170{ 170{
171 struct flow_cache_entry *fle, **head; 171 struct flow_cache_entry *fle, **head;
@@ -225,7 +225,7 @@ nocache:
225 void *obj; 225 void *obj;
226 atomic_t *obj_ref; 226 atomic_t *obj_ref;
227 227
228 err = resolver(key, family, dir, &obj, &obj_ref); 228 err = resolver(net, key, family, dir, &obj, &obj_ref);
229 229
230 if (fle && !err) { 230 if (fle && !err) {
231 fle->genid = atomic_read(&flow_cache_genid); 231 fle->genid = atomic_read(&flow_cache_genid);
@@ -307,7 +307,7 @@ void flow_cache_flush(void)
307 put_online_cpus(); 307 put_online_cpus();
308} 308}
309 309
310static void __devinit flow_cache_cpu_prepare(int cpu) 310static void __init flow_cache_cpu_prepare(int cpu)
311{ 311{
312 struct tasklet_struct *tasklet; 312 struct tasklet_struct *tasklet;
313 unsigned long order; 313 unsigned long order;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 57abe8266be1..9cc9f95b109e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -31,6 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/rtnetlink.h> 32#include <linux/rtnetlink.h>
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/rbtree.h>
34#include <net/sock.h> 35#include <net/sock.h>
35#include <net/gen_stats.h> 36#include <net/gen_stats.h>
36 37
@@ -89,6 +90,7 @@ struct gen_estimator
89 u32 avpps; 90 u32 avpps;
90 u32 avbps; 91 u32 avbps;
91 struct rcu_head e_rcu; 92 struct rcu_head e_rcu;
93 struct rb_node node;
92}; 94};
93 95
94struct gen_estimator_head 96struct gen_estimator_head
@@ -102,6 +104,9 @@ static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
102/* Protects against NULL dereference */ 104/* Protects against NULL dereference */
103static DEFINE_RWLOCK(est_lock); 105static DEFINE_RWLOCK(est_lock);
104 106
107/* Protects against soft lockup during large deletion */
108static struct rb_root est_root = RB_ROOT;
109
105static void est_timer(unsigned long arg) 110static void est_timer(unsigned long arg)
106{ 111{
107 int idx = (int)arg; 112 int idx = (int)arg;
@@ -139,6 +144,46 @@ skip:
139 rcu_read_unlock(); 144 rcu_read_unlock();
140} 145}
141 146
147static void gen_add_node(struct gen_estimator *est)
148{
149 struct rb_node **p = &est_root.rb_node, *parent = NULL;
150
151 while (*p) {
152 struct gen_estimator *e;
153
154 parent = *p;
155 e = rb_entry(parent, struct gen_estimator, node);
156
157 if (est->bstats > e->bstats)
158 p = &parent->rb_right;
159 else
160 p = &parent->rb_left;
161 }
162 rb_link_node(&est->node, parent, p);
163 rb_insert_color(&est->node, &est_root);
164}
165
166static
167struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
168 const struct gnet_stats_rate_est *rate_est)
169{
170 struct rb_node *p = est_root.rb_node;
171
172 while (p) {
173 struct gen_estimator *e;
174
175 e = rb_entry(p, struct gen_estimator, node);
176
177 if (bstats > e->bstats)
178 p = p->rb_right;
179 else if (bstats < e->bstats || rate_est != e->rate_est)
180 p = p->rb_left;
181 else
182 return e;
183 }
184 return NULL;
185}
186
142/** 187/**
143 * gen_new_estimator - create a new rate estimator 188 * gen_new_estimator - create a new rate estimator
144 * @bstats: basic statistics 189 * @bstats: basic statistics
@@ -194,8 +239,11 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
194 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); 239 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
195 240
196 list_add_rcu(&est->list, &elist[idx].list); 241 list_add_rcu(&est->list, &elist[idx].list);
242 gen_add_node(est);
243
197 return 0; 244 return 0;
198} 245}
246EXPORT_SYMBOL(gen_new_estimator);
199 247
200static void __gen_kill_estimator(struct rcu_head *head) 248static void __gen_kill_estimator(struct rcu_head *head)
201{ 249{
@@ -209,36 +257,27 @@ static void __gen_kill_estimator(struct rcu_head *head)
209 * @bstats: basic statistics 257 * @bstats: basic statistics
210 * @rate_est: rate estimator statistics 258 * @rate_est: rate estimator statistics
211 * 259 *
212 * Removes the rate estimator specified by &bstats and &rate_est 260 * Removes the rate estimator specified by &bstats and &rate_est.
213 * and deletes the timer.
214 * 261 *
215 * NOTE: Called under rtnl_mutex 262 * NOTE: Called under rtnl_mutex
216 */ 263 */
217void gen_kill_estimator(struct gnet_stats_basic *bstats, 264void gen_kill_estimator(struct gnet_stats_basic *bstats,
218 struct gnet_stats_rate_est *rate_est) 265 struct gnet_stats_rate_est *rate_est)
219{ 266{
220 int idx; 267 struct gen_estimator *e;
221 struct gen_estimator *e, *n;
222
223 for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
224
225 /* Skip non initialized indexes */
226 if (!elist[idx].timer.function)
227 continue;
228 268
229 list_for_each_entry_safe(e, n, &elist[idx].list, list) { 269 while ((e = gen_find_node(bstats, rate_est))) {
230 if (e->rate_est != rate_est || e->bstats != bstats) 270 rb_erase(&e->node, &est_root);
231 continue;
232 271
233 write_lock_bh(&est_lock); 272 write_lock_bh(&est_lock);
234 e->bstats = NULL; 273 e->bstats = NULL;
235 write_unlock_bh(&est_lock); 274 write_unlock_bh(&est_lock);
236 275
237 list_del_rcu(&e->list); 276 list_del_rcu(&e->list);
238 call_rcu(&e->e_rcu, __gen_kill_estimator); 277 call_rcu(&e->e_rcu, __gen_kill_estimator);
239 }
240 } 278 }
241} 279}
280EXPORT_SYMBOL(gen_kill_estimator);
242 281
243/** 282/**
244 * gen_replace_estimator - replace rate estimator configuration 283 * gen_replace_estimator - replace rate estimator configuration
@@ -259,8 +298,20 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats,
259 gen_kill_estimator(bstats, rate_est); 298 gen_kill_estimator(bstats, rate_est);
260 return gen_new_estimator(bstats, rate_est, stats_lock, opt); 299 return gen_new_estimator(bstats, rate_est, stats_lock, opt);
261} 300}
301EXPORT_SYMBOL(gen_replace_estimator);
262 302
303/**
304 * gen_estimator_active - test if estimator is currently in use
305 * @bstats: basic statistics
306 * @rate_est: rate estimator statistics
307 *
308 * Returns true if estimator is active, and false if not.
309 */
310bool gen_estimator_active(const struct gnet_stats_basic *bstats,
311 const struct gnet_stats_rate_est *rate_est)
312{
313 ASSERT_RTNL();
263 314
264EXPORT_SYMBOL(gen_kill_estimator); 315 return gen_find_node(bstats, rate_est) != NULL;
265EXPORT_SYMBOL(gen_new_estimator); 316}
266EXPORT_SYMBOL(gen_replace_estimator); 317EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1dc728b38589..9c3717a23cf7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
531 if (!n) 531 if (!n)
532 goto out; 532 goto out;
533 533
534#ifdef CONFIG_NET_NS 534 write_pnet(&n->net, hold_net(net));
535 n->net = hold_net(net);
536#endif
537 memcpy(n->key, pkey, key_len); 535 memcpy(n->key, pkey, key_len);
538 n->dev = dev; 536 n->dev = dev;
539 if (dev) 537 if (dev)
@@ -1329,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1329 struct neigh_table *tbl) 1327 struct neigh_table *tbl)
1330{ 1328{
1331 struct neigh_parms *p, *ref; 1329 struct neigh_parms *p, *ref;
1332 struct net *net; 1330 struct net *net = dev_net(dev);
1331 const struct net_device_ops *ops = dev->netdev_ops;
1333 1332
1334 net = dev_net(dev);
1335 ref = lookup_neigh_params(tbl, net, 0); 1333 ref = lookup_neigh_params(tbl, net, 0);
1336 if (!ref) 1334 if (!ref)
1337 return NULL; 1335 return NULL;
@@ -1340,20 +1338,17 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1340 if (p) { 1338 if (p) {
1341 p->tbl = tbl; 1339 p->tbl = tbl;
1342 atomic_set(&p->refcnt, 1); 1340 atomic_set(&p->refcnt, 1);
1343 INIT_RCU_HEAD(&p->rcu_head);
1344 p->reachable_time = 1341 p->reachable_time =
1345 neigh_rand_reach_time(p->base_reachable_time); 1342 neigh_rand_reach_time(p->base_reachable_time);
1346 1343
1347 if (dev->neigh_setup && dev->neigh_setup(dev, p)) { 1344 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1348 kfree(p); 1345 kfree(p);
1349 return NULL; 1346 return NULL;
1350 } 1347 }
1351 1348
1352 dev_hold(dev); 1349 dev_hold(dev);
1353 p->dev = dev; 1350 p->dev = dev;
1354#ifdef CONFIG_NET_NS 1351 write_pnet(&p->net, hold_net(net));
1355 p->net = hold_net(net);
1356#endif
1357 p->sysctl_table = NULL; 1352 p->sysctl_table = NULL;
1358 write_lock_bh(&tbl->lock); 1353 write_lock_bh(&tbl->lock);
1359 p->next = tbl->parms.next; 1354 p->next = tbl->parms.next;
@@ -1408,11 +1403,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1408 unsigned long now = jiffies; 1403 unsigned long now = jiffies;
1409 unsigned long phsize; 1404 unsigned long phsize;
1410 1405
1411#ifdef CONFIG_NET_NS 1406 write_pnet(&tbl->parms.net, &init_net);
1412 tbl->parms.net = &init_net;
1413#endif
1414 atomic_set(&tbl->parms.refcnt, 1); 1407 atomic_set(&tbl->parms.refcnt, 1);
1415 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1416 tbl->parms.reachable_time = 1408 tbl->parms.reachable_time =
1417 neigh_rand_reach_time(tbl->parms.base_reachable_time); 1409 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1418 1410
@@ -1426,9 +1418,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1426 panic("cannot create neighbour cache statistics"); 1418 panic("cannot create neighbour cache statistics");
1427 1419
1428#ifdef CONFIG_PROC_FS 1420#ifdef CONFIG_PROC_FS
1429 tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, 1421 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1430 &neigh_stat_seq_fops, tbl); 1422 &neigh_stat_seq_fops, tbl))
1431 if (!tbl->pde)
1432 panic("cannot create neighbour proc dir entry"); 1423 panic("cannot create neighbour proc dir entry");
1433#endif 1424#endif
1434 1425
@@ -2568,128 +2559,128 @@ static struct neigh_sysctl_table {
2568 .procname = "mcast_solicit", 2559 .procname = "mcast_solicit",
2569 .maxlen = sizeof(int), 2560 .maxlen = sizeof(int),
2570 .mode = 0644, 2561 .mode = 0644,
2571 .proc_handler = &proc_dointvec, 2562 .proc_handler = proc_dointvec,
2572 }, 2563 },
2573 { 2564 {
2574 .ctl_name = NET_NEIGH_UCAST_SOLICIT, 2565 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2575 .procname = "ucast_solicit", 2566 .procname = "ucast_solicit",
2576 .maxlen = sizeof(int), 2567 .maxlen = sizeof(int),
2577 .mode = 0644, 2568 .mode = 0644,
2578 .proc_handler = &proc_dointvec, 2569 .proc_handler = proc_dointvec,
2579 }, 2570 },
2580 { 2571 {
2581 .ctl_name = NET_NEIGH_APP_SOLICIT, 2572 .ctl_name = NET_NEIGH_APP_SOLICIT,
2582 .procname = "app_solicit", 2573 .procname = "app_solicit",
2583 .maxlen = sizeof(int), 2574 .maxlen = sizeof(int),
2584 .mode = 0644, 2575 .mode = 0644,
2585 .proc_handler = &proc_dointvec, 2576 .proc_handler = proc_dointvec,
2586 }, 2577 },
2587 { 2578 {
2588 .procname = "retrans_time", 2579 .procname = "retrans_time",
2589 .maxlen = sizeof(int), 2580 .maxlen = sizeof(int),
2590 .mode = 0644, 2581 .mode = 0644,
2591 .proc_handler = &proc_dointvec_userhz_jiffies, 2582 .proc_handler = proc_dointvec_userhz_jiffies,
2592 }, 2583 },
2593 { 2584 {
2594 .ctl_name = NET_NEIGH_REACHABLE_TIME, 2585 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2595 .procname = "base_reachable_time", 2586 .procname = "base_reachable_time",
2596 .maxlen = sizeof(int), 2587 .maxlen = sizeof(int),
2597 .mode = 0644, 2588 .mode = 0644,
2598 .proc_handler = &proc_dointvec_jiffies, 2589 .proc_handler = proc_dointvec_jiffies,
2599 .strategy = &sysctl_jiffies, 2590 .strategy = sysctl_jiffies,
2600 }, 2591 },
2601 { 2592 {
2602 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, 2593 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2603 .procname = "delay_first_probe_time", 2594 .procname = "delay_first_probe_time",
2604 .maxlen = sizeof(int), 2595 .maxlen = sizeof(int),
2605 .mode = 0644, 2596 .mode = 0644,
2606 .proc_handler = &proc_dointvec_jiffies, 2597 .proc_handler = proc_dointvec_jiffies,
2607 .strategy = &sysctl_jiffies, 2598 .strategy = sysctl_jiffies,
2608 }, 2599 },
2609 { 2600 {
2610 .ctl_name = NET_NEIGH_GC_STALE_TIME, 2601 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2611 .procname = "gc_stale_time", 2602 .procname = "gc_stale_time",
2612 .maxlen = sizeof(int), 2603 .maxlen = sizeof(int),
2613 .mode = 0644, 2604 .mode = 0644,
2614 .proc_handler = &proc_dointvec_jiffies, 2605 .proc_handler = proc_dointvec_jiffies,
2615 .strategy = &sysctl_jiffies, 2606 .strategy = sysctl_jiffies,
2616 }, 2607 },
2617 { 2608 {
2618 .ctl_name = NET_NEIGH_UNRES_QLEN, 2609 .ctl_name = NET_NEIGH_UNRES_QLEN,
2619 .procname = "unres_qlen", 2610 .procname = "unres_qlen",
2620 .maxlen = sizeof(int), 2611 .maxlen = sizeof(int),
2621 .mode = 0644, 2612 .mode = 0644,
2622 .proc_handler = &proc_dointvec, 2613 .proc_handler = proc_dointvec,
2623 }, 2614 },
2624 { 2615 {
2625 .ctl_name = NET_NEIGH_PROXY_QLEN, 2616 .ctl_name = NET_NEIGH_PROXY_QLEN,
2626 .procname = "proxy_qlen", 2617 .procname = "proxy_qlen",
2627 .maxlen = sizeof(int), 2618 .maxlen = sizeof(int),
2628 .mode = 0644, 2619 .mode = 0644,
2629 .proc_handler = &proc_dointvec, 2620 .proc_handler = proc_dointvec,
2630 }, 2621 },
2631 { 2622 {
2632 .procname = "anycast_delay", 2623 .procname = "anycast_delay",
2633 .maxlen = sizeof(int), 2624 .maxlen = sizeof(int),
2634 .mode = 0644, 2625 .mode = 0644,
2635 .proc_handler = &proc_dointvec_userhz_jiffies, 2626 .proc_handler = proc_dointvec_userhz_jiffies,
2636 }, 2627 },
2637 { 2628 {
2638 .procname = "proxy_delay", 2629 .procname = "proxy_delay",
2639 .maxlen = sizeof(int), 2630 .maxlen = sizeof(int),
2640 .mode = 0644, 2631 .mode = 0644,
2641 .proc_handler = &proc_dointvec_userhz_jiffies, 2632 .proc_handler = proc_dointvec_userhz_jiffies,
2642 }, 2633 },
2643 { 2634 {
2644 .procname = "locktime", 2635 .procname = "locktime",
2645 .maxlen = sizeof(int), 2636 .maxlen = sizeof(int),
2646 .mode = 0644, 2637 .mode = 0644,
2647 .proc_handler = &proc_dointvec_userhz_jiffies, 2638 .proc_handler = proc_dointvec_userhz_jiffies,
2648 }, 2639 },
2649 { 2640 {
2650 .ctl_name = NET_NEIGH_RETRANS_TIME_MS, 2641 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2651 .procname = "retrans_time_ms", 2642 .procname = "retrans_time_ms",
2652 .maxlen = sizeof(int), 2643 .maxlen = sizeof(int),
2653 .mode = 0644, 2644 .mode = 0644,
2654 .proc_handler = &proc_dointvec_ms_jiffies, 2645 .proc_handler = proc_dointvec_ms_jiffies,
2655 .strategy = &sysctl_ms_jiffies, 2646 .strategy = sysctl_ms_jiffies,
2656 }, 2647 },
2657 { 2648 {
2658 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, 2649 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2659 .procname = "base_reachable_time_ms", 2650 .procname = "base_reachable_time_ms",
2660 .maxlen = sizeof(int), 2651 .maxlen = sizeof(int),
2661 .mode = 0644, 2652 .mode = 0644,
2662 .proc_handler = &proc_dointvec_ms_jiffies, 2653 .proc_handler = proc_dointvec_ms_jiffies,
2663 .strategy = &sysctl_ms_jiffies, 2654 .strategy = sysctl_ms_jiffies,
2664 }, 2655 },
2665 { 2656 {
2666 .ctl_name = NET_NEIGH_GC_INTERVAL, 2657 .ctl_name = NET_NEIGH_GC_INTERVAL,
2667 .procname = "gc_interval", 2658 .procname = "gc_interval",
2668 .maxlen = sizeof(int), 2659 .maxlen = sizeof(int),
2669 .mode = 0644, 2660 .mode = 0644,
2670 .proc_handler = &proc_dointvec_jiffies, 2661 .proc_handler = proc_dointvec_jiffies,
2671 .strategy = &sysctl_jiffies, 2662 .strategy = sysctl_jiffies,
2672 }, 2663 },
2673 { 2664 {
2674 .ctl_name = NET_NEIGH_GC_THRESH1, 2665 .ctl_name = NET_NEIGH_GC_THRESH1,
2675 .procname = "gc_thresh1", 2666 .procname = "gc_thresh1",
2676 .maxlen = sizeof(int), 2667 .maxlen = sizeof(int),
2677 .mode = 0644, 2668 .mode = 0644,
2678 .proc_handler = &proc_dointvec, 2669 .proc_handler = proc_dointvec,
2679 }, 2670 },
2680 { 2671 {
2681 .ctl_name = NET_NEIGH_GC_THRESH2, 2672 .ctl_name = NET_NEIGH_GC_THRESH2,
2682 .procname = "gc_thresh2", 2673 .procname = "gc_thresh2",
2683 .maxlen = sizeof(int), 2674 .maxlen = sizeof(int),
2684 .mode = 0644, 2675 .mode = 0644,
2685 .proc_handler = &proc_dointvec, 2676 .proc_handler = proc_dointvec,
2686 }, 2677 },
2687 { 2678 {
2688 .ctl_name = NET_NEIGH_GC_THRESH3, 2679 .ctl_name = NET_NEIGH_GC_THRESH3,
2689 .procname = "gc_thresh3", 2680 .procname = "gc_thresh3",
2690 .maxlen = sizeof(int), 2681 .maxlen = sizeof(int),
2691 .mode = 0644, 2682 .mode = 0644,
2692 .proc_handler = &proc_dointvec, 2683 .proc_handler = proc_dointvec,
2693 }, 2684 },
2694 {}, 2685 {},
2695 }, 2686 },
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 92d6b9467314..6ac29a46e23e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d,
270 unsigned long offset) 270 unsigned long offset)
271{ 271{
272 struct net_device *dev = to_net_dev(d); 272 struct net_device *dev = to_net_dev(d);
273 struct net_device_stats *stats;
274 ssize_t ret = -EINVAL; 273 ssize_t ret = -EINVAL;
275 274
276 WARN_ON(offset > sizeof(struct net_device_stats) || 275 WARN_ON(offset > sizeof(struct net_device_stats) ||
@@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d,
278 277
279 read_lock(&dev_base_lock); 278 read_lock(&dev_base_lock);
280 if (dev_isalive(dev)) { 279 if (dev_isalive(dev)) {
281 stats = dev->get_stats(dev); 280 const struct net_device_stats *stats = dev_get_stats(dev);
282 ret = sprintf(buf, fmt_ulong, 281 ret = sprintf(buf, fmt_ulong,
283 *(unsigned long *)(((u8 *) stats) + offset)); 282 *(unsigned long *)(((u8 *) stats) + offset));
284 } 283 }
@@ -428,6 +427,9 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
428 struct net_device *dev = to_net_dev(d); 427 struct net_device *dev = to_net_dev(d);
429 int retval; 428 int retval;
430 429
430 if (!net_eq(dev_net(dev), &init_net))
431 return 0;
432
431 /* pass interface to uevent. */ 433 /* pass interface to uevent. */
432 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 434 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
433 if (retval) 435 if (retval)
@@ -476,6 +478,10 @@ void netdev_unregister_kobject(struct net_device * net)
476 struct device *dev = &(net->dev); 478 struct device *dev = &(net->dev);
477 479
478 kobject_get(&dev->kobj); 480 kobject_get(&dev->kobj);
481
482 if (dev_net(net) != &init_net)
483 return;
484
479 device_del(dev); 485 device_del(dev);
480} 486}
481 487
@@ -490,7 +496,7 @@ int netdev_register_kobject(struct net_device *net)
490 dev->groups = groups; 496 dev->groups = groups;
491 497
492 BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); 498 BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
493 strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); 499 dev_set_name(dev, net->name);
494 500
495#ifdef CONFIG_SYSFS 501#ifdef CONFIG_SYSFS
496 *groups++ = &netstat_group; 502 *groups++ = &netstat_group;
@@ -501,6 +507,9 @@ int netdev_register_kobject(struct net_device *net)
501#endif 507#endif
502#endif /* CONFIG_SYSFS */ 508#endif /* CONFIG_SYSFS */
503 509
510 if (dev_net(net) != &init_net)
511 return 0;
512
504 return device_add(dev); 513 return device_add(dev);
505} 514}
506 515
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1895a4ca9c4f..55cffad2f328 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -47,7 +47,6 @@ static __net_init int setup_net(struct net *net)
47 goto out; 47 goto out;
48 48
49 ng->len = INITIAL_NET_GEN_PTRS; 49 ng->len = INITIAL_NET_GEN_PTRS;
50 INIT_RCU_HEAD(&ng->rcu);
51 rcu_assign_pointer(net->gen, ng); 50 rcu_assign_pointer(net->gen, ng);
52 51
53 error = 0; 52 error = 0;
@@ -478,7 +477,6 @@ int net_assign_generic(struct net *net, int id, void *data)
478 */ 477 */
479 478
480 ng->len = id; 479 ng->len = id;
481 INIT_RCU_HEAD(&ng->rcu);
482 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); 480 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
483 481
484 rcu_assign_pointer(net->gen, ng); 482 rcu_assign_pointer(net->gen, ng);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dadac6281f20..755414cd49d1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work)
58 58
59 while ((skb = skb_dequeue(&npinfo->txq))) { 59 while ((skb = skb_dequeue(&npinfo->txq))) {
60 struct net_device *dev = skb->dev; 60 struct net_device *dev = skb->dev;
61 const struct net_device_ops *ops = dev->netdev_ops;
61 struct netdev_queue *txq; 62 struct netdev_queue *txq;
62 63
63 if (!netif_device_present(dev) || !netif_running(dev)) { 64 if (!netif_device_present(dev) || !netif_running(dev)) {
@@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work)
71 __netif_tx_lock(txq, smp_processor_id()); 72 __netif_tx_lock(txq, smp_processor_id());
72 if (netif_tx_queue_stopped(txq) || 73 if (netif_tx_queue_stopped(txq) ||
73 netif_tx_queue_frozen(txq) || 74 netif_tx_queue_frozen(txq) ||
74 dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { 75 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
75 skb_queue_head(&npinfo->txq, skb); 76 skb_queue_head(&npinfo->txq, skb);
76 __netif_tx_unlock(txq); 77 __netif_tx_unlock(txq);
77 local_irq_restore(flags); 78 local_irq_restore(flags);
@@ -174,12 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi)
174void netpoll_poll(struct netpoll *np) 175void netpoll_poll(struct netpoll *np)
175{ 176{
176 struct net_device *dev = np->dev; 177 struct net_device *dev = np->dev;
178 const struct net_device_ops *ops = dev->netdev_ops;
177 179
178 if (!dev || !netif_running(dev) || !dev->poll_controller) 180 if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
179 return; 181 return;
180 182
181 /* Process pending work on NIC */ 183 /* Process pending work on NIC */
182 dev->poll_controller(dev); 184 ops->ndo_poll_controller(dev);
183 185
184 poll_napi(dev); 186 poll_napi(dev);
185 187
@@ -274,6 +276,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
274 int status = NETDEV_TX_BUSY; 276 int status = NETDEV_TX_BUSY;
275 unsigned long tries; 277 unsigned long tries;
276 struct net_device *dev = np->dev; 278 struct net_device *dev = np->dev;
279 const struct net_device_ops *ops = dev->netdev_ops;
277 struct netpoll_info *npinfo = np->dev->npinfo; 280 struct netpoll_info *npinfo = np->dev->npinfo;
278 281
279 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 282 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -294,7 +297,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
294 tries > 0; --tries) { 297 tries > 0; --tries) {
295 if (__netif_tx_trylock(txq)) { 298 if (__netif_tx_trylock(txq)) {
296 if (!netif_tx_queue_stopped(txq)) 299 if (!netif_tx_queue_stopped(txq))
297 status = dev->hard_start_xmit(skb, dev); 300 status = ops->ndo_start_xmit(skb, dev);
298 __netif_tx_unlock(txq); 301 __netif_tx_unlock(txq);
299 302
300 if (status == NETDEV_TX_OK) 303 if (status == NETDEV_TX_OK)
@@ -345,7 +348,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
345 udph->check = csum_tcpudp_magic(htonl(np->local_ip), 348 udph->check = csum_tcpudp_magic(htonl(np->local_ip),
346 htonl(np->remote_ip), 349 htonl(np->remote_ip),
347 udp_len, IPPROTO_UDP, 350 udp_len, IPPROTO_UDP,
348 csum_partial((unsigned char *)udph, udp_len, 0)); 351 csum_partial(udph, udp_len, 0));
349 if (udph->check == 0) 352 if (udph->check == 0)
350 udph->check = CSUM_MANGLED_0; 353 udph->check = CSUM_MANGLED_0;
351 354
@@ -555,7 +558,6 @@ out:
555 558
556void netpoll_print_options(struct netpoll *np) 559void netpoll_print_options(struct netpoll *np)
557{ 560{
558 DECLARE_MAC_BUF(mac);
559 printk(KERN_INFO "%s: local port %d\n", 561 printk(KERN_INFO "%s: local port %d\n",
560 np->name, np->local_port); 562 np->name, np->local_port);
561 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", 563 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
@@ -566,8 +568,8 @@ void netpoll_print_options(struct netpoll *np)
566 np->name, np->remote_port); 568 np->name, np->remote_port);
567 printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", 569 printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
568 np->name, HIPQUAD(np->remote_ip)); 570 np->name, HIPQUAD(np->remote_ip));
569 printk(KERN_INFO "%s: remote ethernet address %s\n", 571 printk(KERN_INFO "%s: remote ethernet address %pM\n",
570 np->name, print_mac(mac, np->remote_mac)); 572 np->name, np->remote_mac);
571} 573}
572 574
573int netpoll_parse_options(struct netpoll *np, char *opt) 575int netpoll_parse_options(struct netpoll *np, char *opt)
@@ -697,7 +699,7 @@ int netpoll_setup(struct netpoll *np)
697 atomic_inc(&npinfo->refcnt); 699 atomic_inc(&npinfo->refcnt);
698 } 700 }
699 701
700 if (!ndev->poll_controller) { 702 if (!ndev->netdev_ops->ndo_poll_controller) {
701 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 703 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
702 np->name, np->dev_name); 704 np->name, np->dev_name);
703 err = -ENOTSUPP; 705 err = -ENOTSUPP;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8997e912aaaf..65498483325a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -422,6 +422,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
422 const char *ifname); 422 const char *ifname);
423static int pktgen_device_event(struct notifier_block *, unsigned long, void *); 423static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
424static void pktgen_run_all_threads(void); 424static void pktgen_run_all_threads(void);
425static void pktgen_reset_all_threads(void);
425static void pktgen_stop_all_threads_ifs(void); 426static void pktgen_stop_all_threads_ifs(void);
426static int pktgen_stop_device(struct pktgen_dev *pkt_dev); 427static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
427static void pktgen_stop(struct pktgen_thread *t); 428static void pktgen_stop(struct pktgen_thread *t);
@@ -480,6 +481,9 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf,
480 else if (!strcmp(data, "start")) 481 else if (!strcmp(data, "start"))
481 pktgen_run_all_threads(); 482 pktgen_run_all_threads();
482 483
484 else if (!strcmp(data, "reset"))
485 pktgen_reset_all_threads();
486
483 else 487 else
484 printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); 488 printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
485 489
@@ -509,7 +513,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
509 __u64 sa; 513 __u64 sa;
510 __u64 stopped; 514 __u64 stopped;
511 __u64 now = getCurUs(); 515 __u64 now = getCurUs();
512 DECLARE_MAC_BUF(mac);
513 516
514 seq_printf(seq, 517 seq_printf(seq,
515 "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", 518 "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
@@ -554,12 +557,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
554 557
555 seq_puts(seq, " src_mac: "); 558 seq_puts(seq, " src_mac: ");
556 559
557 seq_printf(seq, "%s ", 560 seq_printf(seq, "%pM ",
558 print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? 561 is_zero_ether_addr(pkt_dev->src_mac) ?
559 pkt_dev->odev->dev_addr : pkt_dev->src_mac)); 562 pkt_dev->odev->dev_addr : pkt_dev->src_mac);
560 563
561 seq_printf(seq, "dst_mac: "); 564 seq_printf(seq, "dst_mac: ");
562 seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); 565 seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
563 566
564 seq_printf(seq, 567 seq_printf(seq,
565 " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", 568 " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
@@ -2162,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2162 struct xfrm_state *x = pkt_dev->flows[flow].x; 2165 struct xfrm_state *x = pkt_dev->flows[flow].x;
2163 if (!x) { 2166 if (!x) {
2164 /*slow path: we dont already have xfrm_state*/ 2167 /*slow path: we dont already have xfrm_state*/
2165 x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, 2168 x = xfrm_stateonly_find(&init_net,
2169 (xfrm_address_t *)&pkt_dev->cur_daddr,
2166 (xfrm_address_t *)&pkt_dev->cur_saddr, 2170 (xfrm_address_t *)&pkt_dev->cur_saddr,
2167 AF_INET, 2171 AF_INET,
2168 pkt_dev->ipsmode, 2172 pkt_dev->ipsmode,
@@ -3169,6 +3173,24 @@ static void pktgen_run_all_threads(void)
3169 pktgen_wait_all_threads_run(); 3173 pktgen_wait_all_threads_run();
3170} 3174}
3171 3175
3176static void pktgen_reset_all_threads(void)
3177{
3178 struct pktgen_thread *t;
3179
3180 pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
3181
3182 mutex_lock(&pktgen_thread_lock);
3183
3184 list_for_each_entry(t, &pktgen_threads, th_list)
3185 t->control |= (T_REMDEVALL);
3186
3187 mutex_unlock(&pktgen_thread_lock);
3188
3189 schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */
3190
3191 pktgen_wait_all_threads_run();
3192}
3193
3172static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) 3194static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3173{ 3195{
3174 __u64 total_us, bps, mbps, pps, idle; 3196 __u64 total_us, bps, mbps, pps, idle;
@@ -3331,14 +3353,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3331 3353
3332static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) 3354static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3333{ 3355{
3334 struct net_device *odev = NULL; 3356 struct net_device *odev = pkt_dev->odev;
3357 int (*xmit)(struct sk_buff *, struct net_device *)
3358 = odev->netdev_ops->ndo_start_xmit;
3335 struct netdev_queue *txq; 3359 struct netdev_queue *txq;
3336 __u64 idle_start = 0; 3360 __u64 idle_start = 0;
3337 u16 queue_map; 3361 u16 queue_map;
3338 int ret; 3362 int ret;
3339 3363
3340 odev = pkt_dev->odev;
3341
3342 if (pkt_dev->delay_us || pkt_dev->delay_ns) { 3364 if (pkt_dev->delay_us || pkt_dev->delay_ns) {
3343 u64 now; 3365 u64 now;
3344 3366
@@ -3419,7 +3441,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3419 3441
3420 atomic_inc(&(pkt_dev->skb->users)); 3442 atomic_inc(&(pkt_dev->skb->users));
3421 retry_now: 3443 retry_now:
3422 ret = odev->hard_start_xmit(pkt_dev->skb, odev); 3444 ret = (*xmit)(pkt_dev->skb, odev);
3423 if (likely(ret == NETDEV_TX_OK)) { 3445 if (likely(ret == NETDEV_TX_OK)) {
3424 pkt_dev->last_ok = 1; 3446 pkt_dev->last_ok = 1;
3425 pkt_dev->sofar++; 3447 pkt_dev->sofar++;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4dfb6b4d4559..790dd205bb5d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
551} 551}
552 552
553static void copy_rtnl_link_stats(struct rtnl_link_stats *a, 553static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
554 struct net_device_stats *b) 554 const struct net_device_stats *b)
555{ 555{
556 a->rx_packets = b->rx_packets; 556 a->rx_packets = b->rx_packets;
557 a->tx_packets = b->tx_packets; 557 a->tx_packets = b->tx_packets;
@@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
609 struct netdev_queue *txq; 609 struct netdev_queue *txq;
610 struct ifinfomsg *ifm; 610 struct ifinfomsg *ifm;
611 struct nlmsghdr *nlh; 611 struct nlmsghdr *nlh;
612 struct net_device_stats *stats; 612 const struct net_device_stats *stats;
613 struct nlattr *attr; 613 struct nlattr *attr;
614 614
615 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 615 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
666 if (attr == NULL) 666 if (attr == NULL)
667 goto nla_put_failure; 667 goto nla_put_failure;
668 668
669 stats = dev->get_stats(dev); 669 stats = dev_get_stats(dev);
670 copy_rtnl_link_stats(nla_data(attr), stats); 670 copy_rtnl_link_stats(nla_data(attr), stats);
671 671
672 if (dev->rtnl_link_ops) { 672 if (dev->rtnl_link_ops) {
@@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
762static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 762static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
763 struct nlattr **tb, char *ifname, int modified) 763 struct nlattr **tb, char *ifname, int modified)
764{ 764{
765 const struct net_device_ops *ops = dev->netdev_ops;
765 int send_addr_notify = 0; 766 int send_addr_notify = 0;
766 int err; 767 int err;
767 768
@@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
783 struct rtnl_link_ifmap *u_map; 784 struct rtnl_link_ifmap *u_map;
784 struct ifmap k_map; 785 struct ifmap k_map;
785 786
786 if (!dev->set_config) { 787 if (!ops->ndo_set_config) {
787 err = -EOPNOTSUPP; 788 err = -EOPNOTSUPP;
788 goto errout; 789 goto errout;
789 } 790 }
@@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
801 k_map.dma = (unsigned char) u_map->dma; 802 k_map.dma = (unsigned char) u_map->dma;
802 k_map.port = (unsigned char) u_map->port; 803 k_map.port = (unsigned char) u_map->port;
803 804
804 err = dev->set_config(dev, &k_map); 805 err = ops->ndo_set_config(dev, &k_map);
805 if (err < 0) 806 if (err < 0)
806 goto errout; 807 goto errout;
807 808
@@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
812 struct sockaddr *sa; 813 struct sockaddr *sa;
813 int len; 814 int len;
814 815
815 if (!dev->set_mac_address) { 816 if (!ops->ndo_set_mac_address) {
816 err = -EOPNOTSUPP; 817 err = -EOPNOTSUPP;
817 goto errout; 818 goto errout;
818 } 819 }
@@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
831 sa->sa_family = dev->type; 832 sa->sa_family = dev->type;
832 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), 833 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
833 dev->addr_len); 834 dev->addr_len);
834 err = dev->set_mac_address(dev, sa); 835 err = ops->ndo_set_mac_address(dev, sa);
835 kfree(sa); 836 kfree(sa);
836 if (err) 837 if (err)
837 goto errout; 838 goto errout;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65f7757465bd..b8d0abb26433 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -501,7 +501,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
501 new->network_header = old->network_header; 501 new->network_header = old->network_header;
502 new->mac_header = old->mac_header; 502 new->mac_header = old->mac_header;
503 new->dst = dst_clone(old->dst); 503 new->dst = dst_clone(old->dst);
504#ifdef CONFIG_INET 504#ifdef CONFIG_XFRM
505 new->sp = secpath_get(old->sp); 505 new->sp = secpath_get(old->sp);
506#endif 506#endif
507 memcpy(new->cb, old->cb, sizeof(old->cb)); 507 memcpy(new->cb, old->cb, sizeof(old->cb));
@@ -556,6 +556,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
556 C(truesize); 556 C(truesize);
557#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) 557#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
558 C(do_not_encrypt); 558 C(do_not_encrypt);
559 C(requeue);
559#endif 560#endif
560 atomic_set(&n->users, 1); 561 atomic_set(&n->users, 1);
561 562
@@ -2017,6 +2018,148 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2017 skb_split_no_header(skb, skb1, len, pos); 2018 skb_split_no_header(skb, skb1, len, pos);
2018} 2019}
2019 2020
2021/* Shifting from/to a cloned skb is a no-go.
2022 *
2023 * Caller cannot keep skb_shinfo related pointers past calling here!
2024 */
2025static int skb_prepare_for_shift(struct sk_buff *skb)
2026{
2027 return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2028}
2029
2030/**
2031 * skb_shift - Shifts paged data partially from skb to another
2032 * @tgt: buffer into which tail data gets added
2033 * @skb: buffer from which the paged data comes from
2034 * @shiftlen: shift up to this many bytes
2035 *
2036 * Attempts to shift up to shiftlen worth of bytes, which may be less than
2037 * the length of the skb, from tgt to skb. Returns number bytes shifted.
2038 * It's up to caller to free skb if everything was shifted.
2039 *
2040 * If @tgt runs out of frags, the whole operation is aborted.
2041 *
2042 * Skb cannot include anything else but paged data while tgt is allowed
2043 * to have non-paged data as well.
2044 *
2045 * TODO: full sized shift could be optimized but that would need
2046 * specialized skb free'er to handle frags without up-to-date nr_frags.
2047 */
2048int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2049{
2050 int from, to, merge, todo;
2051 struct skb_frag_struct *fragfrom, *fragto;
2052
2053 BUG_ON(shiftlen > skb->len);
2054 BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
2055
2056 todo = shiftlen;
2057 from = 0;
2058 to = skb_shinfo(tgt)->nr_frags;
2059 fragfrom = &skb_shinfo(skb)->frags[from];
2060
2061 /* Actual merge is delayed until the point when we know we can
2062 * commit all, so that we don't have to undo partial changes
2063 */
2064 if (!to ||
2065 !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
2066 merge = -1;
2067 } else {
2068 merge = to - 1;
2069
2070 todo -= fragfrom->size;
2071 if (todo < 0) {
2072 if (skb_prepare_for_shift(skb) ||
2073 skb_prepare_for_shift(tgt))
2074 return 0;
2075
2076 /* All previous frag pointers might be stale! */
2077 fragfrom = &skb_shinfo(skb)->frags[from];
2078 fragto = &skb_shinfo(tgt)->frags[merge];
2079
2080 fragto->size += shiftlen;
2081 fragfrom->size -= shiftlen;
2082 fragfrom->page_offset += shiftlen;
2083
2084 goto onlymerged;
2085 }
2086
2087 from++;
2088 }
2089
2090 /* Skip full, not-fitting skb to avoid expensive operations */
2091 if ((shiftlen == skb->len) &&
2092 (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
2093 return 0;
2094
2095 if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
2096 return 0;
2097
2098 while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
2099 if (to == MAX_SKB_FRAGS)
2100 return 0;
2101
2102 fragfrom = &skb_shinfo(skb)->frags[from];
2103 fragto = &skb_shinfo(tgt)->frags[to];
2104
2105 if (todo >= fragfrom->size) {
2106 *fragto = *fragfrom;
2107 todo -= fragfrom->size;
2108 from++;
2109 to++;
2110
2111 } else {
2112 get_page(fragfrom->page);
2113 fragto->page = fragfrom->page;
2114 fragto->page_offset = fragfrom->page_offset;
2115 fragto->size = todo;
2116
2117 fragfrom->page_offset += todo;
2118 fragfrom->size -= todo;
2119 todo = 0;
2120
2121 to++;
2122 break;
2123 }
2124 }
2125
2126 /* Ready to "commit" this state change to tgt */
2127 skb_shinfo(tgt)->nr_frags = to;
2128
2129 if (merge >= 0) {
2130 fragfrom = &skb_shinfo(skb)->frags[0];
2131 fragto = &skb_shinfo(tgt)->frags[merge];
2132
2133 fragto->size += fragfrom->size;
2134 put_page(fragfrom->page);
2135 }
2136
2137 /* Reposition in the original skb */
2138 to = 0;
2139 while (from < skb_shinfo(skb)->nr_frags)
2140 skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
2141 skb_shinfo(skb)->nr_frags = to;
2142
2143 BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
2144
2145onlymerged:
2146 /* Most likely the tgt won't ever need its checksum anymore, skb on
2147 * the other hand might need it if it needs to be resent
2148 */
2149 tgt->ip_summed = CHECKSUM_PARTIAL;
2150 skb->ip_summed = CHECKSUM_PARTIAL;
2151
2152 /* Yak, is it really working this way? Some helper please? */
2153 skb->len -= shiftlen;
2154 skb->data_len -= shiftlen;
2155 skb->truesize -= shiftlen;
2156 tgt->len += shiftlen;
2157 tgt->data_len += shiftlen;
2158 tgt->truesize += shiftlen;
2159
2160 return shiftlen;
2161}
2162
2020/** 2163/**
2021 * skb_prepare_seq_read - Prepare a sequential read of skb data 2164 * skb_prepare_seq_read - Prepare a sequential read of skb data
2022 * @skb: the buffer to read 2165 * @skb: the buffer to read
@@ -2285,6 +2428,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2285{ 2428{
2286 struct sk_buff *segs = NULL; 2429 struct sk_buff *segs = NULL;
2287 struct sk_buff *tail = NULL; 2430 struct sk_buff *tail = NULL;
2431 struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
2288 unsigned int mss = skb_shinfo(skb)->gso_size; 2432 unsigned int mss = skb_shinfo(skb)->gso_size;
2289 unsigned int doffset = skb->data - skb_mac_header(skb); 2433 unsigned int doffset = skb->data - skb_mac_header(skb);
2290 unsigned int offset = doffset; 2434 unsigned int offset = doffset;
@@ -2304,7 +2448,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2304 struct sk_buff *nskb; 2448 struct sk_buff *nskb;
2305 skb_frag_t *frag; 2449 skb_frag_t *frag;
2306 int hsize; 2450 int hsize;
2307 int k;
2308 int size; 2451 int size;
2309 2452
2310 len = skb->len - offset; 2453 len = skb->len - offset;
@@ -2317,9 +2460,36 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2317 if (hsize > len || !sg) 2460 if (hsize > len || !sg)
2318 hsize = len; 2461 hsize = len;
2319 2462
2320 nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); 2463 if (!hsize && i >= nfrags) {
2321 if (unlikely(!nskb)) 2464 BUG_ON(fskb->len != len);
2322 goto err; 2465
2466 pos += len;
2467 nskb = skb_clone(fskb, GFP_ATOMIC);
2468 fskb = fskb->next;
2469
2470 if (unlikely(!nskb))
2471 goto err;
2472
2473 hsize = skb_end_pointer(nskb) - nskb->head;
2474 if (skb_cow_head(nskb, doffset + headroom)) {
2475 kfree_skb(nskb);
2476 goto err;
2477 }
2478
2479 nskb->truesize += skb_end_pointer(nskb) - nskb->head -
2480 hsize;
2481 skb_release_head_state(nskb);
2482 __skb_push(nskb, doffset);
2483 } else {
2484 nskb = alloc_skb(hsize + doffset + headroom,
2485 GFP_ATOMIC);
2486
2487 if (unlikely(!nskb))
2488 goto err;
2489
2490 skb_reserve(nskb, headroom);
2491 __skb_put(nskb, doffset);
2492 }
2323 2493
2324 if (segs) 2494 if (segs)
2325 tail->next = nskb; 2495 tail->next = nskb;
@@ -2330,13 +2500,15 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2330 __copy_skb_header(nskb, skb); 2500 __copy_skb_header(nskb, skb);
2331 nskb->mac_len = skb->mac_len; 2501 nskb->mac_len = skb->mac_len;
2332 2502
2333 skb_reserve(nskb, headroom);
2334 skb_reset_mac_header(nskb); 2503 skb_reset_mac_header(nskb);
2335 skb_set_network_header(nskb, skb->mac_len); 2504 skb_set_network_header(nskb, skb->mac_len);
2336 nskb->transport_header = (nskb->network_header + 2505 nskb->transport_header = (nskb->network_header +
2337 skb_network_header_len(skb)); 2506 skb_network_header_len(skb));
2338 skb_copy_from_linear_data(skb, skb_put(nskb, doffset), 2507 skb_copy_from_linear_data(skb, nskb->data, doffset);
2339 doffset); 2508
2509 if (pos >= offset + len)
2510 continue;
2511
2340 if (!sg) { 2512 if (!sg) {
2341 nskb->ip_summed = CHECKSUM_NONE; 2513 nskb->ip_summed = CHECKSUM_NONE;
2342 nskb->csum = skb_copy_and_csum_bits(skb, offset, 2514 nskb->csum = skb_copy_and_csum_bits(skb, offset,
@@ -2346,14 +2518,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2346 } 2518 }
2347 2519
2348 frag = skb_shinfo(nskb)->frags; 2520 frag = skb_shinfo(nskb)->frags;
2349 k = 0;
2350 2521
2351 skb_copy_from_linear_data_offset(skb, offset, 2522 skb_copy_from_linear_data_offset(skb, offset,
2352 skb_put(nskb, hsize), hsize); 2523 skb_put(nskb, hsize), hsize);
2353 2524
2354 while (pos < offset + len) { 2525 while (pos < offset + len && i < nfrags) {
2355 BUG_ON(i >= nfrags);
2356
2357 *frag = skb_shinfo(skb)->frags[i]; 2526 *frag = skb_shinfo(skb)->frags[i];
2358 get_page(frag->page); 2527 get_page(frag->page);
2359 size = frag->size; 2528 size = frag->size;
@@ -2363,20 +2532,39 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2363 frag->size -= offset - pos; 2532 frag->size -= offset - pos;
2364 } 2533 }
2365 2534
2366 k++; 2535 skb_shinfo(nskb)->nr_frags++;
2367 2536
2368 if (pos + size <= offset + len) { 2537 if (pos + size <= offset + len) {
2369 i++; 2538 i++;
2370 pos += size; 2539 pos += size;
2371 } else { 2540 } else {
2372 frag->size -= pos + size - (offset + len); 2541 frag->size -= pos + size - (offset + len);
2373 break; 2542 goto skip_fraglist;
2374 } 2543 }
2375 2544
2376 frag++; 2545 frag++;
2377 } 2546 }
2378 2547
2379 skb_shinfo(nskb)->nr_frags = k; 2548 if (pos < offset + len) {
2549 struct sk_buff *fskb2 = fskb;
2550
2551 BUG_ON(pos + fskb->len != offset + len);
2552
2553 pos += fskb->len;
2554 fskb = fskb->next;
2555
2556 if (fskb2->next) {
2557 fskb2 = skb_clone(fskb2, GFP_ATOMIC);
2558 if (!fskb2)
2559 goto err;
2560 } else
2561 skb_get(fskb2);
2562
2563 BUG_ON(skb_shinfo(nskb)->frag_list);
2564 skb_shinfo(nskb)->frag_list = fskb2;
2565 }
2566
2567skip_fraglist:
2380 nskb->data_len = len - hsize; 2568 nskb->data_len = len - hsize;
2381 nskb->len += nskb->data_len; 2569 nskb->len += nskb->data_len;
2382 nskb->truesize += nskb->data_len; 2570 nskb->truesize += nskb->data_len;
@@ -2394,6 +2582,65 @@ err:
2394 2582
2395EXPORT_SYMBOL_GPL(skb_segment); 2583EXPORT_SYMBOL_GPL(skb_segment);
2396 2584
2585int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2586{
2587 struct sk_buff *p = *head;
2588 struct sk_buff *nskb;
2589 unsigned int headroom;
2590 unsigned int hlen = p->data - skb_mac_header(p);
2591
2592 if (hlen + p->len + skb->len >= 65536)
2593 return -E2BIG;
2594
2595 if (skb_shinfo(p)->frag_list)
2596 goto merge;
2597
2598 headroom = skb_headroom(p);
2599 nskb = netdev_alloc_skb(p->dev, headroom);
2600 if (unlikely(!nskb))
2601 return -ENOMEM;
2602
2603 __copy_skb_header(nskb, p);
2604 nskb->mac_len = p->mac_len;
2605
2606 skb_reserve(nskb, headroom);
2607
2608 skb_set_mac_header(nskb, -hlen);
2609 skb_set_network_header(nskb, skb_network_offset(p));
2610 skb_set_transport_header(nskb, skb_transport_offset(p));
2611
2612 memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
2613
2614 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2615 skb_shinfo(nskb)->frag_list = p;
2616 skb_header_release(p);
2617 nskb->prev = p;
2618
2619 nskb->data_len += p->len;
2620 nskb->truesize += p->len;
2621 nskb->len += p->len;
2622
2623 *head = nskb;
2624 nskb->next = p->next;
2625 p->next = NULL;
2626
2627 p = nskb;
2628
2629merge:
2630 NAPI_GRO_CB(p)->count++;
2631 p->prev->next = skb;
2632 p->prev = skb;
2633 skb_header_release(skb);
2634
2635 p->data_len += skb->len;
2636 p->truesize += skb->len;
2637 p->len += skb->len;
2638
2639 NAPI_GRO_CB(skb)->same_flow = 1;
2640 return 0;
2641}
2642EXPORT_SYMBOL_GPL(skb_gro_receive);
2643
2397void __init skb_init(void) 2644void __init skb_init(void)
2398{ 2645{
2399 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 2646 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
diff --git a/net/core/sock.c b/net/core/sock.c
index edf7220889a4..f3a0d08cbb48 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1071 newsk->sk_sleep = NULL; 1071 newsk->sk_sleep = NULL;
1072 1072
1073 if (newsk->sk_prot->sockets_allocated) 1073 if (newsk->sk_prot->sockets_allocated)
1074 atomic_inc(newsk->sk_prot->sockets_allocated); 1074 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
1075 } 1075 }
1076out: 1076out:
1077 return newsk; 1077 return newsk;
@@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1463 } 1463 }
1464 1464
1465 if (prot->memory_pressure) { 1465 if (prot->memory_pressure) {
1466 if (!*prot->memory_pressure || 1466 int alloc;
1467 prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * 1467
1468 if (!*prot->memory_pressure)
1469 return 1;
1470 alloc = percpu_counter_read_positive(prot->sockets_allocated);
1471 if (prot->sysctl_mem[2] > alloc *
1468 sk_mem_pages(sk->sk_wmem_queued + 1472 sk_mem_pages(sk->sk_wmem_queued +
1469 atomic_read(&sk->sk_rmem_alloc) + 1473 atomic_read(&sk->sk_rmem_alloc) +
1470 sk->sk_forward_alloc)) 1474 sk->sk_forward_alloc))
@@ -2037,7 +2041,8 @@ int proto_register(struct proto *prot, int alloc_slab)
2037{ 2041{
2038 if (alloc_slab) { 2042 if (alloc_slab) {
2039 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 2043 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2040 SLAB_HWCACHE_ALIGN, NULL); 2044 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2045 NULL);
2041 2046
2042 if (prot->slab == NULL) { 2047 if (prot->slab == NULL) {
2043 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 2048 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -2076,7 +2081,9 @@ int proto_register(struct proto *prot, int alloc_slab)
2076 prot->twsk_prot->twsk_slab = 2081 prot->twsk_prot->twsk_slab =
2077 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2082 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2078 prot->twsk_prot->twsk_obj_size, 2083 prot->twsk_prot->twsk_obj_size,
2079 0, SLAB_HWCACHE_ALIGN, 2084 0,
2085 SLAB_HWCACHE_ALIGN |
2086 prot->slab_flags,
2080 NULL); 2087 NULL);
2081 if (prot->twsk_prot->twsk_slab == NULL) 2088 if (prot->twsk_prot->twsk_slab == NULL)
2082 goto out_free_timewait_sock_slab_name; 2089 goto out_free_timewait_sock_slab_name;
@@ -2164,7 +2171,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2164 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2171 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2165 proto->name, 2172 proto->name,
2166 proto->obj_size, 2173 proto->obj_size,
2167 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, 2174 sock_prot_inuse_get(seq_file_net(seq), proto),
2168 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, 2175 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2169 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 2176 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2170 proto->max_header, 2177 proto->max_header,
@@ -2218,7 +2225,8 @@ static const struct seq_operations proto_seq_ops = {
2218 2225
2219static int proto_seq_open(struct inode *inode, struct file *file) 2226static int proto_seq_open(struct inode *inode, struct file *file)
2220{ 2227{
2221 return seq_open(file, &proto_seq_ops); 2228 return seq_open_net(inode, file, &proto_seq_ops,
2229 sizeof(struct seq_net_private));
2222} 2230}
2223 2231
2224static const struct file_operations proto_seq_fops = { 2232static const struct file_operations proto_seq_fops = {
@@ -2226,13 +2234,31 @@ static const struct file_operations proto_seq_fops = {
2226 .open = proto_seq_open, 2234 .open = proto_seq_open,
2227 .read = seq_read, 2235 .read = seq_read,
2228 .llseek = seq_lseek, 2236 .llseek = seq_lseek,
2229 .release = seq_release, 2237 .release = seq_release_net,
2238};
2239
2240static __net_init int proto_init_net(struct net *net)
2241{
2242 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
2243 return -ENOMEM;
2244
2245 return 0;
2246}
2247
2248static __net_exit void proto_exit_net(struct net *net)
2249{
2250 proc_net_remove(net, "protocols");
2251}
2252
2253
2254static __net_initdata struct pernet_operations proto_net_ops = {
2255 .init = proto_init_net,
2256 .exit = proto_exit_net,
2230}; 2257};
2231 2258
2232static int __init proto_init(void) 2259static int __init proto_init(void)
2233{ 2260{
2234 /* register /proc/net/protocols */ 2261 return register_pernet_subsys(&proto_net_ops);
2235 return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
2236} 2262}
2237 2263
2238subsys_initcall(proto_init); 2264subsys_initcall(proto_init);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f686467ff12b..83d3398559ea 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -12,7 +12,6 @@
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <net/sock.h> 14#include <net/sock.h>
15#include <net/xfrm.h>
16 15
17static struct ctl_table net_core_table[] = { 16static struct ctl_table net_core_table[] = {
18#ifdef CONFIG_NET 17#ifdef CONFIG_NET
@@ -22,7 +21,7 @@ static struct ctl_table net_core_table[] = {
22 .data = &sysctl_wmem_max, 21 .data = &sysctl_wmem_max,
23 .maxlen = sizeof(int), 22 .maxlen = sizeof(int),
24 .mode = 0644, 23 .mode = 0644,
25 .proc_handler = &proc_dointvec 24 .proc_handler = proc_dointvec
26 }, 25 },
27 { 26 {
28 .ctl_name = NET_CORE_RMEM_MAX, 27 .ctl_name = NET_CORE_RMEM_MAX,
@@ -30,7 +29,7 @@ static struct ctl_table net_core_table[] = {
30 .data = &sysctl_rmem_max, 29 .data = &sysctl_rmem_max,
31 .maxlen = sizeof(int), 30 .maxlen = sizeof(int),
32 .mode = 0644, 31 .mode = 0644,
33 .proc_handler = &proc_dointvec 32 .proc_handler = proc_dointvec
34 }, 33 },
35 { 34 {
36 .ctl_name = NET_CORE_WMEM_DEFAULT, 35 .ctl_name = NET_CORE_WMEM_DEFAULT,
@@ -38,7 +37,7 @@ static struct ctl_table net_core_table[] = {
38 .data = &sysctl_wmem_default, 37 .data = &sysctl_wmem_default,
39 .maxlen = sizeof(int), 38 .maxlen = sizeof(int),
40 .mode = 0644, 39 .mode = 0644,
41 .proc_handler = &proc_dointvec 40 .proc_handler = proc_dointvec
42 }, 41 },
43 { 42 {
44 .ctl_name = NET_CORE_RMEM_DEFAULT, 43 .ctl_name = NET_CORE_RMEM_DEFAULT,
@@ -46,7 +45,7 @@ static struct ctl_table net_core_table[] = {
46 .data = &sysctl_rmem_default, 45 .data = &sysctl_rmem_default,
47 .maxlen = sizeof(int), 46 .maxlen = sizeof(int),
48 .mode = 0644, 47 .mode = 0644,
49 .proc_handler = &proc_dointvec 48 .proc_handler = proc_dointvec
50 }, 49 },
51 { 50 {
52 .ctl_name = NET_CORE_DEV_WEIGHT, 51 .ctl_name = NET_CORE_DEV_WEIGHT,
@@ -54,7 +53,7 @@ static struct ctl_table net_core_table[] = {
54 .data = &weight_p, 53 .data = &weight_p,
55 .maxlen = sizeof(int), 54 .maxlen = sizeof(int),
56 .mode = 0644, 55 .mode = 0644,
57 .proc_handler = &proc_dointvec 56 .proc_handler = proc_dointvec
58 }, 57 },
59 { 58 {
60 .ctl_name = NET_CORE_MAX_BACKLOG, 59 .ctl_name = NET_CORE_MAX_BACKLOG,
@@ -62,7 +61,7 @@ static struct ctl_table net_core_table[] = {
62 .data = &netdev_max_backlog, 61 .data = &netdev_max_backlog,
63 .maxlen = sizeof(int), 62 .maxlen = sizeof(int),
64 .mode = 0644, 63 .mode = 0644,
65 .proc_handler = &proc_dointvec 64 .proc_handler = proc_dointvec
66 }, 65 },
67 { 66 {
68 .ctl_name = NET_CORE_MSG_COST, 67 .ctl_name = NET_CORE_MSG_COST,
@@ -70,8 +69,8 @@ static struct ctl_table net_core_table[] = {
70 .data = &net_ratelimit_state.interval, 69 .data = &net_ratelimit_state.interval,
71 .maxlen = sizeof(int), 70 .maxlen = sizeof(int),
72 .mode = 0644, 71 .mode = 0644,
73 .proc_handler = &proc_dointvec_jiffies, 72 .proc_handler = proc_dointvec_jiffies,
74 .strategy = &sysctl_jiffies, 73 .strategy = sysctl_jiffies,
75 }, 74 },
76 { 75 {
77 .ctl_name = NET_CORE_MSG_BURST, 76 .ctl_name = NET_CORE_MSG_BURST,
@@ -79,7 +78,7 @@ static struct ctl_table net_core_table[] = {
79 .data = &net_ratelimit_state.burst, 78 .data = &net_ratelimit_state.burst,
80 .maxlen = sizeof(int), 79 .maxlen = sizeof(int),
81 .mode = 0644, 80 .mode = 0644,
82 .proc_handler = &proc_dointvec, 81 .proc_handler = proc_dointvec,
83 }, 82 },
84 { 83 {
85 .ctl_name = NET_CORE_OPTMEM_MAX, 84 .ctl_name = NET_CORE_OPTMEM_MAX,
@@ -87,42 +86,8 @@ static struct ctl_table net_core_table[] = {
87 .data = &sysctl_optmem_max, 86 .data = &sysctl_optmem_max,
88 .maxlen = sizeof(int), 87 .maxlen = sizeof(int),
89 .mode = 0644, 88 .mode = 0644,
90 .proc_handler = &proc_dointvec 89 .proc_handler = proc_dointvec
91 }, 90 },
92#ifdef CONFIG_XFRM
93 {
94 .ctl_name = NET_CORE_AEVENT_ETIME,
95 .procname = "xfrm_aevent_etime",
96 .data = &sysctl_xfrm_aevent_etime,
97 .maxlen = sizeof(u32),
98 .mode = 0644,
99 .proc_handler = &proc_dointvec
100 },
101 {
102 .ctl_name = NET_CORE_AEVENT_RSEQTH,
103 .procname = "xfrm_aevent_rseqth",
104 .data = &sysctl_xfrm_aevent_rseqth,
105 .maxlen = sizeof(u32),
106 .mode = 0644,
107 .proc_handler = &proc_dointvec
108 },
109 {
110 .ctl_name = CTL_UNNUMBERED,
111 .procname = "xfrm_larval_drop",
112 .data = &sysctl_xfrm_larval_drop,
113 .maxlen = sizeof(int),
114 .mode = 0644,
115 .proc_handler = &proc_dointvec
116 },
117 {
118 .ctl_name = CTL_UNNUMBERED,
119 .procname = "xfrm_acq_expires",
120 .data = &sysctl_xfrm_acq_expires,
121 .maxlen = sizeof(int),
122 .mode = 0644,
123 .proc_handler = &proc_dointvec
124 },
125#endif /* CONFIG_XFRM */
126#endif /* CONFIG_NET */ 91#endif /* CONFIG_NET */
127 { 92 {
128 .ctl_name = NET_CORE_BUDGET, 93 .ctl_name = NET_CORE_BUDGET,
@@ -130,7 +95,7 @@ static struct ctl_table net_core_table[] = {
130 .data = &netdev_budget, 95 .data = &netdev_budget,
131 .maxlen = sizeof(int), 96 .maxlen = sizeof(int),
132 .mode = 0644, 97 .mode = 0644,
133 .proc_handler = &proc_dointvec 98 .proc_handler = proc_dointvec
134 }, 99 },
135 { 100 {
136 .ctl_name = NET_CORE_WARNINGS, 101 .ctl_name = NET_CORE_WARNINGS,
@@ -138,7 +103,7 @@ static struct ctl_table net_core_table[] = {
138 .data = &net_msg_warn, 103 .data = &net_msg_warn,
139 .maxlen = sizeof(int), 104 .maxlen = sizeof(int),
140 .mode = 0644, 105 .mode = 0644,
141 .proc_handler = &proc_dointvec 106 .proc_handler = proc_dointvec
142 }, 107 },
143 { .ctl_name = 0 } 108 { .ctl_name = 0 }
144}; 109};
@@ -150,12 +115,12 @@ static struct ctl_table netns_core_table[] = {
150 .data = &init_net.core.sysctl_somaxconn, 115 .data = &init_net.core.sysctl_somaxconn,
151 .maxlen = sizeof(int), 116 .maxlen = sizeof(int),
152 .mode = 0644, 117 .mode = 0644,
153 .proc_handler = &proc_dointvec 118 .proc_handler = proc_dointvec
154 }, 119 },
155 { .ctl_name = 0 } 120 { .ctl_name = 0 }
156}; 121};
157 122
158static __net_initdata struct ctl_path net_core_path[] = { 123__net_initdata struct ctl_path net_core_path[] = {
159 { .procname = "net", .ctl_name = CTL_NET, }, 124 { .procname = "net", .ctl_name = CTL_NET, },
160 { .procname = "core", .ctl_name = NET_CORE, }, 125 { .procname = "core", .ctl_name = NET_CORE, },
161 { }, 126 { },
@@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
207 172
208static __init int sysctl_core_init(void) 173static __init int sysctl_core_init(void)
209{ 174{
175 static struct ctl_table empty[1];
176
177 register_sysctl_paths(net_core_path, empty);
210 register_net_sysctl_rotable(net_core_path, net_core_table); 178 register_net_sysctl_rotable(net_core_path, net_core_table);
211 return register_pernet_subsys(&sysctl_core_ops); 179 return register_pernet_subsys(&sysctl_core_ops);
212} 180}
213 181
214__initcall(sysctl_core_init); 182fs_initcall(sysctl_core_init);