aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c55
-rw-r--r--net/core/dst.c25
-rw-r--r--net/core/ethtool.c45
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/flow_dissector.c5
-rw-r--r--net/core/neighbour.c31
-rw-r--r--net/core/net-sysfs.c74
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c133
-rw-r--r--net/core/rtnetlink.c57
-rw-r--r--net/core/scm.c22
-rw-r--r--net/core/skbuff.c73
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/sock_diag.c42
16 files changed, 358 insertions, 238 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ae6acf6a3dea..0337e2b76862 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -248,7 +248,6 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
248 unlock_sock_fast(sk, slow); 248 unlock_sock_fast(sk, slow);
249 249
250 /* skb is now orphaned, can be freed outside of locked section */ 250 /* skb is now orphaned, can be freed outside of locked section */
251 trace_kfree_skb(skb, skb_free_datagram_locked);
252 __kfree_skb(skb); 251 __kfree_skb(skb);
253} 252}
254EXPORT_SYMBOL(skb_free_datagram_locked); 253EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 84f01ba81a34..0ebaea16632f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1632,6 +1632,8 @@ static inline int deliver_skb(struct sk_buff *skb,
1632 struct packet_type *pt_prev, 1632 struct packet_type *pt_prev,
1633 struct net_device *orig_dev) 1633 struct net_device *orig_dev)
1634{ 1634{
1635 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1636 return -ENOMEM;
1635 atomic_inc(&skb->users); 1637 atomic_inc(&skb->users);
1636 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1638 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1637} 1639}
@@ -1691,7 +1693,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1691 rcu_read_unlock(); 1693 rcu_read_unlock();
1692} 1694}
1693 1695
1694/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1696/**
1697 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1695 * @dev: Network device 1698 * @dev: Network device
1696 * @txq: number of queues available 1699 * @txq: number of queues available
1697 * 1700 *
@@ -1793,6 +1796,18 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1793EXPORT_SYMBOL(netif_set_real_num_rx_queues); 1796EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1794#endif 1797#endif
1795 1798
1799/**
1800 * netif_get_num_default_rss_queues - default number of RSS queues
1801 *
1802 * This routine should set an upper limit on the number of RSS queues
1803 * used by default by multiqueue devices.
1804 */
1805int netif_get_num_default_rss_queues(void)
1806{
1807 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1808}
1809EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1810
1796static inline void __netif_reschedule(struct Qdisc *q) 1811static inline void __netif_reschedule(struct Qdisc *q)
1797{ 1812{
1798 struct softnet_data *sd; 1813 struct softnet_data *sd;
@@ -2444,8 +2459,12 @@ static void skb_update_prio(struct sk_buff *skb)
2444{ 2459{
2445 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2460 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2446 2461
2447 if ((!skb->priority) && (skb->sk) && map) 2462 if (!skb->priority && skb->sk && map) {
2448 skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; 2463 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2464
2465 if (prioidx < map->priomap_len)
2466 skb->priority = map->priomap[prioidx];
2467 }
2449} 2468}
2450#else 2469#else
2451#define skb_update_prio(skb) 2470#define skb_update_prio(skb)
@@ -2455,6 +2474,23 @@ static DEFINE_PER_CPU(int, xmit_recursion);
2455#define RECURSION_LIMIT 10 2474#define RECURSION_LIMIT 10
2456 2475
2457/** 2476/**
2477 * dev_loopback_xmit - loop back @skb
2478 * @skb: buffer to transmit
2479 */
2480int dev_loopback_xmit(struct sk_buff *skb)
2481{
2482 skb_reset_mac_header(skb);
2483 __skb_pull(skb, skb_network_offset(skb));
2484 skb->pkt_type = PACKET_LOOPBACK;
2485 skb->ip_summed = CHECKSUM_UNNECESSARY;
2486 WARN_ON(!skb_dst(skb));
2487 skb_dst_force(skb);
2488 netif_rx_ni(skb);
2489 return 0;
2490}
2491EXPORT_SYMBOL(dev_loopback_xmit);
2492
2493/**
2458 * dev_queue_xmit - transmit a buffer 2494 * dev_queue_xmit - transmit a buffer
2459 * @skb: buffer to transmit 2495 * @skb: buffer to transmit
2460 * 2496 *
@@ -3137,8 +3173,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
3137 if (netpoll_receive_skb(skb)) 3173 if (netpoll_receive_skb(skb))
3138 return NET_RX_DROP; 3174 return NET_RX_DROP;
3139 3175
3140 if (!skb->skb_iif)
3141 skb->skb_iif = skb->dev->ifindex;
3142 orig_dev = skb->dev; 3176 orig_dev = skb->dev;
3143 3177
3144 skb_reset_network_header(skb); 3178 skb_reset_network_header(skb);
@@ -3150,6 +3184,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3150 rcu_read_lock(); 3184 rcu_read_lock();
3151 3185
3152another_round: 3186another_round:
3187 skb->skb_iif = skb->dev->ifindex;
3153 3188
3154 __this_cpu_inc(softnet_data.processed); 3189 __this_cpu_inc(softnet_data.processed);
3155 3190
@@ -3228,7 +3263,10 @@ ncls:
3228 } 3263 }
3229 3264
3230 if (pt_prev) { 3265 if (pt_prev) {
3231 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3266 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3267 ret = -ENOMEM;
3268 else
3269 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3232 } else { 3270 } else {
3233 atomic_long_inc(&skb->dev->rx_dropped); 3271 atomic_long_inc(&skb->dev->rx_dropped);
3234 kfree_skb(skb); 3272 kfree_skb(skb);
@@ -5642,7 +5680,7 @@ int netdev_refcnt_read(const struct net_device *dev)
5642} 5680}
5643EXPORT_SYMBOL(netdev_refcnt_read); 5681EXPORT_SYMBOL(netdev_refcnt_read);
5644 5682
5645/* 5683/**
5646 * netdev_wait_allrefs - wait until all references are gone. 5684 * netdev_wait_allrefs - wait until all references are gone.
5647 * 5685 *
5648 * This is called when unregistering network devices. 5686 * This is called when unregistering network devices.
@@ -6279,7 +6317,8 @@ static struct hlist_head *netdev_create_hash(void)
6279/* Initialize per network namespace state */ 6317/* Initialize per network namespace state */
6280static int __net_init netdev_init(struct net *net) 6318static int __net_init netdev_init(struct net *net)
6281{ 6319{
6282 INIT_LIST_HEAD(&net->dev_base_head); 6320 if (net != &init_net)
6321 INIT_LIST_HEAD(&net->dev_base_head);
6283 6322
6284 net->dev_name_head = netdev_create_hash(); 6323 net->dev_name_head = netdev_create_hash();
6285 if (net->dev_name_head == NULL) 6324 if (net->dev_name_head == NULL)
diff --git a/net/core/dst.c b/net/core/dst.c
index 43d94cedbf7c..069d51d29414 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -94,7 +94,7 @@ loop:
94 * But we do not have state "obsoleted, but 94 * But we do not have state "obsoleted, but
95 * referenced by parent", so it is right. 95 * referenced by parent", so it is right.
96 */ 96 */
97 if (dst->obsolete > 1) 97 if (dst->obsolete > 0)
98 continue; 98 continue;
99 99
100 ___dst_free(dst); 100 ___dst_free(dst);
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard);
152const u32 dst_default_metrics[RTAX_MAX]; 152const u32 dst_default_metrics[RTAX_MAX];
153 153
154void *dst_alloc(struct dst_ops *ops, struct net_device *dev, 154void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
155 int initial_ref, int initial_obsolete, int flags) 155 int initial_ref, int initial_obsolete, unsigned short flags)
156{ 156{
157 struct dst_entry *dst; 157 struct dst_entry *dst;
158 158
@@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
171 dst_init_metrics(dst, dst_default_metrics, true); 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL; 172 dst->expires = 0UL;
173 dst->path = dst; 173 dst->path = dst;
174 RCU_INIT_POINTER(dst->_neighbour, NULL);
175#ifdef CONFIG_XFRM 174#ifdef CONFIG_XFRM
176 dst->xfrm = NULL; 175 dst->xfrm = NULL;
177#endif 176#endif
@@ -188,6 +187,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
188 dst->__use = 0; 187 dst->__use = 0;
189 dst->lastuse = jiffies; 188 dst->lastuse = jiffies;
190 dst->flags = flags; 189 dst->flags = flags;
190 dst->pending_confirm = 0;
191 dst->next = NULL; 191 dst->next = NULL;
192 if (!(flags & DST_NOCOUNT)) 192 if (!(flags & DST_NOCOUNT))
193 dst_entries_add(ops, 1); 193 dst_entries_add(ops, 1);
@@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst)
202 */ 202 */
203 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) 203 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
204 dst->input = dst->output = dst_discard; 204 dst->input = dst->output = dst_discard;
205 dst->obsolete = 2; 205 dst->obsolete = DST_OBSOLETE_DEAD;
206} 206}
207 207
208void __dst_free(struct dst_entry *dst) 208void __dst_free(struct dst_entry *dst)
@@ -224,19 +224,12 @@ EXPORT_SYMBOL(__dst_free);
224struct dst_entry *dst_destroy(struct dst_entry * dst) 224struct dst_entry *dst_destroy(struct dst_entry * dst)
225{ 225{
226 struct dst_entry *child; 226 struct dst_entry *child;
227 struct neighbour *neigh;
228 227
229 smp_rmb(); 228 smp_rmb();
230 229
231again: 230again:
232 neigh = rcu_dereference_protected(dst->_neighbour, 1);
233 child = dst->child; 231 child = dst->child;
234 232
235 if (neigh) {
236 RCU_INIT_POINTER(dst->_neighbour, NULL);
237 neigh_release(neigh);
238 }
239
240 if (!(dst->flags & DST_NOCOUNT)) 233 if (!(dst->flags & DST_NOCOUNT))
241 dst_entries_add(dst->ops, -1); 234 dst_entries_add(dst->ops, -1);
242 235
@@ -360,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
360 if (!unregister) { 353 if (!unregister) {
361 dst->input = dst->output = dst_discard; 354 dst->input = dst->output = dst_discard;
362 } else { 355 } else {
363 struct neighbour *neigh;
364
365 dst->dev = dev_net(dst->dev)->loopback_dev; 356 dst->dev = dev_net(dst->dev)->loopback_dev;
366 dev_hold(dst->dev); 357 dev_hold(dst->dev);
367 dev_put(dev); 358 dev_put(dev);
368 rcu_read_lock();
369 neigh = dst_get_neighbour_noref(dst);
370 if (neigh && neigh->dev == dev) {
371 neigh->dev = dst->dev;
372 dev_hold(dst->dev);
373 dev_put(dev);
374 }
375 rcu_read_unlock();
376 } 359 }
377} 360}
378 361
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9c2afb480270..cbf033dcaf1f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -729,6 +729,40 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
729 return dev->ethtool_ops->set_wol(dev, &wol); 729 return dev->ethtool_ops->set_wol(dev, &wol);
730} 730}
731 731
732static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
733{
734 struct ethtool_eee edata;
735 int rc;
736
737 if (!dev->ethtool_ops->get_eee)
738 return -EOPNOTSUPP;
739
740 memset(&edata, 0, sizeof(struct ethtool_eee));
741 edata.cmd = ETHTOOL_GEEE;
742 rc = dev->ethtool_ops->get_eee(dev, &edata);
743
744 if (rc)
745 return rc;
746
747 if (copy_to_user(useraddr, &edata, sizeof(edata)))
748 return -EFAULT;
749
750 return 0;
751}
752
753static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
754{
755 struct ethtool_eee edata;
756
757 if (!dev->ethtool_ops->set_eee)
758 return -EOPNOTSUPP;
759
760 if (copy_from_user(&edata, useraddr, sizeof(edata)))
761 return -EFAULT;
762
763 return dev->ethtool_ops->set_eee(dev, &edata);
764}
765
732static int ethtool_nway_reset(struct net_device *dev) 766static int ethtool_nway_reset(struct net_device *dev)
733{ 767{
734 if (!dev->ethtool_ops->nway_reset) 768 if (!dev->ethtool_ops->nway_reset)
@@ -1409,6 +1443,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1409 case ETHTOOL_GSET: 1443 case ETHTOOL_GSET:
1410 case ETHTOOL_GDRVINFO: 1444 case ETHTOOL_GDRVINFO:
1411 case ETHTOOL_GMSGLVL: 1445 case ETHTOOL_GMSGLVL:
1446 case ETHTOOL_GLINK:
1412 case ETHTOOL_GCOALESCE: 1447 case ETHTOOL_GCOALESCE:
1413 case ETHTOOL_GRINGPARAM: 1448 case ETHTOOL_GRINGPARAM:
1414 case ETHTOOL_GPAUSEPARAM: 1449 case ETHTOOL_GPAUSEPARAM:
@@ -1417,6 +1452,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1417 case ETHTOOL_GSG: 1452 case ETHTOOL_GSG:
1418 case ETHTOOL_GSSET_INFO: 1453 case ETHTOOL_GSSET_INFO:
1419 case ETHTOOL_GSTRINGS: 1454 case ETHTOOL_GSTRINGS:
1455 case ETHTOOL_GSTATS:
1420 case ETHTOOL_GTSO: 1456 case ETHTOOL_GTSO:
1421 case ETHTOOL_GPERMADDR: 1457 case ETHTOOL_GPERMADDR:
1422 case ETHTOOL_GUFO: 1458 case ETHTOOL_GUFO:
@@ -1429,8 +1465,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1429 case ETHTOOL_GRXCLSRLCNT: 1465 case ETHTOOL_GRXCLSRLCNT:
1430 case ETHTOOL_GRXCLSRULE: 1466 case ETHTOOL_GRXCLSRULE:
1431 case ETHTOOL_GRXCLSRLALL: 1467 case ETHTOOL_GRXCLSRLALL:
1468 case ETHTOOL_GRXFHINDIR:
1432 case ETHTOOL_GFEATURES: 1469 case ETHTOOL_GFEATURES:
1470 case ETHTOOL_GCHANNELS:
1433 case ETHTOOL_GET_TS_INFO: 1471 case ETHTOOL_GET_TS_INFO:
1472 case ETHTOOL_GEEE:
1434 break; 1473 break;
1435 default: 1474 default:
1436 if (!capable(CAP_NET_ADMIN)) 1475 if (!capable(CAP_NET_ADMIN))
@@ -1471,6 +1510,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1471 rc = ethtool_set_value_void(dev, useraddr, 1510 rc = ethtool_set_value_void(dev, useraddr,
1472 dev->ethtool_ops->set_msglevel); 1511 dev->ethtool_ops->set_msglevel);
1473 break; 1512 break;
1513 case ETHTOOL_GEEE:
1514 rc = ethtool_get_eee(dev, useraddr);
1515 break;
1516 case ETHTOOL_SEEE:
1517 rc = ethtool_set_eee(dev, useraddr);
1518 break;
1474 case ETHTOOL_NWAY_RST: 1519 case ETHTOOL_NWAY_RST:
1475 rc = ethtool_nway_reset(dev); 1520 rc = ethtool_nway_reset(dev);
1476 break; 1521 break;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 72cceb79d0d4..ab7db83236c9 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -151,6 +151,8 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
151 151
152 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { 152 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
153 list_del_rcu(&rule->list); 153 list_del_rcu(&rule->list);
154 if (ops->delete)
155 ops->delete(rule);
154 fib_rule_put(rule); 156 fib_rule_put(rule);
155 } 157 }
156} 158}
@@ -499,6 +501,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
499 501
500 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 502 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
501 NETLINK_CB(skb).pid); 503 NETLINK_CB(skb).pid);
504 if (ops->delete)
505 ops->delete(rule);
502 fib_rule_put(rule); 506 fib_rule_put(rule);
503 flush_route_cache(ops); 507 flush_route_cache(ops);
504 rules_ops_put(ops); 508 rules_ops_put(ops);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a225089df5b6..466820b6e344 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -4,6 +4,7 @@
4#include <linux/ipv6.h> 4#include <linux/ipv6.h>
5#include <linux/if_vlan.h> 5#include <linux/if_vlan.h>
6#include <net/ip.h> 6#include <net/ip.h>
7#include <net/ipv6.h>
7#include <linux/if_tunnel.h> 8#include <linux/if_tunnel.h>
8#include <linux/if_pppox.h> 9#include <linux/if_pppox.h>
9#include <linux/ppp_defs.h> 10#include <linux/ppp_defs.h>
@@ -55,8 +56,8 @@ ipv6:
55 return false; 56 return false;
56 57
57 ip_proto = iph->nexthdr; 58 ip_proto = iph->nexthdr;
58 flow->src = iph->saddr.s6_addr32[3]; 59 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
59 flow->dst = iph->daddr.s6_addr32[3]; 60 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
60 nhoff += sizeof(struct ipv6hdr); 61 nhoff += sizeof(struct ipv6hdr);
61 break; 62 break;
62 } 63 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d81d026138f0..117afaf51268 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
474} 474}
475EXPORT_SYMBOL(neigh_lookup_nodev); 475EXPORT_SYMBOL(neigh_lookup_nodev);
476 476
477struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, 477struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478 struct net_device *dev) 478 struct net_device *dev, bool want_ref)
479{ 479{
480 u32 hash_val; 480 u32 hash_val;
481 int key_len = tbl->key_len; 481 int key_len = tbl->key_len;
@@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
535 n1 = rcu_dereference_protected(n1->next, 535 n1 = rcu_dereference_protected(n1->next,
536 lockdep_is_held(&tbl->lock))) { 536 lockdep_is_held(&tbl->lock))) {
537 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 537 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538 neigh_hold(n1); 538 if (want_ref)
539 neigh_hold(n1);
539 rc = n1; 540 rc = n1;
540 goto out_tbl_unlock; 541 goto out_tbl_unlock;
541 } 542 }
542 } 543 }
543 544
544 n->dead = 0; 545 n->dead = 0;
545 neigh_hold(n); 546 if (want_ref)
547 neigh_hold(n);
546 rcu_assign_pointer(n->next, 548 rcu_assign_pointer(n->next,
547 rcu_dereference_protected(nht->hash_buckets[hash_val], 549 rcu_dereference_protected(nht->hash_buckets[hash_val],
548 lockdep_is_held(&tbl->lock))); 550 lockdep_is_held(&tbl->lock)));
@@ -558,7 +560,7 @@ out_neigh_release:
558 neigh_release(n); 560 neigh_release(n);
559 goto out; 561 goto out;
560} 562}
561EXPORT_SYMBOL(neigh_create); 563EXPORT_SYMBOL(__neigh_create);
562 564
563static u32 pneigh_hash(const void *pkey, int key_len) 565static u32 pneigh_hash(const void *pkey, int key_len)
564{ 566{
@@ -1199,10 +1201,23 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1199 write_unlock_bh(&neigh->lock); 1201 write_unlock_bh(&neigh->lock);
1200 1202
1201 rcu_read_lock(); 1203 rcu_read_lock();
1202 /* On shaper/eql skb->dst->neighbour != neigh :( */ 1204
1203 if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) 1205 /* Why not just use 'neigh' as-is? The problem is that
1204 n1 = n2; 1206 * things such as shaper, eql, and sch_teql can end up
1207 * using alternative, different, neigh objects to output
1208 * the packet in the output path. So what we need to do
1209 * here is re-lookup the top-level neigh in the path so
1210 * we can reinject the packet there.
1211 */
1212 n2 = NULL;
1213 if (dst) {
1214 n2 = dst_neigh_lookup_skb(dst, skb);
1215 if (n2)
1216 n1 = n2;
1217 }
1205 n1->output(n1, skb); 1218 n1->output(n1, skb);
1219 if (n2)
1220 neigh_release(n2);
1206 rcu_read_unlock(); 1221 rcu_read_unlock();
1207 1222
1208 write_lock_bh(&neigh->lock); 1223 write_lock_bh(&neigh->lock);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fdf9e61d0651..72607174ea5a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -417,72 +417,6 @@ static struct attribute_group netstat_group = {
417 .name = "statistics", 417 .name = "statistics",
418 .attrs = netstat_attrs, 418 .attrs = netstat_attrs,
419}; 419};
420
421#ifdef CONFIG_WIRELESS_EXT_SYSFS
422/* helper function that does all the locking etc for wireless stats */
423static ssize_t wireless_show(struct device *d, char *buf,
424 ssize_t (*format)(const struct iw_statistics *,
425 char *))
426{
427 struct net_device *dev = to_net_dev(d);
428 const struct iw_statistics *iw;
429 ssize_t ret = -EINVAL;
430
431 if (!rtnl_trylock())
432 return restart_syscall();
433 if (dev_isalive(dev)) {
434 iw = get_wireless_stats(dev);
435 if (iw)
436 ret = (*format)(iw, buf);
437 }
438 rtnl_unlock();
439
440 return ret;
441}
442
443/* show function template for wireless fields */
444#define WIRELESS_SHOW(name, field, format_string) \
445static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
446{ \
447 return sprintf(buf, format_string, iw->field); \
448} \
449static ssize_t show_iw_##name(struct device *d, \
450 struct device_attribute *attr, char *buf) \
451{ \
452 return wireless_show(d, buf, format_iw_##name); \
453} \
454static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
455
456WIRELESS_SHOW(status, status, fmt_hex);
457WIRELESS_SHOW(link, qual.qual, fmt_dec);
458WIRELESS_SHOW(level, qual.level, fmt_dec);
459WIRELESS_SHOW(noise, qual.noise, fmt_dec);
460WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
461WIRELESS_SHOW(crypt, discard.code, fmt_dec);
462WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
463WIRELESS_SHOW(misc, discard.misc, fmt_dec);
464WIRELESS_SHOW(retries, discard.retries, fmt_dec);
465WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
466
467static struct attribute *wireless_attrs[] = {
468 &dev_attr_status.attr,
469 &dev_attr_link.attr,
470 &dev_attr_level.attr,
471 &dev_attr_noise.attr,
472 &dev_attr_nwid.attr,
473 &dev_attr_crypt.attr,
474 &dev_attr_fragment.attr,
475 &dev_attr_retries.attr,
476 &dev_attr_misc.attr,
477 &dev_attr_beacon.attr,
478 NULL
479};
480
481static struct attribute_group wireless_group = {
482 .name = "wireless",
483 .attrs = wireless_attrs,
484};
485#endif
486#endif /* CONFIG_SYSFS */ 420#endif /* CONFIG_SYSFS */
487 421
488#ifdef CONFIG_RPS 422#ifdef CONFIG_RPS
@@ -1463,14 +1397,6 @@ int netdev_register_kobject(struct net_device *net)
1463 groups++; 1397 groups++;
1464 1398
1465 *groups++ = &netstat_group; 1399 *groups++ = &netstat_group;
1466#ifdef CONFIG_WIRELESS_EXT_SYSFS
1467 if (net->ieee80211_ptr)
1468 *groups++ = &wireless_group;
1469#ifdef CONFIG_WIRELESS_EXT
1470 else if (net->wireless_handlers)
1471 *groups++ = &wireless_group;
1472#endif
1473#endif
1474#endif /* CONFIG_SYSFS */ 1400#endif /* CONFIG_SYSFS */
1475 1401
1476 error = device_add(dev); 1402 error = device_add(dev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dddbacb8f28c..42f1e1c7514f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);
27LIST_HEAD(net_namespace_list); 27LIST_HEAD(net_namespace_list);
28EXPORT_SYMBOL_GPL(net_namespace_list); 28EXPORT_SYMBOL_GPL(net_namespace_list);
29 29
30struct net init_net; 30struct net init_net = {
31 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
32};
31EXPORT_SYMBOL(init_net); 33EXPORT_SYMBOL(init_net);
32 34
33#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 35#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index f9f40b932e4b..b4c90e42b443 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -715,14 +715,16 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
715} 715}
716EXPORT_SYMBOL(netpoll_parse_options); 716EXPORT_SYMBOL(netpoll_parse_options);
717 717
718int __netpoll_setup(struct netpoll *np) 718int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
719{ 719{
720 struct net_device *ndev = np->dev;
721 struct netpoll_info *npinfo; 720 struct netpoll_info *npinfo;
722 const struct net_device_ops *ops; 721 const struct net_device_ops *ops;
723 unsigned long flags; 722 unsigned long flags;
724 int err; 723 int err;
725 724
725 np->dev = ndev;
726 strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
727
726 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 728 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
727 !ndev->netdev_ops->ndo_poll_controller) { 729 !ndev->netdev_ops->ndo_poll_controller) {
728 np_err(np, "%s doesn't support polling, aborting\n", 730 np_err(np, "%s doesn't support polling, aborting\n",
@@ -851,13 +853,11 @@ int netpoll_setup(struct netpoll *np)
851 np_info(np, "local IP %pI4\n", &np->local_ip); 853 np_info(np, "local IP %pI4\n", &np->local_ip);
852 } 854 }
853 855
854 np->dev = ndev;
855
856 /* fill up the skb queue */ 856 /* fill up the skb queue */
857 refill_skbs(); 857 refill_skbs();
858 858
859 rtnl_lock(); 859 rtnl_lock();
860 err = __netpoll_setup(np); 860 err = __netpoll_setup(np, ndev);
861 rtnl_unlock(); 861 rtnl_unlock();
862 862
863 if (err) 863 if (err)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5b8aa2fae48b..ed0c0431fcd8 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -25,6 +25,8 @@
25#include <net/sock.h> 25#include <net/sock.h>
26#include <net/netprio_cgroup.h> 26#include <net/netprio_cgroup.h>
27 27
28#include <linux/fdtable.h>
29
28#define PRIOIDX_SZ 128 30#define PRIOIDX_SZ 128
29 31
30static unsigned long prioidx_map[PRIOIDX_SZ]; 32static unsigned long prioidx_map[PRIOIDX_SZ];
@@ -49,8 +51,9 @@ static int get_prioidx(u32 *prio)
49 return -ENOSPC; 51 return -ENOSPC;
50 } 52 }
51 set_bit(prioidx, prioidx_map); 53 set_bit(prioidx, prioidx_map);
54 if (atomic_read(&max_prioidx) < prioidx)
55 atomic_set(&max_prioidx, prioidx);
52 spin_unlock_irqrestore(&prioidx_map_lock, flags); 56 spin_unlock_irqrestore(&prioidx_map_lock, flags);
53 atomic_set(&max_prioidx, prioidx);
54 *prio = prioidx; 57 *prio = prioidx;
55 return 0; 58 return 0;
56} 59}
@@ -64,7 +67,7 @@ static void put_prioidx(u32 idx)
64 spin_unlock_irqrestore(&prioidx_map_lock, flags); 67 spin_unlock_irqrestore(&prioidx_map_lock, flags);
65} 68}
66 69
67static void extend_netdev_table(struct net_device *dev, u32 new_len) 70static int extend_netdev_table(struct net_device *dev, u32 new_len)
68{ 71{
69 size_t new_size = sizeof(struct netprio_map) + 72 size_t new_size = sizeof(struct netprio_map) +
70 ((sizeof(u32) * new_len)); 73 ((sizeof(u32) * new_len));
@@ -76,7 +79,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
76 79
77 if (!new_priomap) { 80 if (!new_priomap) {
78 pr_warn("Unable to alloc new priomap!\n"); 81 pr_warn("Unable to alloc new priomap!\n");
79 return; 82 return -ENOMEM;
80 } 83 }
81 84
82 for (i = 0; 85 for (i = 0;
@@ -89,46 +92,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
89 rcu_assign_pointer(dev->priomap, new_priomap); 92 rcu_assign_pointer(dev->priomap, new_priomap);
90 if (old_priomap) 93 if (old_priomap)
91 kfree_rcu(old_priomap, rcu); 94 kfree_rcu(old_priomap, rcu);
95 return 0;
96}
97
98static int write_update_netdev_table(struct net_device *dev)
99{
100 int ret = 0;
101 u32 max_len;
102 struct netprio_map *map;
103
104 rtnl_lock();
105 max_len = atomic_read(&max_prioidx) + 1;
106 map = rtnl_dereference(dev->priomap);
107 if (!map || map->priomap_len < max_len)
108 ret = extend_netdev_table(dev, max_len);
109 rtnl_unlock();
110
111 return ret;
92} 112}
93 113
94static void update_netdev_tables(void) 114static int update_netdev_tables(void)
95{ 115{
116 int ret = 0;
96 struct net_device *dev; 117 struct net_device *dev;
97 u32 max_len = atomic_read(&max_prioidx) + 1; 118 u32 max_len;
98 struct netprio_map *map; 119 struct netprio_map *map;
99 120
100 rtnl_lock(); 121 rtnl_lock();
122 max_len = atomic_read(&max_prioidx) + 1;
101 for_each_netdev(&init_net, dev) { 123 for_each_netdev(&init_net, dev) {
102 map = rtnl_dereference(dev->priomap); 124 map = rtnl_dereference(dev->priomap);
103 if ((!map) || 125 /*
104 (map->priomap_len < max_len)) 126 * don't allocate priomap if we didn't
105 extend_netdev_table(dev, max_len); 127 * change net_prio.ifpriomap (map == NULL),
128 * this will speed up skb_update_prio.
129 */
130 if (map && map->priomap_len < max_len) {
131 ret = extend_netdev_table(dev, max_len);
132 if (ret < 0)
133 break;
134 }
106 } 135 }
107 rtnl_unlock(); 136 rtnl_unlock();
137 return ret;
108} 138}
109 139
110static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 140static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
111{ 141{
112 struct cgroup_netprio_state *cs; 142 struct cgroup_netprio_state *cs;
113 int ret; 143 int ret = -EINVAL;
114 144
115 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 145 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
116 if (!cs) 146 if (!cs)
117 return ERR_PTR(-ENOMEM); 147 return ERR_PTR(-ENOMEM);
118 148
119 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { 149 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
120 kfree(cs); 150 goto out;
121 return ERR_PTR(-EINVAL);
122 }
123 151
124 ret = get_prioidx(&cs->prioidx); 152 ret = get_prioidx(&cs->prioidx);
125 if (ret != 0) { 153 if (ret < 0) {
126 pr_warn("No space in priority index array\n"); 154 pr_warn("No space in priority index array\n");
127 kfree(cs); 155 goto out;
128 return ERR_PTR(ret); 156 }
157
158 ret = update_netdev_tables();
159 if (ret < 0) {
160 put_prioidx(cs->prioidx);
161 goto out;
129 } 162 }
130 163
131 return &cs->css; 164 return &cs->css;
165out:
166 kfree(cs);
167 return ERR_PTR(ret);
132} 168}
133 169
134static void cgrp_destroy(struct cgroup *cgrp) 170static void cgrp_destroy(struct cgroup *cgrp)
@@ -141,7 +177,7 @@ static void cgrp_destroy(struct cgroup *cgrp)
141 rtnl_lock(); 177 rtnl_lock();
142 for_each_netdev(&init_net, dev) { 178 for_each_netdev(&init_net, dev) {
143 map = rtnl_dereference(dev->priomap); 179 map = rtnl_dereference(dev->priomap);
144 if (map) 180 if (map && cs->prioidx < map->priomap_len)
145 map->priomap[cs->prioidx] = 0; 181 map->priomap[cs->prioidx] = 0;
146 } 182 }
147 rtnl_unlock(); 183 rtnl_unlock();
@@ -165,7 +201,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
165 rcu_read_lock(); 201 rcu_read_lock();
166 for_each_netdev_rcu(&init_net, dev) { 202 for_each_netdev_rcu(&init_net, dev) {
167 map = rcu_dereference(dev->priomap); 203 map = rcu_dereference(dev->priomap);
168 priority = map ? map->priomap[prioidx] : 0; 204 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
169 cb->fill(cb, dev->name, priority); 205 cb->fill(cb, dev->name, priority);
170 } 206 }
171 rcu_read_unlock(); 207 rcu_read_unlock();
@@ -198,7 +234,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
198 234
199 /* 235 /*
200 *Separate the devname from the associated priority 236 *Separate the devname from the associated priority
201 *and advance the priostr poitner to the priority value 237 *and advance the priostr pointer to the priority value
202 */ 238 */
203 *priostr = '\0'; 239 *priostr = '\0';
204 priostr++; 240 priostr++;
@@ -220,13 +256,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
220 if (!dev) 256 if (!dev)
221 goto out_free_devname; 257 goto out_free_devname;
222 258
223 update_netdev_tables(); 259 ret = write_update_netdev_table(dev);
224 ret = 0; 260 if (ret < 0)
261 goto out_put_dev;
262
225 rcu_read_lock(); 263 rcu_read_lock();
226 map = rcu_dereference(dev->priomap); 264 map = rcu_dereference(dev->priomap);
227 if (map) 265 if (map)
228 map->priomap[prioidx] = priority; 266 map->priomap[prioidx] = priority;
229 rcu_read_unlock(); 267 rcu_read_unlock();
268
269out_put_dev:
230 dev_put(dev); 270 dev_put(dev);
231 271
232out_free_devname: 272out_free_devname:
@@ -234,6 +274,56 @@ out_free_devname:
234 return ret; 274 return ret;
235} 275}
236 276
277void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
278{
279 struct task_struct *p;
280 char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
281
282 if (!tmp) {
283 pr_warn("Unable to attach cgrp due to alloc failure!\n");
284 return;
285 }
286
287 cgroup_taskset_for_each(p, cgrp, tset) {
288 unsigned int fd;
289 struct fdtable *fdt;
290 struct files_struct *files;
291
292 task_lock(p);
293 files = p->files;
294 if (!files) {
295 task_unlock(p);
296 continue;
297 }
298
299 rcu_read_lock();
300 fdt = files_fdtable(files);
301 for (fd = 0; fd < fdt->max_fds; fd++) {
302 char *path;
303 struct file *file;
304 struct socket *sock;
305 unsigned long s;
306 int rv, err = 0;
307
308 file = fcheck_files(files, fd);
309 if (!file)
310 continue;
311
312 path = d_path(&file->f_path, tmp, PAGE_SIZE);
313 rv = sscanf(path, "socket:[%lu]", &s);
314 if (rv <= 0)
315 continue;
316
317 sock = sock_from_file(file, &err);
318 if (!err)
319 sock_update_netprioidx(sock->sk, p);
320 }
321 rcu_read_unlock();
322 task_unlock(p);
323 }
324 kfree(tmp);
325}
326
237static struct cftype ss_files[] = { 327static struct cftype ss_files[] = {
238 { 328 {
239 .name = "prioidx", 329 .name = "prioidx",
@@ -251,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = {
251 .name = "net_prio", 341 .name = "net_prio",
252 .create = cgrp_create, 342 .create = cgrp_create,
253 .destroy = cgrp_destroy, 343 .destroy = cgrp_destroy,
344 .attach = net_prio_attach,
254#ifdef CONFIG_NETPRIO_CGROUP 345#ifdef CONFIG_NETPRIO_CGROUP
255 .subsys_id = net_prio_subsys_id, 346 .subsys_id = net_prio_subsys_id,
256#endif 347#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 21318d15bbc3..334b930e0de3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -541,19 +541,6 @@ static const int rta_max[RTM_NR_FAMILIES] =
541 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 541 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
542}; 542};
543 543
544void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
545{
546 struct rtattr *rta;
547 int size = RTA_LENGTH(attrlen);
548
549 rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
550 rta->rta_type = attrtype;
551 rta->rta_len = size;
552 memcpy(RTA_DATA(rta), data, attrlen);
553 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
554}
555EXPORT_SYMBOL(__rta_fill);
556
557int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) 544int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
558{ 545{
559 struct sock *rtnl = net->rtnl; 546 struct sock *rtnl = net->rtnl;
@@ -628,7 +615,7 @@ nla_put_failure:
628EXPORT_SYMBOL(rtnetlink_put_metrics); 615EXPORT_SYMBOL(rtnetlink_put_metrics);
629 616
630int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 617int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
631 u32 ts, u32 tsage, long expires, u32 error) 618 long expires, u32 error)
632{ 619{
633 struct rta_cacheinfo ci = { 620 struct rta_cacheinfo ci = {
634 .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), 621 .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
@@ -636,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
636 .rta_clntref = atomic_read(&(dst->__refcnt)), 623 .rta_clntref = atomic_read(&(dst->__refcnt)),
637 .rta_error = error, 624 .rta_error = error,
638 .rta_id = id, 625 .rta_id = id,
639 .rta_ts = ts,
640 .rta_tsage = tsage,
641 }; 626 };
642 627
643 if (expires) 628 if (expires)
@@ -786,6 +771,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
786 + nla_total_size(4) /* IFLA_LINK */ 771 + nla_total_size(4) /* IFLA_LINK */
787 + nla_total_size(4) /* IFLA_MASTER */ 772 + nla_total_size(4) /* IFLA_MASTER */
788 + nla_total_size(4) /* IFLA_PROMISCUITY */ 773 + nla_total_size(4) /* IFLA_PROMISCUITY */
774 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
775 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
789 + nla_total_size(1) /* IFLA_OPERSTATE */ 776 + nla_total_size(1) /* IFLA_OPERSTATE */
790 + nla_total_size(1) /* IFLA_LINKMODE */ 777 + nla_total_size(1) /* IFLA_LINKMODE */
791 + nla_total_size(ext_filter_mask 778 + nla_total_size(ext_filter_mask
@@ -904,6 +891,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
904 nla_put_u32(skb, IFLA_MTU, dev->mtu) || 891 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
905 nla_put_u32(skb, IFLA_GROUP, dev->group) || 892 nla_put_u32(skb, IFLA_GROUP, dev->group) ||
906 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || 893 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
894 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
895#ifdef CONFIG_RPS
896 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
897#endif
907 (dev->ifindex != dev->iflink && 898 (dev->ifindex != dev->iflink &&
908 nla_put_u32(skb, IFLA_LINK, dev->iflink)) || 899 nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
909 (dev->master && 900 (dev->master &&
@@ -1121,6 +1112,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1121 [IFLA_AF_SPEC] = { .type = NLA_NESTED }, 1112 [IFLA_AF_SPEC] = { .type = NLA_NESTED },
1122 [IFLA_EXT_MASK] = { .type = NLA_U32 }, 1113 [IFLA_EXT_MASK] = { .type = NLA_U32 },
1123 [IFLA_PROMISCUITY] = { .type = NLA_U32 }, 1114 [IFLA_PROMISCUITY] = { .type = NLA_U32 },
1115 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
1116 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
1124}; 1117};
1125EXPORT_SYMBOL(ifla_policy); 1118EXPORT_SYMBOL(ifla_policy);
1126 1119
@@ -1639,17 +1632,22 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1639{ 1632{
1640 int err; 1633 int err;
1641 struct net_device *dev; 1634 struct net_device *dev;
1642 unsigned int num_queues = 1; 1635 unsigned int num_tx_queues = 1;
1636 unsigned int num_rx_queues = 1;
1643 1637
1644 if (ops->get_tx_queues) { 1638 if (tb[IFLA_NUM_TX_QUEUES])
1645 err = ops->get_tx_queues(src_net, tb); 1639 num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
1646 if (err < 0) 1640 else if (ops->get_num_tx_queues)
1647 goto err; 1641 num_tx_queues = ops->get_num_tx_queues();
1648 num_queues = err; 1642
1649 } 1643 if (tb[IFLA_NUM_RX_QUEUES])
1644 num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]);
1645 else if (ops->get_num_rx_queues)
1646 num_rx_queues = ops->get_num_rx_queues();
1650 1647
1651 err = -ENOMEM; 1648 err = -ENOMEM;
1652 dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); 1649 dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup,
1650 num_tx_queues, num_rx_queues);
1653 if (!dev) 1651 if (!dev)
1654 goto err; 1652 goto err;
1655 1653
@@ -2189,7 +2187,7 @@ skip:
2189} 2187}
2190 2188
2191/** 2189/**
2192 * ndo_dflt_fdb_dump: default netdevice operation to dump an FDB table. 2190 * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table.
2193 * @nlh: netlink message header 2191 * @nlh: netlink message header
2194 * @dev: netdevice 2192 * @dev: netdevice
2195 * 2193 *
@@ -2366,8 +2364,13 @@ static struct notifier_block rtnetlink_dev_notifier = {
2366static int __net_init rtnetlink_net_init(struct net *net) 2364static int __net_init rtnetlink_net_init(struct net *net)
2367{ 2365{
2368 struct sock *sk; 2366 struct sock *sk;
2369 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 2367 struct netlink_kernel_cfg cfg = {
2370 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); 2368 .groups = RTNLGRP_MAX,
2369 .input = rtnetlink_rcv,
2370 .cb_mutex = &rtnl_mutex,
2371 };
2372
2373 sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
2371 if (!sk) 2374 if (!sk)
2372 return -ENOMEM; 2375 return -ENOMEM;
2373 net->rtnl = sk; 2376 net->rtnl = sk;
diff --git a/net/core/scm.c b/net/core/scm.c
index 611c5efd4cb0..8f6ccfd68ef4 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -109,25 +109,9 @@ void __scm_destroy(struct scm_cookie *scm)
109 109
110 if (fpl) { 110 if (fpl) {
111 scm->fp = NULL; 111 scm->fp = NULL;
112 if (current->scm_work_list) { 112 for (i=fpl->count-1; i>=0; i--)
113 list_add_tail(&fpl->list, current->scm_work_list); 113 fput(fpl->fp[i]);
114 } else { 114 kfree(fpl);
115 LIST_HEAD(work_list);
116
117 current->scm_work_list = &work_list;
118
119 list_add(&fpl->list, &work_list);
120 while (!list_empty(&work_list)) {
121 fpl = list_first_entry(&work_list, struct scm_fp_list, list);
122
123 list_del(&fpl->list);
124 for (i=fpl->count-1; i>=0; i--)
125 fput(fpl->fp[i]);
126 kfree(fpl);
127 }
128
129 current->scm_work_list = NULL;
130 }
131 } 115 }
132} 116}
133EXPORT_SYMBOL(__scm_destroy); 117EXPORT_SYMBOL(__scm_destroy);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 46a3d23d259e..368f65c15e4f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -160,8 +160,8 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
160 * @node: numa node to allocate memory on 160 * @node: numa node to allocate memory on
161 * 161 *
162 * Allocate a new &sk_buff. The returned buffer has no headroom and a 162 * Allocate a new &sk_buff. The returned buffer has no headroom and a
163 * tail room of size bytes. The object has a reference count of one. 163 * tail room of at least size bytes. The object has a reference count
164 * The return is the buffer. On a failure the return is %NULL. 164 * of one. The return is the buffer. On a failure the return is %NULL.
165 * 165 *
166 * Buffers may only be allocated from interrupts using a @gfp_mask of 166 * Buffers may only be allocated from interrupts using a @gfp_mask of
167 * %GFP_ATOMIC. 167 * %GFP_ATOMIC.
@@ -296,9 +296,12 @@ EXPORT_SYMBOL(build_skb);
296struct netdev_alloc_cache { 296struct netdev_alloc_cache {
297 struct page *page; 297 struct page *page;
298 unsigned int offset; 298 unsigned int offset;
299 unsigned int pagecnt_bias;
299}; 300};
300static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 301static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
301 302
303#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304
302/** 305/**
303 * netdev_alloc_frag - allocate a page fragment 306 * netdev_alloc_frag - allocate a page fragment
304 * @fragsz: fragment size 307 * @fragsz: fragment size
@@ -317,17 +320,26 @@ void *netdev_alloc_frag(unsigned int fragsz)
317 if (unlikely(!nc->page)) { 320 if (unlikely(!nc->page)) {
318refill: 321refill:
319 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); 322 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
323 if (unlikely(!nc->page))
324 goto end;
325recycle:
326 atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
327 nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
320 nc->offset = 0; 328 nc->offset = 0;
321 } 329 }
322 if (likely(nc->page)) { 330
323 if (nc->offset + fragsz > PAGE_SIZE) { 331 if (nc->offset + fragsz > PAGE_SIZE) {
324 put_page(nc->page); 332 /* avoid unnecessary locked operations if possible */
325 goto refill; 333 if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
326 } 334 atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
327 data = page_address(nc->page) + nc->offset; 335 goto recycle;
328 nc->offset += fragsz; 336 goto refill;
329 get_page(nc->page);
330 } 337 }
338
339 data = page_address(nc->page) + nc->offset;
340 nc->offset += fragsz;
341 nc->pagecnt_bias--;
342end:
331 local_irq_restore(flags); 343 local_irq_restore(flags);
332 return data; 344 return data;
333} 345}
@@ -353,7 +365,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
353 unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + 365 unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
354 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 366 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
355 367
356 if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { 368 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
357 void *data = netdev_alloc_frag(fragsz); 369 void *data = netdev_alloc_frag(fragsz);
358 370
359 if (likely(data)) { 371 if (likely(data)) {
@@ -713,7 +725,8 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
713} 725}
714EXPORT_SYMBOL_GPL(skb_morph); 726EXPORT_SYMBOL_GPL(skb_morph);
715 727
716/* skb_copy_ubufs - copy userspace skb frags buffers to kernel 728/**
729 * skb_copy_ubufs - copy userspace skb frags buffers to kernel
717 * @skb: the skb to modify 730 * @skb: the skb to modify
718 * @gfp_mask: allocation priority 731 * @gfp_mask: allocation priority
719 * 732 *
@@ -738,7 +751,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
738 u8 *vaddr; 751 u8 *vaddr;
739 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 752 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
740 753
741 page = alloc_page(GFP_ATOMIC); 754 page = alloc_page(gfp_mask);
742 if (!page) { 755 if (!page) {
743 while (head) { 756 while (head) {
744 struct page *next = (struct page *)head->private; 757 struct page *next = (struct page *)head->private;
@@ -756,22 +769,22 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
756 } 769 }
757 770
758 /* skb frags release userspace buffers */ 771 /* skb frags release userspace buffers */
759 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 772 for (i = 0; i < num_frags; i++)
760 skb_frag_unref(skb, i); 773 skb_frag_unref(skb, i);
761 774
762 uarg->callback(uarg); 775 uarg->callback(uarg);
763 776
764 /* skb frags point to kernel buffers */ 777 /* skb frags point to kernel buffers */
765 for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { 778 for (i = num_frags - 1; i >= 0; i--) {
766 __skb_fill_page_desc(skb, i-1, head, 0, 779 __skb_fill_page_desc(skb, i, head, 0,
767 skb_shinfo(skb)->frags[i - 1].size); 780 skb_shinfo(skb)->frags[i].size);
768 head = (struct page *)head->private; 781 head = (struct page *)head->private;
769 } 782 }
770 783
771 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; 784 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
772 return 0; 785 return 0;
773} 786}
774 787EXPORT_SYMBOL_GPL(skb_copy_ubufs);
775 788
776/** 789/**
777 * skb_clone - duplicate an sk_buff 790 * skb_clone - duplicate an sk_buff
@@ -791,10 +804,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
791{ 804{
792 struct sk_buff *n; 805 struct sk_buff *n;
793 806
794 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 807 if (skb_orphan_frags(skb, gfp_mask))
795 if (skb_copy_ubufs(skb, gfp_mask)) 808 return NULL;
796 return NULL;
797 }
798 809
799 n = skb + 1; 810 n = skb + 1;
800 if (skb->fclone == SKB_FCLONE_ORIG && 811 if (skb->fclone == SKB_FCLONE_ORIG &&
@@ -914,12 +925,10 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
914 if (skb_shinfo(skb)->nr_frags) { 925 if (skb_shinfo(skb)->nr_frags) {
915 int i; 926 int i;
916 927
917 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 928 if (skb_orphan_frags(skb, gfp_mask)) {
918 if (skb_copy_ubufs(skb, gfp_mask)) { 929 kfree_skb(n);
919 kfree_skb(n); 930 n = NULL;
920 n = NULL; 931 goto out;
921 goto out;
922 }
923 } 932 }
924 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 933 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
925 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 934 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -992,10 +1001,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
992 */ 1001 */
993 if (skb_cloned(skb)) { 1002 if (skb_cloned(skb)) {
994 /* copy this zero copy skb frags */ 1003 /* copy this zero copy skb frags */
995 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1004 if (skb_orphan_frags(skb, gfp_mask))
996 if (skb_copy_ubufs(skb, gfp_mask)) 1005 goto nofrags;
997 goto nofrags;
998 }
999 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1006 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1000 skb_frag_ref(skb, i); 1007 skb_frag_ref(skb, i);
1001 1008
@@ -2614,7 +2621,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2614EXPORT_SYMBOL(skb_find_text); 2621EXPORT_SYMBOL(skb_find_text);
2615 2622
2616/** 2623/**
2617 * skb_append_datato_frags: - append the user data to a skb 2624 * skb_append_datato_frags - append the user data to a skb
2618 * @sk: sock structure 2625 * @sk: sock structure
2619 * @skb: skb structure to be appened with user data. 2626 * @skb: skb structure to be appened with user data.
2620 * @getfrag: call back function to be used for getting the user data 2627 * @getfrag: call back function to be used for getting the user data
diff --git a/net/core/sock.c b/net/core/sock.c
index 9e5b71fda6ec..2676a88f533e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk)
1180} 1180}
1181EXPORT_SYMBOL(sock_update_classid); 1181EXPORT_SYMBOL(sock_update_classid);
1182 1182
1183void sock_update_netprioidx(struct sock *sk) 1183void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
1184{ 1184{
1185 if (in_interrupt()) 1185 if (in_interrupt())
1186 return; 1186 return;
1187 1187
1188 sk->sk_cgrp_prioidx = task_netprioidx(current); 1188 sk->sk_cgrp_prioidx = task_netprioidx(task);
1189} 1189}
1190EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1190EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1191#endif 1191#endif
@@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1215 atomic_set(&sk->sk_wmem_alloc, 1); 1215 atomic_set(&sk->sk_wmem_alloc, 1);
1216 1216
1217 sock_update_classid(sk); 1217 sock_update_classid(sk);
1218 sock_update_netprioidx(sk); 1218 sock_update_netprioidx(sk, current);
1219 } 1219 }
1220 1220
1221 return sk; 1221 return sk;
@@ -1465,6 +1465,11 @@ void sock_rfree(struct sk_buff *skb)
1465} 1465}
1466EXPORT_SYMBOL(sock_rfree); 1466EXPORT_SYMBOL(sock_rfree);
1467 1467
1468void sock_edemux(struct sk_buff *skb)
1469{
1470 sock_put(skb->sk);
1471}
1472EXPORT_SYMBOL(sock_edemux);
1468 1473
1469int sock_i_uid(struct sock *sk) 1474int sock_i_uid(struct sock *sk)
1470{ 1475{
@@ -2154,6 +2159,10 @@ void release_sock(struct sock *sk)
2154 spin_lock_bh(&sk->sk_lock.slock); 2159 spin_lock_bh(&sk->sk_lock.slock);
2155 if (sk->sk_backlog.tail) 2160 if (sk->sk_backlog.tail)
2156 __release_sock(sk); 2161 __release_sock(sk);
2162
2163 if (sk->sk_prot->release_cb)
2164 sk->sk_prot->release_cb(sk);
2165
2157 sk->sk_lock.owned = 0; 2166 sk->sk_lock.owned = 0;
2158 if (waitqueue_active(&sk->sk_lock.wq)) 2167 if (waitqueue_active(&sk->sk_lock.wq))
2159 wake_up(&sk->sk_lock.wq); 2168 wake_up(&sk->sk_lock.wq);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 5fd146720f39..9d8755e4a7a5 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -4,7 +4,6 @@
4#include <net/netlink.h> 4#include <net/netlink.h>
5#include <net/net_namespace.h> 5#include <net/net_namespace.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/rtnetlink.h>
8#include <net/sock.h> 7#include <net/sock.h>
9 8
10#include <linux/inet_diag.h> 9#include <linux/inet_diag.h>
@@ -35,9 +34,7 @@ EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
35 34
36int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) 35int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
37{ 36{
38 __u32 *mem; 37 u32 mem[SK_MEMINFO_VARS];
39
40 mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
41 38
42 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 39 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
43 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; 40 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
@@ -46,11 +43,9 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
46 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 43 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
47 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; 44 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
48 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); 45 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
46 mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
49 47
50 return 0; 48 return nla_put(skb, attrtype, sizeof(mem), &mem);
51
52rtattr_failure:
53 return -EMSGSIZE;
54} 49}
55EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); 50EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
56 51
@@ -120,7 +115,7 @@ static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h)
120static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 115static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
121{ 116{
122 int err; 117 int err;
123 struct sock_diag_req *req = NLMSG_DATA(nlh); 118 struct sock_diag_req *req = nlmsg_data(nlh);
124 const struct sock_diag_handler *hndl; 119 const struct sock_diag_handler *hndl;
125 120
126 if (nlmsg_len(nlh) < sizeof(*req)) 121 if (nlmsg_len(nlh) < sizeof(*req))
@@ -171,19 +166,36 @@ static void sock_diag_rcv(struct sk_buff *skb)
171 mutex_unlock(&sock_diag_mutex); 166 mutex_unlock(&sock_diag_mutex);
172} 167}
173 168
174struct sock *sock_diag_nlsk; 169static int __net_init diag_net_init(struct net *net)
175EXPORT_SYMBOL_GPL(sock_diag_nlsk); 170{
171 struct netlink_kernel_cfg cfg = {
172 .input = sock_diag_rcv,
173 };
174
175 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG,
176 THIS_MODULE, &cfg);
177 return net->diag_nlsk == NULL ? -ENOMEM : 0;
178}
179
180static void __net_exit diag_net_exit(struct net *net)
181{
182 netlink_kernel_release(net->diag_nlsk);
183 net->diag_nlsk = NULL;
184}
185
186static struct pernet_operations diag_net_ops = {
187 .init = diag_net_init,
188 .exit = diag_net_exit,
189};
176 190
177static int __init sock_diag_init(void) 191static int __init sock_diag_init(void)
178{ 192{
179 sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0, 193 return register_pernet_subsys(&diag_net_ops);
180 sock_diag_rcv, NULL, THIS_MODULE);
181 return sock_diag_nlsk == NULL ? -ENOMEM : 0;
182} 194}
183 195
184static void __exit sock_diag_exit(void) 196static void __exit sock_diag_exit(void)
185{ 197{
186 netlink_kernel_release(sock_diag_nlsk); 198 unregister_pernet_subsys(&diag_net_ops);
187} 199}
188 200
189module_init(sock_diag_init); 201module_init(sock_diag_init);