aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 13:01:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 13:01:50 -0400
commit3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch)
tree3df72faaacd494d5ac8c9668df4f529b1b5e4457 /net/core
parente017507f37d5cb8b541df165a824958bc333bec3 (diff)
parent320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller: 1) Remove the ipv4 routing cache. Now lookups go directly into the FIB trie and use prebuilt routes cached there. No more garbage collection, no more rDOS attacks on the routing cache. Instead we now get predictable and consistent performance, no matter what the pattern of traffic we service. This has been almost 2 years in the making. Special thanks to Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who have helped along the way. I'm sure that with a change of this magnitude there will be some kind of fallout, but such things ought the be simple to fix at this point. Luckily I'm not European so I'll be around all of August to fix things :-) The major stages of this work here are each fronted by a forced merge commit whose commit message contains a top-level description of the motivations and implementation issues. 2) Pre-demux of established ipv4 TCP sockets, saves a route demux on input. 3) TCP SYN/ACK performance tweaks from Eric Dumazet. 4) Add namespace support for netfilter L4 conntrack helpers, from Gao Feng. 5) Add config mechanism for Energy Efficient Ethernet to ethtool, from Yuval Mintz. 6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet. 7) Support for connection tracker helpers in userspace, from Pablo Neira Ayuso. 8) Allow userspace driven TX load balancing functions in TEAM driver, from Jiri Pirko. 9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with embedded gotos. 10) TCP Small Queues, essentially minimize the amount of TCP data queued up in the packet scheduler layer. Whereas the existing BQL (Byte Queue Limits) limits the pkt_sched --> netdevice queuing levels, this controls the TCP --> pkt_sched queueing levels. From Eric Dumazet. 11) Reduce the number of get_page/put_page ops done on SKB fragments, from Alexander Duyck. 12) Implement protection against blind resets in TCP (RFC 5961), from Eric Dumazet. 13) Support the client side of TCP Fast Open, basically the ability to send data in the SYN exchange, from Yuchung Cheng. Basically, the sender queues up data with a sendmsg() call using MSG_FASTOPEN, then they do the connect() which emits the queued up fastopen data. 14) Avoid all the problems we get into in TCP when timers or PMTU events hit a locked socket. The TCP Small Queues changes added a tcp_release_cb() that allows us to queue work up to the release_sock() caller, and that's what we use here too. From Eric Dumazet. 15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits) genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP r8169: revert "add byte queue limit support". ipv4: Change rt->rt_iif encoding. net: Make skb->skb_iif always track skb->dev ipv4: Prepare for change of rt->rt_iif encoding. ipv4: Remove all RTCF_DIRECTSRC handliing. ipv4: Really ignore ICMP address requests/replies. decnet: Don't set RTCF_DIRECTSRC. net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse. ipv4: Remove redundant assignment rds: set correct msg_namelen openvswitch: potential NULL deref in sample() tcp: dont drop MTU reduction indications bnx2x: Add new 57840 device IDs tcp: avoid oops in tcp_metrics and reset tcpm_stamp niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value niu: Fix to check for dma mapping errors. net: Fix references to out-of-scope variables in put_cmsg_compat() net: ethernet: davinci_emac: add pm_runtime support net: ethernet: davinci_emac: Remove unnecessary #include ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c44
-rw-r--r--net/core/dst.c25
-rw-r--r--net/core/ethtool.c45
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/flow_dissector.c5
-rw-r--r--net/core/neighbour.c31
-rw-r--r--net/core/net-sysfs.c74
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c53
-rw-r--r--net/core/rtnetlink.c57
-rw-r--r--net/core/skbuff.c71
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/sock_diag.c42
14 files changed, 284 insertions, 193 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ae6acf6a3de..0337e2b7686 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -248,7 +248,6 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
248 unlock_sock_fast(sk, slow); 248 unlock_sock_fast(sk, slow);
249 249
250 /* skb is now orphaned, can be freed outside of locked section */ 250 /* skb is now orphaned, can be freed outside of locked section */
251 trace_kfree_skb(skb, skb_free_datagram_locked);
252 __kfree_skb(skb); 251 __kfree_skb(skb);
253} 252}
254EXPORT_SYMBOL(skb_free_datagram_locked); 253EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1cb0d8a6aa6..0ebaea16632 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1632,6 +1632,8 @@ static inline int deliver_skb(struct sk_buff *skb,
1632 struct packet_type *pt_prev, 1632 struct packet_type *pt_prev,
1633 struct net_device *orig_dev) 1633 struct net_device *orig_dev)
1634{ 1634{
1635 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1636 return -ENOMEM;
1635 atomic_inc(&skb->users); 1637 atomic_inc(&skb->users);
1636 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1638 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1637} 1639}
@@ -1691,7 +1693,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1691 rcu_read_unlock(); 1693 rcu_read_unlock();
1692} 1694}
1693 1695
1694/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1696/**
1697 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1695 * @dev: Network device 1698 * @dev: Network device
1696 * @txq: number of queues available 1699 * @txq: number of queues available
1697 * 1700 *
@@ -1793,6 +1796,18 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1793EXPORT_SYMBOL(netif_set_real_num_rx_queues); 1796EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1794#endif 1797#endif
1795 1798
1799/**
1800 * netif_get_num_default_rss_queues - default number of RSS queues
1801 *
1802 * This routine should set an upper limit on the number of RSS queues
1803 * used by default by multiqueue devices.
1804 */
1805int netif_get_num_default_rss_queues(void)
1806{
1807 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1808}
1809EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1810
1796static inline void __netif_reschedule(struct Qdisc *q) 1811static inline void __netif_reschedule(struct Qdisc *q)
1797{ 1812{
1798 struct softnet_data *sd; 1813 struct softnet_data *sd;
@@ -2459,6 +2474,23 @@ static DEFINE_PER_CPU(int, xmit_recursion);
2459#define RECURSION_LIMIT 10 2474#define RECURSION_LIMIT 10
2460 2475
2461/** 2476/**
2477 * dev_loopback_xmit - loop back @skb
2478 * @skb: buffer to transmit
2479 */
2480int dev_loopback_xmit(struct sk_buff *skb)
2481{
2482 skb_reset_mac_header(skb);
2483 __skb_pull(skb, skb_network_offset(skb));
2484 skb->pkt_type = PACKET_LOOPBACK;
2485 skb->ip_summed = CHECKSUM_UNNECESSARY;
2486 WARN_ON(!skb_dst(skb));
2487 skb_dst_force(skb);
2488 netif_rx_ni(skb);
2489 return 0;
2490}
2491EXPORT_SYMBOL(dev_loopback_xmit);
2492
2493/**
2462 * dev_queue_xmit - transmit a buffer 2494 * dev_queue_xmit - transmit a buffer
2463 * @skb: buffer to transmit 2495 * @skb: buffer to transmit
2464 * 2496 *
@@ -3141,8 +3173,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
3141 if (netpoll_receive_skb(skb)) 3173 if (netpoll_receive_skb(skb))
3142 return NET_RX_DROP; 3174 return NET_RX_DROP;
3143 3175
3144 if (!skb->skb_iif)
3145 skb->skb_iif = skb->dev->ifindex;
3146 orig_dev = skb->dev; 3176 orig_dev = skb->dev;
3147 3177
3148 skb_reset_network_header(skb); 3178 skb_reset_network_header(skb);
@@ -3154,6 +3184,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3154 rcu_read_lock(); 3184 rcu_read_lock();
3155 3185
3156another_round: 3186another_round:
3187 skb->skb_iif = skb->dev->ifindex;
3157 3188
3158 __this_cpu_inc(softnet_data.processed); 3189 __this_cpu_inc(softnet_data.processed);
3159 3190
@@ -3232,7 +3263,10 @@ ncls:
3232 } 3263 }
3233 3264
3234 if (pt_prev) { 3265 if (pt_prev) {
3235 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3266 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3267 ret = -ENOMEM;
3268 else
3269 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3236 } else { 3270 } else {
3237 atomic_long_inc(&skb->dev->rx_dropped); 3271 atomic_long_inc(&skb->dev->rx_dropped);
3238 kfree_skb(skb); 3272 kfree_skb(skb);
@@ -5646,7 +5680,7 @@ int netdev_refcnt_read(const struct net_device *dev)
5646} 5680}
5647EXPORT_SYMBOL(netdev_refcnt_read); 5681EXPORT_SYMBOL(netdev_refcnt_read);
5648 5682
5649/* 5683/**
5650 * netdev_wait_allrefs - wait until all references are gone. 5684 * netdev_wait_allrefs - wait until all references are gone.
5651 * 5685 *
5652 * This is called when unregistering network devices. 5686 * This is called when unregistering network devices.
diff --git a/net/core/dst.c b/net/core/dst.c
index 43d94cedbf7..069d51d2941 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -94,7 +94,7 @@ loop:
94 * But we do not have state "obsoleted, but 94 * But we do not have state "obsoleted, but
95 * referenced by parent", so it is right. 95 * referenced by parent", so it is right.
96 */ 96 */
97 if (dst->obsolete > 1) 97 if (dst->obsolete > 0)
98 continue; 98 continue;
99 99
100 ___dst_free(dst); 100 ___dst_free(dst);
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard);
152const u32 dst_default_metrics[RTAX_MAX]; 152const u32 dst_default_metrics[RTAX_MAX];
153 153
154void *dst_alloc(struct dst_ops *ops, struct net_device *dev, 154void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
155 int initial_ref, int initial_obsolete, int flags) 155 int initial_ref, int initial_obsolete, unsigned short flags)
156{ 156{
157 struct dst_entry *dst; 157 struct dst_entry *dst;
158 158
@@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
171 dst_init_metrics(dst, dst_default_metrics, true); 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL; 172 dst->expires = 0UL;
173 dst->path = dst; 173 dst->path = dst;
174 RCU_INIT_POINTER(dst->_neighbour, NULL);
175#ifdef CONFIG_XFRM 174#ifdef CONFIG_XFRM
176 dst->xfrm = NULL; 175 dst->xfrm = NULL;
177#endif 176#endif
@@ -188,6 +187,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
188 dst->__use = 0; 187 dst->__use = 0;
189 dst->lastuse = jiffies; 188 dst->lastuse = jiffies;
190 dst->flags = flags; 189 dst->flags = flags;
190 dst->pending_confirm = 0;
191 dst->next = NULL; 191 dst->next = NULL;
192 if (!(flags & DST_NOCOUNT)) 192 if (!(flags & DST_NOCOUNT))
193 dst_entries_add(ops, 1); 193 dst_entries_add(ops, 1);
@@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst)
202 */ 202 */
203 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) 203 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
204 dst->input = dst->output = dst_discard; 204 dst->input = dst->output = dst_discard;
205 dst->obsolete = 2; 205 dst->obsolete = DST_OBSOLETE_DEAD;
206} 206}
207 207
208void __dst_free(struct dst_entry *dst) 208void __dst_free(struct dst_entry *dst)
@@ -224,19 +224,12 @@ EXPORT_SYMBOL(__dst_free);
224struct dst_entry *dst_destroy(struct dst_entry * dst) 224struct dst_entry *dst_destroy(struct dst_entry * dst)
225{ 225{
226 struct dst_entry *child; 226 struct dst_entry *child;
227 struct neighbour *neigh;
228 227
229 smp_rmb(); 228 smp_rmb();
230 229
231again: 230again:
232 neigh = rcu_dereference_protected(dst->_neighbour, 1);
233 child = dst->child; 231 child = dst->child;
234 232
235 if (neigh) {
236 RCU_INIT_POINTER(dst->_neighbour, NULL);
237 neigh_release(neigh);
238 }
239
240 if (!(dst->flags & DST_NOCOUNT)) 233 if (!(dst->flags & DST_NOCOUNT))
241 dst_entries_add(dst->ops, -1); 234 dst_entries_add(dst->ops, -1);
242 235
@@ -360,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
360 if (!unregister) { 353 if (!unregister) {
361 dst->input = dst->output = dst_discard; 354 dst->input = dst->output = dst_discard;
362 } else { 355 } else {
363 struct neighbour *neigh;
364
365 dst->dev = dev_net(dst->dev)->loopback_dev; 356 dst->dev = dev_net(dst->dev)->loopback_dev;
366 dev_hold(dst->dev); 357 dev_hold(dst->dev);
367 dev_put(dev); 358 dev_put(dev);
368 rcu_read_lock();
369 neigh = dst_get_neighbour_noref(dst);
370 if (neigh && neigh->dev == dev) {
371 neigh->dev = dst->dev;
372 dev_hold(dst->dev);
373 dev_put(dev);
374 }
375 rcu_read_unlock();
376 } 359 }
377} 360}
378 361
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9c2afb48027..cbf033dcaf1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -729,6 +729,40 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
729 return dev->ethtool_ops->set_wol(dev, &wol); 729 return dev->ethtool_ops->set_wol(dev, &wol);
730} 730}
731 731
732static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
733{
734 struct ethtool_eee edata;
735 int rc;
736
737 if (!dev->ethtool_ops->get_eee)
738 return -EOPNOTSUPP;
739
740 memset(&edata, 0, sizeof(struct ethtool_eee));
741 edata.cmd = ETHTOOL_GEEE;
742 rc = dev->ethtool_ops->get_eee(dev, &edata);
743
744 if (rc)
745 return rc;
746
747 if (copy_to_user(useraddr, &edata, sizeof(edata)))
748 return -EFAULT;
749
750 return 0;
751}
752
753static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
754{
755 struct ethtool_eee edata;
756
757 if (!dev->ethtool_ops->set_eee)
758 return -EOPNOTSUPP;
759
760 if (copy_from_user(&edata, useraddr, sizeof(edata)))
761 return -EFAULT;
762
763 return dev->ethtool_ops->set_eee(dev, &edata);
764}
765
732static int ethtool_nway_reset(struct net_device *dev) 766static int ethtool_nway_reset(struct net_device *dev)
733{ 767{
734 if (!dev->ethtool_ops->nway_reset) 768 if (!dev->ethtool_ops->nway_reset)
@@ -1409,6 +1443,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1409 case ETHTOOL_GSET: 1443 case ETHTOOL_GSET:
1410 case ETHTOOL_GDRVINFO: 1444 case ETHTOOL_GDRVINFO:
1411 case ETHTOOL_GMSGLVL: 1445 case ETHTOOL_GMSGLVL:
1446 case ETHTOOL_GLINK:
1412 case ETHTOOL_GCOALESCE: 1447 case ETHTOOL_GCOALESCE:
1413 case ETHTOOL_GRINGPARAM: 1448 case ETHTOOL_GRINGPARAM:
1414 case ETHTOOL_GPAUSEPARAM: 1449 case ETHTOOL_GPAUSEPARAM:
@@ -1417,6 +1452,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1417 case ETHTOOL_GSG: 1452 case ETHTOOL_GSG:
1418 case ETHTOOL_GSSET_INFO: 1453 case ETHTOOL_GSSET_INFO:
1419 case ETHTOOL_GSTRINGS: 1454 case ETHTOOL_GSTRINGS:
1455 case ETHTOOL_GSTATS:
1420 case ETHTOOL_GTSO: 1456 case ETHTOOL_GTSO:
1421 case ETHTOOL_GPERMADDR: 1457 case ETHTOOL_GPERMADDR:
1422 case ETHTOOL_GUFO: 1458 case ETHTOOL_GUFO:
@@ -1429,8 +1465,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1429 case ETHTOOL_GRXCLSRLCNT: 1465 case ETHTOOL_GRXCLSRLCNT:
1430 case ETHTOOL_GRXCLSRULE: 1466 case ETHTOOL_GRXCLSRULE:
1431 case ETHTOOL_GRXCLSRLALL: 1467 case ETHTOOL_GRXCLSRLALL:
1468 case ETHTOOL_GRXFHINDIR:
1432 case ETHTOOL_GFEATURES: 1469 case ETHTOOL_GFEATURES:
1470 case ETHTOOL_GCHANNELS:
1433 case ETHTOOL_GET_TS_INFO: 1471 case ETHTOOL_GET_TS_INFO:
1472 case ETHTOOL_GEEE:
1434 break; 1473 break;
1435 default: 1474 default:
1436 if (!capable(CAP_NET_ADMIN)) 1475 if (!capable(CAP_NET_ADMIN))
@@ -1471,6 +1510,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1471 rc = ethtool_set_value_void(dev, useraddr, 1510 rc = ethtool_set_value_void(dev, useraddr,
1472 dev->ethtool_ops->set_msglevel); 1511 dev->ethtool_ops->set_msglevel);
1473 break; 1512 break;
1513 case ETHTOOL_GEEE:
1514 rc = ethtool_get_eee(dev, useraddr);
1515 break;
1516 case ETHTOOL_SEEE:
1517 rc = ethtool_set_eee(dev, useraddr);
1518 break;
1474 case ETHTOOL_NWAY_RST: 1519 case ETHTOOL_NWAY_RST:
1475 rc = ethtool_nway_reset(dev); 1520 rc = ethtool_nway_reset(dev);
1476 break; 1521 break;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 72cceb79d0d..ab7db83236c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -151,6 +151,8 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
151 151
152 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { 152 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
153 list_del_rcu(&rule->list); 153 list_del_rcu(&rule->list);
154 if (ops->delete)
155 ops->delete(rule);
154 fib_rule_put(rule); 156 fib_rule_put(rule);
155 } 157 }
156} 158}
@@ -499,6 +501,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
499 501
500 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 502 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
501 NETLINK_CB(skb).pid); 503 NETLINK_CB(skb).pid);
504 if (ops->delete)
505 ops->delete(rule);
502 fib_rule_put(rule); 506 fib_rule_put(rule);
503 flush_route_cache(ops); 507 flush_route_cache(ops);
504 rules_ops_put(ops); 508 rules_ops_put(ops);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a225089df5b..466820b6e34 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -4,6 +4,7 @@
4#include <linux/ipv6.h> 4#include <linux/ipv6.h>
5#include <linux/if_vlan.h> 5#include <linux/if_vlan.h>
6#include <net/ip.h> 6#include <net/ip.h>
7#include <net/ipv6.h>
7#include <linux/if_tunnel.h> 8#include <linux/if_tunnel.h>
8#include <linux/if_pppox.h> 9#include <linux/if_pppox.h>
9#include <linux/ppp_defs.h> 10#include <linux/ppp_defs.h>
@@ -55,8 +56,8 @@ ipv6:
55 return false; 56 return false;
56 57
57 ip_proto = iph->nexthdr; 58 ip_proto = iph->nexthdr;
58 flow->src = iph->saddr.s6_addr32[3]; 59 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
59 flow->dst = iph->daddr.s6_addr32[3]; 60 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
60 nhoff += sizeof(struct ipv6hdr); 61 nhoff += sizeof(struct ipv6hdr);
61 break; 62 break;
62 } 63 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d81d026138f..117afaf5126 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
474} 474}
475EXPORT_SYMBOL(neigh_lookup_nodev); 475EXPORT_SYMBOL(neigh_lookup_nodev);
476 476
477struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, 477struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478 struct net_device *dev) 478 struct net_device *dev, bool want_ref)
479{ 479{
480 u32 hash_val; 480 u32 hash_val;
481 int key_len = tbl->key_len; 481 int key_len = tbl->key_len;
@@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
535 n1 = rcu_dereference_protected(n1->next, 535 n1 = rcu_dereference_protected(n1->next,
536 lockdep_is_held(&tbl->lock))) { 536 lockdep_is_held(&tbl->lock))) {
537 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 537 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538 neigh_hold(n1); 538 if (want_ref)
539 neigh_hold(n1);
539 rc = n1; 540 rc = n1;
540 goto out_tbl_unlock; 541 goto out_tbl_unlock;
541 } 542 }
542 } 543 }
543 544
544 n->dead = 0; 545 n->dead = 0;
545 neigh_hold(n); 546 if (want_ref)
547 neigh_hold(n);
546 rcu_assign_pointer(n->next, 548 rcu_assign_pointer(n->next,
547 rcu_dereference_protected(nht->hash_buckets[hash_val], 549 rcu_dereference_protected(nht->hash_buckets[hash_val],
548 lockdep_is_held(&tbl->lock))); 550 lockdep_is_held(&tbl->lock)));
@@ -558,7 +560,7 @@ out_neigh_release:
558 neigh_release(n); 560 neigh_release(n);
559 goto out; 561 goto out;
560} 562}
561EXPORT_SYMBOL(neigh_create); 563EXPORT_SYMBOL(__neigh_create);
562 564
563static u32 pneigh_hash(const void *pkey, int key_len) 565static u32 pneigh_hash(const void *pkey, int key_len)
564{ 566{
@@ -1199,10 +1201,23 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1199 write_unlock_bh(&neigh->lock); 1201 write_unlock_bh(&neigh->lock);
1200 1202
1201 rcu_read_lock(); 1203 rcu_read_lock();
1202 /* On shaper/eql skb->dst->neighbour != neigh :( */ 1204
1203 if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) 1205 /* Why not just use 'neigh' as-is? The problem is that
1204 n1 = n2; 1206 * things such as shaper, eql, and sch_teql can end up
1207 * using alternative, different, neigh objects to output
1208 * the packet in the output path. So what we need to do
1209 * here is re-lookup the top-level neigh in the path so
1210 * we can reinject the packet there.
1211 */
1212 n2 = NULL;
1213 if (dst) {
1214 n2 = dst_neigh_lookup_skb(dst, skb);
1215 if (n2)
1216 n1 = n2;
1217 }
1205 n1->output(n1, skb); 1218 n1->output(n1, skb);
1219 if (n2)
1220 neigh_release(n2);
1206 rcu_read_unlock(); 1221 rcu_read_unlock();
1207 1222
1208 write_lock_bh(&neigh->lock); 1223 write_lock_bh(&neigh->lock);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fdf9e61d065..72607174ea5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -417,72 +417,6 @@ static struct attribute_group netstat_group = {
417 .name = "statistics", 417 .name = "statistics",
418 .attrs = netstat_attrs, 418 .attrs = netstat_attrs,
419}; 419};
420
421#ifdef CONFIG_WIRELESS_EXT_SYSFS
422/* helper function that does all the locking etc for wireless stats */
423static ssize_t wireless_show(struct device *d, char *buf,
424 ssize_t (*format)(const struct iw_statistics *,
425 char *))
426{
427 struct net_device *dev = to_net_dev(d);
428 const struct iw_statistics *iw;
429 ssize_t ret = -EINVAL;
430
431 if (!rtnl_trylock())
432 return restart_syscall();
433 if (dev_isalive(dev)) {
434 iw = get_wireless_stats(dev);
435 if (iw)
436 ret = (*format)(iw, buf);
437 }
438 rtnl_unlock();
439
440 return ret;
441}
442
443/* show function template for wireless fields */
444#define WIRELESS_SHOW(name, field, format_string) \
445static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
446{ \
447 return sprintf(buf, format_string, iw->field); \
448} \
449static ssize_t show_iw_##name(struct device *d, \
450 struct device_attribute *attr, char *buf) \
451{ \
452 return wireless_show(d, buf, format_iw_##name); \
453} \
454static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
455
456WIRELESS_SHOW(status, status, fmt_hex);
457WIRELESS_SHOW(link, qual.qual, fmt_dec);
458WIRELESS_SHOW(level, qual.level, fmt_dec);
459WIRELESS_SHOW(noise, qual.noise, fmt_dec);
460WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
461WIRELESS_SHOW(crypt, discard.code, fmt_dec);
462WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
463WIRELESS_SHOW(misc, discard.misc, fmt_dec);
464WIRELESS_SHOW(retries, discard.retries, fmt_dec);
465WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
466
467static struct attribute *wireless_attrs[] = {
468 &dev_attr_status.attr,
469 &dev_attr_link.attr,
470 &dev_attr_level.attr,
471 &dev_attr_noise.attr,
472 &dev_attr_nwid.attr,
473 &dev_attr_crypt.attr,
474 &dev_attr_fragment.attr,
475 &dev_attr_retries.attr,
476 &dev_attr_misc.attr,
477 &dev_attr_beacon.attr,
478 NULL
479};
480
481static struct attribute_group wireless_group = {
482 .name = "wireless",
483 .attrs = wireless_attrs,
484};
485#endif
486#endif /* CONFIG_SYSFS */ 420#endif /* CONFIG_SYSFS */
487 421
488#ifdef CONFIG_RPS 422#ifdef CONFIG_RPS
@@ -1463,14 +1397,6 @@ int netdev_register_kobject(struct net_device *net)
1463 groups++; 1397 groups++;
1464 1398
1465 *groups++ = &netstat_group; 1399 *groups++ = &netstat_group;
1466#ifdef CONFIG_WIRELESS_EXT_SYSFS
1467 if (net->ieee80211_ptr)
1468 *groups++ = &wireless_group;
1469#ifdef CONFIG_WIRELESS_EXT
1470 else if (net->wireless_handlers)
1471 *groups++ = &wireless_group;
1472#endif
1473#endif
1474#endif /* CONFIG_SYSFS */ 1400#endif /* CONFIG_SYSFS */
1475 1401
1476 error = device_add(dev); 1402 error = device_add(dev);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index f9f40b932e4..b4c90e42b44 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -715,14 +715,16 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
715} 715}
716EXPORT_SYMBOL(netpoll_parse_options); 716EXPORT_SYMBOL(netpoll_parse_options);
717 717
718int __netpoll_setup(struct netpoll *np) 718int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
719{ 719{
720 struct net_device *ndev = np->dev;
721 struct netpoll_info *npinfo; 720 struct netpoll_info *npinfo;
722 const struct net_device_ops *ops; 721 const struct net_device_ops *ops;
723 unsigned long flags; 722 unsigned long flags;
724 int err; 723 int err;
725 724
725 np->dev = ndev;
726 strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
727
726 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 728 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
727 !ndev->netdev_ops->ndo_poll_controller) { 729 !ndev->netdev_ops->ndo_poll_controller) {
728 np_err(np, "%s doesn't support polling, aborting\n", 730 np_err(np, "%s doesn't support polling, aborting\n",
@@ -851,13 +853,11 @@ int netpoll_setup(struct netpoll *np)
851 np_info(np, "local IP %pI4\n", &np->local_ip); 853 np_info(np, "local IP %pI4\n", &np->local_ip);
852 } 854 }
853 855
854 np->dev = ndev;
855
856 /* fill up the skb queue */ 856 /* fill up the skb queue */
857 refill_skbs(); 857 refill_skbs();
858 858
859 rtnl_lock(); 859 rtnl_lock();
860 err = __netpoll_setup(np); 860 err = __netpoll_setup(np, ndev);
861 rtnl_unlock(); 861 rtnl_unlock();
862 862
863 if (err) 863 if (err)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b2e9caa1ad1..63d15e8f80e 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -25,6 +25,8 @@
25#include <net/sock.h> 25#include <net/sock.h>
26#include <net/netprio_cgroup.h> 26#include <net/netprio_cgroup.h>
27 27
28#include <linux/fdtable.h>
29
28#define PRIOIDX_SZ 128 30#define PRIOIDX_SZ 128
29 31
30static unsigned long prioidx_map[PRIOIDX_SZ]; 32static unsigned long prioidx_map[PRIOIDX_SZ];
@@ -272,6 +274,56 @@ out_free_devname:
272 return ret; 274 return ret;
273} 275}
274 276
277void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
278{
279 struct task_struct *p;
280 char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
281
282 if (!tmp) {
283 pr_warn("Unable to attach cgrp due to alloc failure!\n");
284 return;
285 }
286
287 cgroup_taskset_for_each(p, cgrp, tset) {
288 unsigned int fd;
289 struct fdtable *fdt;
290 struct files_struct *files;
291
292 task_lock(p);
293 files = p->files;
294 if (!files) {
295 task_unlock(p);
296 continue;
297 }
298
299 rcu_read_lock();
300 fdt = files_fdtable(files);
301 for (fd = 0; fd < fdt->max_fds; fd++) {
302 char *path;
303 struct file *file;
304 struct socket *sock;
305 unsigned long s;
306 int rv, err = 0;
307
308 file = fcheck_files(files, fd);
309 if (!file)
310 continue;
311
312 path = d_path(&file->f_path, tmp, PAGE_SIZE);
313 rv = sscanf(path, "socket:[%lu]", &s);
314 if (rv <= 0)
315 continue;
316
317 sock = sock_from_file(file, &err);
318 if (!err)
319 sock_update_netprioidx(sock->sk, p);
320 }
321 rcu_read_unlock();
322 task_unlock(p);
323 }
324 kfree(tmp);
325}
326
275static struct cftype ss_files[] = { 327static struct cftype ss_files[] = {
276 { 328 {
277 .name = "prioidx", 329 .name = "prioidx",
@@ -289,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = {
289 .name = "net_prio", 341 .name = "net_prio",
290 .create = cgrp_create, 342 .create = cgrp_create,
291 .destroy = cgrp_destroy, 343 .destroy = cgrp_destroy,
344 .attach = net_prio_attach,
292#ifdef CONFIG_NETPRIO_CGROUP 345#ifdef CONFIG_NETPRIO_CGROUP
293 .subsys_id = net_prio_subsys_id, 346 .subsys_id = net_prio_subsys_id,
294#endif 347#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 21318d15bbc..334b930e0de 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -541,19 +541,6 @@ static const int rta_max[RTM_NR_FAMILIES] =
541 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 541 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
542}; 542};
543 543
544void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
545{
546 struct rtattr *rta;
547 int size = RTA_LENGTH(attrlen);
548
549 rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
550 rta->rta_type = attrtype;
551 rta->rta_len = size;
552 memcpy(RTA_DATA(rta), data, attrlen);
553 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
554}
555EXPORT_SYMBOL(__rta_fill);
556
557int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) 544int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
558{ 545{
559 struct sock *rtnl = net->rtnl; 546 struct sock *rtnl = net->rtnl;
@@ -628,7 +615,7 @@ nla_put_failure:
628EXPORT_SYMBOL(rtnetlink_put_metrics); 615EXPORT_SYMBOL(rtnetlink_put_metrics);
629 616
630int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 617int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
631 u32 ts, u32 tsage, long expires, u32 error) 618 long expires, u32 error)
632{ 619{
633 struct rta_cacheinfo ci = { 620 struct rta_cacheinfo ci = {
634 .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), 621 .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
@@ -636,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
636 .rta_clntref = atomic_read(&(dst->__refcnt)), 623 .rta_clntref = atomic_read(&(dst->__refcnt)),
637 .rta_error = error, 624 .rta_error = error,
638 .rta_id = id, 625 .rta_id = id,
639 .rta_ts = ts,
640 .rta_tsage = tsage,
641 }; 626 };
642 627
643 if (expires) 628 if (expires)
@@ -786,6 +771,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
786 + nla_total_size(4) /* IFLA_LINK */ 771 + nla_total_size(4) /* IFLA_LINK */
787 + nla_total_size(4) /* IFLA_MASTER */ 772 + nla_total_size(4) /* IFLA_MASTER */
788 + nla_total_size(4) /* IFLA_PROMISCUITY */ 773 + nla_total_size(4) /* IFLA_PROMISCUITY */
774 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
775 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
789 + nla_total_size(1) /* IFLA_OPERSTATE */ 776 + nla_total_size(1) /* IFLA_OPERSTATE */
790 + nla_total_size(1) /* IFLA_LINKMODE */ 777 + nla_total_size(1) /* IFLA_LINKMODE */
791 + nla_total_size(ext_filter_mask 778 + nla_total_size(ext_filter_mask
@@ -904,6 +891,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
904 nla_put_u32(skb, IFLA_MTU, dev->mtu) || 891 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
905 nla_put_u32(skb, IFLA_GROUP, dev->group) || 892 nla_put_u32(skb, IFLA_GROUP, dev->group) ||
906 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || 893 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
894 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
895#ifdef CONFIG_RPS
896 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
897#endif
907 (dev->ifindex != dev->iflink && 898 (dev->ifindex != dev->iflink &&
908 nla_put_u32(skb, IFLA_LINK, dev->iflink)) || 899 nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
909 (dev->master && 900 (dev->master &&
@@ -1121,6 +1112,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1121 [IFLA_AF_SPEC] = { .type = NLA_NESTED }, 1112 [IFLA_AF_SPEC] = { .type = NLA_NESTED },
1122 [IFLA_EXT_MASK] = { .type = NLA_U32 }, 1113 [IFLA_EXT_MASK] = { .type = NLA_U32 },
1123 [IFLA_PROMISCUITY] = { .type = NLA_U32 }, 1114 [IFLA_PROMISCUITY] = { .type = NLA_U32 },
1115 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
1116 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
1124}; 1117};
1125EXPORT_SYMBOL(ifla_policy); 1118EXPORT_SYMBOL(ifla_policy);
1126 1119
@@ -1639,17 +1632,22 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1639{ 1632{
1640 int err; 1633 int err;
1641 struct net_device *dev; 1634 struct net_device *dev;
1642 unsigned int num_queues = 1; 1635 unsigned int num_tx_queues = 1;
1636 unsigned int num_rx_queues = 1;
1643 1637
1644 if (ops->get_tx_queues) { 1638 if (tb[IFLA_NUM_TX_QUEUES])
1645 err = ops->get_tx_queues(src_net, tb); 1639 num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
1646 if (err < 0) 1640 else if (ops->get_num_tx_queues)
1647 goto err; 1641 num_tx_queues = ops->get_num_tx_queues();
1648 num_queues = err; 1642
1649 } 1643 if (tb[IFLA_NUM_RX_QUEUES])
1644 num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]);
1645 else if (ops->get_num_rx_queues)
1646 num_rx_queues = ops->get_num_rx_queues();
1650 1647
1651 err = -ENOMEM; 1648 err = -ENOMEM;
1652 dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); 1649 dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup,
1650 num_tx_queues, num_rx_queues);
1653 if (!dev) 1651 if (!dev)
1654 goto err; 1652 goto err;
1655 1653
@@ -2189,7 +2187,7 @@ skip:
2189} 2187}
2190 2188
2191/** 2189/**
2192 * ndo_dflt_fdb_dump: default netdevice operation to dump an FDB table. 2190 * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table.
2193 * @nlh: netlink message header 2191 * @nlh: netlink message header
2194 * @dev: netdevice 2192 * @dev: netdevice
2195 * 2193 *
@@ -2366,8 +2364,13 @@ static struct notifier_block rtnetlink_dev_notifier = {
2366static int __net_init rtnetlink_net_init(struct net *net) 2364static int __net_init rtnetlink_net_init(struct net *net)
2367{ 2365{
2368 struct sock *sk; 2366 struct sock *sk;
2369 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 2367 struct netlink_kernel_cfg cfg = {
2370 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); 2368 .groups = RTNLGRP_MAX,
2369 .input = rtnetlink_rcv,
2370 .cb_mutex = &rtnl_mutex,
2371 };
2372
2373 sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
2371 if (!sk) 2374 if (!sk)
2372 return -ENOMEM; 2375 return -ENOMEM;
2373 net->rtnl = sk; 2376 net->rtnl = sk;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d124306b81f..368f65c15e4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -160,8 +160,8 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
160 * @node: numa node to allocate memory on 160 * @node: numa node to allocate memory on
161 * 161 *
162 * Allocate a new &sk_buff. The returned buffer has no headroom and a 162 * Allocate a new &sk_buff. The returned buffer has no headroom and a
163 * tail room of size bytes. The object has a reference count of one. 163 * tail room of at least size bytes. The object has a reference count
164 * The return is the buffer. On a failure the return is %NULL. 164 * of one. The return is the buffer. On a failure the return is %NULL.
165 * 165 *
166 * Buffers may only be allocated from interrupts using a @gfp_mask of 166 * Buffers may only be allocated from interrupts using a @gfp_mask of
167 * %GFP_ATOMIC. 167 * %GFP_ATOMIC.
@@ -296,9 +296,12 @@ EXPORT_SYMBOL(build_skb);
296struct netdev_alloc_cache { 296struct netdev_alloc_cache {
297 struct page *page; 297 struct page *page;
298 unsigned int offset; 298 unsigned int offset;
299 unsigned int pagecnt_bias;
299}; 300};
300static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 301static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
301 302
303#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304
302/** 305/**
303 * netdev_alloc_frag - allocate a page fragment 306 * netdev_alloc_frag - allocate a page fragment
304 * @fragsz: fragment size 307 * @fragsz: fragment size
@@ -317,17 +320,26 @@ void *netdev_alloc_frag(unsigned int fragsz)
317 if (unlikely(!nc->page)) { 320 if (unlikely(!nc->page)) {
318refill: 321refill:
319 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); 322 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
323 if (unlikely(!nc->page))
324 goto end;
325recycle:
326 atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
327 nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
320 nc->offset = 0; 328 nc->offset = 0;
321 } 329 }
322 if (likely(nc->page)) { 330
323 if (nc->offset + fragsz > PAGE_SIZE) { 331 if (nc->offset + fragsz > PAGE_SIZE) {
324 put_page(nc->page); 332 /* avoid unnecessary locked operations if possible */
325 goto refill; 333 if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
326 } 334 atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
327 data = page_address(nc->page) + nc->offset; 335 goto recycle;
328 nc->offset += fragsz; 336 goto refill;
329 get_page(nc->page);
330 } 337 }
338
339 data = page_address(nc->page) + nc->offset;
340 nc->offset += fragsz;
341 nc->pagecnt_bias--;
342end:
331 local_irq_restore(flags); 343 local_irq_restore(flags);
332 return data; 344 return data;
333} 345}
@@ -713,7 +725,8 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
713} 725}
714EXPORT_SYMBOL_GPL(skb_morph); 726EXPORT_SYMBOL_GPL(skb_morph);
715 727
716/* skb_copy_ubufs - copy userspace skb frags buffers to kernel 728/**
729 * skb_copy_ubufs - copy userspace skb frags buffers to kernel
717 * @skb: the skb to modify 730 * @skb: the skb to modify
718 * @gfp_mask: allocation priority 731 * @gfp_mask: allocation priority
719 * 732 *
@@ -738,7 +751,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
738 u8 *vaddr; 751 u8 *vaddr;
739 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 752 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
740 753
741 page = alloc_page(GFP_ATOMIC); 754 page = alloc_page(gfp_mask);
742 if (!page) { 755 if (!page) {
743 while (head) { 756 while (head) {
744 struct page *next = (struct page *)head->private; 757 struct page *next = (struct page *)head->private;
@@ -756,22 +769,22 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
756 } 769 }
757 770
758 /* skb frags release userspace buffers */ 771 /* skb frags release userspace buffers */
759 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 772 for (i = 0; i < num_frags; i++)
760 skb_frag_unref(skb, i); 773 skb_frag_unref(skb, i);
761 774
762 uarg->callback(uarg); 775 uarg->callback(uarg);
763 776
764 /* skb frags point to kernel buffers */ 777 /* skb frags point to kernel buffers */
765 for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { 778 for (i = num_frags - 1; i >= 0; i--) {
766 __skb_fill_page_desc(skb, i-1, head, 0, 779 __skb_fill_page_desc(skb, i, head, 0,
767 skb_shinfo(skb)->frags[i - 1].size); 780 skb_shinfo(skb)->frags[i].size);
768 head = (struct page *)head->private; 781 head = (struct page *)head->private;
769 } 782 }
770 783
771 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; 784 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
772 return 0; 785 return 0;
773} 786}
774 787EXPORT_SYMBOL_GPL(skb_copy_ubufs);
775 788
776/** 789/**
777 * skb_clone - duplicate an sk_buff 790 * skb_clone - duplicate an sk_buff
@@ -791,10 +804,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
791{ 804{
792 struct sk_buff *n; 805 struct sk_buff *n;
793 806
794 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 807 if (skb_orphan_frags(skb, gfp_mask))
795 if (skb_copy_ubufs(skb, gfp_mask)) 808 return NULL;
796 return NULL;
797 }
798 809
799 n = skb + 1; 810 n = skb + 1;
800 if (skb->fclone == SKB_FCLONE_ORIG && 811 if (skb->fclone == SKB_FCLONE_ORIG &&
@@ -914,12 +925,10 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
914 if (skb_shinfo(skb)->nr_frags) { 925 if (skb_shinfo(skb)->nr_frags) {
915 int i; 926 int i;
916 927
917 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 928 if (skb_orphan_frags(skb, gfp_mask)) {
918 if (skb_copy_ubufs(skb, gfp_mask)) { 929 kfree_skb(n);
919 kfree_skb(n); 930 n = NULL;
920 n = NULL; 931 goto out;
921 goto out;
922 }
923 } 932 }
924 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 933 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
925 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 934 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -992,10 +1001,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
992 */ 1001 */
993 if (skb_cloned(skb)) { 1002 if (skb_cloned(skb)) {
994 /* copy this zero copy skb frags */ 1003 /* copy this zero copy skb frags */
995 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1004 if (skb_orphan_frags(skb, gfp_mask))
996 if (skb_copy_ubufs(skb, gfp_mask)) 1005 goto nofrags;
997 goto nofrags;
998 }
999 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1006 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1000 skb_frag_ref(skb, i); 1007 skb_frag_ref(skb, i);
1001 1008
@@ -2614,7 +2621,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2614EXPORT_SYMBOL(skb_find_text); 2621EXPORT_SYMBOL(skb_find_text);
2615 2622
2616/** 2623/**
2617 * skb_append_datato_frags: - append the user data to a skb 2624 * skb_append_datato_frags - append the user data to a skb
2618 * @sk: sock structure 2625 * @sk: sock structure
2619 * @skb: skb structure to be appened with user data. 2626 * @skb: skb structure to be appened with user data.
2620 * @getfrag: call back function to be used for getting the user data 2627 * @getfrag: call back function to be used for getting the user data
diff --git a/net/core/sock.c b/net/core/sock.c
index 9e5b71fda6e..2676a88f533 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk)
1180} 1180}
1181EXPORT_SYMBOL(sock_update_classid); 1181EXPORT_SYMBOL(sock_update_classid);
1182 1182
1183void sock_update_netprioidx(struct sock *sk) 1183void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
1184{ 1184{
1185 if (in_interrupt()) 1185 if (in_interrupt())
1186 return; 1186 return;
1187 1187
1188 sk->sk_cgrp_prioidx = task_netprioidx(current); 1188 sk->sk_cgrp_prioidx = task_netprioidx(task);
1189} 1189}
1190EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1190EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1191#endif 1191#endif
@@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1215 atomic_set(&sk->sk_wmem_alloc, 1); 1215 atomic_set(&sk->sk_wmem_alloc, 1);
1216 1216
1217 sock_update_classid(sk); 1217 sock_update_classid(sk);
1218 sock_update_netprioidx(sk); 1218 sock_update_netprioidx(sk, current);
1219 } 1219 }
1220 1220
1221 return sk; 1221 return sk;
@@ -1465,6 +1465,11 @@ void sock_rfree(struct sk_buff *skb)
1465} 1465}
1466EXPORT_SYMBOL(sock_rfree); 1466EXPORT_SYMBOL(sock_rfree);
1467 1467
1468void sock_edemux(struct sk_buff *skb)
1469{
1470 sock_put(skb->sk);
1471}
1472EXPORT_SYMBOL(sock_edemux);
1468 1473
1469int sock_i_uid(struct sock *sk) 1474int sock_i_uid(struct sock *sk)
1470{ 1475{
@@ -2154,6 +2159,10 @@ void release_sock(struct sock *sk)
2154 spin_lock_bh(&sk->sk_lock.slock); 2159 spin_lock_bh(&sk->sk_lock.slock);
2155 if (sk->sk_backlog.tail) 2160 if (sk->sk_backlog.tail)
2156 __release_sock(sk); 2161 __release_sock(sk);
2162
2163 if (sk->sk_prot->release_cb)
2164 sk->sk_prot->release_cb(sk);
2165
2157 sk->sk_lock.owned = 0; 2166 sk->sk_lock.owned = 0;
2158 if (waitqueue_active(&sk->sk_lock.wq)) 2167 if (waitqueue_active(&sk->sk_lock.wq))
2159 wake_up(&sk->sk_lock.wq); 2168 wake_up(&sk->sk_lock.wq);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 5fd146720f3..9d8755e4a7a 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -4,7 +4,6 @@
4#include <net/netlink.h> 4#include <net/netlink.h>
5#include <net/net_namespace.h> 5#include <net/net_namespace.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/rtnetlink.h>
8#include <net/sock.h> 7#include <net/sock.h>
9 8
10#include <linux/inet_diag.h> 9#include <linux/inet_diag.h>
@@ -35,9 +34,7 @@ EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
35 34
36int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) 35int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
37{ 36{
38 __u32 *mem; 37 u32 mem[SK_MEMINFO_VARS];
39
40 mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
41 38
42 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 39 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
43 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; 40 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
@@ -46,11 +43,9 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
46 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 43 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
47 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; 44 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
48 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); 45 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
46 mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
49 47
50 return 0; 48 return nla_put(skb, attrtype, sizeof(mem), &mem);
51
52rtattr_failure:
53 return -EMSGSIZE;
54} 49}
55EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); 50EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
56 51
@@ -120,7 +115,7 @@ static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h)
120static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 115static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
121{ 116{
122 int err; 117 int err;
123 struct sock_diag_req *req = NLMSG_DATA(nlh); 118 struct sock_diag_req *req = nlmsg_data(nlh);
124 const struct sock_diag_handler *hndl; 119 const struct sock_diag_handler *hndl;
125 120
126 if (nlmsg_len(nlh) < sizeof(*req)) 121 if (nlmsg_len(nlh) < sizeof(*req))
@@ -171,19 +166,36 @@ static void sock_diag_rcv(struct sk_buff *skb)
171 mutex_unlock(&sock_diag_mutex); 166 mutex_unlock(&sock_diag_mutex);
172} 167}
173 168
174struct sock *sock_diag_nlsk; 169static int __net_init diag_net_init(struct net *net)
175EXPORT_SYMBOL_GPL(sock_diag_nlsk); 170{
171 struct netlink_kernel_cfg cfg = {
172 .input = sock_diag_rcv,
173 };
174
175 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG,
176 THIS_MODULE, &cfg);
177 return net->diag_nlsk == NULL ? -ENOMEM : 0;
178}
179
180static void __net_exit diag_net_exit(struct net *net)
181{
182 netlink_kernel_release(net->diag_nlsk);
183 net->diag_nlsk = NULL;
184}
185
186static struct pernet_operations diag_net_ops = {
187 .init = diag_net_init,
188 .exit = diag_net_exit,
189};
176 190
177static int __init sock_diag_init(void) 191static int __init sock_diag_init(void)
178{ 192{
179 sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0, 193 return register_pernet_subsys(&diag_net_ops);
180 sock_diag_rcv, NULL, THIS_MODULE);
181 return sock_diag_nlsk == NULL ? -ENOMEM : 0;
182} 194}
183 195
184static void __exit sock_diag_exit(void) 196static void __exit sock_diag_exit(void)
185{ 197{
186 netlink_kernel_release(sock_diag_nlsk); 198 unregister_pernet_subsys(&diag_net_ops);
187} 199}
188 200
189module_init(sock_diag_init); 201module_init(sock_diag_init);