aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 16:38:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 16:38:27 -0400
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /net/core
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c109
-rw-r--r--net/core/dev_addr_lists.c40
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c27
-rw-r--r--net/core/link_watch.c8
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netpoll.c5
-rw-r--r--net/core/netprio_cgroup.c41
-rw-r--r--net/core/request_sock.c95
-rw-r--r--net/core/rtnetlink.c38
-rw-r--r--net/core/scm.c17
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/core/skbuff.c86
-rw-r--r--net/core/sock.c64
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/utils.c20
19 files changed, 394 insertions, 206 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 17e912f9b711..1e0a1847c3bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name)
959} 959}
960EXPORT_SYMBOL(dev_alloc_name); 960EXPORT_SYMBOL(dev_alloc_name);
961 961
962static int dev_get_valid_name(struct net_device *dev, const char *name) 962static int dev_alloc_name_ns(struct net *net,
963 struct net_device *dev,
964 const char *name)
963{ 965{
964 struct net *net; 966 char buf[IFNAMSIZ];
967 int ret;
965 968
966 BUG_ON(!dev_net(dev)); 969 ret = __dev_alloc_name(net, name, buf);
967 net = dev_net(dev); 970 if (ret >= 0)
971 strlcpy(dev->name, buf, IFNAMSIZ);
972 return ret;
973}
974
975static int dev_get_valid_name(struct net *net,
976 struct net_device *dev,
977 const char *name)
978{
979 BUG_ON(!net);
968 980
969 if (!dev_valid_name(name)) 981 if (!dev_valid_name(name))
970 return -EINVAL; 982 return -EINVAL;
971 983
972 if (strchr(name, '%')) 984 if (strchr(name, '%'))
973 return dev_alloc_name(dev, name); 985 return dev_alloc_name_ns(net, dev, name);
974 else if (__dev_get_by_name(net, name)) 986 else if (__dev_get_by_name(net, name))
975 return -EEXIST; 987 return -EEXIST;
976 else if (dev->name != name) 988 else if (dev->name != name)
@@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
1006 1018
1007 memcpy(oldname, dev->name, IFNAMSIZ); 1019 memcpy(oldname, dev->name, IFNAMSIZ);
1008 1020
1009 err = dev_get_valid_name(dev, newname); 1021 err = dev_get_valid_name(net, dev, newname);
1010 if (err < 0) 1022 if (err < 0)
1011 return err; 1023 return err;
1012 1024
@@ -1109,11 +1121,23 @@ void netdev_state_change(struct net_device *dev)
1109} 1121}
1110EXPORT_SYMBOL(netdev_state_change); 1122EXPORT_SYMBOL(netdev_state_change);
1111 1123
1112int netdev_bonding_change(struct net_device *dev, unsigned long event) 1124/**
1125 * netdev_notify_peers - notify network peers about existence of @dev
1126 * @dev: network device
1127 *
1128 * Generate traffic such that interested network peers are aware of
1129 * @dev, such as by generating a gratuitous ARP. This may be used when
1130 * a device wants to inform the rest of the network about some sort of
1131 * reconfiguration such as a failover event or virtual machine
1132 * migration.
1133 */
1134void netdev_notify_peers(struct net_device *dev)
1113{ 1135{
1114 return call_netdevice_notifiers(event, dev); 1136 rtnl_lock();
1137 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1138 rtnl_unlock();
1115} 1139}
1116EXPORT_SYMBOL(netdev_bonding_change); 1140EXPORT_SYMBOL(netdev_notify_peers);
1117 1141
1118/** 1142/**
1119 * dev_load - load a network module 1143 * dev_load - load a network module
@@ -1394,7 +1418,6 @@ rollback:
1394 nb->notifier_call(nb, NETDEV_DOWN, dev); 1418 nb->notifier_call(nb, NETDEV_DOWN, dev);
1395 } 1419 }
1396 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1420 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1397 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1398 } 1421 }
1399 } 1422 }
1400 1423
@@ -1436,7 +1459,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1436 nb->notifier_call(nb, NETDEV_DOWN, dev); 1459 nb->notifier_call(nb, NETDEV_DOWN, dev);
1437 } 1460 }
1438 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1461 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1439 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1440 } 1462 }
1441 } 1463 }
1442unlock: 1464unlock:
@@ -2175,9 +2197,7 @@ EXPORT_SYMBOL(netif_skb_features);
2175/* 2197/*
2176 * Returns true if either: 2198 * Returns true if either:
2177 * 1. skb has frag_list and the device doesn't support FRAGLIST, or 2199 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2178 * 2. skb is fragmented and the device does not support SG, or if 2200 * 2. skb is fragmented and the device does not support SG.
2179 * at least one of fragments is in highmem and device does not
2180 * support DMA from it.
2181 */ 2201 */
2182static inline int skb_needs_linearize(struct sk_buff *skb, 2202static inline int skb_needs_linearize(struct sk_buff *skb,
2183 int features) 2203 int features)
@@ -2206,9 +2226,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2206 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2226 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2207 skb_dst_drop(skb); 2227 skb_dst_drop(skb);
2208 2228
2209 if (!list_empty(&ptype_all))
2210 dev_queue_xmit_nit(skb, dev);
2211
2212 features = netif_skb_features(skb); 2229 features = netif_skb_features(skb);
2213 2230
2214 if (vlan_tx_tag_present(skb) && 2231 if (vlan_tx_tag_present(skb) &&
@@ -2243,6 +2260,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2243 } 2260 }
2244 } 2261 }
2245 2262
2263 if (!list_empty(&ptype_all))
2264 dev_queue_xmit_nit(skb, dev);
2265
2246 skb_len = skb->len; 2266 skb_len = skb->len;
2247 rc = ops->ndo_start_xmit(skb, dev); 2267 rc = ops->ndo_start_xmit(skb, dev);
2248 trace_net_dev_xmit(skb, rc, dev, skb_len); 2268 trace_net_dev_xmit(skb, rc, dev, skb_len);
@@ -2265,6 +2285,9 @@ gso:
2265 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2285 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2266 skb_dst_drop(nskb); 2286 skb_dst_drop(nskb);
2267 2287
2288 if (!list_empty(&ptype_all))
2289 dev_queue_xmit_nit(nskb, dev);
2290
2268 skb_len = nskb->len; 2291 skb_len = nskb->len;
2269 rc = ops->ndo_start_xmit(nskb, dev); 2292 rc = ops->ndo_start_xmit(nskb, dev);
2270 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2293 trace_net_dev_xmit(nskb, rc, dev, skb_len);
@@ -2374,8 +2397,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2374#endif 2397#endif
2375} 2398}
2376 2399
2377static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2400struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2378 struct sk_buff *skb) 2401 struct sk_buff *skb)
2379{ 2402{
2380 int queue_index; 2403 int queue_index;
2381 const struct net_device_ops *ops = dev->netdev_ops; 2404 const struct net_device_ops *ops = dev->netdev_ops;
@@ -2549,7 +2572,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2549 2572
2550 skb_update_prio(skb); 2573 skb_update_prio(skb);
2551 2574
2552 txq = dev_pick_tx(dev, skb); 2575 txq = netdev_pick_tx(dev, skb);
2553 q = rcu_dereference_bh(txq->qdisc); 2576 q = rcu_dereference_bh(txq->qdisc);
2554 2577
2555#ifdef CONFIG_NET_CLS_ACT 2578#ifdef CONFIG_NET_CLS_ACT
@@ -2622,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit);
2622 =======================================================================*/ 2645 =======================================================================*/
2623 2646
2624int netdev_max_backlog __read_mostly = 1000; 2647int netdev_max_backlog __read_mostly = 1000;
2648EXPORT_SYMBOL(netdev_max_backlog);
2649
2625int netdev_tstamp_prequeue __read_mostly = 1; 2650int netdev_tstamp_prequeue __read_mostly = 1;
2626int netdev_budget __read_mostly = 300; 2651int netdev_budget __read_mostly = 300;
2627int weight_p __read_mostly = 64; /* old backlog weight */ 2652int weight_p __read_mostly = 64; /* old backlog weight */
@@ -5239,12 +5264,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5239 */ 5264 */
5240static int dev_new_index(struct net *net) 5265static int dev_new_index(struct net *net)
5241{ 5266{
5242 static int ifindex; 5267 int ifindex = net->ifindex;
5243 for (;;) { 5268 for (;;) {
5244 if (++ifindex <= 0) 5269 if (++ifindex <= 0)
5245 ifindex = 1; 5270 ifindex = 1;
5246 if (!__dev_get_by_index(net, ifindex)) 5271 if (!__dev_get_by_index(net, ifindex))
5247 return ifindex; 5272 return net->ifindex = ifindex;
5248 } 5273 }
5249} 5274}
5250 5275
@@ -5322,10 +5347,6 @@ static void rollback_registered_many(struct list_head *head)
5322 netdev_unregister_kobject(dev); 5347 netdev_unregister_kobject(dev);
5323 } 5348 }
5324 5349
5325 /* Process any work delayed until the end of the batch */
5326 dev = list_first_entry(head, struct net_device, unreg_list);
5327 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5328
5329 synchronize_net(); 5350 synchronize_net();
5330 5351
5331 list_for_each_entry(dev, head, unreg_list) 5352 list_for_each_entry(dev, head, unreg_list)
@@ -5583,7 +5604,7 @@ int register_netdevice(struct net_device *dev)
5583 5604
5584 dev->iflink = -1; 5605 dev->iflink = -1;
5585 5606
5586 ret = dev_get_valid_name(dev, dev->name); 5607 ret = dev_get_valid_name(net, dev, dev->name);
5587 if (ret < 0) 5608 if (ret < 0)
5588 goto out; 5609 goto out;
5589 5610
@@ -5597,7 +5618,12 @@ int register_netdevice(struct net_device *dev)
5597 } 5618 }
5598 } 5619 }
5599 5620
5600 dev->ifindex = dev_new_index(net); 5621 ret = -EBUSY;
5622 if (!dev->ifindex)
5623 dev->ifindex = dev_new_index(net);
5624 else if (__dev_get_by_index(net, dev->ifindex))
5625 goto err_uninit;
5626
5601 if (dev->iflink == -1) 5627 if (dev->iflink == -1)
5602 dev->iflink = dev->ifindex; 5628 dev->iflink = dev->ifindex;
5603 5629
@@ -5640,6 +5666,8 @@ int register_netdevice(struct net_device *dev)
5640 5666
5641 set_bit(__LINK_STATE_PRESENT, &dev->state); 5667 set_bit(__LINK_STATE_PRESENT, &dev->state);
5642 5668
5669 linkwatch_init_dev(dev);
5670
5643 dev_init_scheduler(dev); 5671 dev_init_scheduler(dev);
5644 dev_hold(dev); 5672 dev_hold(dev);
5645 list_netdevice(dev); 5673 list_netdevice(dev);
@@ -5773,9 +5801,12 @@ static void netdev_wait_allrefs(struct net_device *dev)
5773 5801
5774 /* Rebroadcast unregister notification */ 5802 /* Rebroadcast unregister notification */
5775 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5803 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5776 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5777 * should have already handle it the first time */
5778 5804
5805 __rtnl_unlock();
5806 rcu_barrier();
5807 rtnl_lock();
5808
5809 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5779 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5810 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5780 &dev->state)) { 5811 &dev->state)) {
5781 /* We must not have linkwatch events 5812 /* We must not have linkwatch events
@@ -5837,9 +5868,8 @@ void netdev_run_todo(void)
5837 5868
5838 __rtnl_unlock(); 5869 __rtnl_unlock();
5839 5870
5840 /* Wait for rcu callbacks to finish before attempting to drain 5871
5841 * the device list. This usually avoids a 250ms wait. 5872 /* Wait for rcu callbacks to finish before next phase */
5842 */
5843 if (!list_empty(&list)) 5873 if (!list_empty(&list))
5844 rcu_barrier(); 5874 rcu_barrier();
5845 5875
@@ -5848,6 +5878,10 @@ void netdev_run_todo(void)
5848 = list_first_entry(&list, struct net_device, todo_list); 5878 = list_first_entry(&list, struct net_device, todo_list);
5849 list_del(&dev->todo_list); 5879 list_del(&dev->todo_list);
5850 5880
5881 rtnl_lock();
5882 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5883 __rtnl_unlock();
5884
5851 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5885 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5852 pr_err("network todo '%s' but state %d\n", 5886 pr_err("network todo '%s' but state %d\n",
5853 dev->name, dev->reg_state); 5887 dev->name, dev->reg_state);
@@ -5943,6 +5977,8 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5943 return queue; 5977 return queue;
5944} 5978}
5945 5979
5980static const struct ethtool_ops default_ethtool_ops;
5981
5946/** 5982/**
5947 * alloc_netdev_mqs - allocate network device 5983 * alloc_netdev_mqs - allocate network device
5948 * @sizeof_priv: size of private data to allocate space for 5984 * @sizeof_priv: size of private data to allocate space for
@@ -6030,6 +6066,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6030 6066
6031 strcpy(dev->name, name); 6067 strcpy(dev->name, name);
6032 dev->group = INIT_NETDEV_GROUP; 6068 dev->group = INIT_NETDEV_GROUP;
6069 if (!dev->ethtool_ops)
6070 dev->ethtool_ops = &default_ethtool_ops;
6033 return dev; 6071 return dev;
6034 6072
6035free_all: 6073free_all:
@@ -6214,7 +6252,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6214 /* We get here if we can't use the current device name */ 6252 /* We get here if we can't use the current device name */
6215 if (!pat) 6253 if (!pat)
6216 goto out; 6254 goto out;
6217 if (dev_get_valid_name(dev, pat) < 0) 6255 if (dev_get_valid_name(net, dev, pat) < 0)
6218 goto out; 6256 goto out;
6219 } 6257 }
6220 6258
@@ -6242,7 +6280,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6242 the device is just moving and can keep their slaves up. 6280 the device is just moving and can keep their slaves up.
6243 */ 6281 */
6244 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6282 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6245 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 6283 rcu_barrier();
6284 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6246 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 6285 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6247 6286
6248 /* 6287 /*
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c4cc2bc49f06..87cc17db2d56 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,7 @@
22 */ 22 */
23 23
24static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, 24static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
25 unsigned char *addr, int addr_len, 25 const unsigned char *addr, int addr_len,
26 unsigned char addr_type, bool global) 26 unsigned char addr_type, bool global)
27{ 27{
28 struct netdev_hw_addr *ha; 28 struct netdev_hw_addr *ha;
@@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
46} 46}
47 47
48static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, 48static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
49 unsigned char *addr, int addr_len, 49 const unsigned char *addr, int addr_len,
50 unsigned char addr_type, bool global) 50 unsigned char addr_type, bool global)
51{ 51{
52 struct netdev_hw_addr *ha; 52 struct netdev_hw_addr *ha;
@@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
72 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); 72 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
73} 73}
74 74
75static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, 75static int __hw_addr_add(struct netdev_hw_addr_list *list,
76 int addr_len, unsigned char addr_type) 76 const unsigned char *addr, int addr_len,
77 unsigned char addr_type)
77{ 78{
78 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); 79 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
79} 80}
80 81
81static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, 82static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
82 unsigned char *addr, int addr_len, 83 const unsigned char *addr, int addr_len,
83 unsigned char addr_type, bool global) 84 unsigned char addr_type, bool global)
84{ 85{
85 struct netdev_hw_addr *ha; 86 struct netdev_hw_addr *ha;
@@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
104 return -ENOENT; 105 return -ENOENT;
105} 106}
106 107
107static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, 108static int __hw_addr_del(struct netdev_hw_addr_list *list,
108 int addr_len, unsigned char addr_type) 109 const unsigned char *addr, int addr_len,
110 unsigned char addr_type)
109{ 111{
110 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); 112 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
111} 113}
@@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init);
278 * 280 *
279 * The caller must hold the rtnl_mutex. 281 * The caller must hold the rtnl_mutex.
280 */ 282 */
281int dev_addr_add(struct net_device *dev, unsigned char *addr, 283int dev_addr_add(struct net_device *dev, const unsigned char *addr,
282 unsigned char addr_type) 284 unsigned char addr_type)
283{ 285{
284 int err; 286 int err;
@@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add);
303 * 305 *
304 * The caller must hold the rtnl_mutex. 306 * The caller must hold the rtnl_mutex.
305 */ 307 */
306int dev_addr_del(struct net_device *dev, unsigned char *addr, 308int dev_addr_del(struct net_device *dev, const unsigned char *addr,
307 unsigned char addr_type) 309 unsigned char addr_type)
308{ 310{
309 int err; 311 int err;
@@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple);
390 * @dev: device 392 * @dev: device
391 * @addr: address to add 393 * @addr: address to add
392 */ 394 */
393int dev_uc_add_excl(struct net_device *dev, unsigned char *addr) 395int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
394{ 396{
395 struct netdev_hw_addr *ha; 397 struct netdev_hw_addr *ha;
396 int err; 398 int err;
@@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl);
421 * Add a secondary unicast address to the device or increase 423 * Add a secondary unicast address to the device or increase
422 * the reference count if it already exists. 424 * the reference count if it already exists.
423 */ 425 */
424int dev_uc_add(struct net_device *dev, unsigned char *addr) 426int dev_uc_add(struct net_device *dev, const unsigned char *addr)
425{ 427{
426 int err; 428 int err;
427 429
@@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add);
443 * Release reference to a secondary unicast address and remove it 445 * Release reference to a secondary unicast address and remove it
444 * from the device if the reference count drops to zero. 446 * from the device if the reference count drops to zero.
445 */ 447 */
446int dev_uc_del(struct net_device *dev, unsigned char *addr) 448int dev_uc_del(struct net_device *dev, const unsigned char *addr)
447{ 449{
448 int err; 450 int err;
449 451
@@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init);
543 * @dev: device 545 * @dev: device
544 * @addr: address to add 546 * @addr: address to add
545 */ 547 */
546int dev_mc_add_excl(struct net_device *dev, unsigned char *addr) 548int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
547{ 549{
548 struct netdev_hw_addr *ha; 550 struct netdev_hw_addr *ha;
549 int err; 551 int err;
@@ -566,7 +568,7 @@ out:
566} 568}
567EXPORT_SYMBOL(dev_mc_add_excl); 569EXPORT_SYMBOL(dev_mc_add_excl);
568 570
569static int __dev_mc_add(struct net_device *dev, unsigned char *addr, 571static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
570 bool global) 572 bool global)
571{ 573{
572 int err; 574 int err;
@@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
587 * Add a multicast address to the device or increase 589 * Add a multicast address to the device or increase
588 * the reference count if it already exists. 590 * the reference count if it already exists.
589 */ 591 */
590int dev_mc_add(struct net_device *dev, unsigned char *addr) 592int dev_mc_add(struct net_device *dev, const unsigned char *addr)
591{ 593{
592 return __dev_mc_add(dev, addr, false); 594 return __dev_mc_add(dev, addr, false);
593} 595}
@@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add);
600 * 602 *
601 * Add a global multicast address to the device. 603 * Add a global multicast address to the device.
602 */ 604 */
603int dev_mc_add_global(struct net_device *dev, unsigned char *addr) 605int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
604{ 606{
605 return __dev_mc_add(dev, addr, true); 607 return __dev_mc_add(dev, addr, true);
606} 608}
607EXPORT_SYMBOL(dev_mc_add_global); 609EXPORT_SYMBOL(dev_mc_add_global);
608 610
609static int __dev_mc_del(struct net_device *dev, unsigned char *addr, 611static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
610 bool global) 612 bool global)
611{ 613{
612 int err; 614 int err;
@@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
628 * Release reference to a multicast address and remove it 630 * Release reference to a multicast address and remove it
629 * from the device if the reference count drops to zero. 631 * from the device if the reference count drops to zero.
630 */ 632 */
631int dev_mc_del(struct net_device *dev, unsigned char *addr) 633int dev_mc_del(struct net_device *dev, const unsigned char *addr)
632{ 634{
633 return __dev_mc_del(dev, addr, false); 635 return __dev_mc_del(dev, addr, false);
634} 636}
@@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del);
642 * Release reference to a multicast address and remove it 644 * Release reference to a multicast address and remove it
643 * from the device if the reference count drops to zero. 645 * from the device if the reference count drops to zero.
644 */ 646 */
645int dev_mc_del_global(struct net_device *dev, unsigned char *addr) 647int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
646{ 648{
647 return __dev_mc_del(dev, addr, true); 649 return __dev_mc_del(dev, addr, true);
648} 650}
diff --git a/net/core/dst.c b/net/core/dst.c
index b8d7c700541d..ee6153e2cf43 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
374 struct dst_entry *dst, *last = NULL; 374 struct dst_entry *dst, *last = NULL;
375 375
376 switch (event) { 376 switch (event) {
377 case NETDEV_UNREGISTER: 377 case NETDEV_UNREGISTER_FINAL:
378 case NETDEV_DOWN: 378 case NETDEV_DOWN:
379 mutex_lock(&dst_gc_mutex); 379 mutex_lock(&dst_gc_mutex);
380 for (dst = dst_busy_list; dst; dst = dst->next) { 380 for (dst = dst_busy_list; dst; dst = dst->next) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cbf033dcaf1f..4d64cc2e3fa9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1426,18 +1426,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1426 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1426 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1427 return -EFAULT; 1427 return -EFAULT;
1428 1428
1429 if (!dev->ethtool_ops) {
1430 /* A few commands do not require any driver support,
1431 * are unprivileged, and do not change anything, so we
1432 * can take a shortcut to them. */
1433 if (ethcmd == ETHTOOL_GDRVINFO)
1434 return ethtool_get_drvinfo(dev, useraddr);
1435 else if (ethcmd == ETHTOOL_GET_TS_INFO)
1436 return ethtool_get_ts_info(dev, useraddr);
1437 else
1438 return -EOPNOTSUPP;
1439 }
1440
1441 /* Allow some commands to be done by anyone */ 1429 /* Allow some commands to be done by anyone */
1442 switch (ethcmd) { 1430 switch (ethcmd) {
1443 case ETHTOOL_GSET: 1431 case ETHTOOL_GSET:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83236c9..58a4ba27dfe3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
402 if (unresolved) 402 if (unresolved)
403 ops->unresolved_rules++; 403 ops->unresolved_rules++;
404 404
405 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 405 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
406 flush_route_cache(ops); 406 flush_route_cache(ops);
407 rules_ops_put(ops); 407 rules_ops_put(ops);
408 return 0; 408 return 0;
@@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
500 } 500 }
501 501
502 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 502 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
503 NETLINK_CB(skb).pid); 503 NETLINK_CB(skb).portid);
504 if (ops->delete) 504 if (ops->delete)
505 ops->delete(rule); 505 ops->delete(rule);
506 fib_rule_put(rule); 506 fib_rule_put(rule);
@@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
601 if (idx < cb->args[1]) 601 if (idx < cb->args[1])
602 goto skip; 602 goto skip;
603 603
604 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, 604 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
605 cb->nlh->nlmsg_seq, RTM_NEWRULE, 605 cb->nlh->nlmsg_seq, RTM_NEWRULE,
606 NLM_F_MULTI, ops) < 0) 606 NLM_F_MULTI, ops) < 0)
607 break; 607 break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 907efd27ec77..3d92ebb7fbcf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -167,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
167 case BPF_S_ALU_DIV_K: 167 case BPF_S_ALU_DIV_K:
168 A = reciprocal_divide(A, K); 168 A = reciprocal_divide(A, K);
169 continue; 169 continue;
170 case BPF_S_ALU_MOD_X:
171 if (X == 0)
172 return 0;
173 A %= X;
174 continue;
175 case BPF_S_ALU_MOD_K:
176 A %= K;
177 continue;
170 case BPF_S_ALU_AND_X: 178 case BPF_S_ALU_AND_X:
171 A &= X; 179 A &= X;
172 continue; 180 continue;
@@ -179,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
179 case BPF_S_ALU_OR_K: 187 case BPF_S_ALU_OR_K:
180 A |= K; 188 A |= K;
181 continue; 189 continue;
190 case BPF_S_ANC_ALU_XOR_X:
191 case BPF_S_ALU_XOR_X:
192 A ^= X;
193 continue;
194 case BPF_S_ALU_XOR_K:
195 A ^= K;
196 continue;
182 case BPF_S_ALU_LSH_X: 197 case BPF_S_ALU_LSH_X:
183 A <<= X; 198 A <<= X;
184 continue; 199 continue;
@@ -326,9 +341,6 @@ load_b:
326 case BPF_S_ANC_CPU: 341 case BPF_S_ANC_CPU:
327 A = raw_smp_processor_id(); 342 A = raw_smp_processor_id();
328 continue; 343 continue;
329 case BPF_S_ANC_ALU_XOR_X:
330 A ^= X;
331 continue;
332 case BPF_S_ANC_NLATTR: { 344 case BPF_S_ANC_NLATTR: {
333 struct nlattr *nla; 345 struct nlattr *nla;
334 346
@@ -469,10 +481,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
469 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, 481 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
470 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, 482 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
471 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, 483 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
484 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
485 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
472 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, 486 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
473 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, 487 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
474 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, 488 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
475 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, 489 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
490 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
491 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
476 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, 492 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
477 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, 493 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
478 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, 494 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
@@ -531,6 +547,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
531 return -EINVAL; 547 return -EINVAL;
532 ftest->k = reciprocal_value(ftest->k); 548 ftest->k = reciprocal_value(ftest->k);
533 break; 549 break;
550 case BPF_S_ALU_MOD_K:
551 /* check for division by zero */
552 if (ftest->k == 0)
553 return -EINVAL;
554 break;
534 case BPF_S_LD_MEM: 555 case BPF_S_LD_MEM:
535 case BPF_S_LDX_MEM: 556 case BPF_S_LDX_MEM:
536 case BPF_S_ST: 557 case BPF_S_ST:
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8e397a69005a..8f82a5cc3851 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev)
76} 76}
77 77
78 78
79void linkwatch_init_dev(struct net_device *dev)
80{
81 /* Handle pre-registration link state changes */
82 if (!netif_carrier_ok(dev) || netif_dormant(dev))
83 rfc2863_policy(dev);
84}
85
86
79static bool linkwatch_urgent_event(struct net_device *dev) 87static bool linkwatch_urgent_event(struct net_device *dev)
80{ 88{
81 if (!netif_running(dev)) 89 if (!netif_running(dev))
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 112c6e2266e9..baca771caae2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2102 if (tidx < tbl_skip || (family && tbl->family != family)) 2102 if (tidx < tbl_skip || (family && tbl->family != family))
2103 continue; 2103 continue;
2104 2104
2105 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, 2105 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2106 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2106 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2107 NLM_F_MULTI) <= 0) 2107 NLM_F_MULTI) <= 0)
2108 break; 2108 break;
@@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2115 goto next; 2115 goto next;
2116 2116
2117 if (neightbl_fill_param_info(skb, tbl, p, 2117 if (neightbl_fill_param_info(skb, tbl, p,
2118 NETLINK_CB(cb->skb).pid, 2118 NETLINK_CB(cb->skb).portid,
2119 cb->nlh->nlmsg_seq, 2119 cb->nlh->nlmsg_seq,
2120 RTM_NEWNEIGHTBL, 2120 RTM_NEWNEIGHTBL,
2121 NLM_F_MULTI) <= 0) 2121 NLM_F_MULTI) <= 0)
@@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2244 continue; 2244 continue;
2245 if (idx < s_idx) 2245 if (idx < s_idx)
2246 goto next; 2246 goto next;
2247 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 2247 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2248 cb->nlh->nlmsg_seq, 2248 cb->nlh->nlmsg_seq,
2249 RTM_NEWNEIGH, 2249 RTM_NEWNEIGH,
2250 NLM_F_MULTI) <= 0) { 2250 NLM_F_MULTI) <= 0) {
@@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2281 continue; 2281 continue;
2282 if (idx < s_idx) 2282 if (idx < s_idx)
2283 goto next; 2283 goto next;
2284 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 2284 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2285 cb->nlh->nlmsg_seq, 2285 cb->nlh->nlmsg_seq,
2286 RTM_NEWNEIGH, 2286 RTM_NEWNEIGH,
2287 NLM_F_MULTI, tbl) <= 0) { 2287 NLM_F_MULTI, tbl) <= 0) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 72607174ea5a..bcf02f608cbf 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -166,9 +166,21 @@ static ssize_t show_duplex(struct device *dev,
166 166
167 if (netif_running(netdev)) { 167 if (netif_running(netdev)) {
168 struct ethtool_cmd cmd; 168 struct ethtool_cmd cmd;
169 if (!__ethtool_get_settings(netdev, &cmd)) 169 if (!__ethtool_get_settings(netdev, &cmd)) {
170 ret = sprintf(buf, "%s\n", 170 const char *duplex;
171 cmd.duplex ? "full" : "half"); 171 switch (cmd.duplex) {
172 case DUPLEX_HALF:
173 duplex = "half";
174 break;
175 case DUPLEX_FULL:
176 duplex = "full";
177 break;
178 default:
179 duplex = "unknown";
180 break;
181 }
182 ret = sprintf(buf, "%s\n", duplex);
183 }
172 } 184 }
173 rtnl_unlock(); 185 rtnl_unlock();
174 return ret; 186 return ret;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e4ba3e70c174..77a0388fc3be 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -328,7 +328,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
328 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { 328 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
329 struct netdev_queue *txq; 329 struct netdev_queue *txq;
330 330
331 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 331 txq = netdev_pick_tx(dev, skb);
332 332
333 /* try until next clock tick */ 333 /* try until next clock tick */
334 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 334 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
@@ -380,6 +380,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
380 struct udphdr *udph; 380 struct udphdr *udph;
381 struct iphdr *iph; 381 struct iphdr *iph;
382 struct ethhdr *eth; 382 struct ethhdr *eth;
383 static atomic_t ip_ident;
383 384
384 udp_len = len + sizeof(*udph); 385 udp_len = len + sizeof(*udph);
385 ip_len = udp_len + sizeof(*iph); 386 ip_len = udp_len + sizeof(*iph);
@@ -415,7 +416,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
415 put_unaligned(0x45, (unsigned char *)iph); 416 put_unaligned(0x45, (unsigned char *)iph);
416 iph->tos = 0; 417 iph->tos = 0;
417 put_unaligned(htons(ip_len), &(iph->tot_len)); 418 put_unaligned(htons(ip_len), &(iph->tot_len));
418 iph->id = 0; 419 iph->id = htons(atomic_inc_return(&ip_ident));
419 iph->frag_off = 0; 420 iph->frag_off = 0;
420 iph->ttl = 64; 421 iph->ttl = 64;
421 iph->protocol = IPPROTO_UDP; 422 iph->protocol = IPPROTO_UDP;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 39e7e4d3cdb4..4a83fb3c8e87 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -73,7 +73,6 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
73 ((sizeof(u32) * new_len)); 73 ((sizeof(u32) * new_len));
74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); 74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
75 struct netprio_map *old_priomap; 75 struct netprio_map *old_priomap;
76 int i;
77 76
78 old_priomap = rtnl_dereference(dev->priomap); 77 old_priomap = rtnl_dereference(dev->priomap);
79 78
@@ -82,10 +81,10 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
82 return -ENOMEM; 81 return -ENOMEM;
83 } 82 }
84 83
85 for (i = 0; 84 if (old_priomap)
86 old_priomap && (i < old_priomap->priomap_len); 85 memcpy(new_priomap->priomap, old_priomap->priomap,
87 i++) 86 old_priomap->priomap_len *
88 new_priomap->priomap[i] = old_priomap->priomap[i]; 87 sizeof(old_priomap->priomap[0]));
89 88
90 new_priomap->priomap_len = new_len; 89 new_priomap->priomap_len = new_len;
91 90
@@ -109,32 +108,6 @@ static int write_update_netdev_table(struct net_device *dev)
109 return ret; 108 return ret;
110} 109}
111 110
112static int update_netdev_tables(void)
113{
114 int ret = 0;
115 struct net_device *dev;
116 u32 max_len;
117 struct netprio_map *map;
118
119 rtnl_lock();
120 max_len = atomic_read(&max_prioidx) + 1;
121 for_each_netdev(&init_net, dev) {
122 map = rtnl_dereference(dev->priomap);
123 /*
124 * don't allocate priomap if we didn't
125 * change net_prio.ifpriomap (map == NULL),
126 * this will speed up skb_update_prio.
127 */
128 if (map && map->priomap_len < max_len) {
129 ret = extend_netdev_table(dev, max_len);
130 if (ret < 0)
131 break;
132 }
133 }
134 rtnl_unlock();
135 return ret;
136}
137
138static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 111static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
139{ 112{
140 struct cgroup_netprio_state *cs; 113 struct cgroup_netprio_state *cs;
@@ -153,12 +126,6 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
153 goto out; 126 goto out;
154 } 127 }
155 128
156 ret = update_netdev_tables();
157 if (ret < 0) {
158 put_prioidx(cs->prioidx);
159 goto out;
160 }
161
162 return &cs->css; 129 return &cs->css;
163out: 130out:
164 kfree(cs); 131 kfree(cs);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6a33c5..c31d9e8668c3 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/tcp.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
19 20
20#include <net/request_sock.h> 21#include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
130 kfree(lopt); 131 kfree(lopt);
131} 132}
132 133
134/*
135 * This function is called to set a Fast Open socket's "fastopen_rsk" field
136 * to NULL when a TFO socket no longer needs to access the request_sock.
137 * This happens only after 3WHS has been either completed or aborted (e.g.,
138 * RST is received).
139 *
140 * Before TFO, a child socket is created only after 3WHS is completed,
141 * hence it never needs to access the request_sock. things get a lot more
142 * complex with TFO. A child socket, accepted or not, has to access its
143 * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
144 * until 3WHS is either completed or aborted. Afterwards the req will stay
145 * until either the child socket is accepted, or in the rare case when the
146 * listener is closed before the child is accepted.
147 *
148 * In short, a request socket is only freed after BOTH 3WHS has completed
149 * (or aborted) and the child socket has been accepted (or listener closed).
150 * When a child socket is accepted, its corresponding req->sk is set to
151 * NULL since it's no longer needed. More importantly, "req->sk == NULL"
152 * will be used by the code below to determine if a child socket has been
153 * accepted or not, and the check is protected by the fastopenq->lock
154 * described below.
155 *
156 * Note that fastopen_rsk is only accessed from the child socket's context
157 * with its socket lock held. But a request_sock (req) can be accessed by
158 * both its child socket through fastopen_rsk, and a listener socket through
159 * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
160 * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
161 * only in the rare case when both the listener and the child locks are held,
162 * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
163 * The lock also protects other fields such as fastopenq->qlen, which is
164 * decremented by this function when fastopen_rsk is no longer needed.
165 *
166 * Note that another solution was to simply use the existing socket lock
167 * from the listener. But first socket lock is difficult to use. It is not
168 * a simple spin lock - one must consider sock_owned_by_user() and arrange
169 * to use sk_add_backlog() stuff. But what really makes it infeasible is the
170 * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
171 * acquire a child's lock while holding listener's socket lock. A corner
172 * case might also exist in tcp_v4_hnd_req() that will trigger this locking
173 * order.
174 *
175 * When a TFO req is created, it needs to sock_hold its listener to prevent
176 * the latter data structure from going away.
177 *
178 * This function also sets "treq->listener" to NULL and unreference listener
179 * socket. treq->listener is used by the listener so it is protected by the
180 * fastopenq->lock in this function.
181 */
182void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
183 bool reset)
184{
185 struct sock *lsk = tcp_rsk(req)->listener;
186 struct fastopen_queue *fastopenq =
187 inet_csk(lsk)->icsk_accept_queue.fastopenq;
188
189 BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
190
191 tcp_sk(sk)->fastopen_rsk = NULL;
192 spin_lock_bh(&fastopenq->lock);
193 fastopenq->qlen--;
194 tcp_rsk(req)->listener = NULL;
195 if (req->sk) /* the child socket hasn't been accepted yet */
196 goto out;
197
198 if (!reset || lsk->sk_state != TCP_LISTEN) {
199 /* If the listener has been closed don't bother with the
200 * special RST handling below.
201 */
202 spin_unlock_bh(&fastopenq->lock);
203 sock_put(lsk);
204 reqsk_free(req);
205 return;
206 }
207 /* Wait for 60secs before removing a req that has triggered RST.
208 * This is a simple defense against TFO spoofing attack - by
209 * counting the req against fastopen.max_qlen, and disabling
210 * TFO when the qlen exceeds max_qlen.
211 *
212 * For more details see CoNext'11 "TCP Fast Open" paper.
213 */
214 req->expires = jiffies + 60*HZ;
215 if (fastopenq->rskq_rst_head == NULL)
216 fastopenq->rskq_rst_head = req;
217 else
218 fastopenq->rskq_rst_tail->dl_next = req;
219
220 req->dl_next = NULL;
221 fastopenq->rskq_rst_tail = req;
222 fastopenq->qlen++;
223out:
224 spin_unlock_bh(&fastopenq->lock);
225 sock_put(lsk);
226 return;
227}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c5a0a06c4ce..76d4c2c3c89b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -618,7 +618,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
618 long expires, u32 error) 618 long expires, u32 error)
619{ 619{
620 struct rta_cacheinfo ci = { 620 struct rta_cacheinfo ci = {
621 .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), 621 .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
622 .rta_used = dst->__use, 622 .rta_used = dst->__use,
623 .rta_clntref = atomic_read(&(dst->__refcnt)), 623 .rta_clntref = atomic_read(&(dst->__refcnt)),
624 .rta_error = error, 624 .rta_error = error,
@@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1081 if (idx < s_idx) 1081 if (idx < s_idx)
1082 goto cont; 1082 goto cont;
1083 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1083 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
1084 NETLINK_CB(cb->skb).pid, 1084 NETLINK_CB(cb->skb).portid,
1085 cb->nlh->nlmsg_seq, 0, 1085 cb->nlh->nlmsg_seq, 0,
1086 NLM_F_MULTI, 1086 NLM_F_MULTI,
1087 ext_filter_mask) <= 0) 1087 ext_filter_mask) <= 0)
@@ -1812,8 +1812,6 @@ replay:
1812 return -ENODEV; 1812 return -ENODEV;
1813 } 1813 }
1814 1814
1815 if (ifm->ifi_index)
1816 return -EOPNOTSUPP;
1817 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) 1815 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
1818 return -EOPNOTSUPP; 1816 return -EOPNOTSUPP;
1819 1817
@@ -1839,10 +1837,14 @@ replay:
1839 return PTR_ERR(dest_net); 1837 return PTR_ERR(dest_net);
1840 1838
1841 dev = rtnl_create_link(net, dest_net, ifname, ops, tb); 1839 dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
1842 1840 if (IS_ERR(dev)) {
1843 if (IS_ERR(dev))
1844 err = PTR_ERR(dev); 1841 err = PTR_ERR(dev);
1845 else if (ops->newlink) 1842 goto out;
1843 }
1844
1845 dev->ifindex = ifm->ifi_index;
1846
1847 if (ops->newlink)
1846 err = ops->newlink(net, dev, tb, data); 1848 err = ops->newlink(net, dev, tb, data);
1847 else 1849 else
1848 err = register_netdevice(dev); 1850 err = register_netdevice(dev);
@@ -1897,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1897 if (nskb == NULL) 1899 if (nskb == NULL)
1898 return -ENOBUFS; 1900 return -ENOBUFS;
1899 1901
1900 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, 1902 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
1901 nlh->nlmsg_seq, 0, 0, ext_filter_mask); 1903 nlh->nlmsg_seq, 0, 0, ext_filter_mask);
1902 if (err < 0) { 1904 if (err < 0) {
1903 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 1905 /* -EMSGSIZE implies BUG in if_nlmsg_size */
1904 WARN_ON(err == -EMSGSIZE); 1906 WARN_ON(err == -EMSGSIZE);
1905 kfree_skb(nskb); 1907 kfree_skb(nskb);
1906 } else 1908 } else
1907 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); 1909 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
1908 1910
1909 return err; 1911 return err;
1910} 1912}
@@ -2088,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2088 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && 2090 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2089 (dev->priv_flags & IFF_BRIDGE_PORT)) { 2091 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2090 master = dev->master; 2092 master = dev->master;
2091 err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr, 2093 err = master->netdev_ops->ndo_fdb_add(ndm, tb,
2094 dev, addr,
2092 nlh->nlmsg_flags); 2095 nlh->nlmsg_flags);
2093 if (err) 2096 if (err)
2094 goto out; 2097 goto out;
@@ -2098,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2098 2101
2099 /* Embedded bridge, macvlan, and any other device support */ 2102 /* Embedded bridge, macvlan, and any other device support */
2100 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { 2103 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
2101 err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr, 2104 err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
2105 dev, addr,
2102 nlh->nlmsg_flags); 2106 nlh->nlmsg_flags);
2103 2107
2104 if (!err) { 2108 if (!err) {
@@ -2178,9 +2182,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
2178{ 2182{
2179 struct netdev_hw_addr *ha; 2183 struct netdev_hw_addr *ha;
2180 int err; 2184 int err;
2181 u32 pid, seq; 2185 u32 portid, seq;
2182 2186
2183 pid = NETLINK_CB(cb->skb).pid; 2187 portid = NETLINK_CB(cb->skb).portid;
2184 seq = cb->nlh->nlmsg_seq; 2188 seq = cb->nlh->nlmsg_seq;
2185 2189
2186 list_for_each_entry(ha, &list->list, list) { 2190 list_for_each_entry(ha, &list->list, list) {
@@ -2188,7 +2192,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
2188 goto skip; 2192 goto skip;
2189 2193
2190 err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 2194 err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
2191 pid, seq, 0, NTF_SELF); 2195 portid, seq, 0, NTF_SELF);
2192 if (err < 0) 2196 if (err < 0)
2193 return err; 2197 return err;
2194skip: 2198skip:
@@ -2356,7 +2360,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
2356 case NETDEV_PRE_TYPE_CHANGE: 2360 case NETDEV_PRE_TYPE_CHANGE:
2357 case NETDEV_GOING_DOWN: 2361 case NETDEV_GOING_DOWN:
2358 case NETDEV_UNREGISTER: 2362 case NETDEV_UNREGISTER:
2359 case NETDEV_UNREGISTER_BATCH: 2363 case NETDEV_UNREGISTER_FINAL:
2360 case NETDEV_RELEASE: 2364 case NETDEV_RELEASE:
2361 case NETDEV_JOIN: 2365 case NETDEV_JOIN:
2362 break; 2366 break;
@@ -2379,9 +2383,10 @@ static int __net_init rtnetlink_net_init(struct net *net)
2379 .groups = RTNLGRP_MAX, 2383 .groups = RTNLGRP_MAX,
2380 .input = rtnetlink_rcv, 2384 .input = rtnetlink_rcv,
2381 .cb_mutex = &rtnl_mutex, 2385 .cb_mutex = &rtnl_mutex,
2386 .flags = NL_CFG_F_NONROOT_RECV,
2382 }; 2387 };
2383 2388
2384 sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg); 2389 sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
2385 if (!sk) 2390 if (!sk)
2386 return -ENOMEM; 2391 return -ENOMEM;
2387 net->rtnl = sk; 2392 net->rtnl = sk;
@@ -2414,7 +2419,6 @@ void __init rtnetlink_init(void)
2414 if (register_pernet_subsys(&rtnetlink_net_ops)) 2419 if (register_pernet_subsys(&rtnetlink_net_ops))
2415 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 2420 panic("rtnetlink_init: cannot initialize rtnetlink\n");
2416 2421
2417 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
2418 register_netdevice_notifier(&rtnetlink_dev_notifier); 2422 register_netdevice_notifier(&rtnetlink_dev_notifier);
2419 2423
2420 rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, 2424 rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
diff --git a/net/core/scm.c b/net/core/scm.c
index 6ab491d6c26f..9c1c63da3ca8 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -155,19 +155,21 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
155 break; 155 break;
156 case SCM_CREDENTIALS: 156 case SCM_CREDENTIALS:
157 { 157 {
158 struct ucred creds;
158 kuid_t uid; 159 kuid_t uid;
159 kgid_t gid; 160 kgid_t gid;
160 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) 161 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
161 goto error; 162 goto error;
162 memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); 163 memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
163 err = scm_check_creds(&p->creds); 164 err = scm_check_creds(&creds);
164 if (err) 165 if (err)
165 goto error; 166 goto error;
166 167
167 if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { 168 p->creds.pid = creds.pid;
169 if (!p->pid || pid_vnr(p->pid) != creds.pid) {
168 struct pid *pid; 170 struct pid *pid;
169 err = -ESRCH; 171 err = -ESRCH;
170 pid = find_get_pid(p->creds.pid); 172 pid = find_get_pid(creds.pid);
171 if (!pid) 173 if (!pid)
172 goto error; 174 goto error;
173 put_pid(p->pid); 175 put_pid(p->pid);
@@ -175,11 +177,14 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
175 } 177 }
176 178
177 err = -EINVAL; 179 err = -EINVAL;
178 uid = make_kuid(current_user_ns(), p->creds.uid); 180 uid = make_kuid(current_user_ns(), creds.uid);
179 gid = make_kgid(current_user_ns(), p->creds.gid); 181 gid = make_kgid(current_user_ns(), creds.gid);
180 if (!uid_valid(uid) || !gid_valid(gid)) 182 if (!uid_valid(uid) || !gid_valid(gid))
181 goto error; 183 goto error;
182 184
185 p->creds.uid = uid;
186 p->creds.gid = gid;
187
183 if (!p->cred || 188 if (!p->cred ||
184 !uid_eq(p->cred->euid, uid) || 189 !uid_eq(p->cred->euid, uid) ||
185 !gid_eq(p->cred->egid, gid)) { 190 !gid_eq(p->cred->egid, gid)) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 99b2596531bb..e61a8bb7fce7 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
76 76
77 return hash[0]; 77 return hash[0];
78} 78}
79EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
79#endif 80#endif
80 81
81#ifdef CONFIG_INET 82#ifdef CONFIG_INET
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e33ebae519c8..cdc28598f4ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
340EXPORT_SYMBOL(build_skb); 340EXPORT_SYMBOL(build_skb);
341 341
342struct netdev_alloc_cache { 342struct netdev_alloc_cache {
343 struct page *page; 343 struct page_frag frag;
344 unsigned int offset; 344 /* we maintain a pagecount bias, so that we dont dirty cache line
345 unsigned int pagecnt_bias; 345 * containing page->_count every time we allocate a fragment.
346 */
347 unsigned int pagecnt_bias;
346}; 348};
347static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 349static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
348 350
349#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) 351#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
352#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
353#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
350 354
351static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 355static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
352{ 356{
353 struct netdev_alloc_cache *nc; 357 struct netdev_alloc_cache *nc;
354 void *data = NULL; 358 void *data = NULL;
359 int order;
355 unsigned long flags; 360 unsigned long flags;
356 361
357 local_irq_save(flags); 362 local_irq_save(flags);
358 nc = &__get_cpu_var(netdev_alloc_cache); 363 nc = &__get_cpu_var(netdev_alloc_cache);
359 if (unlikely(!nc->page)) { 364 if (unlikely(!nc->frag.page)) {
360refill: 365refill:
361 nc->page = alloc_page(gfp_mask); 366 for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
362 if (unlikely(!nc->page)) 367 gfp_t gfp = gfp_mask;
363 goto end; 368
369 if (order)
370 gfp |= __GFP_COMP | __GFP_NOWARN;
371 nc->frag.page = alloc_pages(gfp, order);
372 if (likely(nc->frag.page))
373 break;
374 if (--order < 0)
375 goto end;
376 }
377 nc->frag.size = PAGE_SIZE << order;
364recycle: 378recycle:
365 atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS); 379 atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
366 nc->pagecnt_bias = NETDEV_PAGECNT_BIAS; 380 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
367 nc->offset = 0; 381 nc->frag.offset = 0;
368 } 382 }
369 383
370 if (nc->offset + fragsz > PAGE_SIZE) { 384 if (nc->frag.offset + fragsz > nc->frag.size) {
371 /* avoid unnecessary locked operations if possible */ 385 /* avoid unnecessary locked operations if possible */
372 if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) || 386 if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
373 atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count)) 387 atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
374 goto recycle; 388 goto recycle;
375 goto refill; 389 goto refill;
376 } 390 }
377 391
378 data = page_address(nc->page) + nc->offset; 392 data = page_address(nc->frag.page) + nc->frag.offset;
379 nc->offset += fragsz; 393 nc->frag.offset += fragsz;
380 nc->pagecnt_bias--; 394 nc->pagecnt_bias--;
381end: 395end:
382 local_irq_restore(flags); 396 local_irq_restore(flags);
@@ -1655,38 +1669,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len,
1655 unsigned int *offset, 1669 unsigned int *offset,
1656 struct sk_buff *skb, struct sock *sk) 1670 struct sk_buff *skb, struct sock *sk)
1657{ 1671{
1658 struct page *p = sk->sk_sndmsg_page; 1672 struct page_frag *pfrag = sk_page_frag(sk);
1659 unsigned int off;
1660
1661 if (!p) {
1662new_page:
1663 p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
1664 if (!p)
1665 return NULL;
1666 1673
1667 off = sk->sk_sndmsg_off = 0; 1674 if (!sk_page_frag_refill(sk, pfrag))
1668 /* hold one ref to this page until it's full */ 1675 return NULL;
1669 } else {
1670 unsigned int mlen;
1671
1672 /* If we are the only user of the page, we can reset offset */
1673 if (page_count(p) == 1)
1674 sk->sk_sndmsg_off = 0;
1675 off = sk->sk_sndmsg_off;
1676 mlen = PAGE_SIZE - off;
1677 if (mlen < 64 && mlen < *len) {
1678 put_page(p);
1679 goto new_page;
1680 }
1681 1676
1682 *len = min_t(unsigned int, *len, mlen); 1677 *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
1683 }
1684 1678
1685 memcpy(page_address(p) + off, page_address(page) + *offset, *len); 1679 memcpy(page_address(pfrag->page) + pfrag->offset,
1686 sk->sk_sndmsg_off += *len; 1680 page_address(page) + *offset, *len);
1687 *offset = off; 1681 *offset = pfrag->offset;
1682 pfrag->offset += *len;
1688 1683
1689 return p; 1684 return pfrag->page;
1690} 1685}
1691 1686
1692static bool spd_can_coalesce(const struct splice_pipe_desc *spd, 1687static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
@@ -3488,8 +3483,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
3488 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) 3483 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
3489 return false; 3484 return false;
3490 3485
3491 delta = from->truesize - 3486 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
3492 SKB_TRUESIZE(skb_end_pointer(from) - from->head);
3493 } 3487 }
3494 3488
3495 WARN_ON_ONCE(delta < len); 3489 WARN_ON_ONCE(delta < len);
diff --git a/net/core/sock.c b/net/core/sock.c
index 12cddd037bce..8a146cfcc366 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1221,7 +1221,7 @@ void sock_update_classid(struct sock *sk)
1221 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1221 rcu_read_lock(); /* doing current task, which cannot vanish. */
1222 classid = task_cls_classid(current); 1222 classid = task_cls_classid(current);
1223 rcu_read_unlock(); 1223 rcu_read_unlock();
1224 if (classid && classid != sk->sk_classid) 1224 if (classid != sk->sk_classid)
1225 sk->sk_classid = classid; 1225 sk->sk_classid = classid;
1226} 1226}
1227EXPORT_SYMBOL(sock_update_classid); 1227EXPORT_SYMBOL(sock_update_classid);
@@ -1458,19 +1458,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1458} 1458}
1459EXPORT_SYMBOL_GPL(sk_setup_caps); 1459EXPORT_SYMBOL_GPL(sk_setup_caps);
1460 1460
1461void __init sk_init(void)
1462{
1463 if (totalram_pages <= 4096) {
1464 sysctl_wmem_max = 32767;
1465 sysctl_rmem_max = 32767;
1466 sysctl_wmem_default = 32767;
1467 sysctl_rmem_default = 32767;
1468 } else if (totalram_pages >= 131072) {
1469 sysctl_wmem_max = 131071;
1470 sysctl_rmem_max = 131071;
1471 }
1472}
1473
1474/* 1461/*
1475 * Simple resource managers for sockets. 1462 * Simple resource managers for sockets.
1476 */ 1463 */
@@ -1738,6 +1725,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1738} 1725}
1739EXPORT_SYMBOL(sock_alloc_send_skb); 1726EXPORT_SYMBOL(sock_alloc_send_skb);
1740 1727
1728/* On 32bit arches, an skb frag is limited to 2^15 */
1729#define SKB_FRAG_PAGE_ORDER get_order(32768)
1730
1731bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1732{
1733 int order;
1734
1735 if (pfrag->page) {
1736 if (atomic_read(&pfrag->page->_count) == 1) {
1737 pfrag->offset = 0;
1738 return true;
1739 }
1740 if (pfrag->offset < pfrag->size)
1741 return true;
1742 put_page(pfrag->page);
1743 }
1744
1745 /* We restrict high order allocations to users that can afford to wait */
1746 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1747
1748 do {
1749 gfp_t gfp = sk->sk_allocation;
1750
1751 if (order)
1752 gfp |= __GFP_COMP | __GFP_NOWARN;
1753 pfrag->page = alloc_pages(gfp, order);
1754 if (likely(pfrag->page)) {
1755 pfrag->offset = 0;
1756 pfrag->size = PAGE_SIZE << order;
1757 return true;
1758 }
1759 } while (--order >= 0);
1760
1761 sk_enter_memory_pressure(sk);
1762 sk_stream_moderate_sndbuf(sk);
1763 return false;
1764}
1765EXPORT_SYMBOL(sk_page_frag_refill);
1766
1741static void __lock_sock(struct sock *sk) 1767static void __lock_sock(struct sock *sk)
1742 __releases(&sk->sk_lock.slock) 1768 __releases(&sk->sk_lock.slock)
1743 __acquires(&sk->sk_lock.slock) 1769 __acquires(&sk->sk_lock.slock)
@@ -2167,8 +2193,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2167 sk->sk_error_report = sock_def_error_report; 2193 sk->sk_error_report = sock_def_error_report;
2168 sk->sk_destruct = sock_def_destruct; 2194 sk->sk_destruct = sock_def_destruct;
2169 2195
2170 sk->sk_sndmsg_page = NULL; 2196 sk->sk_frag.page = NULL;
2171 sk->sk_sndmsg_off = 0; 2197 sk->sk_frag.offset = 0;
2172 sk->sk_peek_off = -1; 2198 sk->sk_peek_off = -1;
2173 2199
2174 sk->sk_peer_pid = NULL; 2200 sk->sk_peer_pid = NULL;
@@ -2411,6 +2437,12 @@ void sk_common_release(struct sock *sk)
2411 xfrm_sk_free_policy(sk); 2437 xfrm_sk_free_policy(sk);
2412 2438
2413 sk_refcnt_debug_release(sk); 2439 sk_refcnt_debug_release(sk);
2440
2441 if (sk->sk_frag.page) {
2442 put_page(sk->sk_frag.page);
2443 sk->sk_frag.page = NULL;
2444 }
2445
2414 sock_put(sk); 2446 sock_put(sk);
2415} 2447}
2416EXPORT_SYMBOL(sk_common_release); 2448EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9d8755e4a7a5..602cd637182e 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net)
172 .input = sock_diag_rcv, 172 .input = sock_diag_rcv,
173 }; 173 };
174 174
175 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, 175 net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
176 THIS_MODULE, &cfg);
177 return net->diag_nlsk == NULL ? -ENOMEM : 0; 176 return net->diag_nlsk == NULL ? -ENOMEM : 0;
178} 177}
179 178
diff --git a/net/core/utils.c b/net/core/utils.c
index 39895a65e54a..f5613d569c23 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
294} 294}
295EXPORT_SYMBOL(inet_proto_csum_replace4); 295EXPORT_SYMBOL(inet_proto_csum_replace4);
296 296
297void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
298 const __be32 *from, const __be32 *to,
299 int pseudohdr)
300{
301 __be32 diff[] = {
302 ~from[0], ~from[1], ~from[2], ~from[3],
303 to[0], to[1], to[2], to[3],
304 };
305 if (skb->ip_summed != CHECKSUM_PARTIAL) {
306 *sum = csum_fold(csum_partial(diff, sizeof(diff),
307 ~csum_unfold(*sum)));
308 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
309 skb->csum = ~csum_partial(diff, sizeof(diff),
310 ~skb->csum);
311 } else if (pseudohdr)
312 *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
313 csum_unfold(*sum)));
314}
315EXPORT_SYMBOL(inet_proto_csum_replace16);
316
297int mac_pton(const char *s, u8 *mac) 317int mac_pton(const char *s, u8 *mac)
298{ 318{
299 int i; 319 int i;