aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /net/ipv4
parent82c477669a4665eb4e52030792051e0559ee2a36 (diff)
parent8b662fe70c68282f78482dc272df0c4f355e49f5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/af_inet.c43
-rw-r--r--net/ipv4/arp.c53
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c76
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_lookup.h2
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/gre_demux.c9
-rw-r--r--net/ipv4/gre_offload.c185
-rw-r--r--net/ipv4/icmp.c26
-rw-r--r--net/ipv4/igmp.c84
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_lro.c173
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_forward.c7
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_options.c42
-rw-r--r--net/ipv4/ip_output.c12
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/ip_tunnel.c150
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/Kconfig18
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c140
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c15
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c44
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c60
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c123
-rw-r--r--net/ipv4/ping.c27
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c71
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_metrics.c194
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_offload.c18
-rw-r--r--net/ipv4/tcp_output.c141
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/tcp_yeah.c20
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/udp_offload.c143
-rw-r--r--net/ipv4/xfrm4_mode_beet.c2
-rw-r--r--net/ipv4/xfrm4_state.c2
56 files changed, 1120 insertions, 933 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4b81e91c80fe..f8c49ce5b283 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
11 tcp_offload.o datagram.o raw.o udp.o udplite.o \ 11 tcp_offload.o datagram.o raw.o udp.o udplite.o \
12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o ip_tunnel_core.o 14 inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o 16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
@@ -19,7 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
20obj-$(CONFIG_IP_MROUTE) += ipmr.o 20obj-$(CONFIG_IP_MROUTE) += ipmr.o
21obj-$(CONFIG_NET_IPIP) += ipip.o 21obj-$(CONFIG_NET_IPIP) += ipip.o
22gre-y := gre_demux.o gre_offload.o 22gre-y := gre_demux.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o 23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
24obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
25obj-$(CONFIG_NET_IPVTI) += ip_vti.o 25obj-$(CONFIG_NET_IPVTI) += ip_vti.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70011e029ac1..ecd2c3f245ce 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,9 +126,6 @@
126static struct list_head inetsw[SOCK_MAX]; 126static struct list_head inetsw[SOCK_MAX];
127static DEFINE_SPINLOCK(inetsw_lock); 127static DEFINE_SPINLOCK(inetsw_lock);
128 128
129struct ipv4_config ipv4_config;
130EXPORT_SYMBOL(ipv4_config);
131
132/* New destruction routine */ 129/* New destruction routine */
133 130
134void inet_sock_destruct(struct sock *sk) 131void inet_sock_destruct(struct sock *sk)
@@ -342,7 +339,7 @@ lookup_protocol:
342 inet->hdrincl = 1; 339 inet->hdrincl = 1;
343 } 340 }
344 341
345 if (ipv4_config.no_pmtu_disc) 342 if (net->ipv4.sysctl_ip_no_pmtu_disc)
346 inet->pmtudisc = IP_PMTUDISC_DONT; 343 inet->pmtudisc = IP_PMTUDISC_DONT;
347 else 344 else
348 inet->pmtudisc = IP_PMTUDISC_WANT; 345 inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1133,7 +1130,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1133 fl4 = &inet->cork.fl.u.ip4; 1130 fl4 = &inet->cork.fl.u.ip4;
1134 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), 1131 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
1135 sk->sk_bound_dev_if, sk->sk_protocol, 1132 sk->sk_bound_dev_if, sk->sk_protocol,
1136 inet->inet_sport, inet->inet_dport, sk, false); 1133 inet->inet_sport, inet->inet_dport, sk);
1137 if (IS_ERR(rt)) 1134 if (IS_ERR(rt))
1138 return PTR_ERR(rt); 1135 return PTR_ERR(rt);
1139 1136
@@ -1377,8 +1374,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1377 if (!NAPI_GRO_CB(p)->same_flow) 1374 if (!NAPI_GRO_CB(p)->same_flow)
1378 continue; 1375 continue;
1379 1376
1380 iph2 = ip_hdr(p); 1377 iph2 = (struct iphdr *)(p->data + off);
1381 1378 /* The above works because, with the exception of the top
1379 * (inner most) layer, we only aggregate pkts with the same
1380 * hdr length so all the hdrs we'll need to verify will start
1381 * at the same offset.
1382 */
1382 if ((iph->protocol ^ iph2->protocol) | 1383 if ((iph->protocol ^ iph2->protocol) |
1383 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 1384 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1384 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { 1385 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
@@ -1390,13 +1391,24 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1390 NAPI_GRO_CB(p)->flush |= 1391 NAPI_GRO_CB(p)->flush |=
1391 (iph->ttl ^ iph2->ttl) | 1392 (iph->ttl ^ iph2->ttl) |
1392 (iph->tos ^ iph2->tos) | 1393 (iph->tos ^ iph2->tos) |
1393 (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | 1394 ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
1394 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1395 1395
1396 /* Save the IP ID check to be included later when we get to
1397 * the transport layer so only the inner most IP ID is checked.
1398 * This is because some GSO/TSO implementations do not
1399 * correctly increment the IP ID for the outer hdrs.
1400 */
1401 NAPI_GRO_CB(p)->flush_id =
1402 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1396 NAPI_GRO_CB(p)->flush |= flush; 1403 NAPI_GRO_CB(p)->flush |= flush;
1397 } 1404 }
1398 1405
1399 NAPI_GRO_CB(skb)->flush |= flush; 1406 NAPI_GRO_CB(skb)->flush |= flush;
1407 skb_set_network_header(skb, off);
1408 /* The above will be needed by the transport layer if there is one
1409 * immediately following this IP hdr.
1410 */
1411
1400 skb_gro_pull(skb, sizeof(*iph)); 1412 skb_gro_pull(skb, sizeof(*iph));
1401 skb_set_transport_header(skb, skb_gro_offset(skb)); 1413 skb_set_transport_header(skb, skb_gro_offset(skb));
1402 1414
@@ -1411,10 +1423,10 @@ out:
1411 return pp; 1423 return pp;
1412} 1424}
1413 1425
1414static int inet_gro_complete(struct sk_buff *skb) 1426static int inet_gro_complete(struct sk_buff *skb, int nhoff)
1415{ 1427{
1416 __be16 newlen = htons(skb->len - skb_network_offset(skb)); 1428 __be16 newlen = htons(skb->len - nhoff);
1417 struct iphdr *iph = ip_hdr(skb); 1429 struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1418 const struct net_offload *ops; 1430 const struct net_offload *ops;
1419 int proto = iph->protocol; 1431 int proto = iph->protocol;
1420 int err = -ENOSYS; 1432 int err = -ENOSYS;
@@ -1427,7 +1439,11 @@ static int inet_gro_complete(struct sk_buff *skb)
1427 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 1439 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1428 goto out_unlock; 1440 goto out_unlock;
1429 1441
1430 err = ops->callbacks.gro_complete(skb); 1442 /* Only need to add sizeof(*iph) to get to the next hdr below
1443 * because any hdr with option will have been flushed in
1444 * inet_gro_receive().
1445 */
1446 err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
1431 1447
1432out_unlock: 1448out_unlock:
1433 rcu_read_unlock(); 1449 rcu_read_unlock();
@@ -1529,6 +1545,7 @@ static const struct net_protocol tcp_protocol = {
1529 .err_handler = tcp_v4_err, 1545 .err_handler = tcp_v4_err,
1530 .no_policy = 1, 1546 .no_policy = 1,
1531 .netns_ok = 1, 1547 .netns_ok = 1,
1548 .icmp_strict_tag_validation = 1,
1532}; 1549};
1533 1550
1534static const struct net_protocol udp_protocol = { 1551static const struct net_protocol udp_protocol = {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7808093cede6..1a9b99e04465 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -166,18 +166,20 @@ struct neigh_table arp_tbl = {
166 .id = "arp_cache", 166 .id = "arp_cache",
167 .parms = { 167 .parms = {
168 .tbl = &arp_tbl, 168 .tbl = &arp_tbl,
169 .base_reachable_time = 30 * HZ,
170 .retrans_time = 1 * HZ,
171 .gc_staletime = 60 * HZ,
172 .reachable_time = 30 * HZ, 169 .reachable_time = 30 * HZ,
173 .delay_probe_time = 5 * HZ, 170 .data = {
174 .queue_len_bytes = 64*1024, 171 [NEIGH_VAR_MCAST_PROBES] = 3,
175 .ucast_probes = 3, 172 [NEIGH_VAR_UCAST_PROBES] = 3,
176 .mcast_probes = 3, 173 [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
177 .anycast_delay = 1 * HZ, 174 [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
178 .proxy_delay = (8 * HZ) / 10, 175 [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
179 .proxy_qlen = 64, 176 [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
180 .locktime = 1 * HZ, 177 [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
178 [NEIGH_VAR_PROXY_QLEN] = 64,
179 [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
180 [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
181 [NEIGH_VAR_LOCKTIME] = 1 * HZ,
182 },
181 }, 183 },
182 .gc_interval = 30 * HZ, 184 .gc_interval = 30 * HZ,
183 .gc_thresh1 = 128, 185 .gc_thresh1 = 128,
@@ -359,14 +361,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
359 if (!saddr) 361 if (!saddr)
360 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 362 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
361 363
362 probes -= neigh->parms->ucast_probes; 364 probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
363 if (probes < 0) { 365 if (probes < 0) {
364 if (!(neigh->nud_state & NUD_VALID)) 366 if (!(neigh->nud_state & NUD_VALID))
365 pr_debug("trying to ucast probe in NUD_INVALID\n"); 367 pr_debug("trying to ucast probe in NUD_INVALID\n");
366 neigh_ha_snapshot(dst_ha, neigh, dev); 368 neigh_ha_snapshot(dst_ha, neigh, dev);
367 dst_hw = dst_ha; 369 dst_hw = dst_ha;
368 } else { 370 } else {
369 probes -= neigh->parms->app_probes; 371 probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
370 if (probes < 0) { 372 if (probes < 0) {
371 neigh_app_ns(neigh); 373 neigh_app_ns(neigh);
372 return; 374 return;
@@ -379,6 +381,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
379 381
380static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) 382static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
381{ 383{
384 struct net *net = dev_net(in_dev->dev);
382 int scope; 385 int scope;
383 386
384 switch (IN_DEV_ARP_IGNORE(in_dev)) { 387 switch (IN_DEV_ARP_IGNORE(in_dev)) {
@@ -397,6 +400,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
397 case 3: /* Do not reply for scope host addresses */ 400 case 3: /* Do not reply for scope host addresses */
398 sip = 0; 401 sip = 0;
399 scope = RT_SCOPE_LINK; 402 scope = RT_SCOPE_LINK;
403 in_dev = NULL;
400 break; 404 break;
401 case 4: /* Reserved */ 405 case 4: /* Reserved */
402 case 5: 406 case 5:
@@ -408,7 +412,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
408 default: 412 default:
409 return 0; 413 return 0;
410 } 414 }
411 return !inet_confirm_addr(in_dev, sip, tip, scope); 415 return !inet_confirm_addr(net, in_dev, sip, tip, scope);
412} 416}
413 417
414static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 418static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
@@ -728,6 +732,7 @@ static int arp_process(struct sk_buff *skb)
728 int addr_type; 732 int addr_type;
729 struct neighbour *n; 733 struct neighbour *n;
730 struct net *net = dev_net(dev); 734 struct net *net = dev_net(dev);
735 bool is_garp = false;
731 736
732 /* arp_rcv below verifies the ARP header and verifies the device 737 /* arp_rcv below verifies the ARP header and verifies the device
733 * is ARP'able. 738 * is ARP'able.
@@ -871,7 +876,7 @@ static int arp_process(struct sk_buff *skb)
871 876
872 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 877 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
873 skb->pkt_type == PACKET_HOST || 878 skb->pkt_type == PACKET_HOST ||
874 in_dev->arp_parms->proxy_delay == 0) { 879 NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
875 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, 880 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
876 dev, tip, sha, dev->dev_addr, 881 dev, tip, sha, dev->dev_addr,
877 sha); 882 sha);
@@ -894,10 +899,12 @@ static int arp_process(struct sk_buff *skb)
894 It is possible, that this option should be enabled for some 899 It is possible, that this option should be enabled for some
895 devices (strip is candidate) 900 devices (strip is candidate)
896 */ 901 */
902 is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
903 inet_addr_type(net, sip) == RTN_UNICAST;
904
897 if (n == NULL && 905 if (n == NULL &&
898 (arp->ar_op == htons(ARPOP_REPLY) || 906 ((arp->ar_op == htons(ARPOP_REPLY) &&
899 (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && 907 inet_addr_type(net, sip) == RTN_UNICAST) || is_garp))
900 inet_addr_type(net, sip) == RTN_UNICAST)
901 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 908 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
902 } 909 }
903 910
@@ -910,7 +917,10 @@ static int arp_process(struct sk_buff *skb)
910 agents are active. Taking the first reply prevents 917 agents are active. Taking the first reply prevents
911 arp trashing and chooses the fastest router. 918 arp trashing and chooses the fastest router.
912 */ 919 */
913 override = time_after(jiffies, n->updated + n->parms->locktime); 920 override = time_after(jiffies,
921 n->updated +
922 NEIGH_VAR(n->parms, LOCKTIME)) ||
923 is_garp;
914 924
915 /* Broadcast replies and request packets 925 /* Broadcast replies and request packets
916 do not assert neighbour reachability. 926 do not assert neighbour reachability.
@@ -1107,7 +1117,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
1107 return err; 1117 return err;
1108} 1118}
1109 1119
1110int arp_invalidate(struct net_device *dev, __be32 ip) 1120static int arp_invalidate(struct net_device *dev, __be32 ip)
1111{ 1121{
1112 struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); 1122 struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
1113 int err = -ENXIO; 1123 int err = -ENXIO;
@@ -1122,7 +1132,6 @@ int arp_invalidate(struct net_device *dev, __be32 ip)
1122 1132
1123 return err; 1133 return err;
1124} 1134}
1125EXPORT_SYMBOL(arp_invalidate);
1126 1135
1127static int arp_req_delete_public(struct net *net, struct arpreq *r, 1136static int arp_req_delete_public(struct net *net, struct arpreq *r,
1128 struct net_device *dev) 1137 struct net_device *dev)
@@ -1284,7 +1293,7 @@ void __init arp_init(void)
1284 dev_add_pack(&arp_packet_type); 1293 dev_add_pack(&arp_packet_type);
1285 arp_proc_init(); 1294 arp_proc_init();
1286#ifdef CONFIG_SYSCTL 1295#ifdef CONFIG_SYSCTL
1287 neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); 1296 neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);
1288#endif 1297#endif
1289 register_netdevice_notifier(&arp_netdev_notifier); 1298 register_netdevice_notifier(&arp_netdev_notifier);
1290} 1299}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 667c1d4ca984..69e77c8ff285 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -31,8 +31,7 @@
31 * the GNU General Public License for more details. 31 * the GNU General Public License for more details.
32 * 32 *
33 * You should have received a copy of the GNU General Public License 33 * You should have received a copy of the GNU General Public License
34 * along with this program; if not, write to the Free Software 34 * along with this program; if not, see <http://www.gnu.org/licenses/>.
35 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36 * 35 *
37 */ 36 */
38 37
@@ -1336,8 +1335,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1336 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1335 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1337 1336
1338 if (tag_len > 4) { 1337 if (tag_len > 4) {
1339 secattr->attr.mls.cat = 1338 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1340 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1341 if (secattr->attr.mls.cat == NULL) 1339 if (secattr->attr.mls.cat == NULL)
1342 return -ENOMEM; 1340 return -ENOMEM;
1343 1341
@@ -1432,8 +1430,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def,
1432 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1430 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1433 1431
1434 if (tag_len > 4) { 1432 if (tag_len > 4) {
1435 secattr->attr.mls.cat = 1433 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1436 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1437 if (secattr->attr.mls.cat == NULL) 1434 if (secattr->attr.mls.cat == NULL)
1438 return -ENOMEM; 1435 return -ENOMEM;
1439 1436
@@ -1527,8 +1524,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1527 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1524 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1528 1525
1529 if (tag_len > 4) { 1526 if (tag_len > 4) {
1530 secattr->attr.mls.cat = 1527 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1531 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1532 if (secattr->attr.mls.cat == NULL) 1528 if (secattr->attr.mls.cat == NULL)
1533 return -ENOMEM; 1529 return -ENOMEM;
1534 1530
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 19e36376d2a0..8b5134c582f1 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -53,7 +53,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, 53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
54 RT_CONN_FLAGS(sk), oif, 54 RT_CONN_FLAGS(sk), oif,
55 sk->sk_protocol, 55 sk->sk_protocol,
56 inet->inet_sport, usin->sin_port, sk, true); 56 inet->inet_sport, usin->sin_port, sk);
57 if (IS_ERR(rt)) { 57 if (IS_ERR(rt)) {
58 err = PTR_ERR(rt); 58 err = PTR_ERR(rt);
59 if (err == -ENETUNREACH) 59 if (err == -ENETUNREACH)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a1b5bcbd04ae..ac2dff3c2c1c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 [IFA_BROADCAST] = { .type = NLA_U32 }, 99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
102}; 103};
103 104
104#define IN4_ADDR_HSIZE_SHIFT 8 105#define IN4_ADDR_HSIZE_SHIFT 8
@@ -463,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
463 } 464 }
464 465
465 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 466 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466 net_srandom(ifa->ifa_local); 467 prandom_seed((__force u32) ifa->ifa_local);
467 ifap = last_primary; 468 ifap = last_primary;
468 } 469 }
469 470
@@ -473,7 +474,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
473 inet_hash_insert(dev_net(in_dev->dev), ifa); 474 inet_hash_insert(dev_net(in_dev->dev), ifa);
474 475
475 cancel_delayed_work(&check_lifetime_work); 476 cancel_delayed_work(&check_lifetime_work);
476 schedule_delayed_work(&check_lifetime_work, 0); 477 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
477 478
478 /* Send message first, then call notifier. 479 /* Send message first, then call notifier.
479 Notifier will trigger FIB update, so that 480 Notifier will trigger FIB update, so that
@@ -500,6 +501,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
500 return -ENOBUFS; 501 return -ENOBUFS;
501 } 502 }
502 ipv4_devconf_setall(in_dev); 503 ipv4_devconf_setall(in_dev);
504 neigh_parms_data_state_setall(in_dev->arp_parms);
503 if (ifa->ifa_dev != in_dev) { 505 if (ifa->ifa_dev != in_dev) {
504 WARN_ON(ifa->ifa_dev); 506 WARN_ON(ifa->ifa_dev);
505 in_dev_hold(in_dev); 507 in_dev_hold(in_dev);
@@ -682,7 +684,8 @@ static void check_lifetime(struct work_struct *work)
682 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) 684 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
683 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; 685 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
684 686
685 schedule_delayed_work(&check_lifetime_work, next_sched - now); 687 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
688 next_sched - now);
686} 689}
687 690
688static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, 691static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
@@ -747,6 +750,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 goto errout; 750 goto errout;
748 751
749 ipv4_devconf_setall(in_dev); 752 ipv4_devconf_setall(in_dev);
753 neigh_parms_data_state_setall(in_dev->arp_parms);
750 in_dev_hold(in_dev); 754 in_dev_hold(in_dev);
751 755
752 if (tb[IFA_ADDRESS] == NULL) 756 if (tb[IFA_ADDRESS] == NULL)
@@ -755,7 +759,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
755 INIT_HLIST_NODE(&ifa->hash); 759 INIT_HLIST_NODE(&ifa->hash);
756 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 760 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 761 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758 ifa->ifa_flags = ifm->ifa_flags; 762 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
763 ifm->ifa_flags;
759 ifa->ifa_scope = ifm->ifa_scope; 764 ifa->ifa_scope = ifm->ifa_scope;
760 ifa->ifa_dev = in_dev; 765 ifa->ifa_dev = in_dev;
761 766
@@ -838,7 +843,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
838 ifa = ifa_existing; 843 ifa = ifa_existing;
839 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 844 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
840 cancel_delayed_work(&check_lifetime_work); 845 cancel_delayed_work(&check_lifetime_work);
841 schedule_delayed_work(&check_lifetime_work, 0); 846 queue_delayed_work(system_power_efficient_wq,
847 &check_lifetime_work, 0);
842 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 848 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
843 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 849 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
844 } 850 }
@@ -1236,22 +1242,21 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1236 1242
1237/* 1243/*
1238 * Confirm that local IP address exists using wildcards: 1244 * Confirm that local IP address exists using wildcards:
1239 * - in_dev: only on this interface, 0=any interface 1245 * - net: netns to check, cannot be NULL
1246 * - in_dev: only on this interface, NULL=any interface
1240 * - dst: only in the same subnet as dst, 0=any dst 1247 * - dst: only in the same subnet as dst, 0=any dst
1241 * - local: address, 0=autoselect the local address 1248 * - local: address, 0=autoselect the local address
1242 * - scope: maximum allowed scope value for the local address 1249 * - scope: maximum allowed scope value for the local address
1243 */ 1250 */
1244__be32 inet_confirm_addr(struct in_device *in_dev, 1251__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1245 __be32 dst, __be32 local, int scope) 1252 __be32 dst, __be32 local, int scope)
1246{ 1253{
1247 __be32 addr = 0; 1254 __be32 addr = 0;
1248 struct net_device *dev; 1255 struct net_device *dev;
1249 struct net *net;
1250 1256
1251 if (scope != RT_SCOPE_LINK) 1257 if (in_dev != NULL)
1252 return confirm_addr_indev(in_dev, dst, local, scope); 1258 return confirm_addr_indev(in_dev, dst, local, scope);
1253 1259
1254 net = dev_net(in_dev->dev);
1255 rcu_read_lock(); 1260 rcu_read_lock();
1256 for_each_netdev_rcu(net, dev) { 1261 for_each_netdev_rcu(net, dev) {
1257 in_dev = __in_dev_get_rcu(dev); 1262 in_dev = __in_dev_get_rcu(dev);
@@ -1382,6 +1387,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1382 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1387 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1383 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, 1388 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1384 INFINITY_LIFE_TIME); 1389 INFINITY_LIFE_TIME);
1390 ipv4_devconf_setall(in_dev);
1391 neigh_parms_data_state_setall(in_dev->arp_parms);
1385 inet_insert_ifa(ifa); 1392 inet_insert_ifa(ifa);
1386 } 1393 }
1387 } 1394 }
@@ -1435,7 +1442,8 @@ static size_t inet_nlmsg_size(void)
1435 + nla_total_size(4) /* IFA_ADDRESS */ 1442 + nla_total_size(4) /* IFA_ADDRESS */
1436 + nla_total_size(4) /* IFA_LOCAL */ 1443 + nla_total_size(4) /* IFA_LOCAL */
1437 + nla_total_size(4) /* IFA_BROADCAST */ 1444 + nla_total_size(4) /* IFA_BROADCAST */
1438 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1445 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446 + nla_total_size(4); /* IFA_FLAGS */
1439} 1447}
1440 1448
1441static inline u32 cstamp_delta(unsigned long cstamp) 1449static inline u32 cstamp_delta(unsigned long cstamp)
@@ -1503,6 +1511,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1503 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1511 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504 (ifa->ifa_label[0] && 1512 (ifa->ifa_label[0] &&
1505 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1513 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1514 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1506 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1515 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1507 preferred, valid)) 1516 preferred, valid))
1508 goto nla_put_failure; 1517 goto nla_put_failure;
@@ -1691,6 +1700,8 @@ static int inet_netconf_msgsize_devconf(int type)
1691 size += nla_total_size(4); 1700 size += nla_total_size(4);
1692 if (type == -1 || type == NETCONFA_MC_FORWARDING) 1701 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693 size += nla_total_size(4); 1702 size += nla_total_size(4);
1703 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1704 size += nla_total_size(4);
1694 1705
1695 return size; 1706 return size;
1696} 1707}
@@ -1727,6 +1738,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1727 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1738 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1739 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729 goto nla_put_failure; 1740 goto nla_put_failure;
1741 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1742 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1743 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1744 goto nla_put_failure;
1730 1745
1731 return nlmsg_end(skb, nlh); 1746 return nlmsg_end(skb, nlh);
1732 1747
@@ -1764,6 +1779,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1779 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1765 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1780 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1766 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1781 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1782 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1767}; 1783};
1768 1784
1769static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1785static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -1945,6 +1961,19 @@ static void inet_forward_change(struct net *net)
1945 } 1961 }
1946} 1962}
1947 1963
1964static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1965{
1966 if (cnf == net->ipv4.devconf_dflt)
1967 return NETCONFA_IFINDEX_DEFAULT;
1968 else if (cnf == net->ipv4.devconf_all)
1969 return NETCONFA_IFINDEX_ALL;
1970 else {
1971 struct in_device *idev
1972 = container_of(cnf, struct in_device, cnf);
1973 return idev->dev->ifindex;
1974 }
1975}
1976
1948static int devinet_conf_proc(struct ctl_table *ctl, int write, 1977static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949 void __user *buffer, 1978 void __user *buffer,
1950 size_t *lenp, loff_t *ppos) 1979 size_t *lenp, loff_t *ppos)
@@ -1957,6 +1986,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
1957 struct ipv4_devconf *cnf = ctl->extra1; 1986 struct ipv4_devconf *cnf = ctl->extra1;
1958 struct net *net = ctl->extra2; 1987 struct net *net = ctl->extra2;
1959 int i = (int *)ctl->data - cnf->data; 1988 int i = (int *)ctl->data - cnf->data;
1989 int ifindex;
1960 1990
1961 set_bit(i, cnf->state); 1991 set_bit(i, cnf->state);
1962 1992
@@ -1966,23 +1996,19 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
1966 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1996 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967 if ((new_value == 0) && (old_value != 0)) 1997 if ((new_value == 0) && (old_value != 0))
1968 rt_cache_flush(net); 1998 rt_cache_flush(net);
1999
1969 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 2000 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970 new_value != old_value) { 2001 new_value != old_value) {
1971 int ifindex; 2002 ifindex = devinet_conf_ifindex(net, cnf);
1972
1973 if (cnf == net->ipv4.devconf_dflt)
1974 ifindex = NETCONFA_IFINDEX_DEFAULT;
1975 else if (cnf == net->ipv4.devconf_all)
1976 ifindex = NETCONFA_IFINDEX_ALL;
1977 else {
1978 struct in_device *idev =
1979 container_of(cnf, struct in_device,
1980 cnf);
1981 ifindex = idev->dev->ifindex;
1982 }
1983 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, 2003 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1984 ifindex, cnf); 2004 ifindex, cnf);
1985 } 2005 }
2006 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2007 new_value != old_value) {
2008 ifindex = devinet_conf_ifindex(net, cnf);
2009 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2010 ifindex, cnf);
2011 }
1986 } 2012 }
1987 2013
1988 return ret; 2014 return ret;
@@ -2160,7 +2186,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2160 2186
2161static void devinet_sysctl_register(struct in_device *idev) 2187static void devinet_sysctl_register(struct in_device *idev)
2162{ 2188{
2163 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 2189 neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2164 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2190 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2165 &idev->cnf); 2191 &idev->cnf);
2166} 2192}
@@ -2298,7 +2324,7 @@ void __init devinet_init(void)
2298 register_gifconf(PF_INET, inet_gifconf); 2324 register_gifconf(PF_INET, inet_gifconf);
2299 register_netdevice_notifier(&ip_netdev_notifier); 2325 register_netdevice_notifier(&ip_netdev_notifier);
2300 2326
2301 schedule_delayed_work(&check_lifetime_work, 0); 2327 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2302 2328
2303 rtnl_af_register(&inet_af_ops); 2329 rtnl_af_register(&inet_af_ops);
2304 2330
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d846304b7b89..c7539e22868b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1047,6 +1047,8 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1047 } 1047 }
1048 1048
1049 in_dev = __in_dev_get_rtnl(dev); 1049 in_dev = __in_dev_get_rtnl(dev);
1050 if (!in_dev)
1051 return NOTIFY_DONE;
1050 1052
1051 switch (event) { 1053 switch (event) {
1052 case NETDEV_UP: 1054 case NETDEV_UP:
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 388d113fd289..1e4f6600b31d 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -33,8 +33,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
33void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, 33void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
34 u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); 34 u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
35struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 35struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
36int fib_detect_death(struct fib_info *fi, int order,
37 struct fib_info **last_resort, int *last_idx, int dflt);
38 36
39static inline void fib_result_assign(struct fib_result *res, 37static inline void fib_result_assign(struct fib_result *res,
40 struct fib_info *fi) 38 struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e63f47a4e651..b53f0bf84dca 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -426,8 +426,9 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
426 return NULL; 426 return NULL;
427} 427}
428 428
429int fib_detect_death(struct fib_info *fi, int order, 429static int fib_detect_death(struct fib_info *fi, int order,
430 struct fib_info **last_resort, int *last_idx, int dflt) 430 struct fib_info **last_resort, int *last_idx,
431 int dflt)
431{ 432{
432 struct neighbour *n; 433 struct neighbour *n;
433 int state = NUD_NONE; 434 int state = NUD_NONE;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5893e99e8299..1863422fb7d5 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -355,14 +355,7 @@ static int __init gre_init(void)
355 goto err_gre; 355 goto err_gre;
356 } 356 }
357 357
358 if (gre_offload_init()) {
359 pr_err("can't add protocol offload\n");
360 goto err_gso;
361 }
362
363 return 0; 358 return 0;
364err_gso:
365 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
366err_gre: 359err_gre:
367 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); 360 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
368err: 361err:
@@ -371,8 +364,6 @@ err:
371 364
372static void __exit gre_exit(void) 365static void __exit gre_exit(void)
373{ 366{
374 gre_offload_exit();
375
376 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 367 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
377 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); 368 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
378} 369}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 2cd02f32f99f..f1d32280cb54 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -11,6 +11,7 @@
11 */ 11 */
12 12
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/init.h>
14#include <net/protocol.h> 15#include <net/protocol.h>
15#include <net/gre.h> 16#include <net/gre.h>
16 17
@@ -26,7 +27,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
26{ 27{
27 struct sk_buff *segs = ERR_PTR(-EINVAL); 28 struct sk_buff *segs = ERR_PTR(-EINVAL);
28 netdev_features_t enc_features; 29 netdev_features_t enc_features;
29 int ghl = GRE_HEADER_SECTION; 30 int ghl;
30 struct gre_base_hdr *greh; 31 struct gre_base_hdr *greh;
31 u16 mac_offset = skb->mac_header; 32 u16 mac_offset = skb->mac_header;
32 int mac_len = skb->mac_len; 33 int mac_len = skb->mac_len;
@@ -49,15 +50,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
49 50
50 greh = (struct gre_base_hdr *)skb_transport_header(skb); 51 greh = (struct gre_base_hdr *)skb_transport_header(skb);
51 52
52 if (greh->flags & GRE_KEY) 53 ghl = skb_inner_network_header(skb) - skb_transport_header(skb);
53 ghl += GRE_HEADER_SECTION; 54 if (unlikely(ghl < sizeof(*greh)))
54 if (greh->flags & GRE_SEQ) 55 goto out;
55 ghl += GRE_HEADER_SECTION; 56
56 if (greh->flags & GRE_CSUM) { 57 csum = !!(greh->flags & GRE_CSUM);
57 ghl += GRE_HEADER_SECTION;
58 csum = true;
59 } else
60 csum = false;
61 58
62 if (unlikely(!pskb_may_pull(skb, ghl))) 59 if (unlikely(!pskb_may_pull(skb, ghl)))
63 goto out; 60 goto out;
@@ -116,19 +113,175 @@ out:
116 return segs; 113 return segs;
117} 114}
118 115
116/* Compute the whole skb csum in s/w and store it, then verify GRO csum
117 * starting from gro_offset.
118 */
119static __sum16 gro_skb_checksum(struct sk_buff *skb)
120{
121 __sum16 sum;
122
123 skb->csum = skb_checksum(skb, 0, skb->len, 0);
124 NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum,
125 csum_partial(skb->data, skb_gro_offset(skb), 0));
126 sum = csum_fold(NAPI_GRO_CB(skb)->csum);
127 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
128 if (unlikely(!sum))
129 netdev_rx_csum_fault(skb->dev);
130 } else
131 skb->ip_summed = CHECKSUM_COMPLETE;
132
133 return sum;
134}
135
136static struct sk_buff **gre_gro_receive(struct sk_buff **head,
137 struct sk_buff *skb)
138{
139 struct sk_buff **pp = NULL;
140 struct sk_buff *p;
141 const struct gre_base_hdr *greh;
142 unsigned int hlen, grehlen;
143 unsigned int off;
144 int flush = 1;
145 struct packet_offload *ptype;
146 __be16 type;
147
148 off = skb_gro_offset(skb);
149 hlen = off + sizeof(*greh);
150 greh = skb_gro_header_fast(skb, off);
151 if (skb_gro_header_hard(skb, hlen)) {
152 greh = skb_gro_header_slow(skb, hlen, off);
153 if (unlikely(!greh))
154 goto out;
155 }
156
157 /* Only support version 0 and K (key), C (csum) flags. Note that
158 * although the support for the S (seq#) flag can be added easily
159 * for GRO, this is problematic for GSO hence can not be enabled
160 * here because a GRO pkt may end up in the forwarding path, thus
161 * requiring GSO support to break it up correctly.
162 */
163 if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
164 goto out;
165
166 type = greh->protocol;
167
168 rcu_read_lock();
169 ptype = gro_find_receive_by_type(type);
170 if (ptype == NULL)
171 goto out_unlock;
172
173 grehlen = GRE_HEADER_SECTION;
174
175 if (greh->flags & GRE_KEY)
176 grehlen += GRE_HEADER_SECTION;
177
178 if (greh->flags & GRE_CSUM)
179 grehlen += GRE_HEADER_SECTION;
180
181 hlen = off + grehlen;
182 if (skb_gro_header_hard(skb, hlen)) {
183 greh = skb_gro_header_slow(skb, hlen, off);
184 if (unlikely(!greh))
185 goto out_unlock;
186 }
187 if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */
188 __sum16 csum = 0;
189
190 if (skb->ip_summed == CHECKSUM_COMPLETE)
191 csum = csum_fold(NAPI_GRO_CB(skb)->csum);
192 /* Don't trust csum error calculated/reported by h/w */
193 if (skb->ip_summed == CHECKSUM_NONE || csum != 0)
194 csum = gro_skb_checksum(skb);
195
196 /* GRE CSUM is the 1's complement of the 1's complement sum
197 * of the GRE hdr plus payload so it should add up to 0xffff
198 * (and 0 after csum_fold()) just like the IPv4 hdr csum.
199 */
200 if (csum)
201 goto out_unlock;
202 }
203 flush = 0;
204
205 for (p = *head; p; p = p->next) {
206 const struct gre_base_hdr *greh2;
207
208 if (!NAPI_GRO_CB(p)->same_flow)
209 continue;
210
211 /* The following checks are needed to ensure only pkts
212 * from the same tunnel are considered for aggregation.
213 * The criteria for "the same tunnel" includes:
214 * 1) same version (we only support version 0 here)
215 * 2) same protocol (we only support ETH_P_IP for now)
216 * 3) same set of flags
217 * 4) same key if the key field is present.
218 */
219 greh2 = (struct gre_base_hdr *)(p->data + off);
220
221 if (greh2->flags != greh->flags ||
222 greh2->protocol != greh->protocol) {
223 NAPI_GRO_CB(p)->same_flow = 0;
224 continue;
225 }
226 if (greh->flags & GRE_KEY) {
227 /* compare keys */
228 if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) {
229 NAPI_GRO_CB(p)->same_flow = 0;
230 continue;
231 }
232 }
233 }
234
235 skb_gro_pull(skb, grehlen);
236
237 /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
238 skb_gro_postpull_rcsum(skb, greh, grehlen);
239
240 pp = ptype->callbacks.gro_receive(head, skb);
241
242out_unlock:
243 rcu_read_unlock();
244out:
245 NAPI_GRO_CB(skb)->flush |= flush;
246
247 return pp;
248}
249
250static int gre_gro_complete(struct sk_buff *skb, int nhoff)
251{
252 struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff);
253 struct packet_offload *ptype;
254 unsigned int grehlen = sizeof(*greh);
255 int err = -ENOENT;
256 __be16 type;
257
258 type = greh->protocol;
259 if (greh->flags & GRE_KEY)
260 grehlen += GRE_HEADER_SECTION;
261
262 if (greh->flags & GRE_CSUM)
263 grehlen += GRE_HEADER_SECTION;
264
265 rcu_read_lock();
266 ptype = gro_find_complete_by_type(type);
267 if (ptype != NULL)
268 err = ptype->callbacks.gro_complete(skb, nhoff + grehlen);
269
270 rcu_read_unlock();
271 return err;
272}
273
119static const struct net_offload gre_offload = { 274static const struct net_offload gre_offload = {
120 .callbacks = { 275 .callbacks = {
121 .gso_send_check = gre_gso_send_check, 276 .gso_send_check = gre_gso_send_check,
122 .gso_segment = gre_gso_segment, 277 .gso_segment = gre_gso_segment,
278 .gro_receive = gre_gro_receive,
279 .gro_complete = gre_gro_complete,
123 }, 280 },
124}; 281};
125 282
126int __init gre_offload_init(void) 283static int __init gre_offload_init(void)
127{ 284{
128 return inet_add_offload(&gre_offload, IPPROTO_GRE); 285 return inet_add_offload(&gre_offload, IPPROTO_GRE);
129} 286}
130 287device_initcall(gre_offload_init);
131void __exit gre_offload_exit(void)
132{
133 inet_del_offload(&gre_offload, IPPROTO_GRE);
134}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5c0e8bc6e5ba..0134663fdbce 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
668 rcu_read_unlock(); 668 rcu_read_unlock();
669} 669}
670 670
671static bool icmp_tag_validation(int proto)
672{
673 bool ok;
674
675 rcu_read_lock();
676 ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
677 rcu_read_unlock();
678 return ok;
679}
680
671/* 681/*
672 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and 682 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
673 * ICMP_PARAMETERPROB. 683 * ICMP_PARAMETERPROB.
@@ -705,10 +715,22 @@ static void icmp_unreach(struct sk_buff *skb)
705 case ICMP_PORT_UNREACH: 715 case ICMP_PORT_UNREACH:
706 break; 716 break;
707 case ICMP_FRAG_NEEDED: 717 case ICMP_FRAG_NEEDED:
708 if (ipv4_config.no_pmtu_disc) { 718 /* for documentation of the ip_no_pmtu_disc
719 * values please see
720 * Documentation/networking/ip-sysctl.txt
721 */
722 switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
723 default:
709 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), 724 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
710 &iph->daddr); 725 &iph->daddr);
711 } else { 726 break;
727 case 2:
728 goto out;
729 case 3:
730 if (!icmp_tag_validation(iph->protocol))
731 goto out;
732 /* fall through */
733 case 0:
712 info = ntohs(icmph->un.frag.mtu); 734 info = ntohs(icmph->un.frag.mtu);
713 if (!info) 735 if (!info)
714 goto out; 736 goto out;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7defdc9ba167..97e4d1655d26 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -211,7 +211,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
211/* It must be called with locked im->lock */ 211/* It must be called with locked im->lock */
212static void igmp_start_timer(struct ip_mc_list *im, int max_delay) 212static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
213{ 213{
214 int tv = net_random() % max_delay; 214 int tv = prandom_u32() % max_delay;
215 215
216 im->tm_running = 1; 216 im->tm_running = 1;
217 if (!mod_timer(&im->timer, jiffies+tv+2)) 217 if (!mod_timer(&im->timer, jiffies+tv+2))
@@ -220,7 +220,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
220 220
221static void igmp_gq_start_timer(struct in_device *in_dev) 221static void igmp_gq_start_timer(struct in_device *in_dev)
222{ 222{
223 int tv = net_random() % in_dev->mr_maxdelay; 223 int tv = prandom_u32() % in_dev->mr_maxdelay;
224 224
225 in_dev->mr_gq_running = 1; 225 in_dev->mr_gq_running = 1;
226 if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) 226 if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2))
@@ -229,7 +229,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev)
229 229
230static void igmp_ifc_start_timer(struct in_device *in_dev, int delay) 230static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
231{ 231{
232 int tv = net_random() % delay; 232 int tv = prandom_u32() % delay;
233 233
234 if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2)) 234 if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2))
235 in_dev_hold(in_dev); 235 in_dev_hold(in_dev);
@@ -310,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
310 struct ip_sf_list *psf; 310 struct ip_sf_list *psf;
311 int scount = 0; 311 int scount = 0;
312 312
313 for (psf=pmc->sources; psf; psf=psf->sf_next) { 313 for (psf = pmc->sources; psf; psf = psf->sf_next) {
314 if (!is_in(pmc, psf, type, gdeleted, sdeleted)) 314 if (!is_in(pmc, psf, type, gdeleted, sdeleted))
315 continue; 315 continue;
316 scount++; 316 scount++;
@@ -463,7 +463,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
463 } 463 }
464 first = 1; 464 first = 1;
465 psf_prev = NULL; 465 psf_prev = NULL;
466 for (psf=*psf_list; psf; psf=psf_next) { 466 for (psf = *psf_list; psf; psf = psf_next) {
467 __be32 *psrc; 467 __be32 *psrc;
468 468
469 psf_next = psf->sf_next; 469 psf_next = psf->sf_next;
@@ -520,7 +520,7 @@ empty_source:
520 return skb; 520 return skb;
521 if (pmc->crcount || isquery) { 521 if (pmc->crcount || isquery) {
522 /* make sure we have room for group header */ 522 /* make sure we have room for group header */
523 if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { 523 if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) {
524 igmpv3_sendpack(skb); 524 igmpv3_sendpack(skb);
525 skb = NULL; /* add_grhead will get a new one */ 525 skb = NULL; /* add_grhead will get a new one */
526 } 526 }
@@ -576,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf)
576 struct ip_sf_list *psf_prev, *psf_next, *psf; 576 struct ip_sf_list *psf_prev, *psf_next, *psf;
577 577
578 psf_prev = NULL; 578 psf_prev = NULL;
579 for (psf=*ppsf; psf; psf = psf_next) { 579 for (psf = *ppsf; psf; psf = psf_next) {
580 psf_next = psf->sf_next; 580 psf_next = psf->sf_next;
581 if (psf->sf_crcount == 0) { 581 if (psf->sf_crcount == 0) {
582 if (psf_prev) 582 if (psf_prev)
@@ -600,7 +600,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
600 600
601 /* deleted MCA's */ 601 /* deleted MCA's */
602 pmc_prev = NULL; 602 pmc_prev = NULL;
603 for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { 603 for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) {
604 pmc_next = pmc->next; 604 pmc_next = pmc->next;
605 if (pmc->sfmode == MCAST_INCLUDE) { 605 if (pmc->sfmode == MCAST_INCLUDE) {
606 type = IGMPV3_BLOCK_OLD_SOURCES; 606 type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -764,7 +764,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
764 764
765static void igmp_timer_expire(unsigned long data) 765static void igmp_timer_expire(unsigned long data)
766{ 766{
767 struct ip_mc_list *im=(struct ip_mc_list *)data; 767 struct ip_mc_list *im = (struct ip_mc_list *)data;
768 struct in_device *in_dev = im->interface; 768 struct in_device *in_dev = im->interface;
769 769
770 spin_lock(&im->lock); 770 spin_lock(&im->lock);
@@ -794,10 +794,10 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
794 int i, scount; 794 int i, scount;
795 795
796 scount = 0; 796 scount = 0;
797 for (psf=pmc->sources; psf; psf=psf->sf_next) { 797 for (psf = pmc->sources; psf; psf = psf->sf_next) {
798 if (scount == nsrcs) 798 if (scount == nsrcs)
799 break; 799 break;
800 for (i=0; i<nsrcs; i++) { 800 for (i = 0; i < nsrcs; i++) {
801 /* skip inactive filters */ 801 /* skip inactive filters */
802 if (psf->sf_count[MCAST_INCLUDE] || 802 if (psf->sf_count[MCAST_INCLUDE] ||
803 pmc->sfcount[MCAST_EXCLUDE] != 803 pmc->sfcount[MCAST_EXCLUDE] !=
@@ -825,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
825 825
826 /* mark INCLUDE-mode sources */ 826 /* mark INCLUDE-mode sources */
827 scount = 0; 827 scount = 0;
828 for (psf=pmc->sources; psf; psf=psf->sf_next) { 828 for (psf = pmc->sources; psf; psf = psf->sf_next) {
829 if (scount == nsrcs) 829 if (scount == nsrcs)
830 break; 830 break;
831 for (i=0; i<nsrcs; i++) 831 for (i = 0; i < nsrcs; i++)
832 if (srcs[i] == psf->sf_inaddr) { 832 if (srcs[i] == psf->sf_inaddr) {
833 psf->sf_gsresp = 1; 833 psf->sf_gsresp = 1;
834 scount++; 834 scount++;
@@ -1103,7 +1103,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1103 pmc->tomb = im->tomb; 1103 pmc->tomb = im->tomb;
1104 pmc->sources = im->sources; 1104 pmc->sources = im->sources;
1105 im->tomb = im->sources = NULL; 1105 im->tomb = im->sources = NULL;
1106 for (psf=pmc->sources; psf; psf=psf->sf_next) 1106 for (psf = pmc->sources; psf; psf = psf->sf_next)
1107 psf->sf_crcount = pmc->crcount; 1107 psf->sf_crcount = pmc->crcount;
1108 } 1108 }
1109 spin_unlock_bh(&im->lock); 1109 spin_unlock_bh(&im->lock);
@@ -1121,7 +1121,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
1121 1121
1122 spin_lock_bh(&in_dev->mc_tomb_lock); 1122 spin_lock_bh(&in_dev->mc_tomb_lock);
1123 pmc_prev = NULL; 1123 pmc_prev = NULL;
1124 for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { 1124 for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) {
1125 if (pmc->multiaddr == multiaddr) 1125 if (pmc->multiaddr == multiaddr)
1126 break; 1126 break;
1127 pmc_prev = pmc; 1127 pmc_prev = pmc;
@@ -1134,7 +1134,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
1134 } 1134 }
1135 spin_unlock_bh(&in_dev->mc_tomb_lock); 1135 spin_unlock_bh(&in_dev->mc_tomb_lock);
1136 if (pmc) { 1136 if (pmc) {
1137 for (psf=pmc->tomb; psf; psf=psf_next) { 1137 for (psf = pmc->tomb; psf; psf = psf_next) {
1138 psf_next = psf->sf_next; 1138 psf_next = psf->sf_next;
1139 kfree(psf); 1139 kfree(psf);
1140 } 1140 }
@@ -1167,7 +1167,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
1167 psf = pmc->tomb; 1167 psf = pmc->tomb;
1168 pmc->tomb = NULL; 1168 pmc->tomb = NULL;
1169 spin_unlock_bh(&pmc->lock); 1169 spin_unlock_bh(&pmc->lock);
1170 for (; psf; psf=psf_next) { 1170 for (; psf; psf = psf_next) {
1171 psf_next = psf->sf_next; 1171 psf_next = psf->sf_next;
1172 kfree(psf); 1172 kfree(psf);
1173 } 1173 }
@@ -1557,7 +1557,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1557 int rv = 0; 1557 int rv = 0;
1558 1558
1559 psf_prev = NULL; 1559 psf_prev = NULL;
1560 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1560 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1561 if (psf->sf_inaddr == *psfsrc) 1561 if (psf->sf_inaddr == *psfsrc)
1562 break; 1562 break;
1563 psf_prev = psf; 1563 psf_prev = psf;
@@ -1630,7 +1630,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1630 pmc->sfcount[sfmode]--; 1630 pmc->sfcount[sfmode]--;
1631 } 1631 }
1632 err = 0; 1632 err = 0;
1633 for (i=0; i<sfcount; i++) { 1633 for (i = 0; i < sfcount; i++) {
1634 int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); 1634 int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
1635 1635
1636 changerec |= rv > 0; 1636 changerec |= rv > 0;
@@ -1650,7 +1650,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1650 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1650 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
1651 IGMP_Unsolicited_Report_Count; 1651 IGMP_Unsolicited_Report_Count;
1652 in_dev->mr_ifc_count = pmc->crcount; 1652 in_dev->mr_ifc_count = pmc->crcount;
1653 for (psf=pmc->sources; psf; psf = psf->sf_next) 1653 for (psf = pmc->sources; psf; psf = psf->sf_next)
1654 psf->sf_crcount = 0; 1654 psf->sf_crcount = 0;
1655 igmp_ifc_event(pmc->interface); 1655 igmp_ifc_event(pmc->interface);
1656 } else if (sf_setstate(pmc) || changerec) { 1656 } else if (sf_setstate(pmc) || changerec) {
@@ -1671,7 +1671,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1671 struct ip_sf_list *psf, *psf_prev; 1671 struct ip_sf_list *psf, *psf_prev;
1672 1672
1673 psf_prev = NULL; 1673 psf_prev = NULL;
1674 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1674 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1675 if (psf->sf_inaddr == *psfsrc) 1675 if (psf->sf_inaddr == *psfsrc)
1676 break; 1676 break;
1677 psf_prev = psf; 1677 psf_prev = psf;
@@ -1699,7 +1699,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
1699 struct ip_sf_list *psf; 1699 struct ip_sf_list *psf;
1700 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; 1700 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
1701 1701
1702 for (psf=pmc->sources; psf; psf=psf->sf_next) 1702 for (psf = pmc->sources; psf; psf = psf->sf_next)
1703 if (pmc->sfcount[MCAST_EXCLUDE]) { 1703 if (pmc->sfcount[MCAST_EXCLUDE]) {
1704 psf->sf_oldin = mca_xcount == 1704 psf->sf_oldin = mca_xcount ==
1705 psf->sf_count[MCAST_EXCLUDE] && 1705 psf->sf_count[MCAST_EXCLUDE] &&
@@ -1716,7 +1716,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1716 int new_in, rv; 1716 int new_in, rv;
1717 1717
1718 rv = 0; 1718 rv = 0;
1719 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1719 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1720 if (pmc->sfcount[MCAST_EXCLUDE]) { 1720 if (pmc->sfcount[MCAST_EXCLUDE]) {
1721 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && 1721 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
1722 !psf->sf_count[MCAST_INCLUDE]; 1722 !psf->sf_count[MCAST_INCLUDE];
@@ -1726,7 +1726,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1726 if (!psf->sf_oldin) { 1726 if (!psf->sf_oldin) {
1727 struct ip_sf_list *prev = NULL; 1727 struct ip_sf_list *prev = NULL;
1728 1728
1729 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { 1729 for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) {
1730 if (dpsf->sf_inaddr == psf->sf_inaddr) 1730 if (dpsf->sf_inaddr == psf->sf_inaddr)
1731 break; 1731 break;
1732 prev = dpsf; 1732 prev = dpsf;
@@ -1748,7 +1748,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1748 * add or update "delete" records if an active filter 1748 * add or update "delete" records if an active filter
1749 * is now inactive 1749 * is now inactive
1750 */ 1750 */
1751 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) 1751 for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next)
1752 if (dpsf->sf_inaddr == psf->sf_inaddr) 1752 if (dpsf->sf_inaddr == psf->sf_inaddr)
1753 break; 1753 break;
1754 if (!dpsf) { 1754 if (!dpsf) {
@@ -1800,7 +1800,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1800 if (!delta) 1800 if (!delta)
1801 pmc->sfcount[sfmode]++; 1801 pmc->sfcount[sfmode]++;
1802 err = 0; 1802 err = 0;
1803 for (i=0; i<sfcount; i++) { 1803 for (i = 0; i < sfcount; i++) {
1804 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); 1804 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
1805 if (err) 1805 if (err)
1806 break; 1806 break;
@@ -1810,7 +1810,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1810 1810
1811 if (!delta) 1811 if (!delta)
1812 pmc->sfcount[sfmode]--; 1812 pmc->sfcount[sfmode]--;
1813 for (j=0; j<i; j++) 1813 for (j = 0; j < i; j++)
1814 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); 1814 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
1815 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 1815 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1816#ifdef CONFIG_IP_MULTICAST 1816#ifdef CONFIG_IP_MULTICAST
@@ -1829,7 +1829,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1829 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1829 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
1830 IGMP_Unsolicited_Report_Count; 1830 IGMP_Unsolicited_Report_Count;
1831 in_dev->mr_ifc_count = pmc->crcount; 1831 in_dev->mr_ifc_count = pmc->crcount;
1832 for (psf=pmc->sources; psf; psf = psf->sf_next) 1832 for (psf = pmc->sources; psf; psf = psf->sf_next)
1833 psf->sf_crcount = 0; 1833 psf->sf_crcount = 0;
1834 igmp_ifc_event(in_dev); 1834 igmp_ifc_event(in_dev);
1835 } else if (sf_setstate(pmc)) { 1835 } else if (sf_setstate(pmc)) {
@@ -1844,12 +1844,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
1844{ 1844{
1845 struct ip_sf_list *psf, *nextpsf; 1845 struct ip_sf_list *psf, *nextpsf;
1846 1846
1847 for (psf=pmc->tomb; psf; psf=nextpsf) { 1847 for (psf = pmc->tomb; psf; psf = nextpsf) {
1848 nextpsf = psf->sf_next; 1848 nextpsf = psf->sf_next;
1849 kfree(psf); 1849 kfree(psf);
1850 } 1850 }
1851 pmc->tomb = NULL; 1851 pmc->tomb = NULL;
1852 for (psf=pmc->sources; psf; psf=nextpsf) { 1852 for (psf = pmc->sources; psf; psf = nextpsf) {
1853 nextpsf = psf->sf_next; 1853 nextpsf = psf->sf_next;
1854 kfree(psf); 1854 kfree(psf);
1855 } 1855 }
@@ -2043,7 +2043,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2043 if (!psl) 2043 if (!psl)
2044 goto done; /* err = -EADDRNOTAVAIL */ 2044 goto done; /* err = -EADDRNOTAVAIL */
2045 rv = !0; 2045 rv = !0;
2046 for (i=0; i<psl->sl_count; i++) { 2046 for (i = 0; i < psl->sl_count; i++) {
2047 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, 2047 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
2048 sizeof(__be32)); 2048 sizeof(__be32));
2049 if (rv == 0) 2049 if (rv == 0)
@@ -2062,7 +2062,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2062 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 2062 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
2063 &mreqs->imr_sourceaddr, 1); 2063 &mreqs->imr_sourceaddr, 1);
2064 2064
2065 for (j=i+1; j<psl->sl_count; j++) 2065 for (j = i+1; j < psl->sl_count; j++)
2066 psl->sl_addr[j-1] = psl->sl_addr[j]; 2066 psl->sl_addr[j-1] = psl->sl_addr[j];
2067 psl->sl_count--; 2067 psl->sl_count--;
2068 err = 0; 2068 err = 0;
@@ -2088,7 +2088,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2088 newpsl->sl_max = count; 2088 newpsl->sl_max = count;
2089 newpsl->sl_count = count - IP_SFBLOCK; 2089 newpsl->sl_count = count - IP_SFBLOCK;
2090 if (psl) { 2090 if (psl) {
2091 for (i=0; i<psl->sl_count; i++) 2091 for (i = 0; i < psl->sl_count; i++)
2092 newpsl->sl_addr[i] = psl->sl_addr[i]; 2092 newpsl->sl_addr[i] = psl->sl_addr[i];
2093 /* decrease mem now to avoid the memleak warning */ 2093 /* decrease mem now to avoid the memleak warning */
2094 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2094 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
@@ -2098,7 +2098,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2098 psl = newpsl; 2098 psl = newpsl;
2099 } 2099 }
2100 rv = 1; /* > 0 for insert logic below if sl_count is 0 */ 2100 rv = 1; /* > 0 for insert logic below if sl_count is 0 */
2101 for (i=0; i<psl->sl_count; i++) { 2101 for (i = 0; i < psl->sl_count; i++) {
2102 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, 2102 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
2103 sizeof(__be32)); 2103 sizeof(__be32));
2104 if (rv == 0) 2104 if (rv == 0)
@@ -2106,7 +2106,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2106 } 2106 }
2107 if (rv == 0) /* address already there is an error */ 2107 if (rv == 0) /* address already there is an error */
2108 goto done; 2108 goto done;
2109 for (j=psl->sl_count-1; j>=i; j--) 2109 for (j = psl->sl_count-1; j >= i; j--)
2110 psl->sl_addr[j+1] = psl->sl_addr[j]; 2110 psl->sl_addr[j+1] = psl->sl_addr[j];
2111 psl->sl_addr[i] = mreqs->imr_sourceaddr; 2111 psl->sl_addr[i] = mreqs->imr_sourceaddr;
2112 psl->sl_count++; 2112 psl->sl_count++;
@@ -2305,7 +2305,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2305 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { 2305 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
2306 return -EFAULT; 2306 return -EFAULT;
2307 } 2307 }
2308 for (i=0; i<copycount; i++) { 2308 for (i = 0; i < copycount; i++) {
2309 struct sockaddr_storage ss; 2309 struct sockaddr_storage ss;
2310 2310
2311 psin = (struct sockaddr_in *)&ss; 2311 psin = (struct sockaddr_in *)&ss;
@@ -2350,7 +2350,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2350 if (!psl) 2350 if (!psl)
2351 goto unlock; 2351 goto unlock;
2352 2352
2353 for (i=0; i<psl->sl_count; i++) { 2353 for (i = 0; i < psl->sl_count; i++) {
2354 if (psl->sl_addr[i] == rmt_addr) 2354 if (psl->sl_addr[i] == rmt_addr)
2355 break; 2355 break;
2356 } 2356 }
@@ -2423,7 +2423,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
2423 rv = 1; 2423 rv = 1;
2424 } else if (im) { 2424 } else if (im) {
2425 if (src_addr) { 2425 if (src_addr) {
2426 for (psf=im->sources; psf; psf=psf->sf_next) { 2426 for (psf = im->sources; psf; psf = psf->sf_next) {
2427 if (psf->sf_inaddr == src_addr) 2427 if (psf->sf_inaddr == src_addr)
2428 break; 2428 break;
2429 } 2429 }
@@ -2762,6 +2762,7 @@ static struct pernet_operations igmp_net_ops = {
2762 .init = igmp_net_init, 2762 .init = igmp_net_init,
2763 .exit = igmp_net_exit, 2763 .exit = igmp_net_exit,
2764}; 2764};
2765#endif
2765 2766
2766static int igmp_netdev_event(struct notifier_block *this, 2767static int igmp_netdev_event(struct notifier_block *this,
2767 unsigned long event, void *ptr) 2768 unsigned long event, void *ptr)
@@ -2785,8 +2786,9 @@ static struct notifier_block igmp_notifier = {
2785 .notifier_call = igmp_netdev_event, 2786 .notifier_call = igmp_netdev_event,
2786}; 2787};
2787 2788
2788int __init igmp_mc_proc_init(void) 2789int __init igmp_mc_init(void)
2789{ 2790{
2791#if defined(CONFIG_PROC_FS)
2790 int err; 2792 int err;
2791 2793
2792 err = register_pernet_subsys(&igmp_net_ops); 2794 err = register_pernet_subsys(&igmp_net_ops);
@@ -2800,5 +2802,7 @@ int __init igmp_mc_proc_init(void)
2800reg_notif_fail: 2802reg_notif_fail:
2801 unregister_pernet_subsys(&igmp_net_ops); 2803 unregister_pernet_subsys(&igmp_net_ops);
2802 return err; 2804 return err;
2803} 2805#else
2806 return register_netdevice_notifier(&igmp_notifier);
2804#endif 2807#endif
2808}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fc0e649cc002..0d1e2cb877ec 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
109again: 109again:
110 inet_get_local_port_range(net, &low, &high); 110 inet_get_local_port_range(net, &low, &high);
111 remaining = (high - low) + 1; 111 remaining = (high - low) + 1;
112 smallest_rover = rover = net_random() % remaining + low; 112 smallest_rover = rover = prandom_u32() % remaining + low;
113 113
114 smallest_size = -1; 114 smallest_size = -1;
115 do { 115 do {
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 1975f52933c5..f17ea49b28fb 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -230,29 +230,6 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
230 lro_desc->last_skb = skb; 230 lro_desc->last_skb = skb;
231} 231}
232 232
233static void lro_add_frags(struct net_lro_desc *lro_desc,
234 int len, int hlen, int truesize,
235 struct skb_frag_struct *skb_frags,
236 struct iphdr *iph, struct tcphdr *tcph)
237{
238 struct sk_buff *skb = lro_desc->parent;
239 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
240
241 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
242
243 skb->truesize += truesize;
244
245 skb_frags[0].page_offset += hlen;
246 skb_frag_size_sub(&skb_frags[0], hlen);
247
248 while (tcp_data_len > 0) {
249 *(lro_desc->next_frag) = *skb_frags;
250 tcp_data_len -= skb_frag_size(skb_frags);
251 lro_desc->next_frag++;
252 skb_frags++;
253 skb_shinfo(skb)->nr_frags++;
254 }
255}
256 233
257static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, 234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
258 struct iphdr *iph, 235 struct iphdr *iph,
@@ -371,128 +348,6 @@ out:
371 return 1; 348 return 1;
372} 349}
373 350
374
375static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
376 struct skb_frag_struct *frags,
377 int len, int true_size,
378 void *mac_hdr,
379 int hlen, __wsum sum,
380 u32 ip_summed)
381{
382 struct sk_buff *skb;
383 struct skb_frag_struct *skb_frags;
384 int data_len = len;
385 int hdr_len = min(len, hlen);
386
387 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
388 if (!skb)
389 return NULL;
390
391 skb_reserve(skb, lro_mgr->frag_align_pad);
392 skb->len = len;
393 skb->data_len = len - hdr_len;
394 skb->truesize += true_size;
395 skb->tail += hdr_len;
396
397 memcpy(skb->data, mac_hdr, hdr_len);
398
399 skb_frags = skb_shinfo(skb)->frags;
400 while (data_len > 0) {
401 *skb_frags = *frags;
402 data_len -= skb_frag_size(frags);
403 skb_frags++;
404 frags++;
405 skb_shinfo(skb)->nr_frags++;
406 }
407
408 skb_shinfo(skb)->frags[0].page_offset += hdr_len;
409 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
410
411 skb->ip_summed = ip_summed;
412 skb->csum = sum;
413 skb->protocol = eth_type_trans(skb, lro_mgr->dev);
414 return skb;
415}
416
417static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
418 struct skb_frag_struct *frags,
419 int len, int true_size,
420 void *priv, __wsum sum)
421{
422 struct net_lro_desc *lro_desc;
423 struct iphdr *iph;
424 struct tcphdr *tcph;
425 struct sk_buff *skb;
426 u64 flags;
427 void *mac_hdr;
428 int mac_hdr_len;
429 int hdr_len = LRO_MAX_PG_HLEN;
430 int vlan_hdr_len = 0;
431
432 if (!lro_mgr->get_frag_header ||
433 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
434 (void *)&tcph, &flags, priv)) {
435 mac_hdr = skb_frag_address(frags);
436 goto out1;
437 }
438
439 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
440 goto out1;
441
442 hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
443 mac_hdr_len = (int)((void *)(iph) - mac_hdr);
444
445 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
446 if (!lro_desc)
447 goto out1;
448
449 if (!lro_desc->active) { /* start new lro session */
450 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
451 goto out1;
452
453 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
454 hdr_len, 0, lro_mgr->ip_summed_aggr);
455 if (!skb)
456 goto out;
457
458 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
460 vlan_hdr_len = VLAN_HLEN;
461
462 iph = (void *)(skb->data + vlan_hdr_len);
463 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
464 + IP_HDR_LEN(iph));
465
466 lro_init_desc(lro_desc, skb, iph, tcph);
467 LRO_INC_STATS(lro_mgr, aggregated);
468 return NULL;
469 }
470
471 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
472 goto out2;
473
474 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
475 goto out2;
476
477 lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
478 LRO_INC_STATS(lro_mgr, aggregated);
479
480 if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
481 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
482 lro_flush(lro_mgr, lro_desc);
483
484 return NULL;
485
486out2: /* send aggregated packets to the stack */
487 lro_flush(lro_mgr, lro_desc);
488
489out1: /* Original packet has to be posted to the stack */
490 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
491 hdr_len, sum, lro_mgr->ip_summed);
492out:
493 return skb;
494}
495
496void lro_receive_skb(struct net_lro_mgr *lro_mgr, 351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
497 struct sk_buff *skb, 352 struct sk_buff *skb,
498 void *priv) 353 void *priv)
@@ -506,23 +361,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
506} 361}
507EXPORT_SYMBOL(lro_receive_skb); 362EXPORT_SYMBOL(lro_receive_skb);
508 363
509void lro_receive_frags(struct net_lro_mgr *lro_mgr,
510 struct skb_frag_struct *frags,
511 int len, int true_size, void *priv, __wsum sum)
512{
513 struct sk_buff *skb;
514
515 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
516 if (!skb)
517 return;
518
519 if (lro_mgr->features & LRO_F_NAPI)
520 netif_receive_skb(skb);
521 else
522 netif_rx(skb);
523}
524EXPORT_SYMBOL(lro_receive_frags);
525
526void lro_flush_all(struct net_lro_mgr *lro_mgr) 364void lro_flush_all(struct net_lro_mgr *lro_mgr)
527{ 365{
528 int i; 366 int i;
@@ -534,14 +372,3 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr)
534 } 372 }
535} 373}
536EXPORT_SYMBOL(lro_flush_all); 374EXPORT_SYMBOL(lro_flush_all);
537
538void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
539 struct iphdr *iph, struct tcphdr *tcph)
540{
541 struct net_lro_desc *lro_desc;
542
543 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
544 if (lro_desc->active)
545 lro_flush(lro_mgr, lro_desc);
546}
547EXPORT_SYMBOL(lro_flush_pkt);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 33d5537881ed..48f424465112 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -109,13 +109,6 @@ static inline void flush_check(struct inet_peer_base *base, int family)
109 } 109 }
110} 110}
111 111
112void inetpeer_invalidate_family(int family)
113{
114 atomic_t *fp = inetpeer_seq_ptr(family);
115
116 atomic_inc(fp);
117}
118
119#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 112#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
120 113
121/* Exported for sysctl_net_ipv4. */ 114/* Exported for sysctl_net_ipv4. */
@@ -227,7 +220,7 @@ static int addr_compare(const struct inetpeer_addr *a,
227 stackptr = _stack; \ 220 stackptr = _stack; \
228 *stackptr++ = &_base->root; \ 221 *stackptr++ = &_base->root; \
229 for (u = rcu_deref_locked(_base->root, _base); \ 222 for (u = rcu_deref_locked(_base->root, _base); \
230 u != peer_avl_empty; ) { \ 223 u != peer_avl_empty;) { \
231 int cmp = addr_compare(_daddr, &u->daddr); \ 224 int cmp = addr_compare(_daddr, &u->daddr); \
232 if (cmp == 0) \ 225 if (cmp == 0) \
233 break; \ 226 break; \
@@ -282,7 +275,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
282 *stackptr++ = &start->avl_left; \ 275 *stackptr++ = &start->avl_left; \
283 v = &start->avl_left; \ 276 v = &start->avl_left; \
284 for (u = rcu_deref_locked(*v, base); \ 277 for (u = rcu_deref_locked(*v, base); \
285 u->avl_right != peer_avl_empty_rcu; ) { \ 278 u->avl_right != peer_avl_empty_rcu;) { \
286 v = &u->avl_right; \ 279 v = &u->avl_right; \
287 *stackptr++ = v; \ 280 *stackptr++ = v; \
288 u = rcu_deref_locked(*v, base); \ 281 u = rcu_deref_locked(*v, base); \
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 694de3b7aebf..e9f1217a8afd 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb)
54 54
55int ip_forward(struct sk_buff *skb) 55int ip_forward(struct sk_buff *skb)
56{ 56{
57 u32 mtu;
57 struct iphdr *iph; /* Our header */ 58 struct iphdr *iph; /* Our header */
58 struct rtable *rt; /* Route we use */ 59 struct rtable *rt; /* Route we use */
59 struct ip_options *opt = &(IPCB(skb)->opt); 60 struct ip_options *opt = &(IPCB(skb)->opt);
@@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb)
88 if (opt->is_strictroute && rt->rt_uses_gateway) 89 if (opt->is_strictroute && rt->rt_uses_gateway)
89 goto sr_failed; 90 goto sr_failed;
90 91
91 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && 92 IPCB(skb)->flags |= IPSKB_FORWARDED;
93 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
94 if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
92 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 95 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
93 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); 96 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
94 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 97 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
95 htonl(dst_mtu(&rt->dst))); 98 htonl(mtu));
96 goto drop; 99 goto drop;
97 } 100 }
98 101
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2481993a4970..c10a3ce5cbff 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -704,7 +704,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
704 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 704 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
705 if (ip_defrag(skb, user)) 705 if (ip_defrag(skb, user))
706 return NULL; 706 return NULL;
707 skb->rxhash = 0; 707 skb_clear_hash(skb);
708 } 708 }
709 } 709 }
710 return skb; 710 return skb;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e560ef34cf4b..e7a92fdb36f6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -278,7 +278,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
278 return NETDEV_TX_OK; 278 return NETDEV_TX_OK;
279 279
280free_skb: 280free_skb:
281 dev_kfree_skb(skb); 281 kfree_skb(skb);
282out: 282out:
283 dev->stats.tx_dropped++; 283 dev->stats.tx_dropped++;
284 return NETDEV_TX_OK; 284 return NETDEV_TX_OK;
@@ -301,7 +301,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
301 return NETDEV_TX_OK; 301 return NETDEV_TX_OK;
302 302
303free_skb: 303free_skb:
304 dev_kfree_skb(skb); 304 kfree_skb(skb);
305out: 305out:
306 dev->stats.tx_dropped++; 306 dev->stats.tx_dropped++;
307 return NETDEV_TX_OK; 307 return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec7264514a82..f4ab72e19af9 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -167,7 +167,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
167 soffset -= 4; 167 soffset -= 4;
168 if (soffset > 3) { 168 if (soffset > 3) {
169 memcpy(&faddr, &start[soffset-1], 4); 169 memcpy(&faddr, &start[soffset-1], 4);
170 for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) 170 for (soffset -= 4, doffset = 4; soffset > 3; soffset -= 4, doffset += 4)
171 memcpy(&dptr[doffset-1], &start[soffset-1], 4); 171 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
172 /* 172 /*
173 * RFC1812 requires to fix illegal source routes. 173 * RFC1812 requires to fix illegal source routes.
@@ -227,7 +227,7 @@ void ip_options_fragment(struct sk_buff *skb)
227 continue; 227 continue;
228 } 228 }
229 optlen = optptr[1]; 229 optlen = optptr[1];
230 if (optlen<2 || optlen>l) 230 if (optlen < 2 || optlen > l)
231 return; 231 return;
232 if (!IPOPT_COPIED(*optptr)) 232 if (!IPOPT_COPIED(*optptr))
233 memset(optptr, IPOPT_NOOP, optlen); 233 memset(optptr, IPOPT_NOOP, optlen);
@@ -275,27 +275,27 @@ int ip_options_compile(struct net *net,
275 275
276 for (l = opt->optlen; l > 0; ) { 276 for (l = opt->optlen; l > 0; ) {
277 switch (*optptr) { 277 switch (*optptr) {
278 case IPOPT_END: 278 case IPOPT_END:
279 for (optptr++, l--; l>0; optptr++, l--) { 279 for (optptr++, l--; l > 0; optptr++, l--) {
280 if (*optptr != IPOPT_END) { 280 if (*optptr != IPOPT_END) {
281 *optptr = IPOPT_END; 281 *optptr = IPOPT_END;
282 opt->is_changed = 1; 282 opt->is_changed = 1;
283 } 283 }
284 } 284 }
285 goto eol; 285 goto eol;
286 case IPOPT_NOOP: 286 case IPOPT_NOOP:
287 l--; 287 l--;
288 optptr++; 288 optptr++;
289 continue; 289 continue;
290 } 290 }
291 optlen = optptr[1]; 291 optlen = optptr[1];
292 if (optlen<2 || optlen>l) { 292 if (optlen < 2 || optlen > l) {
293 pp_ptr = optptr; 293 pp_ptr = optptr;
294 goto error; 294 goto error;
295 } 295 }
296 switch (*optptr) { 296 switch (*optptr) {
297 case IPOPT_SSRR: 297 case IPOPT_SSRR:
298 case IPOPT_LSRR: 298 case IPOPT_LSRR:
299 if (optlen < 3) { 299 if (optlen < 3) {
300 pp_ptr = optptr + 1; 300 pp_ptr = optptr + 1;
301 goto error; 301 goto error;
@@ -321,7 +321,7 @@ int ip_options_compile(struct net *net,
321 opt->is_strictroute = (optptr[0] == IPOPT_SSRR); 321 opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
322 opt->srr = optptr - iph; 322 opt->srr = optptr - iph;
323 break; 323 break;
324 case IPOPT_RR: 324 case IPOPT_RR:
325 if (opt->rr) { 325 if (opt->rr) {
326 pp_ptr = optptr; 326 pp_ptr = optptr;
327 goto error; 327 goto error;
@@ -349,7 +349,7 @@ int ip_options_compile(struct net *net,
349 } 349 }
350 opt->rr = optptr - iph; 350 opt->rr = optptr - iph;
351 break; 351 break;
352 case IPOPT_TIMESTAMP: 352 case IPOPT_TIMESTAMP:
353 if (opt->ts) { 353 if (opt->ts) {
354 pp_ptr = optptr; 354 pp_ptr = optptr;
355 goto error; 355 goto error;
@@ -369,13 +369,13 @@ int ip_options_compile(struct net *net,
369 goto error; 369 goto error;
370 } 370 }
371 switch (optptr[3]&0xF) { 371 switch (optptr[3]&0xF) {
372 case IPOPT_TS_TSONLY: 372 case IPOPT_TS_TSONLY:
373 if (skb) 373 if (skb)
374 timeptr = &optptr[optptr[2]-1]; 374 timeptr = &optptr[optptr[2]-1];
375 opt->ts_needtime = 1; 375 opt->ts_needtime = 1;
376 optptr[2] += 4; 376 optptr[2] += 4;
377 break; 377 break;
378 case IPOPT_TS_TSANDADDR: 378 case IPOPT_TS_TSANDADDR:
379 if (optptr[2]+7 > optptr[1]) { 379 if (optptr[2]+7 > optptr[1]) {
380 pp_ptr = optptr + 2; 380 pp_ptr = optptr + 2;
381 goto error; 381 goto error;
@@ -389,7 +389,7 @@ int ip_options_compile(struct net *net,
389 opt->ts_needtime = 1; 389 opt->ts_needtime = 1;
390 optptr[2] += 8; 390 optptr[2] += 8;
391 break; 391 break;
392 case IPOPT_TS_PRESPEC: 392 case IPOPT_TS_PRESPEC:
393 if (optptr[2]+7 > optptr[1]) { 393 if (optptr[2]+7 > optptr[1]) {
394 pp_ptr = optptr + 2; 394 pp_ptr = optptr + 2;
395 goto error; 395 goto error;
@@ -405,7 +405,7 @@ int ip_options_compile(struct net *net,
405 opt->ts_needtime = 1; 405 opt->ts_needtime = 1;
406 optptr[2] += 8; 406 optptr[2] += 8;
407 break; 407 break;
408 default: 408 default:
409 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { 409 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
410 pp_ptr = optptr + 3; 410 pp_ptr = optptr + 3;
411 goto error; 411 goto error;
@@ -433,7 +433,7 @@ int ip_options_compile(struct net *net,
433 } 433 }
434 opt->ts = optptr - iph; 434 opt->ts = optptr - iph;
435 break; 435 break;
436 case IPOPT_RA: 436 case IPOPT_RA:
437 if (optlen < 4) { 437 if (optlen < 4) {
438 pp_ptr = optptr + 1; 438 pp_ptr = optptr + 1;
439 goto error; 439 goto error;
@@ -441,7 +441,7 @@ int ip_options_compile(struct net *net,
441 if (optptr[2] == 0 && optptr[3] == 0) 441 if (optptr[2] == 0 && optptr[3] == 0)
442 opt->router_alert = optptr - iph; 442 opt->router_alert = optptr - iph;
443 break; 443 break;
444 case IPOPT_CIPSO: 444 case IPOPT_CIPSO:
445 if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) { 445 if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) {
446 pp_ptr = optptr; 446 pp_ptr = optptr;
447 goto error; 447 goto error;
@@ -452,9 +452,9 @@ int ip_options_compile(struct net *net,
452 goto error; 452 goto error;
453 } 453 }
454 break; 454 break;
455 case IPOPT_SEC: 455 case IPOPT_SEC:
456 case IPOPT_SID: 456 case IPOPT_SID:
457 default: 457 default:
458 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { 458 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
459 pp_ptr = optptr; 459 pp_ptr = optptr;
460 goto error; 460 goto error;
@@ -572,7 +572,7 @@ void ip_forward_options(struct sk_buff *skb)
572 572
573 optptr = raw + opt->srr; 573 optptr = raw + opt->srr;
574 574
575 for ( srrptr=optptr[2], srrspace = optptr[1]; 575 for ( srrptr = optptr[2], srrspace = optptr[1];
576 srrptr <= srrspace; 576 srrptr <= srrspace;
577 srrptr += 4 577 srrptr += 4
578 ) { 578 ) {
@@ -628,7 +628,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
628 if (rt->rt_type != RTN_LOCAL) 628 if (rt->rt_type != RTN_LOCAL)
629 return -EINVAL; 629 return -EINVAL;
630 630
631 for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { 631 for (srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
632 if (srrptr + 3 > srrspace) { 632 if (srrptr + 3 > srrspace) {
633 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); 633 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
634 return -EINVAL; 634 return -EINVAL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index df184616493f..8971780aec7c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
449 __be16 not_last_frag; 449 __be16 not_last_frag;
450 struct rtable *rt = skb_rtable(skb); 450 struct rtable *rt = skb_rtable(skb);
451 int err = 0; 451 int err = 0;
452 bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
452 453
453 dev = rt->dst.dev; 454 dev = rt->dst.dev;
454 455
@@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
458 459
459 iph = ip_hdr(skb); 460 iph = ip_hdr(skb);
460 461
462 mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
461 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || 463 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
462 (IPCB(skb)->frag_max_size && 464 (IPCB(skb)->frag_max_size &&
463 IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { 465 IPCB(skb)->frag_max_size > mtu))) {
464 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 466 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
465 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 467 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466 htonl(ip_skb_dst_mtu(skb))); 468 htonl(mtu));
467 kfree_skb(skb); 469 kfree_skb(skb);
468 return -EMSGSIZE; 470 return -EMSGSIZE;
469 } 471 }
@@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473 */ 475 */
474 476
475 hlen = iph->ihl * 4; 477 hlen = iph->ihl * 4;
476 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ 478 mtu = mtu - hlen; /* Size of data space */
477#ifdef CONFIG_BRIDGE_NETFILTER 479#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge) 480 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb); 481 mtu -= nf_bridge_mtu_reduction(skb);
@@ -1551,7 +1553,7 @@ void __init ip_init(void)
1551 ip_rt_init(); 1553 ip_rt_init();
1552 inet_initpeers(); 1554 inet_initpeers();
1553 1555
1554#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) 1556#if defined(CONFIG_IP_MULTICAST)
1555 igmp_mc_proc_init(); 1557 igmp_mc_init();
1556#endif 1558#endif
1557} 1559}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ddf32a6bc415..580dd96666e0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -56,7 +56,6 @@
56/* 56/*
57 * SOL_IP control messages. 57 * SOL_IP control messages.
58 */ 58 */
59#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
60 59
61static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 60static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
62{ 61{
@@ -390,7 +389,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
390{ 389{
391 struct sock_exterr_skb *serr; 390 struct sock_exterr_skb *serr;
392 struct sk_buff *skb, *skb2; 391 struct sk_buff *skb, *skb2;
393 struct sockaddr_in *sin; 392 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
394 struct { 393 struct {
395 struct sock_extended_err ee; 394 struct sock_extended_err ee;
396 struct sockaddr_in offender; 395 struct sockaddr_in offender;
@@ -416,7 +415,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
416 415
417 serr = SKB_EXT_ERR(skb); 416 serr = SKB_EXT_ERR(skb);
418 417
419 sin = (struct sockaddr_in *)msg->msg_name;
420 if (sin) { 418 if (sin) {
421 sin->sin_family = AF_INET; 419 sin->sin_family = AF_INET;
422 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 420 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
@@ -1051,14 +1049,15 @@ e_inval:
1051 * 1049 *
1052 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1050 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1053 * destination in skb->cb[] before dst drop. 1051 * destination in skb->cb[] before dst drop.
1054 * This way, receiver doesnt make cache line misses to read rtable. 1052 * This way, receiver doesn't make cache line misses to read rtable.
1055 */ 1053 */
1056void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1054void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1057{ 1055{
1058 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1056 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1057 bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1058 ipv6_sk_rxinfo(sk);
1059 1059
1060 if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) && 1060 if (prepare && skb_rtable(skb)) {
1061 skb_rtable(skb)) {
1062 pktinfo->ipi_ifindex = inet_iif(skb); 1061 pktinfo->ipi_ifindex = inet_iif(skb);
1063 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1062 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1064 } else { 1063 } else {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 90ff9570d7d4..c0e3cb72ad70 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -61,13 +61,69 @@
61#include <net/ip6_route.h> 61#include <net/ip6_route.h>
62#endif 62#endif
63 63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, 64static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
65 __be32 key, __be32 remote)
66{ 65{
67 return hash_32((__force u32)key ^ (__force u32)remote, 66 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS); 67 IP_TNL_HASH_BITS);
69} 68}
70 69
70static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
71 struct dst_entry *dst)
72{
73 struct dst_entry *old_dst;
74
75 if (dst) {
76 if (dst->flags & DST_NOCACHE)
77 dst = NULL;
78 else
79 dst_clone(dst);
80 }
81 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
82 dst_release(old_dst);
83}
84
85static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
86{
87 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
88}
89
90static void tunnel_dst_reset(struct ip_tunnel *t)
91{
92 tunnel_dst_set(t, NULL);
93}
94
95static void tunnel_dst_reset_all(struct ip_tunnel *t)
96{
97 int i;
98
99 for_each_possible_cpu(i)
100 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
101}
102
103static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
104{
105 struct dst_entry *dst;
106
107 rcu_read_lock();
108 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
109 if (dst)
110 dst_hold(dst);
111 rcu_read_unlock();
112 return dst;
113}
114
115static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
116{
117 struct dst_entry *dst = tunnel_dst_get(t);
118
119 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
120 tunnel_dst_reset(t);
121 return NULL;
122 }
123
124 return dst;
125}
126
71/* Often modified stats are per cpu, other are shared (netdev->stats) */ 127/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, 128struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot) 129 struct rtnl_link_stats64 *tot)
@@ -75,7 +131,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
75 int i; 131 int i;
76 132
77 for_each_possible_cpu(i) { 133 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 134 const struct pcpu_sw_netstats *tstats =
135 per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 136 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start; 137 unsigned int start;
81 138
@@ -146,7 +203,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
146 struct ip_tunnel *t, *cand = NULL; 203 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head; 204 struct hlist_head *head;
148 205
149 hash = ip_tunnel_hash(itn, key, remote); 206 hash = ip_tunnel_hash(key, remote);
150 head = &itn->tunnels[hash]; 207 head = &itn->tunnels[hash];
151 208
152 hlist_for_each_entry_rcu(t, head, hash_node) { 209 hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -178,7 +235,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
178 cand = t; 235 cand = t;
179 } 236 }
180 237
181 hash = ip_tunnel_hash(itn, key, 0); 238 hash = ip_tunnel_hash(key, 0);
182 head = &itn->tunnels[hash]; 239 head = &itn->tunnels[hash];
183 240
184 hlist_for_each_entry_rcu(t, head, hash_node) { 241 hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -234,7 +291,7 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234 else 291 else
235 remote = 0; 292 remote = 0;
236 293
237 h = ip_tunnel_hash(itn, parms->i_key, remote); 294 h = ip_tunnel_hash(parms->i_key, remote);
238 return &itn->tunnels[h]; 295 return &itn->tunnels[h];
239} 296}
240 297
@@ -318,11 +375,10 @@ failed:
318 return ERR_PTR(err); 375 return ERR_PTR(err);
319} 376}
320 377
321static inline struct rtable *ip_route_output_tunnel(struct net *net, 378static inline void init_tunnel_flow(struct flowi4 *fl4,
322 struct flowi4 *fl4, 379 int proto,
323 int proto, 380 __be32 daddr, __be32 saddr,
324 __be32 daddr, __be32 saddr, 381 __be32 key, __u8 tos, int oif)
325 __be32 key, __u8 tos, int oif)
326{ 382{
327 memset(fl4, 0, sizeof(*fl4)); 383 memset(fl4, 0, sizeof(*fl4));
328 fl4->flowi4_oif = oif; 384 fl4->flowi4_oif = oif;
@@ -331,7 +387,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net,
331 fl4->flowi4_tos = tos; 387 fl4->flowi4_tos = tos;
332 fl4->flowi4_proto = proto; 388 fl4->flowi4_proto = proto;
333 fl4->fl4_gre_key = key; 389 fl4->fl4_gre_key = key;
334 return ip_route_output_key(net, fl4);
335} 390}
336 391
337static int ip_tunnel_bind_dev(struct net_device *dev) 392static int ip_tunnel_bind_dev(struct net_device *dev)
@@ -350,14 +405,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
350 struct flowi4 fl4; 405 struct flowi4 fl4;
351 struct rtable *rt; 406 struct rtable *rt;
352 407
353 rt = ip_route_output_tunnel(tunnel->net, &fl4, 408 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
354 tunnel->parms.iph.protocol, 409 iph->saddr, tunnel->parms.o_key,
355 iph->daddr, iph->saddr, 410 RT_TOS(iph->tos), tunnel->parms.link);
356 tunnel->parms.o_key, 411 rt = ip_route_output_key(tunnel->net, &fl4);
357 RT_TOS(iph->tos), 412
358 tunnel->parms.link);
359 if (!IS_ERR(rt)) { 413 if (!IS_ERR(rt)) {
360 tdev = rt->dst.dev; 414 tdev = rt->dst.dev;
415 tunnel_dst_set(tunnel, &rt->dst);
361 ip_rt_put(rt); 416 ip_rt_put(rt);
362 } 417 }
363 if (dev->type != ARPHRD_ETHER) 418 if (dev->type != ARPHRD_ETHER)
@@ -405,7 +460,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
405int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 460int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
406 const struct tnl_ptk_info *tpi, bool log_ecn_error) 461 const struct tnl_ptk_info *tpi, bool log_ecn_error)
407{ 462{
408 struct pcpu_tstats *tstats; 463 struct pcpu_sw_netstats *tstats;
409 const struct iphdr *iph = ip_hdr(skb); 464 const struct iphdr *iph = ip_hdr(skb);
410 int err; 465 int err;
411 466
@@ -528,10 +583,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
528 struct flowi4 fl4; 583 struct flowi4 fl4;
529 u8 tos, ttl; 584 u8 tos, ttl;
530 __be16 df; 585 __be16 df;
531 struct rtable *rt; /* Route to the other host */ 586 struct rtable *rt = NULL; /* Route to the other host */
532 unsigned int max_headroom; /* The extra header space needed */ 587 unsigned int max_headroom; /* The extra header space needed */
533 __be32 dst; 588 __be32 dst;
534 int err; 589 int err;
590 bool connected = true;
535 591
536 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 592 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
537 593
@@ -581,27 +637,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
581#endif 637#endif
582 else 638 else
583 goto tx_error; 639 goto tx_error;
640
641 connected = false;
584 } 642 }
585 643
586 tos = tnl_params->tos; 644 tos = tnl_params->tos;
587 if (tos & 0x1) { 645 if (tos & 0x1) {
588 tos &= ~0x1; 646 tos &= ~0x1;
589 if (skb->protocol == htons(ETH_P_IP)) 647 if (skb->protocol == htons(ETH_P_IP)) {
590 tos = inner_iph->tos; 648 tos = inner_iph->tos;
591 else if (skb->protocol == htons(ETH_P_IPV6)) 649 connected = false;
650 } else if (skb->protocol == htons(ETH_P_IPV6)) {
592 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 651 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
652 connected = false;
653 }
593 } 654 }
594 655
595 rt = ip_route_output_tunnel(tunnel->net, &fl4, 656 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
596 protocol, 657 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
597 dst, tnl_params->saddr, 658
598 tunnel->parms.o_key, 659 if (connected)
599 RT_TOS(tos), 660 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
600 tunnel->parms.link); 661
601 if (IS_ERR(rt)) { 662 if (!rt) {
602 dev->stats.tx_carrier_errors++; 663 rt = ip_route_output_key(tunnel->net, &fl4);
603 goto tx_error; 664
665 if (IS_ERR(rt)) {
666 dev->stats.tx_carrier_errors++;
667 goto tx_error;
668 }
669 if (connected)
670 tunnel_dst_set(tunnel, &rt->dst);
604 } 671 }
672
605 if (rt->dst.dev == dev) { 673 if (rt->dst.dev == dev) {
606 ip_rt_put(rt); 674 ip_rt_put(rt);
607 dev->stats.collisions++; 675 dev->stats.collisions++;
@@ -618,6 +686,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
618 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 686 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
619 tunnel->err_count--; 687 tunnel->err_count--;
620 688
689 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
621 dst_link_failure(skb); 690 dst_link_failure(skb);
622 } else 691 } else
623 tunnel->err_count = 0; 692 tunnel->err_count = 0;
@@ -647,7 +716,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
647 716
648 if (skb_cow_head(skb, dev->needed_headroom)) { 717 if (skb_cow_head(skb, dev->needed_headroom)) {
649 dev->stats.tx_dropped++; 718 dev->stats.tx_dropped++;
650 dev_kfree_skb(skb); 719 kfree_skb(skb);
651 return; 720 return;
652 } 721 }
653 722
@@ -663,7 +732,7 @@ tx_error_icmp:
663#endif 732#endif
664tx_error: 733tx_error:
665 dev->stats.tx_errors++; 734 dev->stats.tx_errors++;
666 dev_kfree_skb(skb); 735 kfree_skb(skb);
667} 736}
668EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 737EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
669 738
@@ -696,6 +765,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
696 if (set_mtu) 765 if (set_mtu)
697 dev->mtu = mtu; 766 dev->mtu = mtu;
698 } 767 }
768 tunnel_dst_reset_all(t);
699 netdev_state_change(dev); 769 netdev_state_change(dev);
700} 770}
701 771
@@ -811,6 +881,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
811 struct ip_tunnel *tunnel = netdev_priv(dev); 881 struct ip_tunnel *tunnel = netdev_priv(dev);
812 882
813 gro_cells_destroy(&tunnel->gro_cells); 883 gro_cells_destroy(&tunnel->gro_cells);
884 free_percpu(tunnel->dst_cache);
814 free_percpu(dev->tstats); 885 free_percpu(dev->tstats);
815 free_netdev(dev); 886 free_netdev(dev);
816} 887}
@@ -979,18 +1050,25 @@ int ip_tunnel_init(struct net_device *dev)
979 int i, err; 1050 int i, err;
980 1051
981 dev->destructor = ip_tunnel_dev_free; 1052 dev->destructor = ip_tunnel_dev_free;
982 dev->tstats = alloc_percpu(struct pcpu_tstats); 1053 dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
983 if (!dev->tstats) 1054 if (!dev->tstats)
984 return -ENOMEM; 1055 return -ENOMEM;
985 1056
986 for_each_possible_cpu(i) { 1057 for_each_possible_cpu(i) {
987 struct pcpu_tstats *ipt_stats; 1058 struct pcpu_sw_netstats *ipt_stats;
988 ipt_stats = per_cpu_ptr(dev->tstats, i); 1059 ipt_stats = per_cpu_ptr(dev->tstats, i);
989 u64_stats_init(&ipt_stats->syncp); 1060 u64_stats_init(&ipt_stats->syncp);
990 } 1061 }
991 1062
1063 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1064 if (!tunnel->dst_cache) {
1065 free_percpu(dev->tstats);
1066 return -ENOMEM;
1067 }
1068
992 err = gro_cells_init(&tunnel->gro_cells, dev); 1069 err = gro_cells_init(&tunnel->gro_cells, dev);
993 if (err) { 1070 if (err) {
1071 free_percpu(tunnel->dst_cache);
994 free_percpu(dev->tstats); 1072 free_percpu(dev->tstats);
995 return err; 1073 return err;
996 } 1074 }
@@ -1015,6 +1093,8 @@ void ip_tunnel_uninit(struct net_device *dev)
1015 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1093 /* fb_tunnel_dev will be unregisted in net-exit call. */
1016 if (itn->fb_tunnel_dev != dev) 1094 if (itn->fb_tunnel_dev != dev)
1017 ip_tunnel_del(netdev_priv(dev)); 1095 ip_tunnel_del(netdev_priv(dev));
1096
1097 tunnel_dst_reset_all(tunnel);
1018} 1098}
1019EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1099EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1020 1100
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 42ffbc8d65c6..6156f4ef5e91 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -56,7 +56,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
56 56
57 skb_scrub_packet(skb, xnet); 57 skb_scrub_packet(skb, xnet);
58 58
59 skb->rxhash = 0; 59 skb_clear_hash(skb);
60 skb_dst_set(skb, &rt->dst); 60 skb_dst_set(skb, &rt->dst);
61 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
62 62
@@ -107,8 +107,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
107 107
108 nf_reset(skb); 108 nf_reset(skb);
109 secpath_reset(skb); 109 secpath_reset(skb);
110 if (!skb->l4_rxhash) 110 skb_clear_hash_if_not_l4(skb);
111 skb->rxhash = 0;
112 skb_dst_drop(skb); 111 skb_dst_drop(skb);
113 skb->vlan_tci = 0; 112 skb->vlan_tci = 0;
114 skb_set_queue_mapping(skb, 0); 113 skb_set_queue_mapping(skb, 0);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 52b802a0cd8c..48eafae51769 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -60,7 +60,7 @@ static int vti_rcv(struct sk_buff *skb)
60 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 60 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
61 iph->saddr, iph->daddr, 0); 61 iph->saddr, iph->daddr, 0);
62 if (tunnel != NULL) { 62 if (tunnel != NULL) {
63 struct pcpu_tstats *tstats; 63 struct pcpu_sw_netstats *tstats;
64 u32 oldmark = skb->mark; 64 u32 oldmark = skb->mark;
65 int ret; 65 int ret;
66 66
@@ -162,7 +162,7 @@ tx_error_icmp:
162 dst_link_failure(skb); 162 dst_link_failure(skb);
163tx_error: 163tx_error:
164 dev->stats.tx_errors++; 164 dev->stats.tx_errors++;
165 dev_kfree_skb(skb); 165 kfree_skb(skb);
166 return NETDEV_TX_OK; 166 return NETDEV_TX_OK;
167} 167}
168 168
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fe3e9f7f1f0b..812b18351462 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -228,7 +228,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
228 return NETDEV_TX_OK; 228 return NETDEV_TX_OK;
229 229
230tx_error: 230tx_error:
231 dev_kfree_skb(skb); 231 kfree_skb(skb);
232out: 232out:
233 dev->stats.tx_errors++; 233 dev->stats.tx_errors++;
234 return NETDEV_TX_OK; 234 return NETDEV_TX_OK;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1672409f5ba5..b9b3472975ba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -428,6 +428,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
428 goto failure; 428 goto failure;
429 429
430 ipv4_devconf_setall(in_dev); 430 ipv4_devconf_setall(in_dev);
431 neigh_parms_data_state_setall(in_dev->arp_parms);
431 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 432 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
432 433
433 if (dev_open(dev)) 434 if (dev_open(dev))
@@ -520,6 +521,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
520 } 521 }
521 522
522 ipv4_devconf_setall(in_dev); 523 ipv4_devconf_setall(in_dev);
524 neigh_parms_data_state_setall(in_dev->arp_parms);
523 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 525 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
524 rcu_read_unlock(); 526 rcu_read_unlock();
525 527
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 40d56073cd19..81c6910cfa92 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT
39config NF_TABLES_IPV4 39config NF_TABLES_IPV4
40 depends on NF_TABLES 40 depends on NF_TABLES
41 tristate "IPv4 nf_tables support" 41 tristate "IPv4 nf_tables support"
42 42 help
43config NFT_REJECT_IPV4 43 This option enables the IPv4 support for nf_tables.
44 depends on NF_TABLES_IPV4
45 tristate "nf_tables IPv4 reject support"
46 44
47config NFT_CHAIN_ROUTE_IPV4 45config NFT_CHAIN_ROUTE_IPV4
48 depends on NF_TABLES_IPV4 46 depends on NF_TABLES_IPV4
49 tristate "IPv4 nf_tables route chain support" 47 tristate "IPv4 nf_tables route chain support"
48 help
49 This option enables the "route" chain for IPv4 in nf_tables. This
50 chain type is used to force packet re-routing after mangling header
51 fields such as the source, destination, type of service and
52 the packet mark.
50 53
51config NFT_CHAIN_NAT_IPV4 54config NFT_CHAIN_NAT_IPV4
52 depends on NF_TABLES_IPV4 55 depends on NF_TABLES_IPV4
53 depends on NF_NAT_IPV4 && NFT_NAT 56 depends on NF_NAT_IPV4 && NFT_NAT
54 tristate "IPv4 nf_tables nat chain support" 57 tristate "IPv4 nf_tables nat chain support"
58 help
59 This option enables the "nat" chain for IPv4 in nf_tables. This
60 chain type is used to perform Network Address Translation (NAT)
61 packet transformations such as the source, destination address and
62 source and destination ports.
55 63
56config NF_TABLES_ARP 64config NF_TABLES_ARP
57 depends on NF_TABLES 65 depends on NF_TABLES
58 tristate "ARP nf_tables support" 66 tristate "ARP nf_tables support"
67 help
68 This option enables the ARP support for nf_tables.
59 69
60config IP_NF_IPTABLES 70config IP_NF_IPTABLES
61 tristate "IP tables support (required for filtering/masq/NAT)" 71 tristate "IP tables support (required for filtering/masq/NAT)"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 19df72b7ba88..c16be9d58420 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
28obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o 28obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
29 29
30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o 30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
31obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
32obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o 31obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
33obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o 32obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
34obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o 33obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b969131ad1c1..5b6e0df4ccff 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -17,10 +17,6 @@
17#include <linux/udp.h> 17#include <linux/udp.h>
18#include <linux/icmp.h> 18#include <linux/icmp.h>
19#include <net/icmp.h> 19#include <net/icmp.h>
20#include <net/ip.h>
21#include <net/tcp.h>
22#include <net/route.h>
23#include <net/dst.h>
24#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
26#include <linux/netfilter_ipv4/ipt_REJECT.h> 22#include <linux/netfilter_ipv4/ipt_REJECT.h>
@@ -28,128 +24,12 @@
28#include <linux/netfilter_bridge.h> 24#include <linux/netfilter_bridge.h>
29#endif 25#endif
30 26
27#include <net/netfilter/ipv4/nf_reject.h>
28
31MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 30MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); 31MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
34 32
35/* Send RST reply */
36static void send_reset(struct sk_buff *oldskb, int hook)
37{
38 struct sk_buff *nskb;
39 const struct iphdr *oiph;
40 struct iphdr *niph;
41 const struct tcphdr *oth;
42 struct tcphdr _otcph, *tcph;
43
44 /* IP header checks: fragment. */
45 if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
46 return;
47
48 oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
49 sizeof(_otcph), &_otcph);
50 if (oth == NULL)
51 return;
52
53 /* No RST for RST. */
54 if (oth->rst)
55 return;
56
57 if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
58 return;
59
60 /* Check checksum */
61 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
62 return;
63 oiph = ip_hdr(oldskb);
64
65 nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
66 LL_MAX_HEADER, GFP_ATOMIC);
67 if (!nskb)
68 return;
69
70 skb_reserve(nskb, LL_MAX_HEADER);
71
72 skb_reset_network_header(nskb);
73 niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
74 niph->version = 4;
75 niph->ihl = sizeof(struct iphdr) / 4;
76 niph->tos = 0;
77 niph->id = 0;
78 niph->frag_off = htons(IP_DF);
79 niph->protocol = IPPROTO_TCP;
80 niph->check = 0;
81 niph->saddr = oiph->daddr;
82 niph->daddr = oiph->saddr;
83
84 skb_reset_transport_header(nskb);
85 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
86 memset(tcph, 0, sizeof(*tcph));
87 tcph->source = oth->dest;
88 tcph->dest = oth->source;
89 tcph->doff = sizeof(struct tcphdr) / 4;
90
91 if (oth->ack)
92 tcph->seq = oth->ack_seq;
93 else {
94 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
95 oldskb->len - ip_hdrlen(oldskb) -
96 (oth->doff << 2));
97 tcph->ack = 1;
98 }
99
100 tcph->rst = 1;
101 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
102 niph->daddr, 0);
103 nskb->ip_summed = CHECKSUM_PARTIAL;
104 nskb->csum_start = (unsigned char *)tcph - nskb->head;
105 nskb->csum_offset = offsetof(struct tcphdr, check);
106
107 /* ip_route_me_harder expects skb->dst to be set */
108 skb_dst_set_noref(nskb, skb_dst(oldskb));
109
110 nskb->protocol = htons(ETH_P_IP);
111 if (ip_route_me_harder(nskb, RTN_UNSPEC))
112 goto free_nskb;
113
114 niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
115
116 /* "Never happens" */
117 if (nskb->len > dst_mtu(skb_dst(nskb)))
118 goto free_nskb;
119
120 nf_ct_attach(nskb, oldskb);
121
122#ifdef CONFIG_BRIDGE_NETFILTER
123 /* If we use ip_local_out for bridged traffic, the MAC source on
124 * the RST will be ours, instead of the destination's. This confuses
125 * some routers/firewalls, and they drop the packet. So we need to
126 * build the eth header using the original destination's MAC as the
127 * source, and send the RST packet directly.
128 */
129 if (oldskb->nf_bridge) {
130 struct ethhdr *oeth = eth_hdr(oldskb);
131 nskb->dev = oldskb->nf_bridge->physindev;
132 niph->tot_len = htons(nskb->len);
133 ip_send_check(niph);
134 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
135 oeth->h_source, oeth->h_dest, nskb->len) < 0)
136 goto free_nskb;
137 dev_queue_xmit(nskb);
138 } else
139#endif
140 ip_local_out(nskb);
141
142 return;
143
144 free_nskb:
145 kfree_skb(nskb);
146}
147
148static inline void send_unreach(struct sk_buff *skb_in, int code)
149{
150 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
151}
152
153static unsigned int 33static unsigned int
154reject_tg(struct sk_buff *skb, const struct xt_action_param *par) 34reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
155{ 35{
@@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
157 37
158 switch (reject->with) { 38 switch (reject->with) {
159 case IPT_ICMP_NET_UNREACHABLE: 39 case IPT_ICMP_NET_UNREACHABLE:
160 send_unreach(skb, ICMP_NET_UNREACH); 40 nf_send_unreach(skb, ICMP_NET_UNREACH);
161 break; 41 break;
162 case IPT_ICMP_HOST_UNREACHABLE: 42 case IPT_ICMP_HOST_UNREACHABLE:
163 send_unreach(skb, ICMP_HOST_UNREACH); 43 nf_send_unreach(skb, ICMP_HOST_UNREACH);
164 break; 44 break;
165 case IPT_ICMP_PROT_UNREACHABLE: 45 case IPT_ICMP_PROT_UNREACHABLE:
166 send_unreach(skb, ICMP_PROT_UNREACH); 46 nf_send_unreach(skb, ICMP_PROT_UNREACH);
167 break; 47 break;
168 case IPT_ICMP_PORT_UNREACHABLE: 48 case IPT_ICMP_PORT_UNREACHABLE:
169 send_unreach(skb, ICMP_PORT_UNREACH); 49 nf_send_unreach(skb, ICMP_PORT_UNREACH);
170 break; 50 break;
171 case IPT_ICMP_NET_PROHIBITED: 51 case IPT_ICMP_NET_PROHIBITED:
172 send_unreach(skb, ICMP_NET_ANO); 52 nf_send_unreach(skb, ICMP_NET_ANO);
173 break; 53 break;
174 case IPT_ICMP_HOST_PROHIBITED: 54 case IPT_ICMP_HOST_PROHIBITED:
175 send_unreach(skb, ICMP_HOST_ANO); 55 nf_send_unreach(skb, ICMP_HOST_ANO);
176 break; 56 break;
177 case IPT_ICMP_ADMIN_PROHIBITED: 57 case IPT_ICMP_ADMIN_PROHIBITED:
178 send_unreach(skb, ICMP_PKT_FILTERED); 58 nf_send_unreach(skb, ICMP_PKT_FILTERED);
179 break; 59 break;
180 case IPT_TCP_RESET: 60 case IPT_TCP_RESET:
181 send_reset(skb, par->hooknum); 61 nf_send_reset(skb, par->hooknum);
182 case IPT_ICMP_ECHOREPLY: 62 case IPT_ICMP_ECHOREPLY:
183 /* Doesn't happen. */ 63 /* Doesn't happen. */
184 break; 64 break;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ecd8bec411c9..8127dc802865 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
548 548
549module_init(nf_conntrack_l3proto_ipv4_init); 549module_init(nf_conntrack_l3proto_ipv4_init);
550module_exit(nf_conntrack_l3proto_ipv4_fini); 550module_exit(nf_conntrack_l3proto_ipv4_fini);
551
552void need_ipv4_conntrack(void)
553{
554 return;
555}
556EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5f011cc89cd9..d551e31b416e 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -34,8 +34,7 @@
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details. 35 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License 36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, write to the Free Software 37 * along with this program; if not, see <http://www.gnu.org/licenses/>.
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 * 38 *
40 * Author: James Morris <jmorris@intercode.com.au> 39 * Author: James Morris <jmorris@intercode.com.au>
41 * 40 *
@@ -462,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
462 } 461 }
463 462
464 if (subid < 40) { 463 if (subid < 40) {
465 optr [0] = 0; 464 optr[0] = 0;
466 optr [1] = subid; 465 optr[1] = subid;
467 } else if (subid < 80) { 466 } else if (subid < 80) {
468 optr [0] = 1; 467 optr[0] = 1;
469 optr [1] = subid - 40; 468 optr[1] = subid - 40;
470 } else { 469 } else {
471 optr [0] = 2; 470 optr[0] = 2;
472 optr [1] = subid - 80; 471 optr[1] = subid - 80;
473 } 472 }
474 473
475 *len = 2; 474 *len = 2;
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 3e67ef1c676f..19412a4063fb 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -14,10 +14,30 @@
14#include <linux/netfilter_arp.h> 14#include <linux/netfilter_arp.h>
15#include <net/netfilter/nf_tables.h> 15#include <net/netfilter/nf_tables.h>
16 16
17static unsigned int
18nft_do_chain_arp(const struct nf_hook_ops *ops,
19 struct sk_buff *skb,
20 const struct net_device *in,
21 const struct net_device *out,
22 int (*okfn)(struct sk_buff *))
23{
24 struct nft_pktinfo pkt;
25
26 nft_set_pktinfo(&pkt, ops, skb, in, out);
27
28 return nft_do_chain(&pkt, ops);
29}
30
17static struct nft_af_info nft_af_arp __read_mostly = { 31static struct nft_af_info nft_af_arp __read_mostly = {
18 .family = NFPROTO_ARP, 32 .family = NFPROTO_ARP,
19 .nhooks = NF_ARP_NUMHOOKS, 33 .nhooks = NF_ARP_NUMHOOKS,
20 .owner = THIS_MODULE, 34 .owner = THIS_MODULE,
35 .nops = 1,
36 .hooks = {
37 [NF_ARP_IN] = nft_do_chain_arp,
38 [NF_ARP_OUT] = nft_do_chain_arp,
39 [NF_ARP_FORWARD] = nft_do_chain_arp,
40 },
21}; 41};
22 42
23static int nf_tables_arp_init_net(struct net *net) 43static int nf_tables_arp_init_net(struct net *net)
@@ -48,32 +68,14 @@ static struct pernet_operations nf_tables_arp_net_ops = {
48 .exit = nf_tables_arp_exit_net, 68 .exit = nf_tables_arp_exit_net,
49}; 69};
50 70
51static unsigned int 71static const struct nf_chain_type filter_arp = {
52nft_do_chain_arp(const struct nf_hook_ops *ops,
53 struct sk_buff *skb,
54 const struct net_device *in,
55 const struct net_device *out,
56 int (*okfn)(struct sk_buff *))
57{
58 struct nft_pktinfo pkt;
59
60 nft_set_pktinfo(&pkt, ops, skb, in, out);
61
62 return nft_do_chain_pktinfo(&pkt, ops);
63}
64
65static struct nf_chain_type filter_arp = {
66 .family = NFPROTO_ARP,
67 .name = "filter", 72 .name = "filter",
68 .type = NFT_CHAIN_T_DEFAULT, 73 .type = NFT_CHAIN_T_DEFAULT,
74 .family = NFPROTO_ARP,
75 .owner = THIS_MODULE,
69 .hook_mask = (1 << NF_ARP_IN) | 76 .hook_mask = (1 << NF_ARP_IN) |
70 (1 << NF_ARP_OUT) | 77 (1 << NF_ARP_OUT) |
71 (1 << NF_ARP_FORWARD), 78 (1 << NF_ARP_FORWARD),
72 .fn = {
73 [NF_ARP_IN] = nft_do_chain_arp,
74 [NF_ARP_OUT] = nft_do_chain_arp,
75 [NF_ARP_FORWARD] = nft_do_chain_arp,
76 },
77}; 79};
78 80
79static int __init nf_tables_arp_init(void) 81static int __init nf_tables_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 0f4cbfeb19bd..6820c8c40842 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -18,14 +18,25 @@
18#include <net/ip.h> 18#include <net/ip.h>
19#include <net/netfilter/nf_tables_ipv4.h> 19#include <net/netfilter/nf_tables_ipv4.h>
20 20
21static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
22 struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 int (*okfn)(struct sk_buff *))
26{
27 struct nft_pktinfo pkt;
28
29 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
30
31 return nft_do_chain(&pkt, ops);
32}
33
21static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, 34static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
22 struct sk_buff *skb, 35 struct sk_buff *skb,
23 const struct net_device *in, 36 const struct net_device *in,
24 const struct net_device *out, 37 const struct net_device *out,
25 int (*okfn)(struct sk_buff *)) 38 int (*okfn)(struct sk_buff *))
26{ 39{
27 struct nft_pktinfo pkt;
28
29 if (unlikely(skb->len < sizeof(struct iphdr) || 40 if (unlikely(skb->len < sizeof(struct iphdr) ||
30 ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { 41 ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
31 if (net_ratelimit()) 42 if (net_ratelimit())
@@ -33,19 +44,24 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
33 "packet\n"); 44 "packet\n");
34 return NF_ACCEPT; 45 return NF_ACCEPT;
35 } 46 }
36 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
37 47
38 return nft_do_chain_pktinfo(&pkt, ops); 48 return nft_do_chain_ipv4(ops, skb, in, out, okfn);
39} 49}
40 50
41static struct nft_af_info nft_af_ipv4 __read_mostly = { 51struct nft_af_info nft_af_ipv4 __read_mostly = {
42 .family = NFPROTO_IPV4, 52 .family = NFPROTO_IPV4,
43 .nhooks = NF_INET_NUMHOOKS, 53 .nhooks = NF_INET_NUMHOOKS,
44 .owner = THIS_MODULE, 54 .owner = THIS_MODULE,
55 .nops = 1,
45 .hooks = { 56 .hooks = {
57 [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
46 [NF_INET_LOCAL_OUT] = nft_ipv4_output, 58 [NF_INET_LOCAL_OUT] = nft_ipv4_output,
59 [NF_INET_FORWARD] = nft_do_chain_ipv4,
60 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
61 [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
47 }, 62 },
48}; 63};
64EXPORT_SYMBOL_GPL(nft_af_ipv4);
49 65
50static int nf_tables_ipv4_init_net(struct net *net) 66static int nf_tables_ipv4_init_net(struct net *net)
51{ 67{
@@ -75,42 +91,28 @@ static struct pernet_operations nf_tables_ipv4_net_ops = {
75 .exit = nf_tables_ipv4_exit_net, 91 .exit = nf_tables_ipv4_exit_net,
76}; 92};
77 93
78static unsigned int 94static const struct nf_chain_type filter_ipv4 = {
79nft_do_chain_ipv4(const struct nf_hook_ops *ops,
80 struct sk_buff *skb,
81 const struct net_device *in,
82 const struct net_device *out,
83 int (*okfn)(struct sk_buff *))
84{
85 struct nft_pktinfo pkt;
86
87 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
88
89 return nft_do_chain_pktinfo(&pkt, ops);
90}
91
92static struct nf_chain_type filter_ipv4 = {
93 .family = NFPROTO_IPV4,
94 .name = "filter", 95 .name = "filter",
95 .type = NFT_CHAIN_T_DEFAULT, 96 .type = NFT_CHAIN_T_DEFAULT,
97 .family = NFPROTO_IPV4,
98 .owner = THIS_MODULE,
96 .hook_mask = (1 << NF_INET_LOCAL_IN) | 99 .hook_mask = (1 << NF_INET_LOCAL_IN) |
97 (1 << NF_INET_LOCAL_OUT) | 100 (1 << NF_INET_LOCAL_OUT) |
98 (1 << NF_INET_FORWARD) | 101 (1 << NF_INET_FORWARD) |
99 (1 << NF_INET_PRE_ROUTING) | 102 (1 << NF_INET_PRE_ROUTING) |
100 (1 << NF_INET_POST_ROUTING), 103 (1 << NF_INET_POST_ROUTING),
101 .fn = {
102 [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
103 [NF_INET_LOCAL_OUT] = nft_ipv4_output,
104 [NF_INET_FORWARD] = nft_do_chain_ipv4,
105 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
106 [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
107 },
108}; 104};
109 105
110static int __init nf_tables_ipv4_init(void) 106static int __init nf_tables_ipv4_init(void)
111{ 107{
108 int ret;
109
112 nft_register_chain_type(&filter_ipv4); 110 nft_register_chain_type(&filter_ipv4);
113 return register_pernet_subsys(&nf_tables_ipv4_net_ops); 111 ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
112 if (ret < 0)
113 nft_unregister_chain_type(&filter_ipv4);
114
115 return ret;
114} 116}
115 117
116static void __exit nf_tables_ipv4_exit(void) 118static void __exit nf_tables_ipv4_exit(void)
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index cf2c792cd971..b5b256d45e67 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -75,7 +75,7 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
75 75
76 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); 76 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
77 77
78 ret = nft_do_chain_pktinfo(&pkt, ops); 78 ret = nft_do_chain(&pkt, ops);
79 if (ret != NF_ACCEPT) 79 if (ret != NF_ACCEPT)
80 return ret; 80 return ret;
81 if (!nf_nat_initialized(ct, maniptype)) { 81 if (!nf_nat_initialized(ct, maniptype)) {
@@ -164,21 +164,21 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
164 return ret; 164 return ret;
165} 165}
166 166
167static struct nf_chain_type nft_chain_nat_ipv4 = { 167static const struct nf_chain_type nft_chain_nat_ipv4 = {
168 .family = NFPROTO_IPV4,
169 .name = "nat", 168 .name = "nat",
170 .type = NFT_CHAIN_T_NAT, 169 .type = NFT_CHAIN_T_NAT,
170 .family = NFPROTO_IPV4,
171 .owner = THIS_MODULE,
171 .hook_mask = (1 << NF_INET_PRE_ROUTING) | 172 .hook_mask = (1 << NF_INET_PRE_ROUTING) |
172 (1 << NF_INET_POST_ROUTING) | 173 (1 << NF_INET_POST_ROUTING) |
173 (1 << NF_INET_LOCAL_OUT) | 174 (1 << NF_INET_LOCAL_OUT) |
174 (1 << NF_INET_LOCAL_IN), 175 (1 << NF_INET_LOCAL_IN),
175 .fn = { 176 .hooks = {
176 [NF_INET_PRE_ROUTING] = nf_nat_prerouting, 177 [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
177 [NF_INET_POST_ROUTING] = nf_nat_postrouting, 178 [NF_INET_POST_ROUTING] = nf_nat_postrouting,
178 [NF_INET_LOCAL_OUT] = nf_nat_output, 179 [NF_INET_LOCAL_OUT] = nf_nat_output,
179 [NF_INET_LOCAL_IN] = nf_nat_fn, 180 [NF_INET_LOCAL_IN] = nf_nat_fn,
180 }, 181 },
181 .me = THIS_MODULE,
182}; 182};
183 183
184static int __init nft_chain_nat_init(void) 184static int __init nft_chain_nat_init(void)
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 4e6bf9a3d7aa..125b66766c0a 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
47 daddr = iph->daddr; 47 daddr = iph->daddr;
48 tos = iph->tos; 48 tos = iph->tos;
49 49
50 ret = nft_do_chain_pktinfo(&pkt, ops); 50 ret = nft_do_chain(&pkt, ops);
51 if (ret != NF_DROP && ret != NF_QUEUE) { 51 if (ret != NF_DROP && ret != NF_QUEUE) {
52 iph = ip_hdr(skb); 52 iph = ip_hdr(skb);
53 53
@@ -61,15 +61,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
61 return ret; 61 return ret;
62} 62}
63 63
64static struct nf_chain_type nft_chain_route_ipv4 = { 64static const struct nf_chain_type nft_chain_route_ipv4 = {
65 .family = NFPROTO_IPV4,
66 .name = "route", 65 .name = "route",
67 .type = NFT_CHAIN_T_ROUTE, 66 .type = NFT_CHAIN_T_ROUTE,
67 .family = NFPROTO_IPV4,
68 .owner = THIS_MODULE,
68 .hook_mask = (1 << NF_INET_LOCAL_OUT), 69 .hook_mask = (1 << NF_INET_LOCAL_OUT),
69 .fn = { 70 .hooks = {
70 [NF_INET_LOCAL_OUT] = nf_route_table_hook, 71 [NF_INET_LOCAL_OUT] = nf_route_table_hook,
71 }, 72 },
72 .me = THIS_MODULE,
73}; 73};
74 74
75static int __init nft_chain_route_init(void) 75static int __init nft_chain_route_init(void)
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
deleted file mode 100644
index 4a5e94ac314a..000000000000
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ /dev/null
@@ -1,123 +0,0 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables.h>
18#include <net/icmp.h>
19
20struct nft_reject {
21 enum nft_reject_types type:8;
22 u8 icmp_code;
23};
24
25static void nft_reject_eval(const struct nft_expr *expr,
26 struct nft_data data[NFT_REG_MAX + 1],
27 const struct nft_pktinfo *pkt)
28{
29 struct nft_reject *priv = nft_expr_priv(expr);
30
31 switch (priv->type) {
32 case NFT_REJECT_ICMP_UNREACH:
33 icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
34 break;
35 case NFT_REJECT_TCP_RST:
36 break;
37 }
38
39 data[NFT_REG_VERDICT].verdict = NF_DROP;
40}
41
42static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
43 [NFTA_REJECT_TYPE] = { .type = NLA_U32 },
44 [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
45};
46
47static int nft_reject_init(const struct nft_ctx *ctx,
48 const struct nft_expr *expr,
49 const struct nlattr * const tb[])
50{
51 struct nft_reject *priv = nft_expr_priv(expr);
52
53 if (tb[NFTA_REJECT_TYPE] == NULL)
54 return -EINVAL;
55
56 priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
57 switch (priv->type) {
58 case NFT_REJECT_ICMP_UNREACH:
59 if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
60 return -EINVAL;
61 priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
62 case NFT_REJECT_TCP_RST:
63 break;
64 default:
65 return -EINVAL;
66 }
67
68 return 0;
69}
70
71static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
72{
73 const struct nft_reject *priv = nft_expr_priv(expr);
74
75 if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
76 goto nla_put_failure;
77
78 switch (priv->type) {
79 case NFT_REJECT_ICMP_UNREACH:
80 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
81 goto nla_put_failure;
82 break;
83 }
84
85 return 0;
86
87nla_put_failure:
88 return -1;
89}
90
91static struct nft_expr_type nft_reject_type;
92static const struct nft_expr_ops nft_reject_ops = {
93 .type = &nft_reject_type,
94 .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
95 .eval = nft_reject_eval,
96 .init = nft_reject_init,
97 .dump = nft_reject_dump,
98};
99
100static struct nft_expr_type nft_reject_type __read_mostly = {
101 .name = "reject",
102 .ops = &nft_reject_ops,
103 .policy = nft_reject_policy,
104 .maxattr = NFTA_REJECT_MAX,
105 .owner = THIS_MODULE,
106};
107
108static int __init nft_reject_module_init(void)
109{
110 return nft_register_expr(&nft_reject_type);
111}
112
113static void __exit nft_reject_module_exit(void)
114{
115 nft_unregister_expr(&nft_reject_type);
116}
117
118module_init(nft_reject_module_init);
119module_exit(nft_reject_module_exit);
120
121MODULE_LICENSE("GPL");
122MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
123MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 242e7f4ed6f4..2d11c094296e 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -53,8 +53,12 @@
53#include <net/transp_v6.h> 53#include <net/transp_v6.h>
54#endif 54#endif
55 55
56struct ping_table {
57 struct hlist_nulls_head hash[PING_HTABLE_SIZE];
58 rwlock_t lock;
59};
56 60
57struct ping_table ping_table; 61static struct ping_table ping_table;
58struct pingv6_ops pingv6_ops; 62struct pingv6_ops pingv6_ops;
59EXPORT_SYMBOL_GPL(pingv6_ops); 63EXPORT_SYMBOL_GPL(pingv6_ops);
60 64
@@ -316,6 +320,9 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
316 if (addr_len < sizeof(*addr)) 320 if (addr_len < sizeof(*addr))
317 return -EINVAL; 321 return -EINVAL;
318 322
323 if (addr->sin6_family != AF_INET6)
324 return -EINVAL;
325
319 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", 326 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
320 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); 327 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
321 328
@@ -668,8 +675,8 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
668} 675}
669EXPORT_SYMBOL_GPL(ping_common_sendmsg); 676EXPORT_SYMBOL_GPL(ping_common_sendmsg);
670 677
671int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 678static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
672 size_t len) 679 size_t len)
673{ 680{
674 struct net *net = sock_net(sk); 681 struct net *net = sock_net(sk);
675 struct flowi4 fl4; 682 struct flowi4 fl4;
@@ -696,7 +703,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 */ 703 */
697 704
698 if (msg->msg_name) { 705 if (msg->msg_name) {
699 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; 706 DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
700 if (msg->msg_namelen < sizeof(*usin)) 707 if (msg->msg_namelen < sizeof(*usin))
701 return -EINVAL; 708 return -EINVAL;
702 if (usin->sin_family != AF_INET) 709 if (usin->sin_family != AF_INET)
@@ -869,7 +876,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
869 876
870 /* Copy the address and add cmsg data. */ 877 /* Copy the address and add cmsg data. */
871 if (family == AF_INET) { 878 if (family == AF_INET) {
872 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 879 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
873 880
874 if (sin) { 881 if (sin) {
875 sin->sin_family = AF_INET; 882 sin->sin_family = AF_INET;
@@ -886,8 +893,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
886 } else if (family == AF_INET6) { 893 } else if (family == AF_INET6) {
887 struct ipv6_pinfo *np = inet6_sk(sk); 894 struct ipv6_pinfo *np = inet6_sk(sk);
888 struct ipv6hdr *ip6 = ipv6_hdr(skb); 895 struct ipv6hdr *ip6 = ipv6_hdr(skb);
889 struct sockaddr_in6 *sin6 = 896 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
890 (struct sockaddr_in6 *)msg->msg_name;
891 897
892 if (sin6) { 898 if (sin6) {
893 sin6->sin6_family = AF_INET6; 899 sin6->sin6_family = AF_INET6;
@@ -903,7 +909,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
903 } 909 }
904 910
905 if (inet6_sk(sk)->rxopt.all) 911 if (inet6_sk(sk)->rxopt.all)
906 pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); 912 pingv6_ops.ip6_datagram_recv_common_ctl(sk, msg, skb);
913 if (skb->protocol == htons(ETH_P_IPV6) &&
914 inet6_sk(sk)->rxopt.all)
915 pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb);
916 else if (skb->protocol == htons(ETH_P_IP) && isk->cmsg_flags)
917 ip_cmsg_recv(msg, skb);
907#endif 918#endif
908 } else { 919 } else {
909 BUG(); 920 BUG();
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4a0335854b89..a6c8a80ec9d6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -279,6 +279,7 @@ static const struct snmp_mib snmp4_net_list[] = {
279 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), 279 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
280 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), 280 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
281 SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), 281 SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
282 SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
282 SNMP_MIB_SENTINEL 283 SNMP_MIB_SENTINEL
283}; 284};
284 285
@@ -332,22 +333,22 @@ static void icmp_put(struct seq_file *seq)
332 atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; 333 atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
333 334
334 seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); 335 seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
335 for (i=0; icmpmibmap[i].name != NULL; i++) 336 for (i = 0; icmpmibmap[i].name != NULL; i++)
336 seq_printf(seq, " In%s", icmpmibmap[i].name); 337 seq_printf(seq, " In%s", icmpmibmap[i].name);
337 seq_printf(seq, " OutMsgs OutErrors"); 338 seq_printf(seq, " OutMsgs OutErrors");
338 for (i=0; icmpmibmap[i].name != NULL; i++) 339 for (i = 0; icmpmibmap[i].name != NULL; i++)
339 seq_printf(seq, " Out%s", icmpmibmap[i].name); 340 seq_printf(seq, " Out%s", icmpmibmap[i].name);
340 seq_printf(seq, "\nIcmp: %lu %lu %lu", 341 seq_printf(seq, "\nIcmp: %lu %lu %lu",
341 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), 342 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
342 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), 343 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
343 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); 344 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
344 for (i=0; icmpmibmap[i].name != NULL; i++) 345 for (i = 0; icmpmibmap[i].name != NULL; i++)
345 seq_printf(seq, " %lu", 346 seq_printf(seq, " %lu",
346 atomic_long_read(ptr + icmpmibmap[i].index)); 347 atomic_long_read(ptr + icmpmibmap[i].index));
347 seq_printf(seq, " %lu %lu", 348 seq_printf(seq, " %lu %lu",
348 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), 349 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
349 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); 350 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
350 for (i=0; icmpmibmap[i].name != NULL; i++) 351 for (i = 0; icmpmibmap[i].name != NULL; i++)
351 seq_printf(seq, " %lu", 352 seq_printf(seq, " %lu",
352 atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); 353 atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
353} 354}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 23c3e5b5bb53..c04518f4850a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -493,7 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
493 */ 493 */
494 494
495 if (msg->msg_namelen) { 495 if (msg->msg_namelen) {
496 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; 496 DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
497 err = -EINVAL; 497 err = -EINVAL;
498 if (msg->msg_namelen < sizeof(*usin)) 498 if (msg->msg_namelen < sizeof(*usin))
499 goto out; 499 goto out;
@@ -575,7 +575,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
575 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 575 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
576 RT_SCOPE_UNIVERSE, 576 RT_SCOPE_UNIVERSE,
577 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 577 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
578 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | 578 inet_sk_flowi_flags(sk) |
579 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 579 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
580 daddr, saddr, 0, 0); 580 daddr, saddr, 0, 0);
581 581
@@ -690,7 +690,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
690 struct inet_sock *inet = inet_sk(sk); 690 struct inet_sock *inet = inet_sk(sk);
691 size_t copied = 0; 691 size_t copied = 0;
692 int err = -EOPNOTSUPP; 692 int err = -EOPNOTSUPP;
693 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 693 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
694 struct sk_buff *skb; 694 struct sk_buff *skb;
695 695
696 if (flags & MSG_OOB) 696 if (flags & MSG_OOB)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f8da28278014..25071b48921c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,9 +112,6 @@
112#define RT_FL_TOS(oldflp4) \ 112#define RT_FL_TOS(oldflp4) \
113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114 114
115/* IPv4 datagram length is stored into 16bit field (tot_len) */
116#define IP_MAX_MTU 0xFFFF
117
118#define RT_GC_TIMEOUT (300*HZ) 115#define RT_GC_TIMEOUT (300*HZ)
119 116
120static int ip_rt_max_size; 117static int ip_rt_max_size;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b95331e6c077..f2ed13c2125f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -121,7 +121,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
121 cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; 121 cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
122 122
123 /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ 123 /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */
124 diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); 124 diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
125 if (diff >= MAX_SYNCOOKIE_AGE) 125 if (diff >= MAX_SYNCOOKIE_AGE)
126 return (__u32)-1; 126 return (__u32)-1;
127 127
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3d69ec8dac57..44eba052b43d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -286,13 +286,6 @@ static struct ctl_table ipv4_table[] = {
286 .extra2 = &ip_ttl_max, 286 .extra2 = &ip_ttl_max,
287 }, 287 },
288 { 288 {
289 .procname = "ip_no_pmtu_disc",
290 .data = &ipv4_config.no_pmtu_disc,
291 .maxlen = sizeof(int),
292 .mode = 0644,
293 .proc_handler = proc_dointvec
294 },
295 {
296 .procname = "ip_nonlocal_bind", 289 .procname = "ip_nonlocal_bind",
297 .data = &sysctl_ip_nonlocal_bind, 290 .data = &sysctl_ip_nonlocal_bind,
298 .maxlen = sizeof(int), 291 .maxlen = sizeof(int),
@@ -707,7 +700,7 @@ static struct ctl_table ipv4_table[] = {
707 .mode = 0644, 700 .mode = 0644,
708 .proc_handler = proc_dointvec 701 .proc_handler = proc_dointvec
709 }, 702 },
710 { 703 {
711 .procname = "tcp_thin_dupack", 704 .procname = "tcp_thin_dupack",
712 .data = &sysctl_tcp_thin_dupack, 705 .data = &sysctl_tcp_thin_dupack,
713 .maxlen = sizeof(int), 706 .maxlen = sizeof(int),
@@ -733,6 +726,15 @@ static struct ctl_table ipv4_table[] = {
733 .extra2 = &gso_max_segs, 726 .extra2 = &gso_max_segs,
734 }, 727 },
735 { 728 {
729 .procname = "tcp_autocorking",
730 .data = &sysctl_tcp_autocorking,
731 .maxlen = sizeof(int),
732 .mode = 0644,
733 .proc_handler = proc_dointvec_minmax,
734 .extra1 = &zero,
735 .extra2 = &one,
736 },
737 {
736 .procname = "udp_mem", 738 .procname = "udp_mem",
737 .data = &sysctl_udp_mem, 739 .data = &sysctl_udp_mem,
738 .maxlen = sizeof(sysctl_udp_mem), 740 .maxlen = sizeof(sysctl_udp_mem),
@@ -822,6 +824,20 @@ static struct ctl_table ipv4_net_table[] = {
822 .mode = 0644, 824 .mode = 0644,
823 .proc_handler = ipv4_local_port_range, 825 .proc_handler = ipv4_local_port_range,
824 }, 826 },
827 {
828 .procname = "ip_no_pmtu_disc",
829 .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
830 .maxlen = sizeof(int),
831 .mode = 0644,
832 .proc_handler = proc_dointvec
833 },
834 {
835 .procname = "ip_forward_use_pmtu",
836 .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
837 .maxlen = sizeof(int),
838 .mode = 0644,
839 .proc_handler = proc_dointvec,
840 },
825 { } 841 { }
826}; 842};
827 843
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82de78603686..4475b3bb494d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2; 286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287 287
288int sysctl_tcp_autocorking __read_mostly = 1;
289
288struct percpu_counter tcp_orphan_count; 290struct percpu_counter tcp_orphan_count;
289EXPORT_SYMBOL_GPL(tcp_orphan_count); 291EXPORT_SYMBOL_GPL(tcp_orphan_count);
290 292
@@ -379,7 +381,7 @@ void tcp_init_sock(struct sock *sk)
379 struct inet_connection_sock *icsk = inet_csk(sk); 381 struct inet_connection_sock *icsk = inet_csk(sk);
380 struct tcp_sock *tp = tcp_sk(sk); 382 struct tcp_sock *tp = tcp_sk(sk);
381 383
382 skb_queue_head_init(&tp->out_of_order_queue); 384 __skb_queue_head_init(&tp->out_of_order_queue);
383 tcp_init_xmit_timers(sk); 385 tcp_init_xmit_timers(sk);
384 tcp_prequeue_init(tp); 386 tcp_prequeue_init(tp);
385 INIT_LIST_HEAD(&tp->tsq_node); 387 INIT_LIST_HEAD(&tp->tsq_node);
@@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
619 tp->snd_up = tp->write_seq; 621 tp->snd_up = tp->write_seq;
620} 622}
621 623
622static inline void tcp_push(struct sock *sk, int flags, int mss_now, 624/* If a not yet filled skb is pushed, do not send it if
623 int nonagle) 625 * we have data packets in Qdisc or NIC queues :
626 * Because TX completion will happen shortly, it gives a chance
627 * to coalesce future sendmsg() payload into this skb, without
628 * need for a timer, and with no latency trade off.
629 * As packets containing data payload have a bigger truesize
630 * than pure acks (dataless) packets, the last checks prevent
631 * autocorking if we only have an ACK in Qdisc/NIC queues,
632 * or if TX completion was delayed after we processed ACK packet.
633 */
634static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
635 int size_goal)
624{ 636{
625 if (tcp_send_head(sk)) { 637 return skb->len < size_goal &&
626 struct tcp_sock *tp = tcp_sk(sk); 638 sysctl_tcp_autocorking &&
639 skb != tcp_write_queue_head(sk) &&
640 atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
641}
642
643static void tcp_push(struct sock *sk, int flags, int mss_now,
644 int nonagle, int size_goal)
645{
646 struct tcp_sock *tp = tcp_sk(sk);
647 struct sk_buff *skb;
627 648
628 if (!(flags & MSG_MORE) || forced_push(tp)) 649 if (!tcp_send_head(sk))
629 tcp_mark_push(tp, tcp_write_queue_tail(sk)); 650 return;
651
652 skb = tcp_write_queue_tail(sk);
653 if (!(flags & MSG_MORE) || forced_push(tp))
654 tcp_mark_push(tp, skb);
655
656 tcp_mark_urg(tp, flags);
657
658 if (tcp_should_autocork(sk, skb, size_goal)) {
630 659
631 tcp_mark_urg(tp, flags); 660 /* avoid atomic op if TSQ_THROTTLED bit is already set */
632 __tcp_push_pending_frames(sk, mss_now, 661 if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
633 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 662 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
663 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
664 }
665 /* It is possible TX completion already happened
666 * before we set TSQ_THROTTLED.
667 */
668 if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
669 return;
634 } 670 }
671
672 if (flags & MSG_MORE)
673 nonagle = TCP_NAGLE_CORK;
674
675 __tcp_push_pending_frames(sk, mss_now, nonagle);
635} 676}
636 677
637static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 678static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
@@ -934,7 +975,8 @@ new_segment:
934wait_for_sndbuf: 975wait_for_sndbuf:
935 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 976 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
936wait_for_memory: 977wait_for_memory:
937 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 978 tcp_push(sk, flags & ~MSG_MORE, mss_now,
979 TCP_NAGLE_PUSH, size_goal);
938 980
939 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 981 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
940 goto do_error; 982 goto do_error;
@@ -944,7 +986,7 @@ wait_for_memory:
944 986
945out: 987out:
946 if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) 988 if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
947 tcp_push(sk, flags, mss_now, tp->nonagle); 989 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
948 return copied; 990 return copied;
949 991
950do_error: 992do_error:
@@ -1225,7 +1267,8 @@ wait_for_sndbuf:
1225 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1267 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1226wait_for_memory: 1268wait_for_memory:
1227 if (copied) 1269 if (copied)
1228 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1270 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1271 TCP_NAGLE_PUSH, size_goal);
1229 1272
1230 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1273 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1231 goto do_error; 1274 goto do_error;
@@ -1236,7 +1279,7 @@ wait_for_memory:
1236 1279
1237out: 1280out:
1238 if (copied) 1281 if (copied)
1239 tcp_push(sk, flags, mss_now, tp->nonagle); 1282 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1240 release_sock(sk); 1283 release_sock(sk);
1241 return copied + copied_syn; 1284 return copied + copied_syn;
1242 1285
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c53b7f35c51d..65cf90e063d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -766,7 +766,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
766/* Calculate rto without backoff. This is the second half of Van Jacobson's 766/* Calculate rto without backoff. This is the second half of Van Jacobson's
767 * routine referred to above. 767 * routine referred to above.
768 */ 768 */
769void tcp_set_rto(struct sock *sk) 769static void tcp_set_rto(struct sock *sk)
770{ 770{
771 const struct tcp_sock *tp = tcp_sk(sk); 771 const struct tcp_sock *tp = tcp_sk(sk);
772 /* Old crap is replaced with new one. 8) 772 /* Old crap is replaced with new one. 8)
@@ -3686,7 +3686,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3686 int opcode = *ptr++; 3686 int opcode = *ptr++;
3687 int opsize; 3687 int opsize;
3688 3688
3689 switch(opcode) { 3689 switch (opcode) {
3690 case TCPOPT_EOL: 3690 case TCPOPT_EOL:
3691 return NULL; 3691 return NULL;
3692 case TCPOPT_NOP: 3692 case TCPOPT_NOP:
@@ -4046,7 +4046,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4046 WARN_ON(before(tp->rcv_nxt, sp->end_seq)); 4046 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4047 4047
4048 /* Zap this SACK, by moving forward any other SACKS. */ 4048 /* Zap this SACK, by moving forward any other SACKS. */
4049 for (i=this_sack+1; i < num_sacks; i++) 4049 for (i = this_sack+1; i < num_sacks; i++)
4050 tp->selective_acks[i-1] = tp->selective_acks[i]; 4050 tp->selective_acks[i-1] = tp->selective_acks[i];
4051 num_sacks--; 4051 num_sacks--;
4052 continue; 4052 continue;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 067213924751..3cf976510497 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -173,7 +173,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP, 175 IPPROTO_TCP,
176 orig_sport, orig_dport, sk, true); 176 orig_sport, orig_dport, sk);
177 if (IS_ERR(rt)) { 177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt); 178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH) 179 if (err == -ENETUNREACH)
@@ -827,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
827 const struct inet_request_sock *ireq = inet_rsk(req); 827 const struct inet_request_sock *ireq = inet_rsk(req);
828 struct flowi4 fl4; 828 struct flowi4 fl4;
829 int err = -1; 829 int err = -1;
830 struct sk_buff * skb; 830 struct sk_buff *skb;
831 831
832 /* First, grab a route. */ 832 /* First, grab a route. */
833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
@@ -1668,7 +1668,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1668 } 1668 }
1669 sk_setup_caps(newsk, dst); 1669 sk_setup_caps(newsk, dst);
1670 1670
1671 tcp_mtup_init(newsk);
1672 tcp_sync_mss(newsk, dst_mtu(dst)); 1671 tcp_sync_mss(newsk, dst_mtu(dst));
1673 newtp->advmss = dst_metric_advmss(dst); 1672 newtp->advmss = dst_metric_advmss(dst);
1674 if (tcp_sk(sk)->rx_opt.user_mss && 1673 if (tcp_sk(sk)->rx_opt.user_mss &&
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 098b3a29f6f3..d547075d8300 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -22,7 +22,8 @@
22 22
23int sysctl_tcp_nometrics_save __read_mostly; 23int sysctl_tcp_nometrics_save __read_mostly;
24 24
25static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, 25static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
26 const struct inetpeer_addr *daddr,
26 struct net *net, unsigned int hash); 27 struct net *net, unsigned int hash);
27 28
28struct tcp_fastopen_metrics { 29struct tcp_fastopen_metrics {
@@ -34,7 +35,8 @@ struct tcp_fastopen_metrics {
34 35
35struct tcp_metrics_block { 36struct tcp_metrics_block {
36 struct tcp_metrics_block __rcu *tcpm_next; 37 struct tcp_metrics_block __rcu *tcpm_next;
37 struct inetpeer_addr tcpm_addr; 38 struct inetpeer_addr tcpm_saddr;
39 struct inetpeer_addr tcpm_daddr;
38 unsigned long tcpm_stamp; 40 unsigned long tcpm_stamp;
39 u32 tcpm_ts; 41 u32 tcpm_ts;
40 u32 tcpm_ts_stamp; 42 u32 tcpm_ts_stamp;
@@ -145,7 +147,8 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst
145#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL 147#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
146 148
147static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 149static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
148 struct inetpeer_addr *addr, 150 struct inetpeer_addr *saddr,
151 struct inetpeer_addr *daddr,
149 unsigned int hash) 152 unsigned int hash)
150{ 153{
151 struct tcp_metrics_block *tm; 154 struct tcp_metrics_block *tm;
@@ -158,7 +161,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
158 /* While waiting for the spin-lock the cache might have been populated 161 /* While waiting for the spin-lock the cache might have been populated
159 * with this entry and so we have to check again. 162 * with this entry and so we have to check again.
160 */ 163 */
161 tm = __tcp_get_metrics(addr, net, hash); 164 tm = __tcp_get_metrics(saddr, daddr, net, hash);
162 if (tm == TCP_METRICS_RECLAIM_PTR) { 165 if (tm == TCP_METRICS_RECLAIM_PTR) {
163 reclaim = true; 166 reclaim = true;
164 tm = NULL; 167 tm = NULL;
@@ -183,7 +186,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
183 if (!tm) 186 if (!tm)
184 goto out_unlock; 187 goto out_unlock;
185 } 188 }
186 tm->tcpm_addr = *addr; 189 tm->tcpm_saddr = *saddr;
190 tm->tcpm_daddr = *daddr;
187 191
188 tcpm_suck_dst(tm, dst, true); 192 tcpm_suck_dst(tm, dst, true);
189 193
@@ -206,7 +210,8 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in
206 return NULL; 210 return NULL;
207} 211}
208 212
209static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, 213static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
214 const struct inetpeer_addr *daddr,
210 struct net *net, unsigned int hash) 215 struct net *net, unsigned int hash)
211{ 216{
212 struct tcp_metrics_block *tm; 217 struct tcp_metrics_block *tm;
@@ -214,7 +219,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *a
214 219
215 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 220 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
216 tm = rcu_dereference(tm->tcpm_next)) { 221 tm = rcu_dereference(tm->tcpm_next)) {
217 if (addr_same(&tm->tcpm_addr, addr)) 222 if (addr_same(&tm->tcpm_saddr, saddr) &&
223 addr_same(&tm->tcpm_daddr, daddr))
218 break; 224 break;
219 depth++; 225 depth++;
220 } 226 }
@@ -225,19 +231,22 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
225 struct dst_entry *dst) 231 struct dst_entry *dst)
226{ 232{
227 struct tcp_metrics_block *tm; 233 struct tcp_metrics_block *tm;
228 struct inetpeer_addr addr; 234 struct inetpeer_addr saddr, daddr;
229 unsigned int hash; 235 unsigned int hash;
230 struct net *net; 236 struct net *net;
231 237
232 addr.family = req->rsk_ops->family; 238 saddr.family = req->rsk_ops->family;
233 switch (addr.family) { 239 daddr.family = req->rsk_ops->family;
240 switch (daddr.family) {
234 case AF_INET: 241 case AF_INET:
235 addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; 242 saddr.addr.a4 = inet_rsk(req)->ir_loc_addr;
236 hash = (__force unsigned int) addr.addr.a4; 243 daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
244 hash = (__force unsigned int) daddr.addr.a4;
237 break; 245 break;
238#if IS_ENABLED(CONFIG_IPV6) 246#if IS_ENABLED(CONFIG_IPV6)
239 case AF_INET6: 247 case AF_INET6:
240 *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; 248 *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr;
249 *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
241 hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); 250 hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
242 break; 251 break;
243#endif 252#endif
@@ -250,7 +259,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
250 259
251 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 260 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
252 tm = rcu_dereference(tm->tcpm_next)) { 261 tm = rcu_dereference(tm->tcpm_next)) {
253 if (addr_same(&tm->tcpm_addr, &addr)) 262 if (addr_same(&tm->tcpm_saddr, &saddr) &&
263 addr_same(&tm->tcpm_daddr, &daddr))
254 break; 264 break;
255 } 265 }
256 tcpm_check_stamp(tm, dst); 266 tcpm_check_stamp(tm, dst);
@@ -260,32 +270,44 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
260static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) 270static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
261{ 271{
262 struct tcp_metrics_block *tm; 272 struct tcp_metrics_block *tm;
263 struct inetpeer_addr addr; 273 struct inetpeer_addr saddr, daddr;
264 unsigned int hash; 274 unsigned int hash;
265 struct net *net; 275 struct net *net;
266 276
267 addr.family = tw->tw_family; 277 if (tw->tw_family == AF_INET) {
268 switch (addr.family) { 278 saddr.family = AF_INET;
269 case AF_INET: 279 saddr.addr.a4 = tw->tw_rcv_saddr;
270 addr.addr.a4 = tw->tw_daddr; 280 daddr.family = AF_INET;
271 hash = (__force unsigned int) addr.addr.a4; 281 daddr.addr.a4 = tw->tw_daddr;
272 break; 282 hash = (__force unsigned int) daddr.addr.a4;
283 }
273#if IS_ENABLED(CONFIG_IPV6) 284#if IS_ENABLED(CONFIG_IPV6)
274 case AF_INET6: 285 else if (tw->tw_family == AF_INET6) {
275 *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; 286 if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
276 hash = ipv6_addr_hash(&tw->tw_v6_daddr); 287 saddr.family = AF_INET;
277 break; 288 saddr.addr.a4 = tw->tw_rcv_saddr;
289 daddr.family = AF_INET;
290 daddr.addr.a4 = tw->tw_daddr;
291 hash = (__force unsigned int) daddr.addr.a4;
292 } else {
293 saddr.family = AF_INET6;
294 *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr;
295 daddr.family = AF_INET6;
296 *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr;
297 hash = ipv6_addr_hash(&tw->tw_v6_daddr);
298 }
299 }
278#endif 300#endif
279 default: 301 else
280 return NULL; 302 return NULL;
281 }
282 303
283 net = twsk_net(tw); 304 net = twsk_net(tw);
284 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 305 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
285 306
286 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 307 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
287 tm = rcu_dereference(tm->tcpm_next)) { 308 tm = rcu_dereference(tm->tcpm_next)) {
288 if (addr_same(&tm->tcpm_addr, &addr)) 309 if (addr_same(&tm->tcpm_saddr, &saddr) &&
310 addr_same(&tm->tcpm_daddr, &daddr))
289 break; 311 break;
290 } 312 }
291 return tm; 313 return tm;
@@ -296,34 +318,45 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
296 bool create) 318 bool create)
297{ 319{
298 struct tcp_metrics_block *tm; 320 struct tcp_metrics_block *tm;
299 struct inetpeer_addr addr; 321 struct inetpeer_addr saddr, daddr;
300 unsigned int hash; 322 unsigned int hash;
301 struct net *net; 323 struct net *net;
302 324
303 addr.family = sk->sk_family; 325 if (sk->sk_family == AF_INET) {
304 switch (addr.family) { 326 saddr.family = AF_INET;
305 case AF_INET: 327 saddr.addr.a4 = inet_sk(sk)->inet_saddr;
306 addr.addr.a4 = inet_sk(sk)->inet_daddr; 328 daddr.family = AF_INET;
307 hash = (__force unsigned int) addr.addr.a4; 329 daddr.addr.a4 = inet_sk(sk)->inet_daddr;
308 break; 330 hash = (__force unsigned int) daddr.addr.a4;
331 }
309#if IS_ENABLED(CONFIG_IPV6) 332#if IS_ENABLED(CONFIG_IPV6)
310 case AF_INET6: 333 else if (sk->sk_family == AF_INET6) {
311 *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; 334 if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
312 hash = ipv6_addr_hash(&sk->sk_v6_daddr); 335 saddr.family = AF_INET;
313 break; 336 saddr.addr.a4 = inet_sk(sk)->inet_saddr;
337 daddr.family = AF_INET;
338 daddr.addr.a4 = inet_sk(sk)->inet_daddr;
339 hash = (__force unsigned int) daddr.addr.a4;
340 } else {
341 saddr.family = AF_INET6;
342 *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr;
343 daddr.family = AF_INET6;
344 *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr;
345 hash = ipv6_addr_hash(&sk->sk_v6_daddr);
346 }
347 }
314#endif 348#endif
315 default: 349 else
316 return NULL; 350 return NULL;
317 }
318 351
319 net = dev_net(dst->dev); 352 net = dev_net(dst->dev);
320 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 353 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
321 354
322 tm = __tcp_get_metrics(&addr, net, hash); 355 tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
323 if (tm == TCP_METRICS_RECLAIM_PTR) 356 if (tm == TCP_METRICS_RECLAIM_PTR)
324 tm = NULL; 357 tm = NULL;
325 if (!tm && create) 358 if (!tm && create)
326 tm = tcpm_new(dst, &addr, hash); 359 tm = tcpm_new(dst, &saddr, &daddr, hash);
327 else 360 else
328 tcpm_check_stamp(tm, dst); 361 tcpm_check_stamp(tm, dst);
329 362
@@ -737,15 +770,21 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
737 struct nlattr *nest; 770 struct nlattr *nest;
738 int i; 771 int i;
739 772
740 switch (tm->tcpm_addr.family) { 773 switch (tm->tcpm_daddr.family) {
741 case AF_INET: 774 case AF_INET:
742 if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, 775 if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4,
743 tm->tcpm_addr.addr.a4) < 0) 776 tm->tcpm_daddr.addr.a4) < 0)
777 goto nla_put_failure;
778 if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4,
779 tm->tcpm_saddr.addr.a4) < 0)
744 goto nla_put_failure; 780 goto nla_put_failure;
745 break; 781 break;
746 case AF_INET6: 782 case AF_INET6:
747 if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, 783 if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16,
748 tm->tcpm_addr.addr.a6) < 0) 784 tm->tcpm_daddr.addr.a6) < 0)
785 goto nla_put_failure;
786 if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16,
787 tm->tcpm_saddr.addr.a6) < 0)
749 goto nla_put_failure; 788 goto nla_put_failure;
750 break; 789 break;
751 default: 790 default:
@@ -868,44 +907,66 @@ done:
868 return skb->len; 907 return skb->len;
869} 908}
870 909
871static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, 910static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
872 unsigned int *hash, int optional) 911 unsigned int *hash, int optional, int v4, int v6)
873{ 912{
874 struct nlattr *a; 913 struct nlattr *a;
875 914
876 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; 915 a = info->attrs[v4];
877 if (a) { 916 if (a) {
878 addr->family = AF_INET; 917 addr->family = AF_INET;
879 addr->addr.a4 = nla_get_be32(a); 918 addr->addr.a4 = nla_get_be32(a);
880 *hash = (__force unsigned int) addr->addr.a4; 919 if (hash)
920 *hash = (__force unsigned int) addr->addr.a4;
881 return 0; 921 return 0;
882 } 922 }
883 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; 923 a = info->attrs[v6];
884 if (a) { 924 if (a) {
885 if (nla_len(a) != sizeof(struct in6_addr)) 925 if (nla_len(a) != sizeof(struct in6_addr))
886 return -EINVAL; 926 return -EINVAL;
887 addr->family = AF_INET6; 927 addr->family = AF_INET6;
888 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); 928 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
889 *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); 929 if (hash)
930 *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
890 return 0; 931 return 0;
891 } 932 }
892 return optional ? 1 : -EAFNOSUPPORT; 933 return optional ? 1 : -EAFNOSUPPORT;
893} 934}
894 935
936static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
937 unsigned int *hash, int optional)
938{
939 return __parse_nl_addr(info, addr, hash, optional,
940 TCP_METRICS_ATTR_ADDR_IPV4,
941 TCP_METRICS_ATTR_ADDR_IPV6);
942}
943
944static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr)
945{
946 return __parse_nl_addr(info, addr, NULL, 0,
947 TCP_METRICS_ATTR_SADDR_IPV4,
948 TCP_METRICS_ATTR_SADDR_IPV6);
949}
950
895static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) 951static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
896{ 952{
897 struct tcp_metrics_block *tm; 953 struct tcp_metrics_block *tm;
898 struct inetpeer_addr addr; 954 struct inetpeer_addr saddr, daddr;
899 unsigned int hash; 955 unsigned int hash;
900 struct sk_buff *msg; 956 struct sk_buff *msg;
901 struct net *net = genl_info_net(info); 957 struct net *net = genl_info_net(info);
902 void *reply; 958 void *reply;
903 int ret; 959 int ret;
960 bool src = true;
904 961
905 ret = parse_nl_addr(info, &addr, &hash, 0); 962 ret = parse_nl_addr(info, &daddr, &hash, 0);
906 if (ret < 0) 963 if (ret < 0)
907 return ret; 964 return ret;
908 965
966 ret = parse_nl_saddr(info, &saddr);
967 if (ret < 0)
968 src = false;
969
909 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 970 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
910 if (!msg) 971 if (!msg)
911 return -ENOMEM; 972 return -ENOMEM;
@@ -920,7 +981,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
920 rcu_read_lock(); 981 rcu_read_lock();
921 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 982 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
922 tm = rcu_dereference(tm->tcpm_next)) { 983 tm = rcu_dereference(tm->tcpm_next)) {
923 if (addr_same(&tm->tcpm_addr, &addr)) { 984 if (addr_same(&tm->tcpm_daddr, &daddr) &&
985 (!src || addr_same(&tm->tcpm_saddr, &saddr))) {
924 ret = tcp_metrics_fill_info(msg, tm); 986 ret = tcp_metrics_fill_info(msg, tm);
925 break; 987 break;
926 } 988 }
@@ -975,32 +1037,38 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
975 struct tcpm_hash_bucket *hb; 1037 struct tcpm_hash_bucket *hb;
976 struct tcp_metrics_block *tm; 1038 struct tcp_metrics_block *tm;
977 struct tcp_metrics_block __rcu **pp; 1039 struct tcp_metrics_block __rcu **pp;
978 struct inetpeer_addr addr; 1040 struct inetpeer_addr saddr, daddr;
979 unsigned int hash; 1041 unsigned int hash;
980 struct net *net = genl_info_net(info); 1042 struct net *net = genl_info_net(info);
981 int ret; 1043 int ret;
1044 bool src = true, found = false;
982 1045
983 ret = parse_nl_addr(info, &addr, &hash, 1); 1046 ret = parse_nl_addr(info, &daddr, &hash, 1);
984 if (ret < 0) 1047 if (ret < 0)
985 return ret; 1048 return ret;
986 if (ret > 0) 1049 if (ret > 0)
987 return tcp_metrics_flush_all(net); 1050 return tcp_metrics_flush_all(net);
1051 ret = parse_nl_saddr(info, &saddr);
1052 if (ret < 0)
1053 src = false;
988 1054
989 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 1055 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
990 hb = net->ipv4.tcp_metrics_hash + hash; 1056 hb = net->ipv4.tcp_metrics_hash + hash;
991 pp = &hb->chain; 1057 pp = &hb->chain;
992 spin_lock_bh(&tcp_metrics_lock); 1058 spin_lock_bh(&tcp_metrics_lock);
993 for (tm = deref_locked_genl(*pp); tm; 1059 for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) {
994 pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { 1060 if (addr_same(&tm->tcpm_daddr, &daddr) &&
995 if (addr_same(&tm->tcpm_addr, &addr)) { 1061 (!src || addr_same(&tm->tcpm_saddr, &saddr))) {
996 *pp = tm->tcpm_next; 1062 *pp = tm->tcpm_next;
997 break; 1063 kfree_rcu(tm, rcu_head);
1064 found = true;
1065 } else {
1066 pp = &tm->tcpm_next;
998 } 1067 }
999 } 1068 }
1000 spin_unlock_bh(&tcp_metrics_lock); 1069 spin_unlock_bh(&tcp_metrics_lock);
1001 if (!tm) 1070 if (!found)
1002 return -ESRCH; 1071 return -ESRCH;
1003 kfree_rcu(tm, rcu_head);
1004 return 0; 1072 return 0;
1005} 1073}
1006 1074
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 97b684159861..7a436c517e44 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -297,6 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
297 tw->tw_v6_daddr = sk->sk_v6_daddr; 297 tw->tw_v6_daddr = sk->sk_v6_daddr;
298 tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 298 tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
299 tw->tw_tclass = np->tclass; 299 tw->tw_tclass = np->tclass;
300 tw->tw_flowlabel = np->flow_label >> 12;
300 tw->tw_ipv6only = np->ipv6only; 301 tw->tw_ipv6only = np->ipv6only;
301 } 302 }
302#endif 303#endif
@@ -425,7 +426,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
425 426
426 tcp_set_ca_state(newsk, TCP_CA_Open); 427 tcp_set_ca_state(newsk, TCP_CA_Open);
427 tcp_init_xmit_timers(newsk); 428 tcp_init_xmit_timers(newsk);
428 skb_queue_head_init(&newtp->out_of_order_queue); 429 __skb_queue_head_init(&newtp->out_of_order_queue);
429 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; 430 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
430 431
431 newtp->rx_opt.saw_tstamp = 0; 432 newtp->rx_opt.saw_tstamp = 0;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 05606353c7e7..b92b81718ca4 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -138,7 +138,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
138out: 138out:
139 return segs; 139 return segs;
140} 140}
141EXPORT_SYMBOL(tcp_gso_segment);
142 141
143struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) 142struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
144{ 143{
@@ -197,7 +196,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
197 goto out_check_final; 196 goto out_check_final;
198 197
199found: 198found:
200 flush = NAPI_GRO_CB(p)->flush; 199 /* Include the IP ID check below from the inner most IP hdr */
200 flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id;
201 flush |= (__force int)(flags & TCP_FLAG_CWR); 201 flush |= (__force int)(flags & TCP_FLAG_CWR);
202 flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 202 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
203 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); 203 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -230,17 +230,16 @@ out_check_final:
230 pp = head; 230 pp = head;
231 231
232out: 232out:
233 NAPI_GRO_CB(skb)->flush |= flush; 233 NAPI_GRO_CB(skb)->flush |= (flush != 0);
234 234
235 return pp; 235 return pp;
236} 236}
237EXPORT_SYMBOL(tcp_gro_receive);
238 237
239int tcp_gro_complete(struct sk_buff *skb) 238int tcp_gro_complete(struct sk_buff *skb)
240{ 239{
241 struct tcphdr *th = tcp_hdr(skb); 240 struct tcphdr *th = tcp_hdr(skb);
242 241
243 skb->csum_start = skb_transport_header(skb) - skb->head; 242 skb->csum_start = (unsigned char *)th - skb->head;
244 skb->csum_offset = offsetof(struct tcphdr, check); 243 skb->csum_offset = offsetof(struct tcphdr, check);
245 skb->ip_summed = CHECKSUM_PARTIAL; 244 skb->ip_summed = CHECKSUM_PARTIAL;
246 245
@@ -272,6 +271,7 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb)
272 271
273static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 272static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
274{ 273{
274 /* Use the IP hdr immediately proceeding for this transport */
275 const struct iphdr *iph = skb_gro_network_header(skb); 275 const struct iphdr *iph = skb_gro_network_header(skb);
276 __wsum wsum; 276 __wsum wsum;
277 277
@@ -279,7 +279,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *
279 if (NAPI_GRO_CB(skb)->flush) 279 if (NAPI_GRO_CB(skb)->flush)
280 goto skip_csum; 280 goto skip_csum;
281 281
282 wsum = skb->csum; 282 wsum = NAPI_GRO_CB(skb)->csum;
283 283
284 switch (skb->ip_summed) { 284 switch (skb->ip_summed) {
285 case CHECKSUM_NONE: 285 case CHECKSUM_NONE:
@@ -303,13 +303,13 @@ skip_csum:
303 return tcp_gro_receive(head, skb); 303 return tcp_gro_receive(head, skb);
304} 304}
305 305
306static int tcp4_gro_complete(struct sk_buff *skb) 306static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
307{ 307{
308 const struct iphdr *iph = ip_hdr(skb); 308 const struct iphdr *iph = ip_hdr(skb);
309 struct tcphdr *th = tcp_hdr(skb); 309 struct tcphdr *th = tcp_hdr(skb);
310 310
311 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 311 th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
312 iph->saddr, iph->daddr, 0); 312 iph->daddr, 0);
313 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 313 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
314 314
315 return tcp_gro_complete(skb); 315 return tcp_gro_complete(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7820f3a7dd70..03d26b85eab8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
363 */ 363 */
364static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 364static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
365{ 365{
366 struct skb_shared_info *shinfo = skb_shinfo(skb);
367
366 skb->ip_summed = CHECKSUM_PARTIAL; 368 skb->ip_summed = CHECKSUM_PARTIAL;
367 skb->csum = 0; 369 skb->csum = 0;
368 370
369 TCP_SKB_CB(skb)->tcp_flags = flags; 371 TCP_SKB_CB(skb)->tcp_flags = flags;
370 TCP_SKB_CB(skb)->sacked = 0; 372 TCP_SKB_CB(skb)->sacked = 0;
371 373
372 skb_shinfo(skb)->gso_segs = 1; 374 shinfo->gso_segs = 1;
373 skb_shinfo(skb)->gso_size = 0; 375 shinfo->gso_size = 0;
374 skb_shinfo(skb)->gso_type = 0; 376 shinfo->gso_type = 0;
375 377
376 TCP_SKB_CB(skb)->seq = seq; 378 TCP_SKB_CB(skb)->seq = seq;
377 if (flags & (TCPHDR_SYN | TCPHDR_FIN)) 379 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
@@ -406,7 +408,7 @@ struct tcp_out_options {
406 * Beware: Something in the Internet is very sensitive to the ordering of 408 * Beware: Something in the Internet is very sensitive to the ordering of
407 * TCP options, we learned this through the hard way, so be careful here. 409 * TCP options, we learned this through the hard way, so be careful here.
408 * Luckily we can at least blame others for their non-compliance but from 410 * Luckily we can at least blame others for their non-compliance but from
409 * inter-operatibility perspective it seems that we're somewhat stuck with 411 * inter-operability perspective it seems that we're somewhat stuck with
410 * the ordering which we have been using if we want to keep working with 412 * the ordering which we have been using if we want to keep working with
411 * those broken things (not that it currently hurts anybody as there isn't 413 * those broken things (not that it currently hurts anybody as there isn't
412 * particular reason why the ordering would need to be changed). 414 * particular reason why the ordering would need to be changed).
@@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
679 * 681 *
680 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb 682 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
681 * needs to be reallocated in a driver. 683 * needs to be reallocated in a driver.
682 * The invariant being skb->truesize substracted from sk->sk_wmem_alloc 684 * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
683 * 685 *
684 * Since transmit from skb destructor is forbidden, we use a tasklet 686 * Since transmit from skb destructor is forbidden, we use a tasklet
685 * to process all sockets that eventually need to send more skbs. 687 * to process all sockets that eventually need to send more skbs.
@@ -699,9 +701,9 @@ static void tcp_tsq_handler(struct sock *sk)
699 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); 701 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
700} 702}
701/* 703/*
702 * One tasklest per cpu tries to send more skbs. 704 * One tasklet per cpu tries to send more skbs.
703 * We run in tasklet context but need to disable irqs when 705 * We run in tasklet context but need to disable irqs when
704 * transfering tsq->head because tcp_wfree() might 706 * transferring tsq->head because tcp_wfree() might
705 * interrupt us (non NAPI drivers) 707 * interrupt us (non NAPI drivers)
706 */ 708 */
707static void tcp_tasklet_func(unsigned long data) 709static void tcp_tasklet_func(unsigned long data)
@@ -795,7 +797,7 @@ void __init tcp_tasklet_init(void)
795 797
796/* 798/*
797 * Write buffer destructor automatically called from kfree_skb. 799 * Write buffer destructor automatically called from kfree_skb.
798 * We cant xmit new skbs from this context, as we might already 800 * We can't xmit new skbs from this context, as we might already
799 * hold qdisc lock. 801 * hold qdisc lock.
800 */ 802 */
801void tcp_wfree(struct sk_buff *skb) 803void tcp_wfree(struct sk_buff *skb)
@@ -986,6 +988,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
986static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, 988static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
987 unsigned int mss_now) 989 unsigned int mss_now)
988{ 990{
991 struct skb_shared_info *shinfo = skb_shinfo(skb);
992
989 /* Make sure we own this skb before messing gso_size/gso_segs */ 993 /* Make sure we own this skb before messing gso_size/gso_segs */
990 WARN_ON_ONCE(skb_cloned(skb)); 994 WARN_ON_ONCE(skb_cloned(skb));
991 995
@@ -993,13 +997,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
993 /* Avoid the costly divide in the normal 997 /* Avoid the costly divide in the normal
994 * non-TSO case. 998 * non-TSO case.
995 */ 999 */
996 skb_shinfo(skb)->gso_segs = 1; 1000 shinfo->gso_segs = 1;
997 skb_shinfo(skb)->gso_size = 0; 1001 shinfo->gso_size = 0;
998 skb_shinfo(skb)->gso_type = 0; 1002 shinfo->gso_type = 0;
999 } else { 1003 } else {
1000 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); 1004 shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1001 skb_shinfo(skb)->gso_size = mss_now; 1005 shinfo->gso_size = mss_now;
1002 skb_shinfo(skb)->gso_type = sk->sk_gso_type; 1006 shinfo->gso_type = sk->sk_gso_type;
1003 } 1007 }
1004} 1008}
1005 1009
@@ -1146,6 +1150,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1146 */ 1150 */
1147static void __pskb_trim_head(struct sk_buff *skb, int len) 1151static void __pskb_trim_head(struct sk_buff *skb, int len)
1148{ 1152{
1153 struct skb_shared_info *shinfo;
1149 int i, k, eat; 1154 int i, k, eat;
1150 1155
1151 eat = min_t(int, len, skb_headlen(skb)); 1156 eat = min_t(int, len, skb_headlen(skb));
@@ -1157,23 +1162,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1157 } 1162 }
1158 eat = len; 1163 eat = len;
1159 k = 0; 1164 k = 0;
1160 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1165 shinfo = skb_shinfo(skb);
1161 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 1166 for (i = 0; i < shinfo->nr_frags; i++) {
1167 int size = skb_frag_size(&shinfo->frags[i]);
1162 1168
1163 if (size <= eat) { 1169 if (size <= eat) {
1164 skb_frag_unref(skb, i); 1170 skb_frag_unref(skb, i);
1165 eat -= size; 1171 eat -= size;
1166 } else { 1172 } else {
1167 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1173 shinfo->frags[k] = shinfo->frags[i];
1168 if (eat) { 1174 if (eat) {
1169 skb_shinfo(skb)->frags[k].page_offset += eat; 1175 shinfo->frags[k].page_offset += eat;
1170 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); 1176 skb_frag_size_sub(&shinfo->frags[k], eat);
1171 eat = 0; 1177 eat = 0;
1172 } 1178 }
1173 k++; 1179 k++;
1174 } 1180 }
1175 } 1181 }
1176 skb_shinfo(skb)->nr_frags = k; 1182 shinfo->nr_frags = k;
1177 1183
1178 skb_reset_tail_pointer(skb); 1184 skb_reset_tail_pointer(skb);
1179 skb->data_len -= len; 1185 skb->data_len -= len;
@@ -1378,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk)
1378 } 1384 }
1379} 1385}
1380 1386
1381/* Returns the portion of skb which can be sent right away without 1387/* Minshall's variant of the Nagle send check. */
1382 * introducing MSS oddities to segment boundaries. In rare cases where 1388static bool tcp_minshall_check(const struct tcp_sock *tp)
1383 * mss_now != mss_cache, we will request caller to create a small skb 1389{
1384 * per input skb which could be mostly avoided here (if desired). 1390 return after(tp->snd_sml, tp->snd_una) &&
1385 * 1391 !after(tp->snd_sml, tp->snd_nxt);
1386 * We explicitly want to create a request for splitting write queue tail 1392}
1387 * to a small skb for Nagle purposes while avoiding unnecessary modulos, 1393
1388 * thus all the complexity (cwnd_len is always MSS multiple which we 1394/* Update snd_sml if this skb is under mss
1389 * return whenever allowed by the other factors). Basically we need the 1395 * Note that a TSO packet might end with a sub-mss segment
1390 * modulo only when the receiver window alone is the limiting factor or 1396 * The test is really :
1391 * when we would be allowed to send the split-due-to-Nagle skb fully. 1397 * if ((skb->len % mss) != 0)
1398 * tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1399 * But we can avoid doing the divide again given we already have
1400 * skb_pcount = skb->len / mss_now
1401 */
1402static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1403 const struct sk_buff *skb)
1404{
1405 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1406 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1407}
1408
1409/* Return false, if packet can be sent now without violation Nagle's rules:
1410 * 1. It is full sized. (provided by caller in %partial bool)
1411 * 2. Or it contains FIN. (already checked by caller)
1412 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
1413 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1414 * With Minshall's modification: all sent small packets are ACKed.
1392 */ 1415 */
1393static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, 1416static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1394 unsigned int mss_now, unsigned int max_segs) 1417 unsigned int mss_now, int nonagle)
1418{
1419 return partial &&
1420 ((nonagle & TCP_NAGLE_CORK) ||
1421 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1422}
1423/* Returns the portion of skb which can be sent right away */
1424static unsigned int tcp_mss_split_point(const struct sock *sk,
1425 const struct sk_buff *skb,
1426 unsigned int mss_now,
1427 unsigned int max_segs,
1428 int nonagle)
1395{ 1429{
1396 const struct tcp_sock *tp = tcp_sk(sk); 1430 const struct tcp_sock *tp = tcp_sk(sk);
1397 u32 needed, window, max_len; 1431 u32 partial, needed, window, max_len;
1398 1432
1399 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 1433 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1400 max_len = mss_now * max_segs; 1434 max_len = mss_now * max_segs;
@@ -1407,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b
1407 if (max_len <= needed) 1441 if (max_len <= needed)
1408 return max_len; 1442 return max_len;
1409 1443
1410 return needed - needed % mss_now; 1444 partial = needed % mss_now;
1445 /* If last segment is not a full MSS, check if Nagle rules allow us
1446 * to include this last segment in this skb.
1447 * Otherwise, we'll split the skb at last MSS boundary
1448 */
1449 if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle))
1450 return needed - partial;
1451
1452 return needed;
1411} 1453}
1412 1454
1413/* Can at least one segment of SKB be sent right now, according to the 1455/* Can at least one segment of SKB be sent right now, according to the
@@ -1447,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1447 return tso_segs; 1489 return tso_segs;
1448} 1490}
1449 1491
1450/* Minshall's variant of the Nagle send check. */
1451static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1452{
1453 return after(tp->snd_sml, tp->snd_una) &&
1454 !after(tp->snd_sml, tp->snd_nxt);
1455}
1456
1457/* Return false, if packet can be sent now without violation Nagle's rules:
1458 * 1. It is full sized.
1459 * 2. Or it contains FIN. (already checked by caller)
1460 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
1461 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1462 * With Minshall's modification: all sent small packets are ACKed.
1463 */
1464static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1465 const struct sk_buff *skb,
1466 unsigned int mss_now, int nonagle)
1467{
1468 return skb->len < mss_now &&
1469 ((nonagle & TCP_NAGLE_CORK) ||
1470 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1471}
1472 1492
1473/* Return true if the Nagle test allows this packet to be 1493/* Return true if the Nagle test allows this packet to be
1474 * sent now. 1494 * sent now.
@@ -1489,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1489 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1509 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1490 return true; 1510 return true;
1491 1511
1492 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1512 if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
1493 return true; 1513 return true;
1494 1514
1495 return false; 1515 return false;
@@ -1892,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1892 limit = tcp_mss_split_point(sk, skb, mss_now, 1912 limit = tcp_mss_split_point(sk, skb, mss_now,
1893 min_t(unsigned int, 1913 min_t(unsigned int,
1894 cwnd_quota, 1914 cwnd_quota,
1895 sk->sk_gso_max_segs)); 1915 sk->sk_gso_max_segs),
1916 nonagle);
1896 1917
1897 if (skb->len > limit && 1918 if (skb->len > limit &&
1898 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 1919 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2756,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2756EXPORT_SYMBOL(tcp_make_synack); 2777EXPORT_SYMBOL(tcp_make_synack);
2757 2778
2758/* Do all connect socket setups that can be done AF independent. */ 2779/* Do all connect socket setups that can be done AF independent. */
2759void tcp_connect_init(struct sock *sk) 2780static void tcp_connect_init(struct sock *sk)
2760{ 2781{
2761 const struct dst_entry *dst = __sk_dst_get(sk); 2782 const struct dst_entry *dst = __sk_dst_get(sk);
2762 struct tcp_sock *tp = tcp_sk(sk); 2783 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 8b97d71e193b..1f2d37613c9e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -38,7 +38,7 @@ MODULE_DESCRIPTION("TCP cwnd snooper");
38MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
39MODULE_VERSION("1.1"); 39MODULE_VERSION("1.1");
40 40
41static int port __read_mostly = 0; 41static int port __read_mostly;
42MODULE_PARM_DESC(port, "Port to match (0=all)"); 42MODULE_PARM_DESC(port, "Port to match (0=all)");
43module_param(port, int, 0); 43module_param(port, int, 0);
44 44
@@ -46,7 +46,7 @@ static unsigned int bufsize __read_mostly = 4096;
46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); 46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47module_param(bufsize, uint, 0); 47module_param(bufsize, uint, 0);
48 48
49static unsigned int fwmark __read_mostly = 0; 49static unsigned int fwmark __read_mostly;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); 50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0); 51module_param(fwmark, uint, 0);
52 52
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a347a078ee07..1a8d271f994d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -3,7 +3,7 @@
3 * YeAH TCP 3 * YeAH TCP
4 * 4 *
5 * For further details look at: 5 * For further details look at:
6 * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf 6 * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
7 * 7 *
8 */ 8 */
9#include <linux/mm.h> 9#include <linux/mm.h>
@@ -15,13 +15,13 @@
15 15
16#include "tcp_vegas.h" 16#include "tcp_vegas.h"
17 17
18#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck 18#define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */
19#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt 19#define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */
20#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss 20#define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */
21#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion 21#define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */
22#define TCP_YEAH_PHY 8 //lin maximum delta from base 22#define TCP_YEAH_PHY 8 /* maximum delta from base */
23#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss 23#define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */
24#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count 24#define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */
25 25
26#define TCP_SCALABLE_AI_CNT 100U 26#define TCP_SCALABLE_AI_CNT 100U
27 27
@@ -214,9 +214,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
214 if (yeah->doing_reno_now < TCP_YEAH_RHO) { 214 if (yeah->doing_reno_now < TCP_YEAH_RHO) {
215 reduction = yeah->lastQ; 215 reduction = yeah->lastQ;
216 216
217 reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); 217 reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
218 218
219 reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); 219 reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
220 } else 220 } else
221 reduction = max(tp->snd_cwnd>>1, 2U); 221 reduction = max(tp->snd_cwnd>>1, 2U);
222 222
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a7e4729e974b..77bd16fa9f34 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -223,7 +223,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
223 inet_get_local_port_range(net, &low, &high); 223 inet_get_local_port_range(net, &low, &high);
224 remaining = (high - low) + 1; 224 remaining = (high - low) + 1;
225 225
226 rand = net_random(); 226 rand = prandom_u32();
227 first = (((u64)rand * remaining) >> 32) + low; 227 first = (((u64)rand * remaining) >> 32) + low;
228 /* 228 /*
229 * force rand to be an odd multiple of UDP_HTABLE_SIZE 229 * force rand to be an odd multiple of UDP_HTABLE_SIZE
@@ -902,7 +902,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
902 * Get and verify the address. 902 * Get and verify the address.
903 */ 903 */
904 if (msg->msg_name) { 904 if (msg->msg_name) {
905 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; 905 DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
906 if (msg->msg_namelen < sizeof(*usin)) 906 if (msg->msg_namelen < sizeof(*usin))
907 return -EINVAL; 907 return -EINVAL;
908 if (usin->sin_family != AF_INET) { 908 if (usin->sin_family != AF_INET) {
@@ -986,7 +986,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
986 fl4 = &fl4_stack; 986 fl4 = &fl4_stack;
987 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 987 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
988 RT_SCOPE_UNIVERSE, sk->sk_protocol, 988 RT_SCOPE_UNIVERSE, sk->sk_protocol,
989 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, 989 inet_sk_flowi_flags(sk),
990 faddr, saddr, dport, inet->inet_sport); 990 faddr, saddr, dport, inet->inet_sport);
991 991
992 security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 992 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
@@ -1226,7 +1226,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1226 size_t len, int noblock, int flags, int *addr_len) 1226 size_t len, int noblock, int flags, int *addr_len)
1227{ 1227{
1228 struct inet_sock *inet = inet_sk(sk); 1228 struct inet_sock *inet = inet_sk(sk);
1229 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 1229 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
1230 struct sk_buff *skb; 1230 struct sk_buff *skb;
1231 unsigned int ulen, copied; 1231 unsigned int ulen, copied;
1232 int peeked, off = 0; 1232 int peeked, off = 0;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 79c62bdcd3c5..25f5cee3a08a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -14,6 +14,15 @@
14#include <net/udp.h> 14#include <net/udp.h>
15#include <net/protocol.h> 15#include <net/protocol.h>
16 16
17static DEFINE_SPINLOCK(udp_offload_lock);
18static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
19
20struct udp_offload_priv {
21 struct udp_offload *offload;
22 struct rcu_head rcu;
23 struct udp_offload_priv __rcu *next;
24};
25
17static int udp4_ufo_send_check(struct sk_buff *skb) 26static int udp4_ufo_send_check(struct sk_buff *skb)
18{ 27{
19 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 28 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -89,10 +98,144 @@ out:
89 return segs; 98 return segs;
90} 99}
91 100
101int udp_add_offload(struct udp_offload *uo)
102{
103 struct udp_offload_priv __rcu **head = &udp_offload_base;
104 struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
105
106 if (!new_offload)
107 return -ENOMEM;
108
109 new_offload->offload = uo;
110
111 spin_lock(&udp_offload_lock);
112 rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
113 rcu_assign_pointer(*head, new_offload);
114 spin_unlock(&udp_offload_lock);
115
116 return 0;
117}
118EXPORT_SYMBOL(udp_add_offload);
119
120static void udp_offload_free_routine(struct rcu_head *head)
121{
122 struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
123 kfree(ou_priv);
124}
125
126void udp_del_offload(struct udp_offload *uo)
127{
128 struct udp_offload_priv __rcu **head = &udp_offload_base;
129 struct udp_offload_priv *uo_priv;
130
131 spin_lock(&udp_offload_lock);
132
133 uo_priv = rcu_dereference(*head);
134 for (; uo_priv != NULL;
135 uo_priv = rcu_dereference(*head)) {
136
137 if (uo_priv->offload == uo) {
138 rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
139 goto unlock;
140 }
141 head = &uo_priv->next;
142 }
143 pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port));
144unlock:
145 spin_unlock(&udp_offload_lock);
146 if (uo_priv != NULL)
147 call_rcu(&uo_priv->rcu, udp_offload_free_routine);
148}
149EXPORT_SYMBOL(udp_del_offload);
150
151static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
152{
153 struct udp_offload_priv *uo_priv;
154 struct sk_buff *p, **pp = NULL;
155 struct udphdr *uh, *uh2;
156 unsigned int hlen, off;
157 int flush = 1;
158
159 if (NAPI_GRO_CB(skb)->udp_mark ||
160 (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
161 goto out;
162
163 /* mark that this skb passed once through the udp gro layer */
164 NAPI_GRO_CB(skb)->udp_mark = 1;
165
166 off = skb_gro_offset(skb);
167 hlen = off + sizeof(*uh);
168 uh = skb_gro_header_fast(skb, off);
169 if (skb_gro_header_hard(skb, hlen)) {
170 uh = skb_gro_header_slow(skb, hlen, off);
171 if (unlikely(!uh))
172 goto out;
173 }
174
175 rcu_read_lock();
176 uo_priv = rcu_dereference(udp_offload_base);
177 for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
178 if (uo_priv->offload->port == uh->dest &&
179 uo_priv->offload->callbacks.gro_receive)
180 goto unflush;
181 }
182 goto out_unlock;
183
184unflush:
185 flush = 0;
186
187 for (p = *head; p; p = p->next) {
188 if (!NAPI_GRO_CB(p)->same_flow)
189 continue;
190
191 uh2 = (struct udphdr *)(p->data + off);
192 if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
193 NAPI_GRO_CB(p)->same_flow = 0;
194 continue;
195 }
196 }
197
198 skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
199 pp = uo_priv->offload->callbacks.gro_receive(head, skb);
200
201out_unlock:
202 rcu_read_unlock();
203out:
204 NAPI_GRO_CB(skb)->flush |= flush;
205 return pp;
206}
207
208static int udp_gro_complete(struct sk_buff *skb, int nhoff)
209{
210 struct udp_offload_priv *uo_priv;
211 __be16 newlen = htons(skb->len - nhoff);
212 struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
213 int err = -ENOSYS;
214
215 uh->len = newlen;
216
217 rcu_read_lock();
218
219 uo_priv = rcu_dereference(udp_offload_base);
220 for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
221 if (uo_priv->offload->port == uh->dest &&
222 uo_priv->offload->callbacks.gro_complete)
223 break;
224 }
225
226 if (uo_priv != NULL)
227 err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
228
229 rcu_read_unlock();
230 return err;
231}
232
92static const struct net_offload udpv4_offload = { 233static const struct net_offload udpv4_offload = {
93 .callbacks = { 234 .callbacks = {
94 .gso_send_check = udp4_ufo_send_check, 235 .gso_send_check = udp4_ufo_send_check,
95 .gso_segment = udp4_ufo_fragment, 236 .gso_segment = udp4_ufo_fragment,
237 .gro_receive = udp_gro_receive,
238 .gro_complete = udp_gro_complete,
96 }, 239 },
97}; 240};
98 241
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e3db3f915114..71acd0014f2d 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -48,7 +48,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
48 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 48 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
49 49
50 skb_set_network_header(skb, -x->props.header_len - 50 skb_set_network_header(skb, -x->props.header_len -
51 hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 51 hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
52 if (x->sel.family != AF_INET6) 52 if (x->sel.family != AF_INET6)
53 skb->network_header += IPV4_BEET_PHMAXLEN; 53 skb->network_header += IPV4_BEET_PHMAXLEN;
54 skb->mac_header = skb->network_header + 54 skb->mac_header = skb->network_header +
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 0b2a0641526a..542074c00c78 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -16,7 +16,7 @@
16 16
17static int xfrm4_init_flags(struct xfrm_state *x) 17static int xfrm4_init_flags(struct xfrm_state *x)
18{ 18{
19 if (ipv4_config.no_pmtu_disc) 19 if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
20 x->props.flags |= XFRM_STATE_NOPMTUDISC; 20 x->props.flags |= XFRM_STATE_NOPMTUDISC;
21 return 0; 21 return 0;
22} 22}