aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c32
-rw-r--r--net/ipv4/arp.c42
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c62
-rw-r--r--net/ipv4/fib_lookup.h2
-rw-r--r--net/ipv4/fib_rules.c5
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c70
-rw-r--r--net/ipv4/inet_lro.c173
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_options.c12
-rw-r--r--net/ipv4/ip_sockglue.c5
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c15
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c2
-rw-r--r--net/ipv4/ping.c17
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c25
-rw-r--r--net/ipv4/tcp.c73
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/tcp_memcontrol.c9
-rw-r--r--net/ipv4/tcp_offload.c40
-rw-r--r--net/ipv4/tcp_output.c141
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/tcp_yeah.c20
-rw-r--r--net/ipv4/udp.c49
-rw-r--r--net/ipv4/xfrm4_mode_beet.c2
-rw-r--r--net/ipv4/xfrm4_state.c2
37 files changed, 400 insertions, 473 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70011e029ac1..b8bc1a3d5cf1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,9 +126,6 @@
126static struct list_head inetsw[SOCK_MAX]; 126static struct list_head inetsw[SOCK_MAX];
127static DEFINE_SPINLOCK(inetsw_lock); 127static DEFINE_SPINLOCK(inetsw_lock);
128 128
129struct ipv4_config ipv4_config;
130EXPORT_SYMBOL(ipv4_config);
131
132/* New destruction routine */ 129/* New destruction routine */
133 130
134void inet_sock_destruct(struct sock *sk) 131void inet_sock_destruct(struct sock *sk)
@@ -342,7 +339,7 @@ lookup_protocol:
342 inet->hdrincl = 1; 339 inet->hdrincl = 1;
343 } 340 }
344 341
345 if (ipv4_config.no_pmtu_disc) 342 if (net->ipv4.sysctl_ip_no_pmtu_disc)
346 inet->pmtudisc = IP_PMTUDISC_DONT; 343 inet->pmtudisc = IP_PMTUDISC_DONT;
347 else 344 else
348 inet->pmtudisc = IP_PMTUDISC_WANT; 345 inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1133,7 +1130,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1133 fl4 = &inet->cork.fl.u.ip4; 1130 fl4 = &inet->cork.fl.u.ip4;
1134 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), 1131 rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
1135 sk->sk_bound_dev_if, sk->sk_protocol, 1132 sk->sk_bound_dev_if, sk->sk_protocol,
1136 inet->inet_sport, inet->inet_dport, sk, false); 1133 inet->inet_sport, inet->inet_dport, sk);
1137 if (IS_ERR(rt)) 1134 if (IS_ERR(rt))
1138 return PTR_ERR(rt); 1135 return PTR_ERR(rt);
1139 1136
@@ -1377,8 +1374,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1377 if (!NAPI_GRO_CB(p)->same_flow) 1374 if (!NAPI_GRO_CB(p)->same_flow)
1378 continue; 1375 continue;
1379 1376
1380 iph2 = ip_hdr(p); 1377 iph2 = (struct iphdr *)(p->data + off);
1381 1378 /* The above works because, with the exception of the top
1379 * (inner most) layer, we only aggregate pkts with the same
1380 * hdr length so all the hdrs we'll need to verify will start
1381 * at the same offset.
1382 */
1382 if ((iph->protocol ^ iph2->protocol) | 1383 if ((iph->protocol ^ iph2->protocol) |
1383 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 1384 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1384 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { 1385 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
@@ -1397,6 +1398,11 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1397 } 1398 }
1398 1399
1399 NAPI_GRO_CB(skb)->flush |= flush; 1400 NAPI_GRO_CB(skb)->flush |= flush;
1401 skb_set_network_header(skb, off);
1402 /* The above will be needed by the transport layer if there is one
1403 * immediately following this IP hdr.
1404 */
1405
1400 skb_gro_pull(skb, sizeof(*iph)); 1406 skb_gro_pull(skb, sizeof(*iph));
1401 skb_set_transport_header(skb, skb_gro_offset(skb)); 1407 skb_set_transport_header(skb, skb_gro_offset(skb));
1402 1408
@@ -1411,10 +1417,10 @@ out:
1411 return pp; 1417 return pp;
1412} 1418}
1413 1419
1414static int inet_gro_complete(struct sk_buff *skb) 1420static int inet_gro_complete(struct sk_buff *skb, int nhoff)
1415{ 1421{
1416 __be16 newlen = htons(skb->len - skb_network_offset(skb)); 1422 __be16 newlen = htons(skb->len - nhoff);
1417 struct iphdr *iph = ip_hdr(skb); 1423 struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
1418 const struct net_offload *ops; 1424 const struct net_offload *ops;
1419 int proto = iph->protocol; 1425 int proto = iph->protocol;
1420 int err = -ENOSYS; 1426 int err = -ENOSYS;
@@ -1427,7 +1433,11 @@ static int inet_gro_complete(struct sk_buff *skb)
1427 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 1433 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1428 goto out_unlock; 1434 goto out_unlock;
1429 1435
1430 err = ops->callbacks.gro_complete(skb); 1436 /* Only need to add sizeof(*iph) to get to the next hdr below
1437 * because any hdr with option will have been flushed in
1438 * inet_gro_receive().
1439 */
1440 err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
1431 1441
1432out_unlock: 1442out_unlock:
1433 rcu_read_unlock(); 1443 rcu_read_unlock();
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7808093cede6..5bf408b466b1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -166,18 +166,20 @@ struct neigh_table arp_tbl = {
166 .id = "arp_cache", 166 .id = "arp_cache",
167 .parms = { 167 .parms = {
168 .tbl = &arp_tbl, 168 .tbl = &arp_tbl,
169 .base_reachable_time = 30 * HZ,
170 .retrans_time = 1 * HZ,
171 .gc_staletime = 60 * HZ,
172 .reachable_time = 30 * HZ, 169 .reachable_time = 30 * HZ,
173 .delay_probe_time = 5 * HZ, 170 .data = {
174 .queue_len_bytes = 64*1024, 171 [NEIGH_VAR_MCAST_PROBES] = 3,
175 .ucast_probes = 3, 172 [NEIGH_VAR_UCAST_PROBES] = 3,
176 .mcast_probes = 3, 173 [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
177 .anycast_delay = 1 * HZ, 174 [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
178 .proxy_delay = (8 * HZ) / 10, 175 [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
179 .proxy_qlen = 64, 176 [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
180 .locktime = 1 * HZ, 177 [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
178 [NEIGH_VAR_PROXY_QLEN] = 64,
179 [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
180 [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
181 [NEIGH_VAR_LOCKTIME] = 1 * HZ,
182 },
181 }, 183 },
182 .gc_interval = 30 * HZ, 184 .gc_interval = 30 * HZ,
183 .gc_thresh1 = 128, 185 .gc_thresh1 = 128,
@@ -359,14 +361,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
359 if (!saddr) 361 if (!saddr)
360 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 362 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
361 363
362 probes -= neigh->parms->ucast_probes; 364 probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
363 if (probes < 0) { 365 if (probes < 0) {
364 if (!(neigh->nud_state & NUD_VALID)) 366 if (!(neigh->nud_state & NUD_VALID))
365 pr_debug("trying to ucast probe in NUD_INVALID\n"); 367 pr_debug("trying to ucast probe in NUD_INVALID\n");
366 neigh_ha_snapshot(dst_ha, neigh, dev); 368 neigh_ha_snapshot(dst_ha, neigh, dev);
367 dst_hw = dst_ha; 369 dst_hw = dst_ha;
368 } else { 370 } else {
369 probes -= neigh->parms->app_probes; 371 probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
370 if (probes < 0) { 372 if (probes < 0) {
371 neigh_app_ns(neigh); 373 neigh_app_ns(neigh);
372 return; 374 return;
@@ -379,6 +381,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
379 381
380static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) 382static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
381{ 383{
384 struct net *net = dev_net(in_dev->dev);
382 int scope; 385 int scope;
383 386
384 switch (IN_DEV_ARP_IGNORE(in_dev)) { 387 switch (IN_DEV_ARP_IGNORE(in_dev)) {
@@ -397,6 +400,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
397 case 3: /* Do not reply for scope host addresses */ 400 case 3: /* Do not reply for scope host addresses */
398 sip = 0; 401 sip = 0;
399 scope = RT_SCOPE_LINK; 402 scope = RT_SCOPE_LINK;
403 in_dev = NULL;
400 break; 404 break;
401 case 4: /* Reserved */ 405 case 4: /* Reserved */
402 case 5: 406 case 5:
@@ -408,7 +412,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
408 default: 412 default:
409 return 0; 413 return 0;
410 } 414 }
411 return !inet_confirm_addr(in_dev, sip, tip, scope); 415 return !inet_confirm_addr(net, in_dev, sip, tip, scope);
412} 416}
413 417
414static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 418static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
@@ -871,7 +875,7 @@ static int arp_process(struct sk_buff *skb)
871 875
872 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 876 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
873 skb->pkt_type == PACKET_HOST || 877 skb->pkt_type == PACKET_HOST ||
874 in_dev->arp_parms->proxy_delay == 0) { 878 NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
875 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, 879 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
876 dev, tip, sha, dev->dev_addr, 880 dev, tip, sha, dev->dev_addr,
877 sha); 881 sha);
@@ -910,7 +914,8 @@ static int arp_process(struct sk_buff *skb)
910 agents are active. Taking the first reply prevents 914 agents are active. Taking the first reply prevents
911 arp trashing and chooses the fastest router. 915 arp trashing and chooses the fastest router.
912 */ 916 */
913 override = time_after(jiffies, n->updated + n->parms->locktime); 917 override = time_after(jiffies, n->updated +
918 NEIGH_VAR(n->parms, LOCKTIME));
914 919
915 /* Broadcast replies and request packets 920 /* Broadcast replies and request packets
916 do not assert neighbour reachability. 921 do not assert neighbour reachability.
@@ -1107,7 +1112,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
1107 return err; 1112 return err;
1108} 1113}
1109 1114
1110int arp_invalidate(struct net_device *dev, __be32 ip) 1115static int arp_invalidate(struct net_device *dev, __be32 ip)
1111{ 1116{
1112 struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); 1117 struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
1113 int err = -ENXIO; 1118 int err = -ENXIO;
@@ -1122,7 +1127,6 @@ int arp_invalidate(struct net_device *dev, __be32 ip)
1122 1127
1123 return err; 1128 return err;
1124} 1129}
1125EXPORT_SYMBOL(arp_invalidate);
1126 1130
1127static int arp_req_delete_public(struct net *net, struct arpreq *r, 1131static int arp_req_delete_public(struct net *net, struct arpreq *r,
1128 struct net_device *dev) 1132 struct net_device *dev)
@@ -1284,7 +1288,7 @@ void __init arp_init(void)
1284 dev_add_pack(&arp_packet_type); 1288 dev_add_pack(&arp_packet_type);
1285 arp_proc_init(); 1289 arp_proc_init();
1286#ifdef CONFIG_SYSCTL 1290#ifdef CONFIG_SYSCTL
1287 neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); 1291 neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);
1288#endif 1292#endif
1289 register_netdevice_notifier(&arp_netdev_notifier); 1293 register_netdevice_notifier(&arp_netdev_notifier);
1290} 1294}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 667c1d4ca984..69e77c8ff285 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -31,8 +31,7 @@
31 * the GNU General Public License for more details. 31 * the GNU General Public License for more details.
32 * 32 *
33 * You should have received a copy of the GNU General Public License 33 * You should have received a copy of the GNU General Public License
34 * along with this program; if not, write to the Free Software 34 * along with this program; if not, see <http://www.gnu.org/licenses/>.
35 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36 * 35 *
37 */ 36 */
38 37
@@ -1336,8 +1335,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1336 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1335 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1337 1336
1338 if (tag_len > 4) { 1337 if (tag_len > 4) {
1339 secattr->attr.mls.cat = 1338 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1340 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1341 if (secattr->attr.mls.cat == NULL) 1339 if (secattr->attr.mls.cat == NULL)
1342 return -ENOMEM; 1340 return -ENOMEM;
1343 1341
@@ -1432,8 +1430,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def,
1432 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1430 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1433 1431
1434 if (tag_len > 4) { 1432 if (tag_len > 4) {
1435 secattr->attr.mls.cat = 1433 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1436 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1437 if (secattr->attr.mls.cat == NULL) 1434 if (secattr->attr.mls.cat == NULL)
1438 return -ENOMEM; 1435 return -ENOMEM;
1439 1436
@@ -1527,8 +1524,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1527 secattr->flags |= NETLBL_SECATTR_MLS_LVL; 1524 secattr->flags |= NETLBL_SECATTR_MLS_LVL;
1528 1525
1529 if (tag_len > 4) { 1526 if (tag_len > 4) {
1530 secattr->attr.mls.cat = 1527 secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1531 netlbl_secattr_catmap_alloc(GFP_ATOMIC);
1532 if (secattr->attr.mls.cat == NULL) 1528 if (secattr->attr.mls.cat == NULL)
1533 return -ENOMEM; 1529 return -ENOMEM;
1534 1530
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 19e36376d2a0..8b5134c582f1 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -53,7 +53,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, 53 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
54 RT_CONN_FLAGS(sk), oif, 54 RT_CONN_FLAGS(sk), oif,
55 sk->sk_protocol, 55 sk->sk_protocol,
56 inet->inet_sport, usin->sin_port, sk, true); 56 inet->inet_sport, usin->sin_port, sk);
57 if (IS_ERR(rt)) { 57 if (IS_ERR(rt)) {
58 err = PTR_ERR(rt); 58 err = PTR_ERR(rt);
59 if (err == -ENETUNREACH) 59 if (err == -ENETUNREACH)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a1b5bcbd04ae..0feebd5de295 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 [IFA_BROADCAST] = { .type = NLA_U32 }, 99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
102}; 103};
103 104
104#define IN4_ADDR_HSIZE_SHIFT 8 105#define IN4_ADDR_HSIZE_SHIFT 8
@@ -500,6 +501,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
500 return -ENOBUFS; 501 return -ENOBUFS;
501 } 502 }
502 ipv4_devconf_setall(in_dev); 503 ipv4_devconf_setall(in_dev);
504 neigh_parms_data_state_setall(in_dev->arp_parms);
503 if (ifa->ifa_dev != in_dev) { 505 if (ifa->ifa_dev != in_dev) {
504 WARN_ON(ifa->ifa_dev); 506 WARN_ON(ifa->ifa_dev);
505 in_dev_hold(in_dev); 507 in_dev_hold(in_dev);
@@ -747,6 +749,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 goto errout; 749 goto errout;
748 750
749 ipv4_devconf_setall(in_dev); 751 ipv4_devconf_setall(in_dev);
752 neigh_parms_data_state_setall(in_dev->arp_parms);
750 in_dev_hold(in_dev); 753 in_dev_hold(in_dev);
751 754
752 if (tb[IFA_ADDRESS] == NULL) 755 if (tb[IFA_ADDRESS] == NULL)
@@ -755,7 +758,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
755 INIT_HLIST_NODE(&ifa->hash); 758 INIT_HLIST_NODE(&ifa->hash);
756 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 759 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 760 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758 ifa->ifa_flags = ifm->ifa_flags; 761 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
762 ifm->ifa_flags;
759 ifa->ifa_scope = ifm->ifa_scope; 763 ifa->ifa_scope = ifm->ifa_scope;
760 ifa->ifa_dev = in_dev; 764 ifa->ifa_dev = in_dev;
761 765
@@ -1236,22 +1240,21 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1236 1240
1237/* 1241/*
1238 * Confirm that local IP address exists using wildcards: 1242 * Confirm that local IP address exists using wildcards:
1239 * - in_dev: only on this interface, 0=any interface 1243 * - net: netns to check, cannot be NULL
1244 * - in_dev: only on this interface, NULL=any interface
1240 * - dst: only in the same subnet as dst, 0=any dst 1245 * - dst: only in the same subnet as dst, 0=any dst
1241 * - local: address, 0=autoselect the local address 1246 * - local: address, 0=autoselect the local address
1242 * - scope: maximum allowed scope value for the local address 1247 * - scope: maximum allowed scope value for the local address
1243 */ 1248 */
1244__be32 inet_confirm_addr(struct in_device *in_dev, 1249__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1245 __be32 dst, __be32 local, int scope) 1250 __be32 dst, __be32 local, int scope)
1246{ 1251{
1247 __be32 addr = 0; 1252 __be32 addr = 0;
1248 struct net_device *dev; 1253 struct net_device *dev;
1249 struct net *net;
1250 1254
1251 if (scope != RT_SCOPE_LINK) 1255 if (in_dev != NULL)
1252 return confirm_addr_indev(in_dev, dst, local, scope); 1256 return confirm_addr_indev(in_dev, dst, local, scope);
1253 1257
1254 net = dev_net(in_dev->dev);
1255 rcu_read_lock(); 1258 rcu_read_lock();
1256 for_each_netdev_rcu(net, dev) { 1259 for_each_netdev_rcu(net, dev) {
1257 in_dev = __in_dev_get_rcu(dev); 1260 in_dev = __in_dev_get_rcu(dev);
@@ -1435,7 +1438,8 @@ static size_t inet_nlmsg_size(void)
1435 + nla_total_size(4) /* IFA_ADDRESS */ 1438 + nla_total_size(4) /* IFA_ADDRESS */
1436 + nla_total_size(4) /* IFA_LOCAL */ 1439 + nla_total_size(4) /* IFA_LOCAL */
1437 + nla_total_size(4) /* IFA_BROADCAST */ 1440 + nla_total_size(4) /* IFA_BROADCAST */
1438 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1441 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1442 + nla_total_size(4); /* IFA_FLAGS */
1439} 1443}
1440 1444
1441static inline u32 cstamp_delta(unsigned long cstamp) 1445static inline u32 cstamp_delta(unsigned long cstamp)
@@ -1503,6 +1507,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1503 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1507 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504 (ifa->ifa_label[0] && 1508 (ifa->ifa_label[0] &&
1505 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1509 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1510 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1506 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1511 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1507 preferred, valid)) 1512 preferred, valid))
1508 goto nla_put_failure; 1513 goto nla_put_failure;
@@ -1691,6 +1696,8 @@ static int inet_netconf_msgsize_devconf(int type)
1691 size += nla_total_size(4); 1696 size += nla_total_size(4);
1692 if (type == -1 || type == NETCONFA_MC_FORWARDING) 1697 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693 size += nla_total_size(4); 1698 size += nla_total_size(4);
1699 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1700 size += nla_total_size(4);
1694 1701
1695 return size; 1702 return size;
1696} 1703}
@@ -1727,6 +1734,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1727 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1734 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1735 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729 goto nla_put_failure; 1736 goto nla_put_failure;
1737 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1738 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1739 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1740 goto nla_put_failure;
1730 1741
1731 return nlmsg_end(skb, nlh); 1742 return nlmsg_end(skb, nlh);
1732 1743
@@ -1764,6 +1775,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1775 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1765 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1776 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1766 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1777 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1778 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1767}; 1779};
1768 1780
1769static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1781static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -1945,6 +1957,19 @@ static void inet_forward_change(struct net *net)
1945 } 1957 }
1946} 1958}
1947 1959
1960static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1961{
1962 if (cnf == net->ipv4.devconf_dflt)
1963 return NETCONFA_IFINDEX_DEFAULT;
1964 else if (cnf == net->ipv4.devconf_all)
1965 return NETCONFA_IFINDEX_ALL;
1966 else {
1967 struct in_device *idev
1968 = container_of(cnf, struct in_device, cnf);
1969 return idev->dev->ifindex;
1970 }
1971}
1972
1948static int devinet_conf_proc(struct ctl_table *ctl, int write, 1973static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949 void __user *buffer, 1974 void __user *buffer,
1950 size_t *lenp, loff_t *ppos) 1975 size_t *lenp, loff_t *ppos)
@@ -1957,6 +1982,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
1957 struct ipv4_devconf *cnf = ctl->extra1; 1982 struct ipv4_devconf *cnf = ctl->extra1;
1958 struct net *net = ctl->extra2; 1983 struct net *net = ctl->extra2;
1959 int i = (int *)ctl->data - cnf->data; 1984 int i = (int *)ctl->data - cnf->data;
1985 int ifindex;
1960 1986
1961 set_bit(i, cnf->state); 1987 set_bit(i, cnf->state);
1962 1988
@@ -1966,23 +1992,19 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
1966 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1992 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967 if ((new_value == 0) && (old_value != 0)) 1993 if ((new_value == 0) && (old_value != 0))
1968 rt_cache_flush(net); 1994 rt_cache_flush(net);
1995
1969 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 1996 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970 new_value != old_value) { 1997 new_value != old_value) {
1971 int ifindex; 1998 ifindex = devinet_conf_ifindex(net, cnf);
1972
1973 if (cnf == net->ipv4.devconf_dflt)
1974 ifindex = NETCONFA_IFINDEX_DEFAULT;
1975 else if (cnf == net->ipv4.devconf_all)
1976 ifindex = NETCONFA_IFINDEX_ALL;
1977 else {
1978 struct in_device *idev =
1979 container_of(cnf, struct in_device,
1980 cnf);
1981 ifindex = idev->dev->ifindex;
1982 }
1983 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, 1999 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1984 ifindex, cnf); 2000 ifindex, cnf);
1985 } 2001 }
2002 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2003 new_value != old_value) {
2004 ifindex = devinet_conf_ifindex(net, cnf);
2005 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2006 ifindex, cnf);
2007 }
1986 } 2008 }
1987 2009
1988 return ret; 2010 return ret;
@@ -2160,7 +2182,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2160 2182
2161static void devinet_sysctl_register(struct in_device *idev) 2183static void devinet_sysctl_register(struct in_device *idev)
2162{ 2184{
2163 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 2185 neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2164 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2186 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2165 &idev->cnf); 2187 &idev->cnf);
2166} 2188}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 388d113fd289..1e4f6600b31d 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -33,8 +33,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
33void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, 33void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
34 u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); 34 u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
35struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 35struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
36int fib_detect_death(struct fib_info *fi, int order,
37 struct fib_info **last_resort, int *last_idx, int dflt);
38 36
39static inline void fib_result_assign(struct fib_result *res, 37static inline void fib_result_assign(struct fib_result *res,
40 struct fib_info *fi) 38 struct fib_info *fi)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 523be38e37de..f2e15738534d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -104,7 +104,10 @@ errout:
104static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) 104static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
105{ 105{
106 struct fib_result *result = (struct fib_result *) arg->result; 106 struct fib_result *result = (struct fib_result *) arg->result;
107 struct net_device *dev = result->fi->fib_dev; 107 struct net_device *dev = NULL;
108
109 if (result->fi)
110 dev = result->fi->fib_dev;
108 111
109 /* do not accept result if the route does 112 /* do not accept result if the route does
110 * not meet the required prefix length 113 * not meet the required prefix length
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e63f47a4e651..b53f0bf84dca 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -426,8 +426,9 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
426 return NULL; 426 return NULL;
427} 427}
428 428
429int fib_detect_death(struct fib_info *fi, int order, 429static int fib_detect_death(struct fib_info *fi, int order,
430 struct fib_info **last_resort, int *last_idx, int dflt) 430 struct fib_info **last_resort, int *last_idx,
431 int dflt)
431{ 432{
432 struct neighbour *n; 433 struct neighbour *n;
433 int state = NUD_NONE; 434 int state = NUD_NONE;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5c0e8bc6e5ba..fb3c5637199d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -705,7 +705,9 @@ static void icmp_unreach(struct sk_buff *skb)
705 case ICMP_PORT_UNREACH: 705 case ICMP_PORT_UNREACH:
706 break; 706 break;
707 case ICMP_FRAG_NEEDED: 707 case ICMP_FRAG_NEEDED:
708 if (ipv4_config.no_pmtu_disc) { 708 if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) {
709 goto out;
710 } else if (net->ipv4.sysctl_ip_no_pmtu_disc) {
709 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), 711 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
710 &iph->daddr); 712 &iph->daddr);
711 } else { 713 } else {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7defdc9ba167..84c4329cbd30 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -310,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
310 struct ip_sf_list *psf; 310 struct ip_sf_list *psf;
311 int scount = 0; 311 int scount = 0;
312 312
313 for (psf=pmc->sources; psf; psf=psf->sf_next) { 313 for (psf = pmc->sources; psf; psf = psf->sf_next) {
314 if (!is_in(pmc, psf, type, gdeleted, sdeleted)) 314 if (!is_in(pmc, psf, type, gdeleted, sdeleted))
315 continue; 315 continue;
316 scount++; 316 scount++;
@@ -463,7 +463,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
463 } 463 }
464 first = 1; 464 first = 1;
465 psf_prev = NULL; 465 psf_prev = NULL;
466 for (psf=*psf_list; psf; psf=psf_next) { 466 for (psf = *psf_list; psf; psf = psf_next) {
467 __be32 *psrc; 467 __be32 *psrc;
468 468
469 psf_next = psf->sf_next; 469 psf_next = psf->sf_next;
@@ -520,7 +520,7 @@ empty_source:
520 return skb; 520 return skb;
521 if (pmc->crcount || isquery) { 521 if (pmc->crcount || isquery) {
522 /* make sure we have room for group header */ 522 /* make sure we have room for group header */
523 if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { 523 if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) {
524 igmpv3_sendpack(skb); 524 igmpv3_sendpack(skb);
525 skb = NULL; /* add_grhead will get a new one */ 525 skb = NULL; /* add_grhead will get a new one */
526 } 526 }
@@ -576,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf)
576 struct ip_sf_list *psf_prev, *psf_next, *psf; 576 struct ip_sf_list *psf_prev, *psf_next, *psf;
577 577
578 psf_prev = NULL; 578 psf_prev = NULL;
579 for (psf=*ppsf; psf; psf = psf_next) { 579 for (psf = *ppsf; psf; psf = psf_next) {
580 psf_next = psf->sf_next; 580 psf_next = psf->sf_next;
581 if (psf->sf_crcount == 0) { 581 if (psf->sf_crcount == 0) {
582 if (psf_prev) 582 if (psf_prev)
@@ -600,7 +600,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
600 600
601 /* deleted MCA's */ 601 /* deleted MCA's */
602 pmc_prev = NULL; 602 pmc_prev = NULL;
603 for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { 603 for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) {
604 pmc_next = pmc->next; 604 pmc_next = pmc->next;
605 if (pmc->sfmode == MCAST_INCLUDE) { 605 if (pmc->sfmode == MCAST_INCLUDE) {
606 type = IGMPV3_BLOCK_OLD_SOURCES; 606 type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -764,7 +764,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
764 764
765static void igmp_timer_expire(unsigned long data) 765static void igmp_timer_expire(unsigned long data)
766{ 766{
767 struct ip_mc_list *im=(struct ip_mc_list *)data; 767 struct ip_mc_list *im = (struct ip_mc_list *)data;
768 struct in_device *in_dev = im->interface; 768 struct in_device *in_dev = im->interface;
769 769
770 spin_lock(&im->lock); 770 spin_lock(&im->lock);
@@ -794,10 +794,10 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
794 int i, scount; 794 int i, scount;
795 795
796 scount = 0; 796 scount = 0;
797 for (psf=pmc->sources; psf; psf=psf->sf_next) { 797 for (psf = pmc->sources; psf; psf = psf->sf_next) {
798 if (scount == nsrcs) 798 if (scount == nsrcs)
799 break; 799 break;
800 for (i=0; i<nsrcs; i++) { 800 for (i = 0; i < nsrcs; i++) {
801 /* skip inactive filters */ 801 /* skip inactive filters */
802 if (psf->sf_count[MCAST_INCLUDE] || 802 if (psf->sf_count[MCAST_INCLUDE] ||
803 pmc->sfcount[MCAST_EXCLUDE] != 803 pmc->sfcount[MCAST_EXCLUDE] !=
@@ -825,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
825 825
826 /* mark INCLUDE-mode sources */ 826 /* mark INCLUDE-mode sources */
827 scount = 0; 827 scount = 0;
828 for (psf=pmc->sources; psf; psf=psf->sf_next) { 828 for (psf = pmc->sources; psf; psf = psf->sf_next) {
829 if (scount == nsrcs) 829 if (scount == nsrcs)
830 break; 830 break;
831 for (i=0; i<nsrcs; i++) 831 for (i = 0; i < nsrcs; i++)
832 if (srcs[i] == psf->sf_inaddr) { 832 if (srcs[i] == psf->sf_inaddr) {
833 psf->sf_gsresp = 1; 833 psf->sf_gsresp = 1;
834 scount++; 834 scount++;
@@ -1103,7 +1103,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1103 pmc->tomb = im->tomb; 1103 pmc->tomb = im->tomb;
1104 pmc->sources = im->sources; 1104 pmc->sources = im->sources;
1105 im->tomb = im->sources = NULL; 1105 im->tomb = im->sources = NULL;
1106 for (psf=pmc->sources; psf; psf=psf->sf_next) 1106 for (psf = pmc->sources; psf; psf = psf->sf_next)
1107 psf->sf_crcount = pmc->crcount; 1107 psf->sf_crcount = pmc->crcount;
1108 } 1108 }
1109 spin_unlock_bh(&im->lock); 1109 spin_unlock_bh(&im->lock);
@@ -1121,7 +1121,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
1121 1121
1122 spin_lock_bh(&in_dev->mc_tomb_lock); 1122 spin_lock_bh(&in_dev->mc_tomb_lock);
1123 pmc_prev = NULL; 1123 pmc_prev = NULL;
1124 for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { 1124 for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) {
1125 if (pmc->multiaddr == multiaddr) 1125 if (pmc->multiaddr == multiaddr)
1126 break; 1126 break;
1127 pmc_prev = pmc; 1127 pmc_prev = pmc;
@@ -1134,7 +1134,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
1134 } 1134 }
1135 spin_unlock_bh(&in_dev->mc_tomb_lock); 1135 spin_unlock_bh(&in_dev->mc_tomb_lock);
1136 if (pmc) { 1136 if (pmc) {
1137 for (psf=pmc->tomb; psf; psf=psf_next) { 1137 for (psf = pmc->tomb; psf; psf = psf_next) {
1138 psf_next = psf->sf_next; 1138 psf_next = psf->sf_next;
1139 kfree(psf); 1139 kfree(psf);
1140 } 1140 }
@@ -1167,7 +1167,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
1167 psf = pmc->tomb; 1167 psf = pmc->tomb;
1168 pmc->tomb = NULL; 1168 pmc->tomb = NULL;
1169 spin_unlock_bh(&pmc->lock); 1169 spin_unlock_bh(&pmc->lock);
1170 for (; psf; psf=psf_next) { 1170 for (; psf; psf = psf_next) {
1171 psf_next = psf->sf_next; 1171 psf_next = psf->sf_next;
1172 kfree(psf); 1172 kfree(psf);
1173 } 1173 }
@@ -1557,7 +1557,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1557 int rv = 0; 1557 int rv = 0;
1558 1558
1559 psf_prev = NULL; 1559 psf_prev = NULL;
1560 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1560 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1561 if (psf->sf_inaddr == *psfsrc) 1561 if (psf->sf_inaddr == *psfsrc)
1562 break; 1562 break;
1563 psf_prev = psf; 1563 psf_prev = psf;
@@ -1630,7 +1630,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1630 pmc->sfcount[sfmode]--; 1630 pmc->sfcount[sfmode]--;
1631 } 1631 }
1632 err = 0; 1632 err = 0;
1633 for (i=0; i<sfcount; i++) { 1633 for (i = 0; i < sfcount; i++) {
1634 int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); 1634 int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
1635 1635
1636 changerec |= rv > 0; 1636 changerec |= rv > 0;
@@ -1650,7 +1650,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1650 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1650 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
1651 IGMP_Unsolicited_Report_Count; 1651 IGMP_Unsolicited_Report_Count;
1652 in_dev->mr_ifc_count = pmc->crcount; 1652 in_dev->mr_ifc_count = pmc->crcount;
1653 for (psf=pmc->sources; psf; psf = psf->sf_next) 1653 for (psf = pmc->sources; psf; psf = psf->sf_next)
1654 psf->sf_crcount = 0; 1654 psf->sf_crcount = 0;
1655 igmp_ifc_event(pmc->interface); 1655 igmp_ifc_event(pmc->interface);
1656 } else if (sf_setstate(pmc) || changerec) { 1656 } else if (sf_setstate(pmc) || changerec) {
@@ -1671,7 +1671,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1671 struct ip_sf_list *psf, *psf_prev; 1671 struct ip_sf_list *psf, *psf_prev;
1672 1672
1673 psf_prev = NULL; 1673 psf_prev = NULL;
1674 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1674 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1675 if (psf->sf_inaddr == *psfsrc) 1675 if (psf->sf_inaddr == *psfsrc)
1676 break; 1676 break;
1677 psf_prev = psf; 1677 psf_prev = psf;
@@ -1699,7 +1699,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
1699 struct ip_sf_list *psf; 1699 struct ip_sf_list *psf;
1700 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; 1700 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
1701 1701
1702 for (psf=pmc->sources; psf; psf=psf->sf_next) 1702 for (psf = pmc->sources; psf; psf = psf->sf_next)
1703 if (pmc->sfcount[MCAST_EXCLUDE]) { 1703 if (pmc->sfcount[MCAST_EXCLUDE]) {
1704 psf->sf_oldin = mca_xcount == 1704 psf->sf_oldin = mca_xcount ==
1705 psf->sf_count[MCAST_EXCLUDE] && 1705 psf->sf_count[MCAST_EXCLUDE] &&
@@ -1716,7 +1716,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1716 int new_in, rv; 1716 int new_in, rv;
1717 1717
1718 rv = 0; 1718 rv = 0;
1719 for (psf=pmc->sources; psf; psf=psf->sf_next) { 1719 for (psf = pmc->sources; psf; psf = psf->sf_next) {
1720 if (pmc->sfcount[MCAST_EXCLUDE]) { 1720 if (pmc->sfcount[MCAST_EXCLUDE]) {
1721 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && 1721 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
1722 !psf->sf_count[MCAST_INCLUDE]; 1722 !psf->sf_count[MCAST_INCLUDE];
@@ -1726,7 +1726,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1726 if (!psf->sf_oldin) { 1726 if (!psf->sf_oldin) {
1727 struct ip_sf_list *prev = NULL; 1727 struct ip_sf_list *prev = NULL;
1728 1728
1729 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { 1729 for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) {
1730 if (dpsf->sf_inaddr == psf->sf_inaddr) 1730 if (dpsf->sf_inaddr == psf->sf_inaddr)
1731 break; 1731 break;
1732 prev = dpsf; 1732 prev = dpsf;
@@ -1748,7 +1748,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1748 * add or update "delete" records if an active filter 1748 * add or update "delete" records if an active filter
1749 * is now inactive 1749 * is now inactive
1750 */ 1750 */
1751 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) 1751 for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next)
1752 if (dpsf->sf_inaddr == psf->sf_inaddr) 1752 if (dpsf->sf_inaddr == psf->sf_inaddr)
1753 break; 1753 break;
1754 if (!dpsf) { 1754 if (!dpsf) {
@@ -1800,7 +1800,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1800 if (!delta) 1800 if (!delta)
1801 pmc->sfcount[sfmode]++; 1801 pmc->sfcount[sfmode]++;
1802 err = 0; 1802 err = 0;
1803 for (i=0; i<sfcount; i++) { 1803 for (i = 0; i < sfcount; i++) {
1804 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); 1804 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
1805 if (err) 1805 if (err)
1806 break; 1806 break;
@@ -1810,7 +1810,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1810 1810
1811 if (!delta) 1811 if (!delta)
1812 pmc->sfcount[sfmode]--; 1812 pmc->sfcount[sfmode]--;
1813 for (j=0; j<i; j++) 1813 for (j = 0; j < i; j++)
1814 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); 1814 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
1815 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 1815 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1816#ifdef CONFIG_IP_MULTICAST 1816#ifdef CONFIG_IP_MULTICAST
@@ -1829,7 +1829,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1829 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1829 pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
1830 IGMP_Unsolicited_Report_Count; 1830 IGMP_Unsolicited_Report_Count;
1831 in_dev->mr_ifc_count = pmc->crcount; 1831 in_dev->mr_ifc_count = pmc->crcount;
1832 for (psf=pmc->sources; psf; psf = psf->sf_next) 1832 for (psf = pmc->sources; psf; psf = psf->sf_next)
1833 psf->sf_crcount = 0; 1833 psf->sf_crcount = 0;
1834 igmp_ifc_event(in_dev); 1834 igmp_ifc_event(in_dev);
1835 } else if (sf_setstate(pmc)) { 1835 } else if (sf_setstate(pmc)) {
@@ -1844,12 +1844,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
1844{ 1844{
1845 struct ip_sf_list *psf, *nextpsf; 1845 struct ip_sf_list *psf, *nextpsf;
1846 1846
1847 for (psf=pmc->tomb; psf; psf=nextpsf) { 1847 for (psf = pmc->tomb; psf; psf = nextpsf) {
1848 nextpsf = psf->sf_next; 1848 nextpsf = psf->sf_next;
1849 kfree(psf); 1849 kfree(psf);
1850 } 1850 }
1851 pmc->tomb = NULL; 1851 pmc->tomb = NULL;
1852 for (psf=pmc->sources; psf; psf=nextpsf) { 1852 for (psf = pmc->sources; psf; psf = nextpsf) {
1853 nextpsf = psf->sf_next; 1853 nextpsf = psf->sf_next;
1854 kfree(psf); 1854 kfree(psf);
1855 } 1855 }
@@ -2043,7 +2043,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2043 if (!psl) 2043 if (!psl)
2044 goto done; /* err = -EADDRNOTAVAIL */ 2044 goto done; /* err = -EADDRNOTAVAIL */
2045 rv = !0; 2045 rv = !0;
2046 for (i=0; i<psl->sl_count; i++) { 2046 for (i = 0; i < psl->sl_count; i++) {
2047 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, 2047 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
2048 sizeof(__be32)); 2048 sizeof(__be32));
2049 if (rv == 0) 2049 if (rv == 0)
@@ -2062,7 +2062,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2062 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 2062 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
2063 &mreqs->imr_sourceaddr, 1); 2063 &mreqs->imr_sourceaddr, 1);
2064 2064
2065 for (j=i+1; j<psl->sl_count; j++) 2065 for (j = i+1; j < psl->sl_count; j++)
2066 psl->sl_addr[j-1] = psl->sl_addr[j]; 2066 psl->sl_addr[j-1] = psl->sl_addr[j];
2067 psl->sl_count--; 2067 psl->sl_count--;
2068 err = 0; 2068 err = 0;
@@ -2088,7 +2088,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2088 newpsl->sl_max = count; 2088 newpsl->sl_max = count;
2089 newpsl->sl_count = count - IP_SFBLOCK; 2089 newpsl->sl_count = count - IP_SFBLOCK;
2090 if (psl) { 2090 if (psl) {
2091 for (i=0; i<psl->sl_count; i++) 2091 for (i = 0; i < psl->sl_count; i++)
2092 newpsl->sl_addr[i] = psl->sl_addr[i]; 2092 newpsl->sl_addr[i] = psl->sl_addr[i];
2093 /* decrease mem now to avoid the memleak warning */ 2093 /* decrease mem now to avoid the memleak warning */
2094 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2094 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
@@ -2098,7 +2098,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2098 psl = newpsl; 2098 psl = newpsl;
2099 } 2099 }
2100 rv = 1; /* > 0 for insert logic below if sl_count is 0 */ 2100 rv = 1; /* > 0 for insert logic below if sl_count is 0 */
2101 for (i=0; i<psl->sl_count; i++) { 2101 for (i = 0; i < psl->sl_count; i++) {
2102 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, 2102 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
2103 sizeof(__be32)); 2103 sizeof(__be32));
2104 if (rv == 0) 2104 if (rv == 0)
@@ -2106,7 +2106,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2106 } 2106 }
2107 if (rv == 0) /* address already there is an error */ 2107 if (rv == 0) /* address already there is an error */
2108 goto done; 2108 goto done;
2109 for (j=psl->sl_count-1; j>=i; j--) 2109 for (j = psl->sl_count-1; j >= i; j--)
2110 psl->sl_addr[j+1] = psl->sl_addr[j]; 2110 psl->sl_addr[j+1] = psl->sl_addr[j];
2111 psl->sl_addr[i] = mreqs->imr_sourceaddr; 2111 psl->sl_addr[i] = mreqs->imr_sourceaddr;
2112 psl->sl_count++; 2112 psl->sl_count++;
@@ -2305,7 +2305,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2305 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { 2305 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
2306 return -EFAULT; 2306 return -EFAULT;
2307 } 2307 }
2308 for (i=0; i<copycount; i++) { 2308 for (i = 0; i < copycount; i++) {
2309 struct sockaddr_storage ss; 2309 struct sockaddr_storage ss;
2310 2310
2311 psin = (struct sockaddr_in *)&ss; 2311 psin = (struct sockaddr_in *)&ss;
@@ -2350,7 +2350,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2350 if (!psl) 2350 if (!psl)
2351 goto unlock; 2351 goto unlock;
2352 2352
2353 for (i=0; i<psl->sl_count; i++) { 2353 for (i = 0; i < psl->sl_count; i++) {
2354 if (psl->sl_addr[i] == rmt_addr) 2354 if (psl->sl_addr[i] == rmt_addr)
2355 break; 2355 break;
2356 } 2356 }
@@ -2423,7 +2423,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
2423 rv = 1; 2423 rv = 1;
2424 } else if (im) { 2424 } else if (im) {
2425 if (src_addr) { 2425 if (src_addr) {
2426 for (psf=im->sources; psf; psf=psf->sf_next) { 2426 for (psf = im->sources; psf; psf = psf->sf_next) {
2427 if (psf->sf_inaddr == src_addr) 2427 if (psf->sf_inaddr == src_addr)
2428 break; 2428 break;
2429 } 2429 }
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 1975f52933c5..f17ea49b28fb 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -230,29 +230,6 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
230 lro_desc->last_skb = skb; 230 lro_desc->last_skb = skb;
231} 231}
232 232
233static void lro_add_frags(struct net_lro_desc *lro_desc,
234 int len, int hlen, int truesize,
235 struct skb_frag_struct *skb_frags,
236 struct iphdr *iph, struct tcphdr *tcph)
237{
238 struct sk_buff *skb = lro_desc->parent;
239 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
240
241 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
242
243 skb->truesize += truesize;
244
245 skb_frags[0].page_offset += hlen;
246 skb_frag_size_sub(&skb_frags[0], hlen);
247
248 while (tcp_data_len > 0) {
249 *(lro_desc->next_frag) = *skb_frags;
250 tcp_data_len -= skb_frag_size(skb_frags);
251 lro_desc->next_frag++;
252 skb_frags++;
253 skb_shinfo(skb)->nr_frags++;
254 }
255}
256 233
257static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, 234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
258 struct iphdr *iph, 235 struct iphdr *iph,
@@ -371,128 +348,6 @@ out:
371 return 1; 348 return 1;
372} 349}
373 350
374
375static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
376 struct skb_frag_struct *frags,
377 int len, int true_size,
378 void *mac_hdr,
379 int hlen, __wsum sum,
380 u32 ip_summed)
381{
382 struct sk_buff *skb;
383 struct skb_frag_struct *skb_frags;
384 int data_len = len;
385 int hdr_len = min(len, hlen);
386
387 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
388 if (!skb)
389 return NULL;
390
391 skb_reserve(skb, lro_mgr->frag_align_pad);
392 skb->len = len;
393 skb->data_len = len - hdr_len;
394 skb->truesize += true_size;
395 skb->tail += hdr_len;
396
397 memcpy(skb->data, mac_hdr, hdr_len);
398
399 skb_frags = skb_shinfo(skb)->frags;
400 while (data_len > 0) {
401 *skb_frags = *frags;
402 data_len -= skb_frag_size(frags);
403 skb_frags++;
404 frags++;
405 skb_shinfo(skb)->nr_frags++;
406 }
407
408 skb_shinfo(skb)->frags[0].page_offset += hdr_len;
409 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
410
411 skb->ip_summed = ip_summed;
412 skb->csum = sum;
413 skb->protocol = eth_type_trans(skb, lro_mgr->dev);
414 return skb;
415}
416
417static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
418 struct skb_frag_struct *frags,
419 int len, int true_size,
420 void *priv, __wsum sum)
421{
422 struct net_lro_desc *lro_desc;
423 struct iphdr *iph;
424 struct tcphdr *tcph;
425 struct sk_buff *skb;
426 u64 flags;
427 void *mac_hdr;
428 int mac_hdr_len;
429 int hdr_len = LRO_MAX_PG_HLEN;
430 int vlan_hdr_len = 0;
431
432 if (!lro_mgr->get_frag_header ||
433 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
434 (void *)&tcph, &flags, priv)) {
435 mac_hdr = skb_frag_address(frags);
436 goto out1;
437 }
438
439 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
440 goto out1;
441
442 hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
443 mac_hdr_len = (int)((void *)(iph) - mac_hdr);
444
445 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
446 if (!lro_desc)
447 goto out1;
448
449 if (!lro_desc->active) { /* start new lro session */
450 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
451 goto out1;
452
453 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
454 hdr_len, 0, lro_mgr->ip_summed_aggr);
455 if (!skb)
456 goto out;
457
458 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
460 vlan_hdr_len = VLAN_HLEN;
461
462 iph = (void *)(skb->data + vlan_hdr_len);
463 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
464 + IP_HDR_LEN(iph));
465
466 lro_init_desc(lro_desc, skb, iph, tcph);
467 LRO_INC_STATS(lro_mgr, aggregated);
468 return NULL;
469 }
470
471 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
472 goto out2;
473
474 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
475 goto out2;
476
477 lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
478 LRO_INC_STATS(lro_mgr, aggregated);
479
480 if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
481 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
482 lro_flush(lro_mgr, lro_desc);
483
484 return NULL;
485
486out2: /* send aggregated packets to the stack */
487 lro_flush(lro_mgr, lro_desc);
488
489out1: /* Original packet has to be posted to the stack */
490 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
491 hdr_len, sum, lro_mgr->ip_summed);
492out:
493 return skb;
494}
495
496void lro_receive_skb(struct net_lro_mgr *lro_mgr, 351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
497 struct sk_buff *skb, 352 struct sk_buff *skb,
498 void *priv) 353 void *priv)
@@ -506,23 +361,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
506} 361}
507EXPORT_SYMBOL(lro_receive_skb); 362EXPORT_SYMBOL(lro_receive_skb);
508 363
509void lro_receive_frags(struct net_lro_mgr *lro_mgr,
510 struct skb_frag_struct *frags,
511 int len, int true_size, void *priv, __wsum sum)
512{
513 struct sk_buff *skb;
514
515 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
516 if (!skb)
517 return;
518
519 if (lro_mgr->features & LRO_F_NAPI)
520 netif_receive_skb(skb);
521 else
522 netif_rx(skb);
523}
524EXPORT_SYMBOL(lro_receive_frags);
525
526void lro_flush_all(struct net_lro_mgr *lro_mgr) 364void lro_flush_all(struct net_lro_mgr *lro_mgr)
527{ 365{
528 int i; 366 int i;
@@ -534,14 +372,3 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr)
534 } 372 }
535} 373}
536EXPORT_SYMBOL(lro_flush_all); 374EXPORT_SYMBOL(lro_flush_all);
537
538void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
539 struct iphdr *iph, struct tcphdr *tcph)
540{
541 struct net_lro_desc *lro_desc;
542
543 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
544 if (lro_desc->active)
545 lro_flush(lro_mgr, lro_desc);
546}
547EXPORT_SYMBOL(lro_flush_pkt);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 33d5537881ed..48f424465112 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -109,13 +109,6 @@ static inline void flush_check(struct inet_peer_base *base, int family)
109 } 109 }
110} 110}
111 111
112void inetpeer_invalidate_family(int family)
113{
114 atomic_t *fp = inetpeer_seq_ptr(family);
115
116 atomic_inc(fp);
117}
118
119#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 112#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
120 113
121/* Exported for sysctl_net_ipv4. */ 114/* Exported for sysctl_net_ipv4. */
@@ -227,7 +220,7 @@ static int addr_compare(const struct inetpeer_addr *a,
227 stackptr = _stack; \ 220 stackptr = _stack; \
228 *stackptr++ = &_base->root; \ 221 *stackptr++ = &_base->root; \
229 for (u = rcu_deref_locked(_base->root, _base); \ 222 for (u = rcu_deref_locked(_base->root, _base); \
230 u != peer_avl_empty; ) { \ 223 u != peer_avl_empty;) { \
231 int cmp = addr_compare(_daddr, &u->daddr); \ 224 int cmp = addr_compare(_daddr, &u->daddr); \
232 if (cmp == 0) \ 225 if (cmp == 0) \
233 break; \ 226 break; \
@@ -282,7 +275,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
282 *stackptr++ = &start->avl_left; \ 275 *stackptr++ = &start->avl_left; \
283 v = &start->avl_left; \ 276 v = &start->avl_left; \
284 for (u = rcu_deref_locked(*v, base); \ 277 for (u = rcu_deref_locked(*v, base); \
285 u->avl_right != peer_avl_empty_rcu; ) { \ 278 u->avl_right != peer_avl_empty_rcu;) { \
286 v = &u->avl_right; \ 279 v = &u->avl_right; \
287 *stackptr++ = v; \ 280 *stackptr++ = v; \
288 u = rcu_deref_locked(*v, base); \ 281 u = rcu_deref_locked(*v, base); \
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2481993a4970..c10a3ce5cbff 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -704,7 +704,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
704 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 704 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
705 if (ip_defrag(skb, user)) 705 if (ip_defrag(skb, user))
706 return NULL; 706 return NULL;
707 skb->rxhash = 0; 707 skb_clear_hash(skb);
708 } 708 }
709 } 709 }
710 return skb; 710 return skb;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec7264514a82..dd9d90b44f97 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -167,7 +167,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
167 soffset -= 4; 167 soffset -= 4;
168 if (soffset > 3) { 168 if (soffset > 3) {
169 memcpy(&faddr, &start[soffset-1], 4); 169 memcpy(&faddr, &start[soffset-1], 4);
170 for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) 170 for (soffset -= 4, doffset = 4; soffset > 3; soffset -= 4, doffset += 4)
171 memcpy(&dptr[doffset-1], &start[soffset-1], 4); 171 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
172 /* 172 /*
173 * RFC1812 requires to fix illegal source routes. 173 * RFC1812 requires to fix illegal source routes.
@@ -227,7 +227,7 @@ void ip_options_fragment(struct sk_buff *skb)
227 continue; 227 continue;
228 } 228 }
229 optlen = optptr[1]; 229 optlen = optptr[1];
230 if (optlen<2 || optlen>l) 230 if (optlen < 2 || optlen > l)
231 return; 231 return;
232 if (!IPOPT_COPIED(*optptr)) 232 if (!IPOPT_COPIED(*optptr))
233 memset(optptr, IPOPT_NOOP, optlen); 233 memset(optptr, IPOPT_NOOP, optlen);
@@ -276,7 +276,7 @@ int ip_options_compile(struct net *net,
276 for (l = opt->optlen; l > 0; ) { 276 for (l = opt->optlen; l > 0; ) {
277 switch (*optptr) { 277 switch (*optptr) {
278 case IPOPT_END: 278 case IPOPT_END:
279 for (optptr++, l--; l>0; optptr++, l--) { 279 for (optptr++, l--; l > 0; optptr++, l--) {
280 if (*optptr != IPOPT_END) { 280 if (*optptr != IPOPT_END) {
281 *optptr = IPOPT_END; 281 *optptr = IPOPT_END;
282 opt->is_changed = 1; 282 opt->is_changed = 1;
@@ -289,7 +289,7 @@ int ip_options_compile(struct net *net,
289 continue; 289 continue;
290 } 290 }
291 optlen = optptr[1]; 291 optlen = optptr[1];
292 if (optlen<2 || optlen>l) { 292 if (optlen < 2 || optlen > l) {
293 pp_ptr = optptr; 293 pp_ptr = optptr;
294 goto error; 294 goto error;
295 } 295 }
@@ -572,7 +572,7 @@ void ip_forward_options(struct sk_buff *skb)
572 572
573 optptr = raw + opt->srr; 573 optptr = raw + opt->srr;
574 574
575 for ( srrptr=optptr[2], srrspace = optptr[1]; 575 for ( srrptr = optptr[2], srrspace = optptr[1];
576 srrptr <= srrspace; 576 srrptr <= srrspace;
577 srrptr += 4 577 srrptr += 4
578 ) { 578 ) {
@@ -628,7 +628,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
628 if (rt->rt_type != RTN_LOCAL) 628 if (rt->rt_type != RTN_LOCAL)
629 return -EINVAL; 629 return -EINVAL;
630 630
631 for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { 631 for (srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
632 if (srrptr + 3 > srrspace) { 632 if (srrptr + 3 > srrspace) {
633 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); 633 icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
634 return -EINVAL; 634 return -EINVAL;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3f858266fa7e..a9fc435dc89f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
386/* 386/*
387 * Handle MSG_ERRQUEUE 387 * Handle MSG_ERRQUEUE
388 */ 388 */
389int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) 389int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
390{ 390{
391 struct sock_exterr_skb *serr; 391 struct sock_exterr_skb *serr;
392 struct sk_buff *skb, *skb2; 392 struct sk_buff *skb, *skb2;
@@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
423 serr->addr_offset); 423 serr->addr_offset);
424 sin->sin_port = serr->port; 424 sin->sin_port = serr->port;
425 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 425 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
426 *addr_len = sizeof(*sin);
426 } 427 }
427 428
428 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 429 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -1050,7 +1051,7 @@ e_inval:
1050 * 1051 *
1051 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1052 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1052 * destination in skb->cb[] before dst drop. 1053 * destination in skb->cb[] before dst drop.
1053 * This way, receiver doesnt make cache line misses to read rtable. 1054 * This way, receiver doesn't make cache line misses to read rtable.
1054 */ 1055 */
1055void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1056void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1056{ 1057{
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 42ffbc8d65c6..6156f4ef5e91 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -56,7 +56,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
56 56
57 skb_scrub_packet(skb, xnet); 57 skb_scrub_packet(skb, xnet);
58 58
59 skb->rxhash = 0; 59 skb_clear_hash(skb);
60 skb_dst_set(skb, &rt->dst); 60 skb_dst_set(skb, &rt->dst);
61 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
62 62
@@ -107,8 +107,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
107 107
108 nf_reset(skb); 108 nf_reset(skb);
109 secpath_reset(skb); 109 secpath_reset(skb);
110 if (!skb->l4_rxhash) 110 skb_clear_hash_if_not_l4(skb);
111 skb->rxhash = 0;
112 skb_dst_drop(skb); 111 skb_dst_drop(skb);
113 skb->vlan_tci = 0; 112 skb->vlan_tci = 0;
114 skb_set_queue_mapping(skb, 0); 113 skb_set_queue_mapping(skb, 0);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 62212c772a4b..421a24934ffd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -425,6 +425,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
425 goto failure; 425 goto failure;
426 426
427 ipv4_devconf_setall(in_dev); 427 ipv4_devconf_setall(in_dev);
428 neigh_parms_data_state_setall(in_dev->arp_parms);
428 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 429 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
429 430
430 if (dev_open(dev)) 431 if (dev_open(dev))
@@ -517,6 +518,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
517 } 518 }
518 519
519 ipv4_devconf_setall(in_dev); 520 ipv4_devconf_setall(in_dev);
521 neigh_parms_data_state_setall(in_dev->arp_parms);
520 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 522 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
521 rcu_read_unlock(); 523 rcu_read_unlock();
522 524
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f13bd91d9a56..a313c3fbeb46 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -423,6 +423,7 @@ static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
423static struct xt_target synproxy_tg4_reg __read_mostly = { 423static struct xt_target synproxy_tg4_reg __read_mostly = {
424 .name = "SYNPROXY", 424 .name = "SYNPROXY",
425 .family = NFPROTO_IPV4, 425 .family = NFPROTO_IPV4,
426 .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
426 .target = synproxy_tg4, 427 .target = synproxy_tg4,
427 .targetsize = sizeof(struct xt_synproxy_info), 428 .targetsize = sizeof(struct xt_synproxy_info),
428 .checkentry = synproxy_tg4_check, 429 .checkentry = synproxy_tg4_check,
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5f011cc89cd9..d551e31b416e 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -34,8 +34,7 @@
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details. 35 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License 36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, write to the Free Software 37 * along with this program; if not, see <http://www.gnu.org/licenses/>.
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 * 38 *
40 * Author: James Morris <jmorris@intercode.com.au> 39 * Author: James Morris <jmorris@intercode.com.au>
41 * 40 *
@@ -462,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
462 } 461 }
463 462
464 if (subid < 40) { 463 if (subid < 40) {
465 optr [0] = 0; 464 optr[0] = 0;
466 optr [1] = subid; 465 optr[1] = subid;
467 } else if (subid < 80) { 466 } else if (subid < 80) {
468 optr [0] = 1; 467 optr[0] = 1;
469 optr [1] = subid - 40; 468 optr[1] = subid - 40;
470 } else { 469 } else {
471 optr [0] = 2; 470 optr[0] = 2;
472 optr [1] = subid - 80; 471 optr[1] = subid - 80;
473 } 472 }
474 473
475 *len = 2; 474 *len = 2;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index fff5ba1a33b7..4a5e94ac314a 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -72,7 +72,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
72{ 72{
73 const struct nft_reject *priv = nft_expr_priv(expr); 73 const struct nft_reject *priv = nft_expr_priv(expr);
74 74
75 if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) 75 if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
76 goto nla_put_failure; 76 goto nla_put_failure;
77 77
78 switch (priv->type) { 78 switch (priv->type) {
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 876c6ca2d8f9..cae5262a337c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -53,8 +53,12 @@
53#include <net/transp_v6.h> 53#include <net/transp_v6.h>
54#endif 54#endif
55 55
56struct ping_table {
57 struct hlist_nulls_head hash[PING_HTABLE_SIZE];
58 rwlock_t lock;
59};
56 60
57struct ping_table ping_table; 61static struct ping_table ping_table;
58struct pingv6_ops pingv6_ops; 62struct pingv6_ops pingv6_ops;
59EXPORT_SYMBOL_GPL(pingv6_ops); 63EXPORT_SYMBOL_GPL(pingv6_ops);
60 64
@@ -668,8 +672,8 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
668} 672}
669EXPORT_SYMBOL_GPL(ping_common_sendmsg); 673EXPORT_SYMBOL_GPL(ping_common_sendmsg);
670 674
671int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 675static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
672 size_t len) 676 size_t len)
673{ 677{
674 struct net *net = sock_net(sk); 678 struct net *net = sock_net(sk);
675 struct flowi4 fl4; 679 struct flowi4 fl4;
@@ -772,7 +776,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
772 err = PTR_ERR(rt); 776 err = PTR_ERR(rt);
773 rt = NULL; 777 rt = NULL;
774 if (err == -ENETUNREACH) 778 if (err == -ENETUNREACH)
775 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 779 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
776 goto out; 780 goto out;
777 } 781 }
778 782
@@ -841,10 +845,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
841 845
842 if (flags & MSG_ERRQUEUE) { 846 if (flags & MSG_ERRQUEUE) {
843 if (family == AF_INET) { 847 if (family == AF_INET) {
844 return ip_recv_error(sk, msg, len); 848 return ip_recv_error(sk, msg, len, addr_len);
845#if IS_ENABLED(CONFIG_IPV6) 849#if IS_ENABLED(CONFIG_IPV6)
846 } else if (family == AF_INET6) { 850 } else if (family == AF_INET6) {
847 return pingv6_ops.ipv6_recv_error(sk, msg, len); 851 return pingv6_ops.ipv6_recv_error(sk, msg, len,
852 addr_len);
848#endif 853#endif
849 } 854 }
850 } 855 }
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4a0335854b89..8ecd7ad959b4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -279,6 +279,7 @@ static const struct snmp_mib snmp4_net_list[] = {
279 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), 279 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
280 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), 280 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
281 SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), 281 SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
282 SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
282 SNMP_MIB_SENTINEL 283 SNMP_MIB_SENTINEL
283}; 284};
284 285
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index ce848461acbb..46d6a1c923a8 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -31,10 +31,6 @@
31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; 32const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
33 33
34/*
35 * Add a protocol handler to the hash tables
36 */
37
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 34int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 35{
40 if (!prot->netns_ok) { 36 if (!prot->netns_ok) {
@@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol)
55} 51}
56EXPORT_SYMBOL(inet_add_offload); 52EXPORT_SYMBOL(inet_add_offload);
57 53
58/*
59 * Remove a protocol from the hash tables.
60 */
61
62int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 54int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
63{ 55{
64 int ret; 56 int ret;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5cb8ddb505ee..81e6cfd5a365 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -575,7 +575,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
575 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 575 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
576 RT_SCOPE_UNIVERSE, 576 RT_SCOPE_UNIVERSE,
577 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 577 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
578 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | 578 inet_sk_flowi_flags(sk) |
579 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 579 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
580 daddr, saddr, 0, 0); 580 daddr, saddr, 0, 0);
581 581
@@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
697 goto out; 697 goto out;
698 698
699 if (flags & MSG_ERRQUEUE) { 699 if (flags & MSG_ERRQUEUE) {
700 err = ip_recv_error(sk, msg, len); 700 err = ip_recv_error(sk, msg, len, addr_len);
701 goto out; 701 goto out;
702 } 702 }
703 703
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b95331e6c077..f2ed13c2125f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -121,7 +121,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
121 cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; 121 cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
122 122
123 /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ 123 /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */
124 diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); 124 diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
125 if (diff >= MAX_SYNCOOKIE_AGE) 125 if (diff >= MAX_SYNCOOKIE_AGE)
126 return (__u32)-1; 126 return (__u32)-1;
127 127
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3d69ec8dac57..1d2480ac2bb6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -286,13 +286,6 @@ static struct ctl_table ipv4_table[] = {
286 .extra2 = &ip_ttl_max, 286 .extra2 = &ip_ttl_max,
287 }, 287 },
288 { 288 {
289 .procname = "ip_no_pmtu_disc",
290 .data = &ipv4_config.no_pmtu_disc,
291 .maxlen = sizeof(int),
292 .mode = 0644,
293 .proc_handler = proc_dointvec
294 },
295 {
296 .procname = "ip_nonlocal_bind", 289 .procname = "ip_nonlocal_bind",
297 .data = &sysctl_ip_nonlocal_bind, 290 .data = &sysctl_ip_nonlocal_bind,
298 .maxlen = sizeof(int), 291 .maxlen = sizeof(int),
@@ -707,7 +700,7 @@ static struct ctl_table ipv4_table[] = {
707 .mode = 0644, 700 .mode = 0644,
708 .proc_handler = proc_dointvec 701 .proc_handler = proc_dointvec
709 }, 702 },
710 { 703 {
711 .procname = "tcp_thin_dupack", 704 .procname = "tcp_thin_dupack",
712 .data = &sysctl_tcp_thin_dupack, 705 .data = &sysctl_tcp_thin_dupack,
713 .maxlen = sizeof(int), 706 .maxlen = sizeof(int),
@@ -733,6 +726,15 @@ static struct ctl_table ipv4_table[] = {
733 .extra2 = &gso_max_segs, 726 .extra2 = &gso_max_segs,
734 }, 727 },
735 { 728 {
729 .procname = "tcp_autocorking",
730 .data = &sysctl_tcp_autocorking,
731 .maxlen = sizeof(int),
732 .mode = 0644,
733 .proc_handler = proc_dointvec_minmax,
734 .extra1 = &zero,
735 .extra2 = &one,
736 },
737 {
736 .procname = "udp_mem", 738 .procname = "udp_mem",
737 .data = &sysctl_udp_mem, 739 .data = &sysctl_udp_mem,
738 .maxlen = sizeof(sysctl_udp_mem), 740 .maxlen = sizeof(sysctl_udp_mem),
@@ -822,6 +824,13 @@ static struct ctl_table ipv4_net_table[] = {
822 .mode = 0644, 824 .mode = 0644,
823 .proc_handler = ipv4_local_port_range, 825 .proc_handler = ipv4_local_port_range,
824 }, 826 },
827 {
828 .procname = "ip_no_pmtu_disc",
829 .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
830 .maxlen = sizeof(int),
831 .mode = 0644,
832 .proc_handler = proc_dointvec
833 },
825 { } 834 { }
826}; 835};
827 836
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3dc0c6cf02a8..d099f9a055c6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2; 286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287 287
288int sysctl_tcp_autocorking __read_mostly = 1;
289
288struct percpu_counter tcp_orphan_count; 290struct percpu_counter tcp_orphan_count;
289EXPORT_SYMBOL_GPL(tcp_orphan_count); 291EXPORT_SYMBOL_GPL(tcp_orphan_count);
290 292
@@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
619 tp->snd_up = tp->write_seq; 621 tp->snd_up = tp->write_seq;
620} 622}
621 623
622static inline void tcp_push(struct sock *sk, int flags, int mss_now, 624/* If a not yet filled skb is pushed, do not send it if
623 int nonagle) 625 * we have data packets in Qdisc or NIC queues :
626 * Because TX completion will happen shortly, it gives a chance
627 * to coalesce future sendmsg() payload into this skb, without
628 * need for a timer, and with no latency trade off.
629 * As packets containing data payload have a bigger truesize
630 * than pure acks (dataless) packets, the last checks prevent
631 * autocorking if we only have an ACK in Qdisc/NIC queues,
632 * or if TX completion was delayed after we processed ACK packet.
633 */
634static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
635 int size_goal)
624{ 636{
625 if (tcp_send_head(sk)) { 637 return skb->len < size_goal &&
626 struct tcp_sock *tp = tcp_sk(sk); 638 sysctl_tcp_autocorking &&
639 skb != tcp_write_queue_head(sk) &&
640 atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
641}
642
643static void tcp_push(struct sock *sk, int flags, int mss_now,
644 int nonagle, int size_goal)
645{
646 struct tcp_sock *tp = tcp_sk(sk);
647 struct sk_buff *skb;
627 648
628 if (!(flags & MSG_MORE) || forced_push(tp)) 649 if (!tcp_send_head(sk))
629 tcp_mark_push(tp, tcp_write_queue_tail(sk)); 650 return;
651
652 skb = tcp_write_queue_tail(sk);
653 if (!(flags & MSG_MORE) || forced_push(tp))
654 tcp_mark_push(tp, skb);
655
656 tcp_mark_urg(tp, flags);
657
658 if (tcp_should_autocork(sk, skb, size_goal)) {
630 659
631 tcp_mark_urg(tp, flags); 660 /* avoid atomic op if TSQ_THROTTLED bit is already set */
632 __tcp_push_pending_frames(sk, mss_now, 661 if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
633 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 662 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
663 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
664 }
665 /* It is possible TX completion already happened
666 * before we set TSQ_THROTTLED.
667 */
668 if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
669 return;
634 } 670 }
671
672 if (flags & MSG_MORE)
673 nonagle = TCP_NAGLE_CORK;
674
675 __tcp_push_pending_frames(sk, mss_now, nonagle);
635} 676}
636 677
637static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 678static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
@@ -934,7 +975,8 @@ new_segment:
934wait_for_sndbuf: 975wait_for_sndbuf:
935 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 976 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
936wait_for_memory: 977wait_for_memory:
937 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 978 tcp_push(sk, flags & ~MSG_MORE, mss_now,
979 TCP_NAGLE_PUSH, size_goal);
938 980
939 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 981 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
940 goto do_error; 982 goto do_error;
@@ -944,7 +986,7 @@ wait_for_memory:
944 986
945out: 987out:
946 if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) 988 if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
947 tcp_push(sk, flags, mss_now, tp->nonagle); 989 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
948 return copied; 990 return copied;
949 991
950do_error: 992do_error:
@@ -1225,7 +1267,8 @@ wait_for_sndbuf:
1225 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1267 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1226wait_for_memory: 1268wait_for_memory:
1227 if (copied) 1269 if (copied)
1228 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1270 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1271 TCP_NAGLE_PUSH, size_goal);
1229 1272
1230 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1273 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1231 goto do_error; 1274 goto do_error;
@@ -1236,7 +1279,7 @@ wait_for_memory:
1236 1279
1237out: 1280out:
1238 if (copied) 1281 if (copied)
1239 tcp_push(sk, flags, mss_now, tp->nonagle); 1282 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1240 release_sock(sk); 1283 release_sock(sk);
1241 return copied + copied_syn; 1284 return copied + copied_syn;
1242 1285
@@ -1425,7 +1468,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
1425 do { 1468 do {
1426 if (dma_async_is_tx_complete(tp->ucopy.dma_chan, 1469 if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
1427 last_issued, &done, 1470 last_issued, &done,
1428 &used) == DMA_SUCCESS) { 1471 &used) == DMA_COMPLETE) {
1429 /* Safe to free early-copied skbs now */ 1472 /* Safe to free early-copied skbs now */
1430 __skb_queue_purge(&sk->sk_async_wait_queue); 1473 __skb_queue_purge(&sk->sk_async_wait_queue);
1431 break; 1474 break;
@@ -1433,7 +1476,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
1433 struct sk_buff *skb; 1476 struct sk_buff *skb;
1434 while ((skb = skb_peek(&sk->sk_async_wait_queue)) && 1477 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1435 (dma_async_is_complete(skb->dma_cookie, done, 1478 (dma_async_is_complete(skb->dma_cookie, done,
1436 used) == DMA_SUCCESS)) { 1479 used) == DMA_COMPLETE)) {
1437 __skb_dequeue(&sk->sk_async_wait_queue); 1480 __skb_dequeue(&sk->sk_async_wait_queue);
1438 kfree_skb(skb); 1481 kfree_skb(skb);
1439 } 1482 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c53b7f35c51d..65cf90e063d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -766,7 +766,7 @@ static void tcp_update_pacing_rate(struct sock *sk)
766/* Calculate rto without backoff. This is the second half of Van Jacobson's 766/* Calculate rto without backoff. This is the second half of Van Jacobson's
767 * routine referred to above. 767 * routine referred to above.
768 */ 768 */
769void tcp_set_rto(struct sock *sk) 769static void tcp_set_rto(struct sock *sk)
770{ 770{
771 const struct tcp_sock *tp = tcp_sk(sk); 771 const struct tcp_sock *tp = tcp_sk(sk);
772 /* Old crap is replaced with new one. 8) 772 /* Old crap is replaced with new one. 8)
@@ -3686,7 +3686,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3686 int opcode = *ptr++; 3686 int opcode = *ptr++;
3687 int opsize; 3687 int opsize;
3688 3688
3689 switch(opcode) { 3689 switch (opcode) {
3690 case TCPOPT_EOL: 3690 case TCPOPT_EOL:
3691 return NULL; 3691 return NULL;
3692 case TCPOPT_NOP: 3692 case TCPOPT_NOP:
@@ -4046,7 +4046,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4046 WARN_ON(before(tp->rcv_nxt, sp->end_seq)); 4046 WARN_ON(before(tp->rcv_nxt, sp->end_seq));
4047 4047
4048 /* Zap this SACK, by moving forward any other SACKS. */ 4048 /* Zap this SACK, by moving forward any other SACKS. */
4049 for (i=this_sack+1; i < num_sacks; i++) 4049 for (i = this_sack+1; i < num_sacks; i++)
4050 tp->selective_acks[i-1] = tp->selective_acks[i]; 4050 tp->selective_acks[i-1] = tp->selective_acks[i];
4051 num_sacks--; 4051 num_sacks--;
4052 continue; 4052 continue;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 59a6f8b90cd9..7297b56c28c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -173,11 +173,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP, 175 IPPROTO_TCP,
176 orig_sport, orig_dport, sk, true); 176 orig_sport, orig_dport, sk);
177 if (IS_ERR(rt)) { 177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt); 178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH) 179 if (err == -ENETUNREACH)
180 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181 return err; 181 return err;
182 } 182 }
183 183
@@ -827,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
827 const struct inet_request_sock *ireq = inet_rsk(req); 827 const struct inet_request_sock *ireq = inet_rsk(req);
828 struct flowi4 fl4; 828 struct flowi4 fl4;
829 int err = -1; 829 int err = -1;
830 struct sk_buff * skb; 830 struct sk_buff *skb;
831 831
832 /* First, grab a route. */ 832 /* First, grab a route. */
833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 03e9154f7e68..f7e522c558ba 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -6,13 +6,6 @@
6#include <linux/memcontrol.h> 6#include <linux/memcontrol.h>
7#include <linux/module.h> 7#include <linux/module.h>
8 8
9static void memcg_tcp_enter_memory_pressure(struct sock *sk)
10{
11 if (sk->sk_cgrp->memory_pressure)
12 sk->sk_cgrp->memory_pressure = 1;
13}
14EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
15
16int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 9int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
17{ 10{
18 /* 11 /*
@@ -60,7 +53,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup);
60static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) 53static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
61{ 54{
62 struct cg_proto *cg_proto; 55 struct cg_proto *cg_proto;
63 u64 old_lim;
64 int i; 56 int i;
65 int ret; 57 int ret;
66 58
@@ -71,7 +63,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
71 if (val > RES_COUNTER_MAX) 63 if (val > RES_COUNTER_MAX)
72 val = RES_COUNTER_MAX; 64 val = RES_COUNTER_MAX;
73 65
74 old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT);
75 ret = res_counter_set_limit(&cg_proto->memory_allocated, val); 66 ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
76 if (ret) 67 if (ret)
77 return ret; 68 return ret;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index a2b68a108eae..2658a27f540d 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -240,7 +240,7 @@ int tcp_gro_complete(struct sk_buff *skb)
240{ 240{
241 struct tcphdr *th = tcp_hdr(skb); 241 struct tcphdr *th = tcp_hdr(skb);
242 242
243 skb->csum_start = skb_transport_header(skb) - skb->head; 243 skb->csum_start = (unsigned char *)th - skb->head;
244 skb->csum_offset = offsetof(struct tcphdr, check); 244 skb->csum_offset = offsetof(struct tcphdr, check);
245 skb->ip_summed = CHECKSUM_PARTIAL; 245 skb->ip_summed = CHECKSUM_PARTIAL;
246 246
@@ -272,45 +272,45 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb)
272 272
273static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 273static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
274{ 274{
275 /* Use the IP hdr immediately proceeding for this transport */
275 const struct iphdr *iph = skb_gro_network_header(skb); 276 const struct iphdr *iph = skb_gro_network_header(skb);
276 __wsum wsum; 277 __wsum wsum;
277 __sum16 sum; 278
279 /* Don't bother verifying checksum if we're going to flush anyway. */
280 if (NAPI_GRO_CB(skb)->flush)
281 goto skip_csum;
282
283 wsum = skb->csum;
278 284
279 switch (skb->ip_summed) { 285 switch (skb->ip_summed) {
286 case CHECKSUM_NONE:
287 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
288 0);
289
290 /* fall through */
291
280 case CHECKSUM_COMPLETE: 292 case CHECKSUM_COMPLETE:
281 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 293 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
282 skb->csum)) { 294 wsum)) {
283 skb->ip_summed = CHECKSUM_UNNECESSARY; 295 skb->ip_summed = CHECKSUM_UNNECESSARY;
284 break; 296 break;
285 } 297 }
286flush: 298
287 NAPI_GRO_CB(skb)->flush = 1; 299 NAPI_GRO_CB(skb)->flush = 1;
288 return NULL; 300 return NULL;
289
290 case CHECKSUM_NONE:
291 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
292 skb_gro_len(skb), IPPROTO_TCP, 0);
293 sum = csum_fold(skb_checksum(skb,
294 skb_gro_offset(skb),
295 skb_gro_len(skb),
296 wsum));
297 if (sum)
298 goto flush;
299
300 skb->ip_summed = CHECKSUM_UNNECESSARY;
301 break;
302 } 301 }
303 302
303skip_csum:
304 return tcp_gro_receive(head, skb); 304 return tcp_gro_receive(head, skb);
305} 305}
306 306
307static int tcp4_gro_complete(struct sk_buff *skb) 307static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
308{ 308{
309 const struct iphdr *iph = ip_hdr(skb); 309 const struct iphdr *iph = ip_hdr(skb);
310 struct tcphdr *th = tcp_hdr(skb); 310 struct tcphdr *th = tcp_hdr(skb);
311 311
312 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 312 th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
313 iph->saddr, iph->daddr, 0); 313 iph->daddr, 0);
314 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 314 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
315 315
316 return tcp_gro_complete(skb); 316 return tcp_gro_complete(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7820f3a7dd70..03d26b85eab8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
363 */ 363 */
364static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 364static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
365{ 365{
366 struct skb_shared_info *shinfo = skb_shinfo(skb);
367
366 skb->ip_summed = CHECKSUM_PARTIAL; 368 skb->ip_summed = CHECKSUM_PARTIAL;
367 skb->csum = 0; 369 skb->csum = 0;
368 370
369 TCP_SKB_CB(skb)->tcp_flags = flags; 371 TCP_SKB_CB(skb)->tcp_flags = flags;
370 TCP_SKB_CB(skb)->sacked = 0; 372 TCP_SKB_CB(skb)->sacked = 0;
371 373
372 skb_shinfo(skb)->gso_segs = 1; 374 shinfo->gso_segs = 1;
373 skb_shinfo(skb)->gso_size = 0; 375 shinfo->gso_size = 0;
374 skb_shinfo(skb)->gso_type = 0; 376 shinfo->gso_type = 0;
375 377
376 TCP_SKB_CB(skb)->seq = seq; 378 TCP_SKB_CB(skb)->seq = seq;
377 if (flags & (TCPHDR_SYN | TCPHDR_FIN)) 379 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
@@ -406,7 +408,7 @@ struct tcp_out_options {
406 * Beware: Something in the Internet is very sensitive to the ordering of 408 * Beware: Something in the Internet is very sensitive to the ordering of
407 * TCP options, we learned this through the hard way, so be careful here. 409 * TCP options, we learned this through the hard way, so be careful here.
408 * Luckily we can at least blame others for their non-compliance but from 410 * Luckily we can at least blame others for their non-compliance but from
409 * inter-operatibility perspective it seems that we're somewhat stuck with 411 * inter-operability perspective it seems that we're somewhat stuck with
410 * the ordering which we have been using if we want to keep working with 412 * the ordering which we have been using if we want to keep working with
411 * those broken things (not that it currently hurts anybody as there isn't 413 * those broken things (not that it currently hurts anybody as there isn't
412 * particular reason why the ordering would need to be changed). 414 * particular reason why the ordering would need to be changed).
@@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
679 * 681 *
680 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb 682 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
681 * needs to be reallocated in a driver. 683 * needs to be reallocated in a driver.
682 * The invariant being skb->truesize substracted from sk->sk_wmem_alloc 684 * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
683 * 685 *
684 * Since transmit from skb destructor is forbidden, we use a tasklet 686 * Since transmit from skb destructor is forbidden, we use a tasklet
685 * to process all sockets that eventually need to send more skbs. 687 * to process all sockets that eventually need to send more skbs.
@@ -699,9 +701,9 @@ static void tcp_tsq_handler(struct sock *sk)
699 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); 701 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
700} 702}
701/* 703/*
702 * One tasklest per cpu tries to send more skbs. 704 * One tasklet per cpu tries to send more skbs.
703 * We run in tasklet context but need to disable irqs when 705 * We run in tasklet context but need to disable irqs when
704 * transfering tsq->head because tcp_wfree() might 706 * transferring tsq->head because tcp_wfree() might
705 * interrupt us (non NAPI drivers) 707 * interrupt us (non NAPI drivers)
706 */ 708 */
707static void tcp_tasklet_func(unsigned long data) 709static void tcp_tasklet_func(unsigned long data)
@@ -795,7 +797,7 @@ void __init tcp_tasklet_init(void)
795 797
796/* 798/*
797 * Write buffer destructor automatically called from kfree_skb. 799 * Write buffer destructor automatically called from kfree_skb.
798 * We cant xmit new skbs from this context, as we might already 800 * We can't xmit new skbs from this context, as we might already
799 * hold qdisc lock. 801 * hold qdisc lock.
800 */ 802 */
801void tcp_wfree(struct sk_buff *skb) 803void tcp_wfree(struct sk_buff *skb)
@@ -986,6 +988,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
986static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, 988static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
987 unsigned int mss_now) 989 unsigned int mss_now)
988{ 990{
991 struct skb_shared_info *shinfo = skb_shinfo(skb);
992
989 /* Make sure we own this skb before messing gso_size/gso_segs */ 993 /* Make sure we own this skb before messing gso_size/gso_segs */
990 WARN_ON_ONCE(skb_cloned(skb)); 994 WARN_ON_ONCE(skb_cloned(skb));
991 995
@@ -993,13 +997,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
993 /* Avoid the costly divide in the normal 997 /* Avoid the costly divide in the normal
994 * non-TSO case. 998 * non-TSO case.
995 */ 999 */
996 skb_shinfo(skb)->gso_segs = 1; 1000 shinfo->gso_segs = 1;
997 skb_shinfo(skb)->gso_size = 0; 1001 shinfo->gso_size = 0;
998 skb_shinfo(skb)->gso_type = 0; 1002 shinfo->gso_type = 0;
999 } else { 1003 } else {
1000 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); 1004 shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
1001 skb_shinfo(skb)->gso_size = mss_now; 1005 shinfo->gso_size = mss_now;
1002 skb_shinfo(skb)->gso_type = sk->sk_gso_type; 1006 shinfo->gso_type = sk->sk_gso_type;
1003 } 1007 }
1004} 1008}
1005 1009
@@ -1146,6 +1150,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1146 */ 1150 */
1147static void __pskb_trim_head(struct sk_buff *skb, int len) 1151static void __pskb_trim_head(struct sk_buff *skb, int len)
1148{ 1152{
1153 struct skb_shared_info *shinfo;
1149 int i, k, eat; 1154 int i, k, eat;
1150 1155
1151 eat = min_t(int, len, skb_headlen(skb)); 1156 eat = min_t(int, len, skb_headlen(skb));
@@ -1157,23 +1162,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1157 } 1162 }
1158 eat = len; 1163 eat = len;
1159 k = 0; 1164 k = 0;
1160 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1165 shinfo = skb_shinfo(skb);
1161 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 1166 for (i = 0; i < shinfo->nr_frags; i++) {
1167 int size = skb_frag_size(&shinfo->frags[i]);
1162 1168
1163 if (size <= eat) { 1169 if (size <= eat) {
1164 skb_frag_unref(skb, i); 1170 skb_frag_unref(skb, i);
1165 eat -= size; 1171 eat -= size;
1166 } else { 1172 } else {
1167 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1173 shinfo->frags[k] = shinfo->frags[i];
1168 if (eat) { 1174 if (eat) {
1169 skb_shinfo(skb)->frags[k].page_offset += eat; 1175 shinfo->frags[k].page_offset += eat;
1170 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); 1176 skb_frag_size_sub(&shinfo->frags[k], eat);
1171 eat = 0; 1177 eat = 0;
1172 } 1178 }
1173 k++; 1179 k++;
1174 } 1180 }
1175 } 1181 }
1176 skb_shinfo(skb)->nr_frags = k; 1182 shinfo->nr_frags = k;
1177 1183
1178 skb_reset_tail_pointer(skb); 1184 skb_reset_tail_pointer(skb);
1179 skb->data_len -= len; 1185 skb->data_len -= len;
@@ -1378,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk)
1378 } 1384 }
1379} 1385}
1380 1386
1381/* Returns the portion of skb which can be sent right away without 1387/* Minshall's variant of the Nagle send check. */
1382 * introducing MSS oddities to segment boundaries. In rare cases where 1388static bool tcp_minshall_check(const struct tcp_sock *tp)
1383 * mss_now != mss_cache, we will request caller to create a small skb 1389{
1384 * per input skb which could be mostly avoided here (if desired). 1390 return after(tp->snd_sml, tp->snd_una) &&
1385 * 1391 !after(tp->snd_sml, tp->snd_nxt);
1386 * We explicitly want to create a request for splitting write queue tail 1392}
1387 * to a small skb for Nagle purposes while avoiding unnecessary modulos, 1393
1388 * thus all the complexity (cwnd_len is always MSS multiple which we 1394/* Update snd_sml if this skb is under mss
1389 * return whenever allowed by the other factors). Basically we need the 1395 * Note that a TSO packet might end with a sub-mss segment
1390 * modulo only when the receiver window alone is the limiting factor or 1396 * The test is really :
1391 * when we would be allowed to send the split-due-to-Nagle skb fully. 1397 * if ((skb->len % mss) != 0)
1398 * tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1399 * But we can avoid doing the divide again given we already have
1400 * skb_pcount = skb->len / mss_now
1401 */
1402static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
1403 const struct sk_buff *skb)
1404{
1405 if (skb->len < tcp_skb_pcount(skb) * mss_now)
1406 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1407}
1408
1409/* Return false, if packet can be sent now without violation Nagle's rules:
1410 * 1. It is full sized. (provided by caller in %partial bool)
1411 * 2. Or it contains FIN. (already checked by caller)
1412 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
1413 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1414 * With Minshall's modification: all sent small packets are ACKed.
1392 */ 1415 */
1393static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, 1416static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
1394 unsigned int mss_now, unsigned int max_segs) 1417 unsigned int mss_now, int nonagle)
1418{
1419 return partial &&
1420 ((nonagle & TCP_NAGLE_CORK) ||
1421 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1422}
1423/* Returns the portion of skb which can be sent right away */
1424static unsigned int tcp_mss_split_point(const struct sock *sk,
1425 const struct sk_buff *skb,
1426 unsigned int mss_now,
1427 unsigned int max_segs,
1428 int nonagle)
1395{ 1429{
1396 const struct tcp_sock *tp = tcp_sk(sk); 1430 const struct tcp_sock *tp = tcp_sk(sk);
1397 u32 needed, window, max_len; 1431 u32 partial, needed, window, max_len;
1398 1432
1399 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 1433 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1400 max_len = mss_now * max_segs; 1434 max_len = mss_now * max_segs;
@@ -1407,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b
1407 if (max_len <= needed) 1441 if (max_len <= needed)
1408 return max_len; 1442 return max_len;
1409 1443
1410 return needed - needed % mss_now; 1444 partial = needed % mss_now;
1445 /* If last segment is not a full MSS, check if Nagle rules allow us
1446 * to include this last segment in this skb.
1447 * Otherwise, we'll split the skb at last MSS boundary
1448 */
1449 if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle))
1450 return needed - partial;
1451
1452 return needed;
1411} 1453}
1412 1454
1413/* Can at least one segment of SKB be sent right now, according to the 1455/* Can at least one segment of SKB be sent right now, according to the
@@ -1447,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1447 return tso_segs; 1489 return tso_segs;
1448} 1490}
1449 1491
1450/* Minshall's variant of the Nagle send check. */
1451static inline bool tcp_minshall_check(const struct tcp_sock *tp)
1452{
1453 return after(tp->snd_sml, tp->snd_una) &&
1454 !after(tp->snd_sml, tp->snd_nxt);
1455}
1456
1457/* Return false, if packet can be sent now without violation Nagle's rules:
1458 * 1. It is full sized.
1459 * 2. Or it contains FIN. (already checked by caller)
1460 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
1461 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1462 * With Minshall's modification: all sent small packets are ACKed.
1463 */
1464static inline bool tcp_nagle_check(const struct tcp_sock *tp,
1465 const struct sk_buff *skb,
1466 unsigned int mss_now, int nonagle)
1467{
1468 return skb->len < mss_now &&
1469 ((nonagle & TCP_NAGLE_CORK) ||
1470 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1471}
1472 1492
1473/* Return true if the Nagle test allows this packet to be 1493/* Return true if the Nagle test allows this packet to be
1474 * sent now. 1494 * sent now.
@@ -1489,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1489 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1509 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1490 return true; 1510 return true;
1491 1511
1492 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1512 if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
1493 return true; 1513 return true;
1494 1514
1495 return false; 1515 return false;
@@ -1892,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1892 limit = tcp_mss_split_point(sk, skb, mss_now, 1912 limit = tcp_mss_split_point(sk, skb, mss_now,
1893 min_t(unsigned int, 1913 min_t(unsigned int,
1894 cwnd_quota, 1914 cwnd_quota,
1895 sk->sk_gso_max_segs)); 1915 sk->sk_gso_max_segs),
1916 nonagle);
1896 1917
1897 if (skb->len > limit && 1918 if (skb->len > limit &&
1898 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 1919 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2756,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2756EXPORT_SYMBOL(tcp_make_synack); 2777EXPORT_SYMBOL(tcp_make_synack);
2757 2778
2758/* Do all connect socket setups that can be done AF independent. */ 2779/* Do all connect socket setups that can be done AF independent. */
2759void tcp_connect_init(struct sock *sk) 2780static void tcp_connect_init(struct sock *sk)
2760{ 2781{
2761 const struct dst_entry *dst = __sk_dst_get(sk); 2782 const struct dst_entry *dst = __sk_dst_get(sk);
2762 struct tcp_sock *tp = tcp_sk(sk); 2783 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 8b97d71e193b..1f2d37613c9e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -38,7 +38,7 @@ MODULE_DESCRIPTION("TCP cwnd snooper");
38MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
39MODULE_VERSION("1.1"); 39MODULE_VERSION("1.1");
40 40
41static int port __read_mostly = 0; 41static int port __read_mostly;
42MODULE_PARM_DESC(port, "Port to match (0=all)"); 42MODULE_PARM_DESC(port, "Port to match (0=all)");
43module_param(port, int, 0); 43module_param(port, int, 0);
44 44
@@ -46,7 +46,7 @@ static unsigned int bufsize __read_mostly = 4096;
46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); 46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47module_param(bufsize, uint, 0); 47module_param(bufsize, uint, 0);
48 48
49static unsigned int fwmark __read_mostly = 0; 49static unsigned int fwmark __read_mostly;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); 50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0); 51module_param(fwmark, uint, 0);
52 52
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a347a078ee07..1a8d271f994d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -3,7 +3,7 @@
3 * YeAH TCP 3 * YeAH TCP
4 * 4 *
5 * For further details look at: 5 * For further details look at:
6 * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf 6 * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
7 * 7 *
8 */ 8 */
9#include <linux/mm.h> 9#include <linux/mm.h>
@@ -15,13 +15,13 @@
15 15
16#include "tcp_vegas.h" 16#include "tcp_vegas.h"
17 17
18#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck 18#define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */
19#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt 19#define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */
20#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss 20#define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */
21#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion 21#define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */
22#define TCP_YEAH_PHY 8 //lin maximum delta from base 22#define TCP_YEAH_PHY 8 /* maximum delta from base */
23#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss 23#define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */
24#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count 24#define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */
25 25
26#define TCP_SCALABLE_AI_CNT 100U 26#define TCP_SCALABLE_AI_CNT 100U
27 27
@@ -214,9 +214,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
214 if (yeah->doing_reno_now < TCP_YEAH_RHO) { 214 if (yeah->doing_reno_now < TCP_YEAH_RHO) {
215 reduction = yeah->lastQ; 215 reduction = yeah->lastQ;
216 216
217 reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); 217 reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
218 218
219 reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); 219 reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
220 } else 220 } else
221 reduction = max(tp->snd_cwnd>>1, 2U); 221 reduction = max(tp->snd_cwnd>>1, 2U);
222 222
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5944d7d668dd..d5d24ecde6a5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
560 __be16 sport, __be16 dport, 560 __be16 sport, __be16 dport,
561 struct udp_table *udptable) 561 struct udp_table *udptable)
562{ 562{
563 struct sock *sk;
564 const struct iphdr *iph = ip_hdr(skb); 563 const struct iphdr *iph = ip_hdr(skb);
565 564
566 if (unlikely(sk = skb_steal_sock(skb))) 565 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
567 return sk; 566 iph->daddr, dport, inet_iif(skb),
568 else 567 udptable);
569 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
570 iph->daddr, dport, inet_iif(skb),
571 udptable);
572} 568}
573 569
574struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 570struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
@@ -990,7 +986,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
990 fl4 = &fl4_stack; 986 fl4 = &fl4_stack;
991 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 987 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
992 RT_SCOPE_UNIVERSE, sk->sk_protocol, 988 RT_SCOPE_UNIVERSE, sk->sk_protocol,
993 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, 989 inet_sk_flowi_flags(sk),
994 faddr, saddr, dport, inet->inet_sport); 990 faddr, saddr, dport, inet->inet_sport);
995 991
996 security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 992 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
@@ -999,7 +995,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
999 err = PTR_ERR(rt); 995 err = PTR_ERR(rt);
1000 rt = NULL; 996 rt = NULL;
1001 if (err == -ENETUNREACH) 997 if (err == -ENETUNREACH)
1002 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 998 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
1003 goto out; 999 goto out;
1004 } 1000 }
1005 1001
@@ -1098,6 +1094,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
1098 struct udp_sock *up = udp_sk(sk); 1094 struct udp_sock *up = udp_sk(sk);
1099 int ret; 1095 int ret;
1100 1096
1097 if (flags & MSG_SENDPAGE_NOTLAST)
1098 flags |= MSG_MORE;
1099
1101 if (!up->pending) { 1100 if (!up->pending) {
1102 struct msghdr msg = { .msg_flags = flags|MSG_MORE }; 1101 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
1103 1102
@@ -1236,7 +1235,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1236 bool slow; 1235 bool slow;
1237 1236
1238 if (flags & MSG_ERRQUEUE) 1237 if (flags & MSG_ERRQUEUE)
1239 return ip_recv_error(sk, msg, len); 1238 return ip_recv_error(sk, msg, len, addr_len);
1240 1239
1241try_again: 1240try_again:
1242 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 1241 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
@@ -1600,12 +1599,16 @@ static void flush_stack(struct sock **stack, unsigned int count,
1600 kfree_skb(skb1); 1599 kfree_skb(skb1);
1601} 1600}
1602 1601
1603static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 1602/* For TCP sockets, sk_rx_dst is protected by socket lock
1603 * For UDP, we use xchg() to guard against concurrent changes.
1604 */
1605static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
1604{ 1606{
1605 struct dst_entry *dst = skb_dst(skb); 1607 struct dst_entry *old;
1606 1608
1607 dst_hold(dst); 1609 dst_hold(dst);
1608 sk->sk_rx_dst = dst; 1610 old = xchg(&sk->sk_rx_dst, dst);
1611 dst_release(old);
1609} 1612}
1610 1613
1611/* 1614/*
@@ -1736,15 +1739,16 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1736 if (udp4_csum_init(skb, uh, proto)) 1739 if (udp4_csum_init(skb, uh, proto))
1737 goto csum_error; 1740 goto csum_error;
1738 1741
1739 if (skb->sk) { 1742 sk = skb_steal_sock(skb);
1743 if (sk) {
1744 struct dst_entry *dst = skb_dst(skb);
1740 int ret; 1745 int ret;
1741 sk = skb->sk;
1742 1746
1743 if (unlikely(sk->sk_rx_dst == NULL)) 1747 if (unlikely(sk->sk_rx_dst != dst))
1744 udp_sk_rx_dst_set(sk, skb); 1748 udp_sk_rx_dst_set(sk, dst);
1745 1749
1746 ret = udp_queue_rcv_skb(sk, skb); 1750 ret = udp_queue_rcv_skb(sk, skb);
1747 1751 sock_put(sk);
1748 /* a return value > 0 means to resubmit the input, but 1752 /* a return value > 0 means to resubmit the input, but
1749 * it wants the return to be -protocol, or 0 1753 * it wants the return to be -protocol, or 0
1750 */ 1754 */
@@ -1910,17 +1914,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
1910 1914
1911void udp_v4_early_demux(struct sk_buff *skb) 1915void udp_v4_early_demux(struct sk_buff *skb)
1912{ 1916{
1913 const struct iphdr *iph = ip_hdr(skb); 1917 struct net *net = dev_net(skb->dev);
1914 const struct udphdr *uh = udp_hdr(skb); 1918 const struct iphdr *iph;
1919 const struct udphdr *uh;
1915 struct sock *sk; 1920 struct sock *sk;
1916 struct dst_entry *dst; 1921 struct dst_entry *dst;
1917 struct net *net = dev_net(skb->dev);
1918 int dif = skb->dev->ifindex; 1922 int dif = skb->dev->ifindex;
1919 1923
1920 /* validate the packet */ 1924 /* validate the packet */
1921 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 1925 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
1922 return; 1926 return;
1923 1927
1928 iph = ip_hdr(skb);
1929 uh = udp_hdr(skb);
1930
1924 if (skb->pkt_type == PACKET_BROADCAST || 1931 if (skb->pkt_type == PACKET_BROADCAST ||
1925 skb->pkt_type == PACKET_MULTICAST) 1932 skb->pkt_type == PACKET_MULTICAST)
1926 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 1933 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e3db3f915114..71acd0014f2d 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -48,7 +48,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
48 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 48 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
49 49
50 skb_set_network_header(skb, -x->props.header_len - 50 skb_set_network_header(skb, -x->props.header_len -
51 hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 51 hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
52 if (x->sel.family != AF_INET6) 52 if (x->sel.family != AF_INET6)
53 skb->network_header += IPV4_BEET_PHMAXLEN; 53 skb->network_header += IPV4_BEET_PHMAXLEN;
54 skb->mac_header = skb->network_header + 54 skb->mac_header = skb->network_header +
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 0b2a0641526a..542074c00c78 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -16,7 +16,7 @@
16 16
17static int xfrm4_init_flags(struct xfrm_state *x) 17static int xfrm4_init_flags(struct xfrm_state *x)
18{ 18{
19 if (ipv4_config.no_pmtu_disc) 19 if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
20 x->props.flags |= XFRM_STATE_NOPMTUDISC; 20 x->props.flags |= XFRM_STATE_NOPMTUDISC;
21 return 0; 21 return 0;
22} 22}