aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c93
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/gre_demux.c24
-rw-r--r--net/ipv4/icmp.c12
-rw-r--r--net/ipv4/igmp.c12
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/proc.c24
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_bic.c5
-rw-r--r--net/ipv4/tcp_cong.c24
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_highspeed.c4
-rw-r--r--net/ipv4/tcp_htcp.c4
-rw-r--r--net/ipv4/tcp_hybla.c7
-rw-r--r--net/ipv4/tcp_illinois.c5
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_lp.c5
-rw-r--r--net/ipv4/tcp_output.c77
-rw-r--r--net/ipv4/tcp_scalable.c5
-rw-r--r--net/ipv4/tcp_vegas.c7
-rw-r--r--net/ipv4/tcp_veno.c9
-rw-r--r--net/ipv4/tcp_yeah.c5
-rw-r--r--net/ipv4/udp.c23
26 files changed, 167 insertions, 268 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6d6dd345bc4d..211c0cc6c3d3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1476,22 +1476,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1476} 1476}
1477EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1477EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1478 1478
1479unsigned long snmp_fold_field(void __percpu *mib[], int offt) 1479unsigned long snmp_fold_field(void __percpu *mib, int offt)
1480{ 1480{
1481 unsigned long res = 0; 1481 unsigned long res = 0;
1482 int i, j; 1482 int i;
1483 1483
1484 for_each_possible_cpu(i) { 1484 for_each_possible_cpu(i)
1485 for (j = 0; j < SNMP_ARRAY_SZ; j++) 1485 res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt);
1486 res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
1487 }
1488 return res; 1486 return res;
1489} 1487}
1490EXPORT_SYMBOL_GPL(snmp_fold_field); 1488EXPORT_SYMBOL_GPL(snmp_fold_field);
1491 1489
1492#if BITS_PER_LONG==32 1490#if BITS_PER_LONG==32
1493 1491
1494u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) 1492u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
1495{ 1493{
1496 u64 res = 0; 1494 u64 res = 0;
1497 int cpu; 1495 int cpu;
@@ -1502,7 +1500,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1502 u64 v; 1500 u64 v;
1503 unsigned int start; 1501 unsigned int start;
1504 1502
1505 bhptr = per_cpu_ptr(mib[0], cpu); 1503 bhptr = per_cpu_ptr(mib, cpu);
1506 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); 1504 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1507 do { 1505 do {
1508 start = u64_stats_fetch_begin_irq(syncp); 1506 start = u64_stats_fetch_begin_irq(syncp);
@@ -1516,25 +1514,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1516EXPORT_SYMBOL_GPL(snmp_fold_field64); 1514EXPORT_SYMBOL_GPL(snmp_fold_field64);
1517#endif 1515#endif
1518 1516
1519int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1520{
1521 BUG_ON(ptr == NULL);
1522 ptr[0] = __alloc_percpu(mibsize, align);
1523 if (!ptr[0])
1524 return -ENOMEM;
1525
1526#if SNMP_ARRAY_SZ == 2
1527 ptr[1] = __alloc_percpu(mibsize, align);
1528 if (!ptr[1]) {
1529 free_percpu(ptr[0]);
1530 ptr[0] = NULL;
1531 return -ENOMEM;
1532 }
1533#endif
1534 return 0;
1535}
1536EXPORT_SYMBOL_GPL(snmp_mib_init);
1537
1538#ifdef CONFIG_IP_MULTICAST 1517#ifdef CONFIG_IP_MULTICAST
1539static const struct net_protocol igmp_protocol = { 1518static const struct net_protocol igmp_protocol = {
1540 .handler = igmp_rcv, 1519 .handler = igmp_rcv,
@@ -1570,40 +1549,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
1570{ 1549{
1571 int i; 1550 int i;
1572 1551
1573 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, 1552 net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
1574 sizeof(struct tcp_mib), 1553 if (!net->mib.tcp_statistics)
1575 __alignof__(struct tcp_mib)) < 0)
1576 goto err_tcp_mib; 1554 goto err_tcp_mib;
1577 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, 1555 net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
1578 sizeof(struct ipstats_mib), 1556 if (!net->mib.ip_statistics)
1579 __alignof__(struct ipstats_mib)) < 0)
1580 goto err_ip_mib; 1557 goto err_ip_mib;
1581 1558
1582 for_each_possible_cpu(i) { 1559 for_each_possible_cpu(i) {
1583 struct ipstats_mib *af_inet_stats; 1560 struct ipstats_mib *af_inet_stats;
1584 af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); 1561 af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
1585 u64_stats_init(&af_inet_stats->syncp); 1562 u64_stats_init(&af_inet_stats->syncp);
1586#if SNMP_ARRAY_SZ == 2
1587 af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
1588 u64_stats_init(&af_inet_stats->syncp);
1589#endif
1590 } 1563 }
1591 1564
1592 if (snmp_mib_init((void __percpu **)net->mib.net_statistics, 1565 net->mib.net_statistics = alloc_percpu(struct linux_mib);
1593 sizeof(struct linux_mib), 1566 if (!net->mib.net_statistics)
1594 __alignof__(struct linux_mib)) < 0)
1595 goto err_net_mib; 1567 goto err_net_mib;
1596 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, 1568 net->mib.udp_statistics = alloc_percpu(struct udp_mib);
1597 sizeof(struct udp_mib), 1569 if (!net->mib.udp_statistics)
1598 __alignof__(struct udp_mib)) < 0)
1599 goto err_udp_mib; 1570 goto err_udp_mib;
1600 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, 1571 net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
1601 sizeof(struct udp_mib), 1572 if (!net->mib.udplite_statistics)
1602 __alignof__(struct udp_mib)) < 0)
1603 goto err_udplite_mib; 1573 goto err_udplite_mib;
1604 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, 1574 net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
1605 sizeof(struct icmp_mib), 1575 if (!net->mib.icmp_statistics)
1606 __alignof__(struct icmp_mib)) < 0)
1607 goto err_icmp_mib; 1576 goto err_icmp_mib;
1608 net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), 1577 net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
1609 GFP_KERNEL); 1578 GFP_KERNEL);
@@ -1614,17 +1583,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
1614 return 0; 1583 return 0;
1615 1584
1616err_icmpmsg_mib: 1585err_icmpmsg_mib:
1617 snmp_mib_free((void __percpu **)net->mib.icmp_statistics); 1586 free_percpu(net->mib.icmp_statistics);
1618err_icmp_mib: 1587err_icmp_mib:
1619 snmp_mib_free((void __percpu **)net->mib.udplite_statistics); 1588 free_percpu(net->mib.udplite_statistics);
1620err_udplite_mib: 1589err_udplite_mib:
1621 snmp_mib_free((void __percpu **)net->mib.udp_statistics); 1590 free_percpu(net->mib.udp_statistics);
1622err_udp_mib: 1591err_udp_mib:
1623 snmp_mib_free((void __percpu **)net->mib.net_statistics); 1592 free_percpu(net->mib.net_statistics);
1624err_net_mib: 1593err_net_mib:
1625 snmp_mib_free((void __percpu **)net->mib.ip_statistics); 1594 free_percpu(net->mib.ip_statistics);
1626err_ip_mib: 1595err_ip_mib:
1627 snmp_mib_free((void __percpu **)net->mib.tcp_statistics); 1596 free_percpu(net->mib.tcp_statistics);
1628err_tcp_mib: 1597err_tcp_mib:
1629 return -ENOMEM; 1598 return -ENOMEM;
1630} 1599}
@@ -1632,12 +1601,12 @@ err_tcp_mib:
1632static __net_exit void ipv4_mib_exit_net(struct net *net) 1601static __net_exit void ipv4_mib_exit_net(struct net *net)
1633{ 1602{
1634 kfree(net->mib.icmpmsg_statistics); 1603 kfree(net->mib.icmpmsg_statistics);
1635 snmp_mib_free((void __percpu **)net->mib.icmp_statistics); 1604 free_percpu(net->mib.icmp_statistics);
1636 snmp_mib_free((void __percpu **)net->mib.udplite_statistics); 1605 free_percpu(net->mib.udplite_statistics);
1637 snmp_mib_free((void __percpu **)net->mib.udp_statistics); 1606 free_percpu(net->mib.udp_statistics);
1638 snmp_mib_free((void __percpu **)net->mib.net_statistics); 1607 free_percpu(net->mib.net_statistics);
1639 snmp_mib_free((void __percpu **)net->mib.ip_statistics); 1608 free_percpu(net->mib.ip_statistics);
1640 snmp_mib_free((void __percpu **)net->mib.tcp_statistics); 1609 free_percpu(net->mib.tcp_statistics);
1641} 1610}
1642 1611
1643static __net_initdata struct pernet_operations ipv4_mib_ops = { 1612static __net_initdata struct pernet_operations ipv4_mib_ops = {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bdbf68bb2e2d..78692e46b64f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
106#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 106#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
107 107
108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 109
111static u32 inet_addr_hash(struct net *net, __be32 addr) 110static u32 inet_addr_hash(struct net *net, __be32 addr)
112{ 111{
@@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119{ 118{
120 u32 hash = inet_addr_hash(net, ifa->ifa_local); 119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 120
122 spin_lock(&inet_addr_hash_lock); 121 ASSERT_RTNL();
123 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 spin_unlock(&inet_addr_hash_lock);
125} 123}
126 124
127static void inet_hash_remove(struct in_ifaddr *ifa) 125static void inet_hash_remove(struct in_ifaddr *ifa)
128{ 126{
129 spin_lock(&inet_addr_hash_lock); 127 ASSERT_RTNL();
130 hlist_del_init_rcu(&ifa->hash); 128 hlist_del_init_rcu(&ifa->hash);
131 spin_unlock(&inet_addr_hash_lock);
132} 129}
133 130
134/** 131/**
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 250be7421ab3..fbfd829f4049 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -93,28 +93,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
93} 93}
94EXPORT_SYMBOL_GPL(gre_build_header); 94EXPORT_SYMBOL_GPL(gre_build_header);
95 95
96static __sum16 check_checksum(struct sk_buff *skb)
97{
98 __sum16 csum = 0;
99
100 switch (skb->ip_summed) {
101 case CHECKSUM_COMPLETE:
102 csum = csum_fold(skb->csum);
103
104 if (!csum)
105 break;
106 /* Fall through. */
107
108 case CHECKSUM_NONE:
109 skb->csum = 0;
110 csum = __skb_checksum_complete(skb);
111 skb->ip_summed = CHECKSUM_COMPLETE;
112 break;
113 }
114
115 return csum;
116}
117
118static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, 96static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
119 bool *csum_err) 97 bool *csum_err)
120{ 98{
@@ -141,7 +119,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
141 119
142 options = (__be32 *)(greh + 1); 120 options = (__be32 *)(greh + 1);
143 if (greh->flags & GRE_CSUM) { 121 if (greh->flags & GRE_CSUM) {
144 if (check_checksum(skb)) { 122 if (skb_checksum_simple_validate(skb)) {
145 *csum_err = true; 123 *csum_err = true;
146 return -EINVAL; 124 return -EINVAL;
147 } 125 }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0134663fdbce..fe52666dc43c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -908,16 +908,8 @@ int icmp_rcv(struct sk_buff *skb)
908 908
909 ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); 909 ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
910 910
911 switch (skb->ip_summed) { 911 if (skb_checksum_simple_validate(skb))
912 case CHECKSUM_COMPLETE: 912 goto csum_error;
913 if (!csum_fold(skb->csum))
914 break;
915 /* fall through */
916 case CHECKSUM_NONE:
917 skb->csum = 0;
918 if (__skb_checksum_complete(skb))
919 goto csum_error;
920 }
921 913
922 if (!pskb_pull(skb, sizeof(*icmph))) 914 if (!pskb_pull(skb, sizeof(*icmph)))
923 goto error; 915 goto error;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 97e4d1655d26..17d34e3c2ac3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb)
988 if (!pskb_may_pull(skb, sizeof(struct igmphdr))) 988 if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
989 goto drop; 989 goto drop;
990 990
991 switch (skb->ip_summed) { 991 if (skb_checksum_simple_validate(skb))
992 case CHECKSUM_COMPLETE: 992 goto drop;
993 if (!csum_fold(skb->csum))
994 break;
995 /* fall through */
996 case CHECKSUM_NONE:
997 skb->csum = 0;
998 if (__skb_checksum_complete(skb))
999 goto drop;
1000 }
1001 993
1002 ih = igmp_hdr(skb); 994 ih = igmp_hdr(skb);
1003 switch (ih->type) { 995 switch (ih->type) {
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f424465112..c98cf141f4ed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -120,7 +120,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
120static void inetpeer_gc_worker(struct work_struct *work) 120static void inetpeer_gc_worker(struct work_struct *work)
121{ 121{
122 struct inet_peer *p, *n, *c; 122 struct inet_peer *p, *n, *c;
123 LIST_HEAD(list); 123 struct list_head list;
124 124
125 spin_lock_bh(&gc_lock); 125 spin_lock_bh(&gc_lock);
126 list_replace_init(&gc_list, &list); 126 list_replace_init(&gc_list, &list);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 94213c891565..c5a557a06a31 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev)
410 struct flowi4 fl4; 410 struct flowi4 fl4;
411 struct rtable *rt; 411 struct rtable *rt;
412 412
413 rt = ip_route_output_gre(dev_net(dev), &fl4, 413 rt = ip_route_output_gre(t->net, &fl4,
414 t->parms.iph.daddr, 414 t->parms.iph.daddr,
415 t->parms.iph.saddr, 415 t->parms.iph.saddr,
416 t->parms.o_key, 416 t->parms.o_key,
@@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev)
434 434
435 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 435 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
436 struct in_device *in_dev; 436 struct in_device *in_dev;
437 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 437 in_dev = inetdev_by_index(t->net, t->mlink);
438 if (in_dev) 438 if (in_dev)
439 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 439 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
440 } 440 }
@@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev)
478 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 478 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
479 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 479 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
480 480
481 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; 481 dev->features |= GRE_FEATURES;
482 dev->hw_features |= GRE_FEATURES; 482 dev->hw_features |= GRE_FEATURES;
483 483
484 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 484 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f4ab72e19af9..5e7aecea05cd 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -364,7 +364,7 @@ int ip_options_compile(struct net *net,
364 } 364 }
365 if (optptr[2] <= optlen) { 365 if (optptr[2] <= optlen) {
366 unsigned char *timeptr = NULL; 366 unsigned char *timeptr = NULL;
367 if (optptr[2]+3 > optptr[1]) { 367 if (optptr[2]+3 > optlen) {
368 pp_ptr = optptr + 2; 368 pp_ptr = optptr + 2;
369 goto error; 369 goto error;
370 } 370 }
@@ -376,7 +376,7 @@ int ip_options_compile(struct net *net,
376 optptr[2] += 4; 376 optptr[2] += 4;
377 break; 377 break;
378 case IPOPT_TS_TSANDADDR: 378 case IPOPT_TS_TSANDADDR:
379 if (optptr[2]+7 > optptr[1]) { 379 if (optptr[2]+7 > optlen) {
380 pp_ptr = optptr + 2; 380 pp_ptr = optptr + 2;
381 goto error; 381 goto error;
382 } 382 }
@@ -390,7 +390,7 @@ int ip_options_compile(struct net *net,
390 optptr[2] += 8; 390 optptr[2] += 8;
391 break; 391 break;
392 case IPOPT_TS_PRESPEC: 392 case IPOPT_TS_PRESPEC:
393 if (optptr[2]+7 > optptr[1]) { 393 if (optptr[2]+7 > optlen) {
394 pp_ptr = optptr + 2; 394 pp_ptr = optptr + 2;
395 goto error; 395 goto error;
396 } 396 }
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ad737fad6d8b..ae0af9386f7c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq)
345 for (i = 0; icmpmibmap[i].name != NULL; i++) 345 for (i = 0; icmpmibmap[i].name != NULL; i++)
346 seq_printf(seq, " Out%s", icmpmibmap[i].name); 346 seq_printf(seq, " Out%s", icmpmibmap[i].name);
347 seq_printf(seq, "\nIcmp: %lu %lu %lu", 347 seq_printf(seq, "\nIcmp: %lu %lu %lu",
348 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), 348 snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
349 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), 349 snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
350 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); 350 snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
351 for (i = 0; icmpmibmap[i].name != NULL; i++) 351 for (i = 0; icmpmibmap[i].name != NULL; i++)
352 seq_printf(seq, " %lu", 352 seq_printf(seq, " %lu",
353 atomic_long_read(ptr + icmpmibmap[i].index)); 353 atomic_long_read(ptr + icmpmibmap[i].index));
354 seq_printf(seq, " %lu %lu", 354 seq_printf(seq, " %lu %lu",
355 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), 355 snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
356 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); 356 snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
357 for (i = 0; icmpmibmap[i].name != NULL; i++) 357 for (i = 0; icmpmibmap[i].name != NULL; i++)
358 seq_printf(seq, " %lu", 358 seq_printf(seq, " %lu",
359 atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); 359 atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
@@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
379 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); 379 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
380 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 380 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
381 seq_printf(seq, " %llu", 381 seq_printf(seq, " %llu",
382 snmp_fold_field64((void __percpu **)net->mib.ip_statistics, 382 snmp_fold_field64(net->mib.ip_statistics,
383 snmp4_ipstats_list[i].entry, 383 snmp4_ipstats_list[i].entry,
384 offsetof(struct ipstats_mib, syncp))); 384 offsetof(struct ipstats_mib, syncp)));
385 385
@@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
395 /* MaxConn field is signed, RFC 2012 */ 395 /* MaxConn field is signed, RFC 2012 */
396 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) 396 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
397 seq_printf(seq, " %ld", 397 seq_printf(seq, " %ld",
398 snmp_fold_field((void __percpu **)net->mib.tcp_statistics, 398 snmp_fold_field(net->mib.tcp_statistics,
399 snmp4_tcp_list[i].entry)); 399 snmp4_tcp_list[i].entry));
400 else 400 else
401 seq_printf(seq, " %lu", 401 seq_printf(seq, " %lu",
402 snmp_fold_field((void __percpu **)net->mib.tcp_statistics, 402 snmp_fold_field(net->mib.tcp_statistics,
403 snmp4_tcp_list[i].entry)); 403 snmp4_tcp_list[i].entry));
404 } 404 }
405 405
@@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
410 seq_puts(seq, "\nUdp:"); 410 seq_puts(seq, "\nUdp:");
411 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 411 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
412 seq_printf(seq, " %lu", 412 seq_printf(seq, " %lu",
413 snmp_fold_field((void __percpu **)net->mib.udp_statistics, 413 snmp_fold_field(net->mib.udp_statistics,
414 snmp4_udp_list[i].entry)); 414 snmp4_udp_list[i].entry));
415 415
416 /* the UDP and UDP-Lite MIBs are the same */ 416 /* the UDP and UDP-Lite MIBs are the same */
@@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
421 seq_puts(seq, "\nUdpLite:"); 421 seq_puts(seq, "\nUdpLite:");
422 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 422 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
423 seq_printf(seq, " %lu", 423 seq_printf(seq, " %lu",
424 snmp_fold_field((void __percpu **)net->mib.udplite_statistics, 424 snmp_fold_field(net->mib.udplite_statistics,
425 snmp4_udp_list[i].entry)); 425 snmp4_udp_list[i].entry));
426 426
427 seq_putc(seq, '\n'); 427 seq_putc(seq, '\n');
@@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
458 seq_puts(seq, "\nTcpExt:"); 458 seq_puts(seq, "\nTcpExt:");
459 for (i = 0; snmp4_net_list[i].name != NULL; i++) 459 for (i = 0; snmp4_net_list[i].name != NULL; i++)
460 seq_printf(seq, " %lu", 460 seq_printf(seq, " %lu",
461 snmp_fold_field((void __percpu **)net->mib.net_statistics, 461 snmp_fold_field(net->mib.net_statistics,
462 snmp4_net_list[i].entry)); 462 snmp4_net_list[i].entry));
463 463
464 seq_puts(seq, "\nIpExt:"); 464 seq_puts(seq, "\nIpExt:");
@@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
468 seq_puts(seq, "\nIpExt:"); 468 seq_puts(seq, "\nIpExt:");
469 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 469 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
470 seq_printf(seq, " %llu", 470 seq_printf(seq, " %llu",
471 snmp_fold_field64((void __percpu **)net->mib.ip_statistics, 471 snmp_fold_field64(net->mib.ip_statistics,
472 snmp4_ipextstats_list[i].entry, 472 snmp4_ipextstats_list[i].entry,
473 offsetof(struct ipstats_mib, syncp))); 473 offsetof(struct ipstats_mib, syncp)));
474 474
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bd6d52eeffb..eb1dde37e678 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2916,6 +2916,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2916 case TCP_USER_TIMEOUT: 2916 case TCP_USER_TIMEOUT:
2917 val = jiffies_to_msecs(icsk->icsk_user_timeout); 2917 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2918 break; 2918 break;
2919
2920 case TCP_FASTOPEN:
2921 if (icsk->icsk_accept_queue.fastopenq != NULL)
2922 val = icsk->icsk_accept_queue.fastopenq->max_qlen;
2923 else
2924 val = 0;
2925 break;
2926
2919 case TCP_TIMESTAMP: 2927 case TCP_TIMESTAMP:
2920 val = tcp_time_stamp + tp->tsoffset; 2928 val = tcp_time_stamp + tp->tsoffset;
2921 break; 2929 break;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846fb0a7e..d5de69bc04f5 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
140 ca->cnt = 1; 140 ca->cnt = 1;
141} 141}
142 142
143static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, 143static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
144 u32 in_flight)
145{ 144{
146 struct tcp_sock *tp = tcp_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk);
147 struct bictcp *ca = inet_csk_ca(sk); 146 struct bictcp *ca = inet_csk_ca(sk);
148 147
149 if (!tcp_is_cwnd_limited(sk, in_flight)) 148 if (!tcp_is_cwnd_limited(sk))
150 return; 149 return;
151 150
152 if (tp->snd_cwnd <= tp->snd_ssthresh) 151 if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c93b88..7b09d8b49fa5 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
276 return err; 276 return err;
277} 277}
278 278
279/* RFC2861 Check whether we are limited by application or congestion window
280 * This is the inverse of cwnd check in tcp_tso_should_defer
281 */
282bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
283{
284 const struct tcp_sock *tp = tcp_sk(sk);
285 u32 left;
286
287 if (in_flight >= tp->snd_cwnd)
288 return true;
289
290 left = tp->snd_cwnd - in_flight;
291 if (sk_can_gso(sk) &&
292 left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
293 left < tp->xmit_size_goal_segs)
294 return true;
295 return left <= tcp_max_tso_deferred_mss(tp);
296}
297EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
298
299/* Slow start is used when congestion window is no greater than the slow start 279/* Slow start is used when congestion window is no greater than the slow start
300 * threshold. We base on RFC2581 and also handle stretch ACKs properly. 280 * threshold. We base on RFC2581 and also handle stretch ACKs properly.
301 * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but 281 * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
@@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
337/* This is Jacobson's slow start and congestion avoidance. 317/* This is Jacobson's slow start and congestion avoidance.
338 * SIGCOMM '88, p. 328. 318 * SIGCOMM '88, p. 328.
339 */ 319 */
340void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) 320void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
341{ 321{
342 struct tcp_sock *tp = tcp_sk(sk); 322 struct tcp_sock *tp = tcp_sk(sk);
343 323
344 if (!tcp_is_cwnd_limited(sk, in_flight)) 324 if (!tcp_is_cwnd_limited(sk))
345 return; 325 return;
346 326
347 /* In "safe" area, increase. */ 327 /* In "safe" area, increase. */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b4f1b29b08bd..a9bd8a4828a9 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
304 ca->cnt = 1; 304 ca->cnt = 1;
305} 305}
306 306
307static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, 307static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
308 u32 in_flight)
309{ 308{
310 struct tcp_sock *tp = tcp_sk(sk); 309 struct tcp_sock *tp = tcp_sk(sk);
311 struct bictcp *ca = inet_csk_ca(sk); 310 struct bictcp *ca = inet_csk_ca(sk);
312 311
313 if (!tcp_is_cwnd_limited(sk, in_flight)) 312 if (!tcp_is_cwnd_limited(sk))
314 return; 313 return;
315 314
316 if (tp->snd_cwnd <= tp->snd_ssthresh) { 315 if (tp->snd_cwnd <= tp->snd_ssthresh) {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b9e7bad77c0..1c4908280d92 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk)
109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); 109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
110} 110}
111 111
112static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) 112static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
113{ 113{
114 struct tcp_sock *tp = tcp_sk(sk); 114 struct tcp_sock *tp = tcp_sk(sk);
115 struct hstcp *ca = inet_csk_ca(sk); 115 struct hstcp *ca = inet_csk_ca(sk);
116 116
117 if (!tcp_is_cwnd_limited(sk, in_flight)) 117 if (!tcp_is_cwnd_limited(sk))
118 return; 118 return;
119 119
120 if (tp->snd_cwnd <= tp->snd_ssthresh) 120 if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194acfd923..031361311a8b 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
227 return max((tp->snd_cwnd * ca->beta) >> 7, 2U); 227 return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
228} 228}
229 229
230static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) 230static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
231{ 231{
232 struct tcp_sock *tp = tcp_sk(sk); 232 struct tcp_sock *tp = tcp_sk(sk);
233 struct htcp *ca = inet_csk_ca(sk); 233 struct htcp *ca = inet_csk_ca(sk);
234 234
235 if (!tcp_is_cwnd_limited(sk, in_flight)) 235 if (!tcp_is_cwnd_limited(sk))
236 return; 236 return;
237 237
238 if (tp->snd_cwnd <= tp->snd_ssthresh) 238 if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index a15a799bf768..d8f8f05a4951 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds)
87 * o Give cwnd a new value based on the model proposed 87 * o Give cwnd a new value based on the model proposed
88 * o remember increments <1 88 * o remember increments <1
89 */ 89 */
90static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, 90static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
91 u32 in_flight)
92{ 91{
93 struct tcp_sock *tp = tcp_sk(sk); 92 struct tcp_sock *tp = tcp_sk(sk);
94 struct hybla *ca = inet_csk_ca(sk); 93 struct hybla *ca = inet_csk_ca(sk);
@@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
101 ca->minrtt_us = tp->srtt_us; 100 ca->minrtt_us = tp->srtt_us;
102 } 101 }
103 102
104 if (!tcp_is_cwnd_limited(sk, in_flight)) 103 if (!tcp_is_cwnd_limited(sk))
105 return; 104 return;
106 105
107 if (!ca->hybla_en) { 106 if (!ca->hybla_en) {
108 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 107 tcp_reno_cong_avoid(sk, ack, acked);
109 return; 108 return;
110 } 109 }
111 110
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 863d105e3015..5999b3972e64 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
255/* 255/*
256 * Increase window in response to successful acknowledgment. 256 * Increase window in response to successful acknowledgment.
257 */ 257 */
258static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, 258static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
259 u32 in_flight)
260{ 259{
261 struct tcp_sock *tp = tcp_sk(sk); 260 struct tcp_sock *tp = tcp_sk(sk);
262 struct illinois *ca = inet_csk_ca(sk); 261 struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
265 update_params(sk); 264 update_params(sk);
266 265
267 /* RFC2861 only increase cwnd if fully utilized */ 266 /* RFC2861 only increase cwnd if fully utilized */
268 if (!tcp_is_cwnd_limited(sk, in_flight)) 267 if (!tcp_is_cwnd_limited(sk))
269 return; 268 return;
270 269
271 /* In slow start */ 270 /* In slow start */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6b46eb2f94c..350b2072f0ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2938,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2938 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); 2938 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
2939} 2939}
2940 2940
2941static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) 2941static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2942{ 2942{
2943 const struct inet_connection_sock *icsk = inet_csk(sk); 2943 const struct inet_connection_sock *icsk = inet_csk(sk);
2944 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); 2944
2945 icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
2945 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; 2946 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2946} 2947}
2947 2948
@@ -3364,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3364 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3365 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3365 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3366 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3366 bool is_dupack = false; 3367 bool is_dupack = false;
3367 u32 prior_in_flight;
3368 u32 prior_fackets; 3368 u32 prior_fackets;
3369 int prior_packets = tp->packets_out; 3369 int prior_packets = tp->packets_out;
3370 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3370 const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3397,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3397 flag |= FLAG_SND_UNA_ADVANCED; 3397 flag |= FLAG_SND_UNA_ADVANCED;
3398 3398
3399 prior_fackets = tp->fackets_out; 3399 prior_fackets = tp->fackets_out;
3400 prior_in_flight = tcp_packets_in_flight(tp);
3401 3400
3402 /* ts_recent update must be made after we are sure that the packet 3401 /* ts_recent update must be made after we are sure that the packet
3403 * is in window. 3402 * is in window.
@@ -3452,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3452 3451
3453 /* Advance cwnd if state allows */ 3452 /* Advance cwnd if state allows */
3454 if (tcp_may_raise_cwnd(sk, flag)) 3453 if (tcp_may_raise_cwnd(sk, flag))
3455 tcp_cong_avoid(sk, ack, acked, prior_in_flight); 3454 tcp_cong_avoid(sk, ack, acked);
3456 3455
3457 if (tcp_ack_is_dubious(sk, flag)) { 3456 if (tcp_ack_is_dubious(sk, flag)) {
3458 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3457 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -4703,28 +4702,6 @@ static int tcp_prune_queue(struct sock *sk)
4703 return -1; 4702 return -1;
4704} 4703}
4705 4704
4706/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
4707 * As additional protections, we do not touch cwnd in retransmission phases,
4708 * and if application hit its sndbuf limit recently.
4709 */
4710void tcp_cwnd_application_limited(struct sock *sk)
4711{
4712 struct tcp_sock *tp = tcp_sk(sk);
4713
4714 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
4715 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
4716 /* Limited by application or receiver window. */
4717 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
4718 u32 win_used = max(tp->snd_cwnd_used, init_win);
4719 if (win_used < tp->snd_cwnd) {
4720 tp->snd_ssthresh = tcp_current_ssthresh(sk);
4721 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
4722 }
4723 tp->snd_cwnd_used = 0;
4724 }
4725 tp->snd_cwnd_stamp = tcp_time_stamp;
4726}
4727
4728static bool tcp_should_expand_sndbuf(const struct sock *sk) 4705static bool tcp_should_expand_sndbuf(const struct sock *sk)
4729{ 4706{
4730 const struct tcp_sock *tp = tcp_sk(sk); 4707 const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 438f3b95143d..ad166dcc278f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1744,28 +1744,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1744 return sk; 1744 return sk;
1745} 1745}
1746 1746
1747static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1748{
1749 const struct iphdr *iph = ip_hdr(skb);
1750
1751 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1752 if (!tcp_v4_check(skb->len, iph->saddr,
1753 iph->daddr, skb->csum)) {
1754 skb->ip_summed = CHECKSUM_UNNECESSARY;
1755 return 0;
1756 }
1757 }
1758
1759 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1760 skb->len, IPPROTO_TCP, 0);
1761
1762 if (skb->len <= 76) {
1763 return __skb_checksum_complete(skb);
1764 }
1765 return 0;
1766}
1767
1768
1769/* The socket must have it's spinlock held when we get 1747/* The socket must have it's spinlock held when we get
1770 * here. 1748 * here.
1771 * 1749 *
@@ -1960,7 +1938,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
1960 * Packet length and doff are validated by header prediction, 1938 * Packet length and doff are validated by header prediction,
1961 * provided case of th->doff==0 is eliminated. 1939 * provided case of th->doff==0 is eliminated.
1962 * So, we defer the checks. */ 1940 * So, we defer the checks. */
1963 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 1941
1942 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1964 goto csum_error; 1943 goto csum_error;
1965 1944
1966 th = tcp_hdr(skb); 1945 th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c9aecae31327..1e70fa8fa793 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk)
115 * Will only call newReno CA when away from inference. 115 * Will only call newReno CA when away from inference.
116 * From TCP-LP's paper, this will be handled in additive increasement. 116 * From TCP-LP's paper, this will be handled in additive increasement.
117 */ 117 */
118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, 118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
119 u32 in_flight)
120{ 119{
121 struct lp *lp = inet_csk_ca(sk); 120 struct lp *lp = inet_csk_ca(sk);
122 121
123 if (!(lp->flag & LP_WITHIN_INF)) 122 if (!(lp->flag & LP_WITHIN_INF))
124 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 123 tcp_reno_cong_avoid(sk, ack, acked);
125} 124}
126 125
127/** 126/**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 12d6016bdd9a..694711a140d4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
878 BUG_ON(!skb || !tcp_skb_pcount(skb)); 878 BUG_ON(!skb || !tcp_skb_pcount(skb));
879 879
880 if (clone_it) { 880 if (clone_it) {
881 const struct sk_buff *fclone = skb + 1;
882
883 skb_mstamp_get(&skb->skb_mstamp); 881 skb_mstamp_get(&skb->skb_mstamp);
884 882
885 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
886 fclone->fclone == SKB_FCLONE_CLONE))
887 NET_INC_STATS(sock_net(sk),
888 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
889
890 if (unlikely(skb_cloned(skb))) 883 if (unlikely(skb_cloned(skb)))
891 skb = pskb_copy(skb, gfp_mask); 884 skb = pskb_copy(skb, gfp_mask);
892 else 885 else
@@ -1387,12 +1380,35 @@ unsigned int tcp_current_mss(struct sock *sk)
1387 return mss_now; 1380 return mss_now;
1388} 1381}
1389 1382
1390/* Congestion window validation. (RFC2861) */ 1383/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
1391static void tcp_cwnd_validate(struct sock *sk) 1384 * As additional protections, we do not touch cwnd in retransmission phases,
1385 * and if application hit its sndbuf limit recently.
1386 */
1387static void tcp_cwnd_application_limited(struct sock *sk)
1392{ 1388{
1393 struct tcp_sock *tp = tcp_sk(sk); 1389 struct tcp_sock *tp = tcp_sk(sk);
1394 1390
1395 if (tp->packets_out >= tp->snd_cwnd) { 1391 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
1392 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1393 /* Limited by application or receiver window. */
1394 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
1395 u32 win_used = max(tp->snd_cwnd_used, init_win);
1396 if (win_used < tp->snd_cwnd) {
1397 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1398 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
1399 }
1400 tp->snd_cwnd_used = 0;
1401 }
1402 tp->snd_cwnd_stamp = tcp_time_stamp;
1403}
1404
1405static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
1406{
1407 struct tcp_sock *tp = tcp_sk(sk);
1408
1409 tp->lsnd_pending = tp->packets_out + unsent_segs;
1410
1411 if (tcp_is_cwnd_limited(sk)) {
1396 /* Network is feed fully. */ 1412 /* Network is feed fully. */
1397 tp->snd_cwnd_used = 0; 1413 tp->snd_cwnd_used = 0;
1398 tp->snd_cwnd_stamp = tcp_time_stamp; 1414 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1865,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1865{ 1881{
1866 struct tcp_sock *tp = tcp_sk(sk); 1882 struct tcp_sock *tp = tcp_sk(sk);
1867 struct sk_buff *skb; 1883 struct sk_buff *skb;
1868 unsigned int tso_segs, sent_pkts; 1884 unsigned int tso_segs, sent_pkts, unsent_segs = 0;
1869 int cwnd_quota; 1885 int cwnd_quota;
1870 int result; 1886 int result;
1871 1887
@@ -1909,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1909 break; 1925 break;
1910 } else { 1926 } else {
1911 if (!push_one && tcp_tso_should_defer(sk, skb)) 1927 if (!push_one && tcp_tso_should_defer(sk, skb))
1912 break; 1928 goto compute_unsent_segs;
1913 } 1929 }
1914 1930
1915 /* TCP Small Queues : 1931 /* TCP Small Queues :
@@ -1934,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1934 * there is no smp_mb__after_set_bit() yet 1950 * there is no smp_mb__after_set_bit() yet
1935 */ 1951 */
1936 smp_mb__after_clear_bit(); 1952 smp_mb__after_clear_bit();
1937 if (atomic_read(&sk->sk_wmem_alloc) > limit) 1953 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1954 u32 unsent_bytes;
1955
1956compute_unsent_segs:
1957 unsent_bytes = tp->write_seq - tp->snd_nxt;
1958 unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
1938 break; 1959 break;
1960 }
1939 } 1961 }
1940 1962
1941 limit = mss_now; 1963 limit = mss_now;
@@ -1975,7 +1997,7 @@ repair:
1975 /* Send one loss probe per tail loss episode. */ 1997 /* Send one loss probe per tail loss episode. */
1976 if (push_one != 2) 1998 if (push_one != 2)
1977 tcp_schedule_loss_probe(sk); 1999 tcp_schedule_loss_probe(sk);
1978 tcp_cwnd_validate(sk); 2000 tcp_cwnd_validate(sk, unsent_segs);
1979 return false; 2001 return false;
1980 } 2002 }
1981 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); 2003 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -2039,6 +2061,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2039 return true; 2061 return true;
2040} 2062}
2041 2063
2064/* Thanks to skb fast clones, we can detect if a prior transmit of
2065 * a packet is still in a qdisc or driver queue.
2066 * In this case, there is very little point doing a retransmit !
2067 * Note: This is called from BH context only.
2068 */
2069static bool skb_still_in_host_queue(const struct sock *sk,
2070 const struct sk_buff *skb)
2071{
2072 const struct sk_buff *fclone = skb + 1;
2073
2074 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
2075 fclone->fclone == SKB_FCLONE_CLONE)) {
2076 NET_INC_STATS_BH(sock_net(sk),
2077 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2078 return true;
2079 }
2080 return false;
2081}
2082
2042/* When probe timeout (PTO) fires, send a new segment if one exists, else 2083/* When probe timeout (PTO) fires, send a new segment if one exists, else
2043 * retransmit the last segment. 2084 * retransmit the last segment.
2044 */ 2085 */
@@ -2064,6 +2105,9 @@ void tcp_send_loss_probe(struct sock *sk)
2064 if (WARN_ON(!skb)) 2105 if (WARN_ON(!skb))
2065 goto rearm_timer; 2106 goto rearm_timer;
2066 2107
2108 if (skb_still_in_host_queue(sk, skb))
2109 goto rearm_timer;
2110
2067 pcount = tcp_skb_pcount(skb); 2111 pcount = tcp_skb_pcount(skb);
2068 if (WARN_ON(!pcount)) 2112 if (WARN_ON(!pcount))
2069 goto rearm_timer; 2113 goto rearm_timer;
@@ -2385,6 +2429,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2385 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) 2429 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
2386 return -EAGAIN; 2430 return -EAGAIN;
2387 2431
2432 if (skb_still_in_host_queue(sk, skb))
2433 return -EBUSY;
2434
2388 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { 2435 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
2389 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 2436 if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
2390 BUG(); 2437 BUG();
@@ -2478,7 +2525,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2478 * see tcp_input.c tcp_sacktag_write_queue(). 2525 * see tcp_input.c tcp_sacktag_write_queue().
2479 */ 2526 */
2480 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; 2527 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2481 } else { 2528 } else if (err != -EBUSY) {
2482 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2529 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2483 } 2530 }
2484 return err; 2531 return err;
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 0ac50836da4d..8250949b8853 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,12 +15,11 @@
15#define TCP_SCALABLE_AI_CNT 50U 15#define TCP_SCALABLE_AI_CNT 50U
16#define TCP_SCALABLE_MD_SCALE 3 16#define TCP_SCALABLE_MD_SCALE 3
17 17
18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, 18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
19 u32 in_flight)
20{ 19{
21 struct tcp_sock *tp = tcp_sk(sk); 20 struct tcp_sock *tp = tcp_sk(sk);
22 21
23 if (!tcp_is_cwnd_limited(sk, in_flight)) 22 if (!tcp_is_cwnd_limited(sk))
24 return; 23 return;
25 24
26 if (tp->snd_cwnd <= tp->snd_ssthresh) 25 if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 48539fff6357..9a5e05f27f4f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
163 return min(tp->snd_ssthresh, tp->snd_cwnd-1); 163 return min(tp->snd_ssthresh, tp->snd_cwnd-1);
164} 164}
165 165
166static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, 166static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
167 u32 in_flight)
168{ 167{
169 struct tcp_sock *tp = tcp_sk(sk); 168 struct tcp_sock *tp = tcp_sk(sk);
170 struct vegas *vegas = inet_csk_ca(sk); 169 struct vegas *vegas = inet_csk_ca(sk);
171 170
172 if (!vegas->doing_vegas_now) { 171 if (!vegas->doing_vegas_now) {
173 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 172 tcp_reno_cong_avoid(sk, ack, acked);
174 return; 173 return;
175 } 174 }
176 175
@@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
195 /* We don't have enough RTT samples to do the Vegas 194 /* We don't have enough RTT samples to do the Vegas
196 * calculation, so we'll behave like Reno. 195 * calculation, so we'll behave like Reno.
197 */ 196 */
198 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 197 tcp_reno_cong_avoid(sk, ack, acked);
199 } else { 198 } else {
200 u32 rtt, diff; 199 u32 rtt, diff;
201 u64 target_cwnd; 200 u64 target_cwnd;
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 1b8e28fcd7e1..27b9825753d1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
114 tcp_veno_init(sk); 114 tcp_veno_init(sk);
115} 115}
116 116
117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, 117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
118 u32 in_flight)
119{ 118{
120 struct tcp_sock *tp = tcp_sk(sk); 119 struct tcp_sock *tp = tcp_sk(sk);
121 struct veno *veno = inet_csk_ca(sk); 120 struct veno *veno = inet_csk_ca(sk);
122 121
123 if (!veno->doing_veno_now) { 122 if (!veno->doing_veno_now) {
124 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 123 tcp_reno_cong_avoid(sk, ack, acked);
125 return; 124 return;
126 } 125 }
127 126
128 /* limited by applications */ 127 /* limited by applications */
129 if (!tcp_is_cwnd_limited(sk, in_flight)) 128 if (!tcp_is_cwnd_limited(sk))
130 return; 129 return;
131 130
132 /* We do the Veno calculations only if we got enough rtt samples */ 131 /* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
134 /* We don't have enough rtt samples to do the Veno 133 /* We don't have enough rtt samples to do the Veno
135 * calculation, so we'll behave like Reno. 134 * calculation, so we'll behave like Reno.
136 */ 135 */
137 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 136 tcp_reno_cong_avoid(sk, ack, acked);
138 } else { 137 } else {
139 u64 target_cwnd; 138 u64 target_cwnd;
140 u32 rtt; 139 u32 rtt;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 5ede0e727945..599b79b8eac0 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); 69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
70} 70}
71 71
72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, 72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
73 u32 in_flight)
74{ 73{
75 struct tcp_sock *tp = tcp_sk(sk); 74 struct tcp_sock *tp = tcp_sk(sk);
76 struct yeah *yeah = inet_csk_ca(sk); 75 struct yeah *yeah = inet_csk_ca(sk);
77 76
78 if (!tcp_is_cwnd_limited(sk, in_flight)) 77 if (!tcp_is_cwnd_limited(sk))
79 return; 78 return;
80 79
81 if (tp->snd_cwnd <= tp->snd_ssthresh) 80 if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4468e1adc094..54ea0a3a48f1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1495,6 +1495,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1495 if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { 1495 if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
1496 int ret; 1496 int ret;
1497 1497
1498 /* Verify checksum before giving to encap */
1499 if (udp_lib_checksum_complete(skb))
1500 goto csum_error;
1501
1498 ret = encap_rcv(sk, skb); 1502 ret = encap_rcv(sk, skb);
1499 if (ret <= 0) { 1503 if (ret <= 0) {
1500 UDP_INC_STATS_BH(sock_net(sk), 1504 UDP_INC_STATS_BH(sock_net(sk),
@@ -1672,7 +1676,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1672static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, 1676static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1673 int proto) 1677 int proto)
1674{ 1678{
1675 const struct iphdr *iph;
1676 int err; 1679 int err;
1677 1680
1678 UDP_SKB_CB(skb)->partial_cov = 0; 1681 UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1684,22 +1687,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1684 return err; 1687 return err;
1685 } 1688 }
1686 1689
1687 iph = ip_hdr(skb); 1690 return skb_checksum_init_zero_check(skb, proto, uh->check,
1688 if (uh->check == 0) { 1691 inet_compute_pseudo);
1689 skb->ip_summed = CHECKSUM_UNNECESSARY;
1690 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1691 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1692 proto, skb->csum))
1693 skb->ip_summed = CHECKSUM_UNNECESSARY;
1694 }
1695 if (!skb_csum_unnecessary(skb))
1696 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1697 skb->len, proto, 0);
1698 /* Probably, we should checksum udp header (it should be in cache
1699 * in any case) and data in tiny packets (< rx copybreak).
1700 */
1701
1702 return 0;
1703} 1692}
1704 1693
1705/* 1694/*