diff options
Diffstat (limited to 'net/ipv4')
52 files changed, 715 insertions, 756 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6d6dd345bc4d..d5e6836cf772 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, | |||
| 254 | struct inet_sock *inet; | 254 | struct inet_sock *inet; |
| 255 | struct proto *answer_prot; | 255 | struct proto *answer_prot; |
| 256 | unsigned char answer_flags; | 256 | unsigned char answer_flags; |
| 257 | char answer_no_check; | ||
| 258 | int try_loading_module = 0; | 257 | int try_loading_module = 0; |
| 259 | int err; | 258 | int err; |
| 260 | 259 | ||
| @@ -312,7 +311,6 @@ lookup_protocol: | |||
| 312 | 311 | ||
| 313 | sock->ops = answer->ops; | 312 | sock->ops = answer->ops; |
| 314 | answer_prot = answer->prot; | 313 | answer_prot = answer->prot; |
| 315 | answer_no_check = answer->no_check; | ||
| 316 | answer_flags = answer->flags; | 314 | answer_flags = answer->flags; |
| 317 | rcu_read_unlock(); | 315 | rcu_read_unlock(); |
| 318 | 316 | ||
| @@ -324,7 +322,6 @@ lookup_protocol: | |||
| 324 | goto out; | 322 | goto out; |
| 325 | 323 | ||
| 326 | err = 0; | 324 | err = 0; |
| 327 | sk->sk_no_check = answer_no_check; | ||
| 328 | if (INET_PROTOSW_REUSE & answer_flags) | 325 | if (INET_PROTOSW_REUSE & answer_flags) |
| 329 | sk->sk_reuse = SK_CAN_REUSE; | 326 | sk->sk_reuse = SK_CAN_REUSE; |
| 330 | 327 | ||
| @@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] = | |||
| 1002 | .protocol = IPPROTO_TCP, | 999 | .protocol = IPPROTO_TCP, |
| 1003 | .prot = &tcp_prot, | 1000 | .prot = &tcp_prot, |
| 1004 | .ops = &inet_stream_ops, | 1001 | .ops = &inet_stream_ops, |
| 1005 | .no_check = 0, | ||
| 1006 | .flags = INET_PROTOSW_PERMANENT | | 1002 | .flags = INET_PROTOSW_PERMANENT | |
| 1007 | INET_PROTOSW_ICSK, | 1003 | INET_PROTOSW_ICSK, |
| 1008 | }, | 1004 | }, |
| @@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] = | |||
| 1012 | .protocol = IPPROTO_UDP, | 1008 | .protocol = IPPROTO_UDP, |
| 1013 | .prot = &udp_prot, | 1009 | .prot = &udp_prot, |
| 1014 | .ops = &inet_dgram_ops, | 1010 | .ops = &inet_dgram_ops, |
| 1015 | .no_check = UDP_CSUM_DEFAULT, | ||
| 1016 | .flags = INET_PROTOSW_PERMANENT, | 1011 | .flags = INET_PROTOSW_PERMANENT, |
| 1017 | }, | 1012 | }, |
| 1018 | 1013 | ||
| @@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] = | |||
| 1021 | .protocol = IPPROTO_ICMP, | 1016 | .protocol = IPPROTO_ICMP, |
| 1022 | .prot = &ping_prot, | 1017 | .prot = &ping_prot, |
| 1023 | .ops = &inet_dgram_ops, | 1018 | .ops = &inet_dgram_ops, |
| 1024 | .no_check = UDP_CSUM_DEFAULT, | ||
| 1025 | .flags = INET_PROTOSW_REUSE, | 1019 | .flags = INET_PROTOSW_REUSE, |
| 1026 | }, | 1020 | }, |
| 1027 | 1021 | ||
| @@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] = | |||
| 1030 | .protocol = IPPROTO_IP, /* wild card */ | 1024 | .protocol = IPPROTO_IP, /* wild card */ |
| 1031 | .prot = &raw_prot, | 1025 | .prot = &raw_prot, |
| 1032 | .ops = &inet_sockraw_ops, | 1026 | .ops = &inet_sockraw_ops, |
| 1033 | .no_check = UDP_CSUM_DEFAULT, | ||
| 1034 | .flags = INET_PROTOSW_REUSE, | 1027 | .flags = INET_PROTOSW_REUSE, |
| 1035 | } | 1028 | } |
| 1036 | }; | 1029 | }; |
| @@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
| 1261 | SKB_GSO_DODGY | | 1254 | SKB_GSO_DODGY | |
| 1262 | SKB_GSO_TCP_ECN | | 1255 | SKB_GSO_TCP_ECN | |
| 1263 | SKB_GSO_GRE | | 1256 | SKB_GSO_GRE | |
| 1257 | SKB_GSO_GRE_CSUM | | ||
| 1264 | SKB_GSO_IPIP | | 1258 | SKB_GSO_IPIP | |
| 1265 | SKB_GSO_SIT | | 1259 | SKB_GSO_SIT | |
| 1266 | SKB_GSO_TCPV6 | | 1260 | SKB_GSO_TCPV6 | |
| 1267 | SKB_GSO_UDP_TUNNEL | | 1261 | SKB_GSO_UDP_TUNNEL | |
| 1262 | SKB_GSO_UDP_TUNNEL_CSUM | | ||
| 1268 | SKB_GSO_MPLS | | 1263 | SKB_GSO_MPLS | |
| 1269 | 0))) | 1264 | 0))) |
| 1270 | goto out; | 1265 | goto out; |
| @@ -1476,22 +1471,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, | |||
| 1476 | } | 1471 | } |
| 1477 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | 1472 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); |
| 1478 | 1473 | ||
| 1479 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) | 1474 | unsigned long snmp_fold_field(void __percpu *mib, int offt) |
| 1480 | { | 1475 | { |
| 1481 | unsigned long res = 0; | 1476 | unsigned long res = 0; |
| 1482 | int i, j; | 1477 | int i; |
| 1483 | 1478 | ||
| 1484 | for_each_possible_cpu(i) { | 1479 | for_each_possible_cpu(i) |
| 1485 | for (j = 0; j < SNMP_ARRAY_SZ; j++) | 1480 | res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt); |
| 1486 | res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); | ||
| 1487 | } | ||
| 1488 | return res; | 1481 | return res; |
| 1489 | } | 1482 | } |
| 1490 | EXPORT_SYMBOL_GPL(snmp_fold_field); | 1483 | EXPORT_SYMBOL_GPL(snmp_fold_field); |
| 1491 | 1484 | ||
| 1492 | #if BITS_PER_LONG==32 | 1485 | #if BITS_PER_LONG==32 |
| 1493 | 1486 | ||
| 1494 | u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | 1487 | u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) |
| 1495 | { | 1488 | { |
| 1496 | u64 res = 0; | 1489 | u64 res = 0; |
| 1497 | int cpu; | 1490 | int cpu; |
| @@ -1502,7 +1495,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | |||
| 1502 | u64 v; | 1495 | u64 v; |
| 1503 | unsigned int start; | 1496 | unsigned int start; |
| 1504 | 1497 | ||
| 1505 | bhptr = per_cpu_ptr(mib[0], cpu); | 1498 | bhptr = per_cpu_ptr(mib, cpu); |
| 1506 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); | 1499 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); |
| 1507 | do { | 1500 | do { |
| 1508 | start = u64_stats_fetch_begin_irq(syncp); | 1501 | start = u64_stats_fetch_begin_irq(syncp); |
| @@ -1516,25 +1509,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | |||
| 1516 | EXPORT_SYMBOL_GPL(snmp_fold_field64); | 1509 | EXPORT_SYMBOL_GPL(snmp_fold_field64); |
| 1517 | #endif | 1510 | #endif |
| 1518 | 1511 | ||
| 1519 | int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) | ||
| 1520 | { | ||
| 1521 | BUG_ON(ptr == NULL); | ||
| 1522 | ptr[0] = __alloc_percpu(mibsize, align); | ||
| 1523 | if (!ptr[0]) | ||
| 1524 | return -ENOMEM; | ||
| 1525 | |||
| 1526 | #if SNMP_ARRAY_SZ == 2 | ||
| 1527 | ptr[1] = __alloc_percpu(mibsize, align); | ||
| 1528 | if (!ptr[1]) { | ||
| 1529 | free_percpu(ptr[0]); | ||
| 1530 | ptr[0] = NULL; | ||
| 1531 | return -ENOMEM; | ||
| 1532 | } | ||
| 1533 | #endif | ||
| 1534 | return 0; | ||
| 1535 | } | ||
| 1536 | EXPORT_SYMBOL_GPL(snmp_mib_init); | ||
| 1537 | |||
| 1538 | #ifdef CONFIG_IP_MULTICAST | 1512 | #ifdef CONFIG_IP_MULTICAST |
| 1539 | static const struct net_protocol igmp_protocol = { | 1513 | static const struct net_protocol igmp_protocol = { |
| 1540 | .handler = igmp_rcv, | 1514 | .handler = igmp_rcv, |
| @@ -1570,40 +1544,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) | |||
| 1570 | { | 1544 | { |
| 1571 | int i; | 1545 | int i; |
| 1572 | 1546 | ||
| 1573 | if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, | 1547 | net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); |
| 1574 | sizeof(struct tcp_mib), | 1548 | if (!net->mib.tcp_statistics) |
| 1575 | __alignof__(struct tcp_mib)) < 0) | ||
| 1576 | goto err_tcp_mib; | 1549 | goto err_tcp_mib; |
| 1577 | if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, | 1550 | net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); |
| 1578 | sizeof(struct ipstats_mib), | 1551 | if (!net->mib.ip_statistics) |
| 1579 | __alignof__(struct ipstats_mib)) < 0) | ||
| 1580 | goto err_ip_mib; | 1552 | goto err_ip_mib; |
| 1581 | 1553 | ||
| 1582 | for_each_possible_cpu(i) { | 1554 | for_each_possible_cpu(i) { |
| 1583 | struct ipstats_mib *af_inet_stats; | 1555 | struct ipstats_mib *af_inet_stats; |
| 1584 | af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); | 1556 | af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); |
| 1585 | u64_stats_init(&af_inet_stats->syncp); | 1557 | u64_stats_init(&af_inet_stats->syncp); |
| 1586 | #if SNMP_ARRAY_SZ == 2 | ||
| 1587 | af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); | ||
| 1588 | u64_stats_init(&af_inet_stats->syncp); | ||
| 1589 | #endif | ||
| 1590 | } | 1558 | } |
| 1591 | 1559 | ||
| 1592 | if (snmp_mib_init((void __percpu **)net->mib.net_statistics, | 1560 | net->mib.net_statistics = alloc_percpu(struct linux_mib); |
| 1593 | sizeof(struct linux_mib), | 1561 | if (!net->mib.net_statistics) |
| 1594 | __alignof__(struct linux_mib)) < 0) | ||
| 1595 | goto err_net_mib; | 1562 | goto err_net_mib; |
| 1596 | if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, | 1563 | net->mib.udp_statistics = alloc_percpu(struct udp_mib); |
| 1597 | sizeof(struct udp_mib), | 1564 | if (!net->mib.udp_statistics) |
| 1598 | __alignof__(struct udp_mib)) < 0) | ||
| 1599 | goto err_udp_mib; | 1565 | goto err_udp_mib; |
| 1600 | if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, | 1566 | net->mib.udplite_statistics = alloc_percpu(struct udp_mib); |
| 1601 | sizeof(struct udp_mib), | 1567 | if (!net->mib.udplite_statistics) |
| 1602 | __alignof__(struct udp_mib)) < 0) | ||
| 1603 | goto err_udplite_mib; | 1568 | goto err_udplite_mib; |
| 1604 | if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, | 1569 | net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); |
| 1605 | sizeof(struct icmp_mib), | 1570 | if (!net->mib.icmp_statistics) |
| 1606 | __alignof__(struct icmp_mib)) < 0) | ||
| 1607 | goto err_icmp_mib; | 1571 | goto err_icmp_mib; |
| 1608 | net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), | 1572 | net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), |
| 1609 | GFP_KERNEL); | 1573 | GFP_KERNEL); |
| @@ -1614,17 +1578,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) | |||
| 1614 | return 0; | 1578 | return 0; |
| 1615 | 1579 | ||
| 1616 | err_icmpmsg_mib: | 1580 | err_icmpmsg_mib: |
| 1617 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); | 1581 | free_percpu(net->mib.icmp_statistics); |
| 1618 | err_icmp_mib: | 1582 | err_icmp_mib: |
| 1619 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); | 1583 | free_percpu(net->mib.udplite_statistics); |
| 1620 | err_udplite_mib: | 1584 | err_udplite_mib: |
| 1621 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); | 1585 | free_percpu(net->mib.udp_statistics); |
| 1622 | err_udp_mib: | 1586 | err_udp_mib: |
| 1623 | snmp_mib_free((void __percpu **)net->mib.net_statistics); | 1587 | free_percpu(net->mib.net_statistics); |
| 1624 | err_net_mib: | 1588 | err_net_mib: |
| 1625 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); | 1589 | free_percpu(net->mib.ip_statistics); |
| 1626 | err_ip_mib: | 1590 | err_ip_mib: |
| 1627 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); | 1591 | free_percpu(net->mib.tcp_statistics); |
| 1628 | err_tcp_mib: | 1592 | err_tcp_mib: |
| 1629 | return -ENOMEM; | 1593 | return -ENOMEM; |
| 1630 | } | 1594 | } |
| @@ -1632,12 +1596,12 @@ err_tcp_mib: | |||
| 1632 | static __net_exit void ipv4_mib_exit_net(struct net *net) | 1596 | static __net_exit void ipv4_mib_exit_net(struct net *net) |
| 1633 | { | 1597 | { |
| 1634 | kfree(net->mib.icmpmsg_statistics); | 1598 | kfree(net->mib.icmpmsg_statistics); |
| 1635 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); | 1599 | free_percpu(net->mib.icmp_statistics); |
| 1636 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); | 1600 | free_percpu(net->mib.udplite_statistics); |
| 1637 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); | 1601 | free_percpu(net->mib.udp_statistics); |
| 1638 | snmp_mib_free((void __percpu **)net->mib.net_statistics); | 1602 | free_percpu(net->mib.net_statistics); |
| 1639 | snmp_mib_free((void __percpu **)net->mib.ip_statistics); | 1603 | free_percpu(net->mib.ip_statistics); |
| 1640 | snmp_mib_free((void __percpu **)net->mib.tcp_statistics); | 1604 | free_percpu(net->mib.tcp_statistics); |
| 1641 | } | 1605 | } |
| 1642 | 1606 | ||
| 1643 | static __net_initdata struct pernet_operations ipv4_mib_ops = { | 1607 | static __net_initdata struct pernet_operations ipv4_mib_ops = { |
| @@ -1736,13 +1700,9 @@ static int __init inet_init(void) | |||
| 1736 | 1700 | ||
| 1737 | BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); | 1701 | BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); |
| 1738 | 1702 | ||
| 1739 | sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); | ||
| 1740 | if (!sysctl_local_reserved_ports) | ||
| 1741 | goto out; | ||
| 1742 | |||
| 1743 | rc = proto_register(&tcp_prot, 1); | 1703 | rc = proto_register(&tcp_prot, 1); |
| 1744 | if (rc) | 1704 | if (rc) |
| 1745 | goto out_free_reserved_ports; | 1705 | goto out; |
| 1746 | 1706 | ||
| 1747 | rc = proto_register(&udp_prot, 1); | 1707 | rc = proto_register(&udp_prot, 1); |
| 1748 | if (rc) | 1708 | if (rc) |
| @@ -1852,8 +1812,6 @@ out_unregister_udp_proto: | |||
| 1852 | proto_unregister(&udp_prot); | 1812 | proto_unregister(&udp_prot); |
| 1853 | out_unregister_tcp_proto: | 1813 | out_unregister_tcp_proto: |
| 1854 | proto_unregister(&tcp_prot); | 1814 | proto_unregister(&tcp_prot); |
| 1855 | out_free_reserved_ports: | ||
| 1856 | kfree(sysctl_local_reserved_ports); | ||
| 1857 | goto out; | 1815 | goto out; |
| 1858 | } | 1816 | } |
| 1859 | 1817 | ||
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 8b5134c582f1..a3095fdefbed 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
| @@ -86,18 +86,26 @@ out: | |||
| 86 | } | 86 | } |
| 87 | EXPORT_SYMBOL(ip4_datagram_connect); | 87 | EXPORT_SYMBOL(ip4_datagram_connect); |
| 88 | 88 | ||
| 89 | /* Because UDP xmit path can manipulate sk_dst_cache without holding | ||
| 90 | * socket lock, we need to use sk_dst_set() here, | ||
| 91 | * even if we own the socket lock. | ||
| 92 | */ | ||
| 89 | void ip4_datagram_release_cb(struct sock *sk) | 93 | void ip4_datagram_release_cb(struct sock *sk) |
| 90 | { | 94 | { |
| 91 | const struct inet_sock *inet = inet_sk(sk); | 95 | const struct inet_sock *inet = inet_sk(sk); |
| 92 | const struct ip_options_rcu *inet_opt; | 96 | const struct ip_options_rcu *inet_opt; |
| 93 | __be32 daddr = inet->inet_daddr; | 97 | __be32 daddr = inet->inet_daddr; |
| 98 | struct dst_entry *dst; | ||
| 94 | struct flowi4 fl4; | 99 | struct flowi4 fl4; |
| 95 | struct rtable *rt; | 100 | struct rtable *rt; |
| 96 | 101 | ||
| 97 | if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) | ||
| 98 | return; | ||
| 99 | |||
| 100 | rcu_read_lock(); | 102 | rcu_read_lock(); |
| 103 | |||
| 104 | dst = __sk_dst_get(sk); | ||
| 105 | if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) { | ||
| 106 | rcu_read_unlock(); | ||
| 107 | return; | ||
| 108 | } | ||
| 101 | inet_opt = rcu_dereference(inet->inet_opt); | 109 | inet_opt = rcu_dereference(inet->inet_opt); |
| 102 | if (inet_opt && inet_opt->opt.srr) | 110 | if (inet_opt && inet_opt->opt.srr) |
| 103 | daddr = inet_opt->opt.faddr; | 111 | daddr = inet_opt->opt.faddr; |
| @@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk) | |||
| 105 | inet->inet_saddr, inet->inet_dport, | 113 | inet->inet_saddr, inet->inet_dport, |
| 106 | inet->inet_sport, sk->sk_protocol, | 114 | inet->inet_sport, sk->sk_protocol, |
| 107 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); | 115 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); |
| 108 | if (!IS_ERR(rt)) | 116 | |
| 109 | __sk_dst_set(sk, &rt->dst); | 117 | dst = !IS_ERR(rt) ? &rt->dst : NULL; |
| 118 | sk_dst_set(sk, dst); | ||
| 119 | |||
| 110 | rcu_read_unlock(); | 120 | rcu_read_unlock(); |
| 111 | } | 121 | } |
| 112 | EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); | 122 | EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bdbf68bb2e2d..e9449376b58e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
| @@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { | |||
| 106 | #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) | 106 | #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) |
| 107 | 107 | ||
| 108 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; | 108 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; |
| 109 | static DEFINE_SPINLOCK(inet_addr_hash_lock); | ||
| 110 | 109 | ||
| 111 | static u32 inet_addr_hash(struct net *net, __be32 addr) | 110 | static u32 inet_addr_hash(struct net *net, __be32 addr) |
| 112 | { | 111 | { |
| @@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) | |||
| 119 | { | 118 | { |
| 120 | u32 hash = inet_addr_hash(net, ifa->ifa_local); | 119 | u32 hash = inet_addr_hash(net, ifa->ifa_local); |
| 121 | 120 | ||
| 122 | spin_lock(&inet_addr_hash_lock); | 121 | ASSERT_RTNL(); |
| 123 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); | 122 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); |
| 124 | spin_unlock(&inet_addr_hash_lock); | ||
| 125 | } | 123 | } |
| 126 | 124 | ||
| 127 | static void inet_hash_remove(struct in_ifaddr *ifa) | 125 | static void inet_hash_remove(struct in_ifaddr *ifa) |
| 128 | { | 126 | { |
| 129 | spin_lock(&inet_addr_hash_lock); | 127 | ASSERT_RTNL(); |
| 130 | hlist_del_init_rcu(&ifa->hash); | 128 | hlist_del_init_rcu(&ifa->hash); |
| 131 | spin_unlock(&inet_addr_hash_lock); | ||
| 132 | } | 129 | } |
| 133 | 130 | ||
| 134 | /** | 131 | /** |
| @@ -830,7 +827,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 830 | ifa_existing = find_matching_ifa(ifa); | 827 | ifa_existing = find_matching_ifa(ifa); |
| 831 | if (!ifa_existing) { | 828 | if (!ifa_existing) { |
| 832 | /* It would be best to check for !NLM_F_CREATE here but | 829 | /* It would be best to check for !NLM_F_CREATE here but |
| 833 | * userspace alreay relies on not having to provide this. | 830 | * userspace already relies on not having to provide this. |
| 834 | */ | 831 | */ |
| 835 | set_ifa_lifetime(ifa, valid_lft, prefered_lft); | 832 | set_ifa_lifetime(ifa, valid_lft, prefered_lft); |
| 836 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); | 833 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); |
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 250be7421ab3..4e9619bca732 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c | |||
| @@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, | |||
| 84 | ptr--; | 84 | ptr--; |
| 85 | } | 85 | } |
| 86 | if (tpi->flags&TUNNEL_CSUM && | 86 | if (tpi->flags&TUNNEL_CSUM && |
| 87 | !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { | 87 | !(skb_shinfo(skb)->gso_type & |
| 88 | (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { | ||
| 88 | *ptr = 0; | 89 | *ptr = 0; |
| 89 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, | 90 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, |
| 90 | skb->len, 0)); | 91 | skb->len, 0)); |
| @@ -93,28 +94,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, | |||
| 93 | } | 94 | } |
| 94 | EXPORT_SYMBOL_GPL(gre_build_header); | 95 | EXPORT_SYMBOL_GPL(gre_build_header); |
| 95 | 96 | ||
| 96 | static __sum16 check_checksum(struct sk_buff *skb) | ||
| 97 | { | ||
| 98 | __sum16 csum = 0; | ||
| 99 | |||
| 100 | switch (skb->ip_summed) { | ||
| 101 | case CHECKSUM_COMPLETE: | ||
| 102 | csum = csum_fold(skb->csum); | ||
| 103 | |||
| 104 | if (!csum) | ||
| 105 | break; | ||
| 106 | /* Fall through. */ | ||
| 107 | |||
| 108 | case CHECKSUM_NONE: | ||
| 109 | skb->csum = 0; | ||
| 110 | csum = __skb_checksum_complete(skb); | ||
| 111 | skb->ip_summed = CHECKSUM_COMPLETE; | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | |||
| 115 | return csum; | ||
| 116 | } | ||
| 117 | |||
| 118 | static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, | 97 | static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, |
| 119 | bool *csum_err) | 98 | bool *csum_err) |
| 120 | { | 99 | { |
| @@ -141,7 +120,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, | |||
| 141 | 120 | ||
| 142 | options = (__be32 *)(greh + 1); | 121 | options = (__be32 *)(greh + 1); |
| 143 | if (greh->flags & GRE_CSUM) { | 122 | if (greh->flags & GRE_CSUM) { |
| 144 | if (check_checksum(skb)) { | 123 | if (skb_checksum_simple_validate(skb)) { |
| 145 | *csum_err = true; | 124 | *csum_err = true; |
| 146 | return -EINVAL; | 125 | return -EINVAL; |
| 147 | } | 126 | } |
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f1d32280cb54..eb92deb12666 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c | |||
| @@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
| 42 | SKB_GSO_DODGY | | 42 | SKB_GSO_DODGY | |
| 43 | SKB_GSO_TCP_ECN | | 43 | SKB_GSO_TCP_ECN | |
| 44 | SKB_GSO_GRE | | 44 | SKB_GSO_GRE | |
| 45 | SKB_GSO_GRE_CSUM | | ||
| 45 | SKB_GSO_IPIP))) | 46 | SKB_GSO_IPIP))) |
| 46 | goto out; | 47 | goto out; |
| 47 | 48 | ||
| @@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
| 55 | goto out; | 56 | goto out; |
| 56 | 57 | ||
| 57 | csum = !!(greh->flags & GRE_CSUM); | 58 | csum = !!(greh->flags & GRE_CSUM); |
| 59 | if (csum) | ||
| 60 | skb->encap_hdr_csum = 1; | ||
| 58 | 61 | ||
| 59 | if (unlikely(!pskb_may_pull(skb, ghl))) | 62 | if (unlikely(!pskb_may_pull(skb, ghl))) |
| 60 | goto out; | 63 | goto out; |
| @@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
| 94 | } | 97 | } |
| 95 | } | 98 | } |
| 96 | 99 | ||
| 97 | greh = (struct gre_base_hdr *)(skb->data); | 100 | skb_reset_transport_header(skb); |
| 101 | |||
| 102 | greh = (struct gre_base_hdr *) | ||
| 103 | skb_transport_header(skb); | ||
| 98 | pcsum = (__be32 *)(greh + 1); | 104 | pcsum = (__be32 *)(greh + 1); |
| 99 | *pcsum = 0; | 105 | *pcsum = 0; |
| 100 | *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); | 106 | *(__sum16 *)pcsum = gso_make_checksum(skb, 0); |
| 101 | } | 107 | } |
| 102 | __skb_push(skb, tnl_hlen - ghl); | 108 | __skb_push(skb, tnl_hlen - ghl); |
| 103 | 109 | ||
| @@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb) | |||
| 125 | csum_partial(skb->data, skb_gro_offset(skb), 0)); | 131 | csum_partial(skb->data, skb_gro_offset(skb), 0)); |
| 126 | sum = csum_fold(NAPI_GRO_CB(skb)->csum); | 132 | sum = csum_fold(NAPI_GRO_CB(skb)->csum); |
| 127 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { | 133 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { |
| 128 | if (unlikely(!sum)) | 134 | if (unlikely(!sum) && !skb->csum_complete_sw) |
| 129 | netdev_rx_csum_fault(skb->dev); | 135 | netdev_rx_csum_fault(skb->dev); |
| 130 | } else | 136 | } else { |
| 131 | skb->ip_summed = CHECKSUM_COMPLETE; | 137 | skb->ip_summed = CHECKSUM_COMPLETE; |
| 138 | skb->csum_complete_sw = 1; | ||
| 139 | } | ||
| 132 | 140 | ||
| 133 | return sum; | 141 | return sum; |
| 134 | } | 142 | } |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0134663fdbce..79c3d947a481 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
| @@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 337 | struct sock *sk; | 337 | struct sock *sk; |
| 338 | struct inet_sock *inet; | 338 | struct inet_sock *inet; |
| 339 | __be32 daddr, saddr; | 339 | __be32 daddr, saddr; |
| 340 | u32 mark = IP4_REPLY_MARK(net, skb->mark); | ||
| 340 | 341 | ||
| 341 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) | 342 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) |
| 342 | return; | 343 | return; |
| @@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 349 | icmp_param->data.icmph.checksum = 0; | 350 | icmp_param->data.icmph.checksum = 0; |
| 350 | 351 | ||
| 351 | inet->tos = ip_hdr(skb)->tos; | 352 | inet->tos = ip_hdr(skb)->tos; |
| 353 | sk->sk_mark = mark; | ||
| 352 | daddr = ipc.addr = ip_hdr(skb)->saddr; | 354 | daddr = ipc.addr = ip_hdr(skb)->saddr; |
| 353 | saddr = fib_compute_spec_dst(skb); | 355 | saddr = fib_compute_spec_dst(skb); |
| 354 | ipc.opt = NULL; | 356 | ipc.opt = NULL; |
| @@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 364 | memset(&fl4, 0, sizeof(fl4)); | 366 | memset(&fl4, 0, sizeof(fl4)); |
| 365 | fl4.daddr = daddr; | 367 | fl4.daddr = daddr; |
| 366 | fl4.saddr = saddr; | 368 | fl4.saddr = saddr; |
| 369 | fl4.flowi4_mark = mark; | ||
| 367 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | 370 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); |
| 368 | fl4.flowi4_proto = IPPROTO_ICMP; | 371 | fl4.flowi4_proto = IPPROTO_ICMP; |
| 369 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 372 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
| @@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
| 382 | struct flowi4 *fl4, | 385 | struct flowi4 *fl4, |
| 383 | struct sk_buff *skb_in, | 386 | struct sk_buff *skb_in, |
| 384 | const struct iphdr *iph, | 387 | const struct iphdr *iph, |
| 385 | __be32 saddr, u8 tos, | 388 | __be32 saddr, u8 tos, u32 mark, |
| 386 | int type, int code, | 389 | int type, int code, |
| 387 | struct icmp_bxm *param) | 390 | struct icmp_bxm *param) |
| 388 | { | 391 | { |
| @@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
| 394 | fl4->daddr = (param->replyopts.opt.opt.srr ? | 397 | fl4->daddr = (param->replyopts.opt.opt.srr ? |
| 395 | param->replyopts.opt.opt.faddr : iph->saddr); | 398 | param->replyopts.opt.opt.faddr : iph->saddr); |
| 396 | fl4->saddr = saddr; | 399 | fl4->saddr = saddr; |
| 400 | fl4->flowi4_mark = mark; | ||
| 397 | fl4->flowi4_tos = RT_TOS(tos); | 401 | fl4->flowi4_tos = RT_TOS(tos); |
| 398 | fl4->flowi4_proto = IPPROTO_ICMP; | 402 | fl4->flowi4_proto = IPPROTO_ICMP; |
| 399 | fl4->fl4_icmp_type = type; | 403 | fl4->fl4_icmp_type = type; |
| @@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 491 | struct flowi4 fl4; | 495 | struct flowi4 fl4; |
| 492 | __be32 saddr; | 496 | __be32 saddr; |
| 493 | u8 tos; | 497 | u8 tos; |
| 498 | u32 mark; | ||
| 494 | struct net *net; | 499 | struct net *net; |
| 495 | struct sock *sk; | 500 | struct sock *sk; |
| 496 | 501 | ||
| @@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 592 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | | 597 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | |
| 593 | IPTOS_PREC_INTERNETCONTROL) : | 598 | IPTOS_PREC_INTERNETCONTROL) : |
| 594 | iph->tos; | 599 | iph->tos; |
| 600 | mark = IP4_REPLY_MARK(net, skb_in->mark); | ||
| 595 | 601 | ||
| 596 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) | 602 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) |
| 597 | goto out_unlock; | 603 | goto out_unlock; |
| @@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 608 | icmp_param->skb = skb_in; | 614 | icmp_param->skb = skb_in; |
| 609 | icmp_param->offset = skb_network_offset(skb_in); | 615 | icmp_param->offset = skb_network_offset(skb_in); |
| 610 | inet_sk(sk)->tos = tos; | 616 | inet_sk(sk)->tos = tos; |
| 617 | sk->sk_mark = mark; | ||
| 611 | ipc.addr = iph->saddr; | 618 | ipc.addr = iph->saddr; |
| 612 | ipc.opt = &icmp_param->replyopts.opt; | 619 | ipc.opt = &icmp_param->replyopts.opt; |
| 613 | ipc.tx_flags = 0; | 620 | ipc.tx_flags = 0; |
| 614 | ipc.ttl = 0; | 621 | ipc.ttl = 0; |
| 615 | ipc.tos = -1; | 622 | ipc.tos = -1; |
| 616 | 623 | ||
| 617 | rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, | 624 | rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, |
| 618 | type, code, icmp_param); | 625 | type, code, icmp_param); |
| 619 | if (IS_ERR(rt)) | 626 | if (IS_ERR(rt)) |
| 620 | goto out_unlock; | 627 | goto out_unlock; |
| @@ -908,16 +915,8 @@ int icmp_rcv(struct sk_buff *skb) | |||
| 908 | 915 | ||
| 909 | ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); | 916 | ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); |
| 910 | 917 | ||
| 911 | switch (skb->ip_summed) { | 918 | if (skb_checksum_simple_validate(skb)) |
| 912 | case CHECKSUM_COMPLETE: | 919 | goto csum_error; |
| 913 | if (!csum_fold(skb->csum)) | ||
| 914 | break; | ||
| 915 | /* fall through */ | ||
| 916 | case CHECKSUM_NONE: | ||
| 917 | skb->csum = 0; | ||
| 918 | if (__skb_checksum_complete(skb)) | ||
| 919 | goto csum_error; | ||
| 920 | } | ||
| 921 | 920 | ||
| 922 | if (!pskb_pull(skb, sizeof(*icmph))) | 921 | if (!pskb_pull(skb, sizeof(*icmph))) |
| 923 | goto error; | 922 | goto error; |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 97e4d1655d26..6748d420f714 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
| @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 369 | pip->saddr = fl4.saddr; | 369 | pip->saddr = fl4.saddr; |
| 370 | pip->protocol = IPPROTO_IGMP; | 370 | pip->protocol = IPPROTO_IGMP; |
| 371 | pip->tot_len = 0; /* filled in later */ | 371 | pip->tot_len = 0; /* filled in later */ |
| 372 | ip_select_ident(skb, &rt->dst, NULL); | 372 | ip_select_ident(skb, NULL); |
| 373 | ((u8 *)&pip[1])[0] = IPOPT_RA; | 373 | ((u8 *)&pip[1])[0] = IPOPT_RA; |
| 374 | ((u8 *)&pip[1])[1] = 4; | 374 | ((u8 *)&pip[1])[1] = 4; |
| 375 | ((u8 *)&pip[1])[2] = 0; | 375 | ((u8 *)&pip[1])[2] = 0; |
| @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 714 | iph->daddr = dst; | 714 | iph->daddr = dst; |
| 715 | iph->saddr = fl4.saddr; | 715 | iph->saddr = fl4.saddr; |
| 716 | iph->protocol = IPPROTO_IGMP; | 716 | iph->protocol = IPPROTO_IGMP; |
| 717 | ip_select_ident(skb, &rt->dst, NULL); | 717 | ip_select_ident(skb, NULL); |
| 718 | ((u8 *)&iph[1])[0] = IPOPT_RA; | 718 | ((u8 *)&iph[1])[0] = IPOPT_RA; |
| 719 | ((u8 *)&iph[1])[1] = 4; | 719 | ((u8 *)&iph[1])[1] = 4; |
| 720 | ((u8 *)&iph[1])[2] = 0; | 720 | ((u8 *)&iph[1])[2] = 0; |
| @@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb) | |||
| 988 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) | 988 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) |
| 989 | goto drop; | 989 | goto drop; |
| 990 | 990 | ||
| 991 | switch (skb->ip_summed) { | 991 | if (skb_checksum_simple_validate(skb)) |
| 992 | case CHECKSUM_COMPLETE: | 992 | goto drop; |
| 993 | if (!csum_fold(skb->csum)) | ||
| 994 | break; | ||
| 995 | /* fall through */ | ||
| 996 | case CHECKSUM_NONE: | ||
| 997 | skb->csum = 0; | ||
| 998 | if (__skb_checksum_complete(skb)) | ||
| 999 | goto drop; | ||
| 1000 | } | ||
| 1001 | 993 | ||
| 1002 | ih = igmp_hdr(skb); | 994 | ih = igmp_hdr(skb); |
| 1003 | switch (ih->type) { | 995 | switch (ih->type) { |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a56b8e6e866a..14d02ea905b6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -29,9 +29,6 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | |||
| 29 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); | 29 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); |
| 30 | #endif | 30 | #endif |
| 31 | 31 | ||
| 32 | unsigned long *sysctl_local_reserved_ports; | ||
| 33 | EXPORT_SYMBOL(sysctl_local_reserved_ports); | ||
| 34 | |||
| 35 | void inet_get_local_port_range(struct net *net, int *low, int *high) | 32 | void inet_get_local_port_range(struct net *net, int *low, int *high) |
| 36 | { | 33 | { |
| 37 | unsigned int seq; | 34 | unsigned int seq; |
| @@ -113,7 +110,7 @@ again: | |||
| 113 | 110 | ||
| 114 | smallest_size = -1; | 111 | smallest_size = -1; |
| 115 | do { | 112 | do { |
| 116 | if (inet_is_reserved_local_port(rover)) | 113 | if (inet_is_local_reserved_port(net, rover)) |
| 117 | goto next_nolock; | 114 | goto next_nolock; |
| 118 | head = &hashinfo->bhash[inet_bhashfn(net, rover, | 115 | head = &hashinfo->bhash[inet_bhashfn(net, rover, |
| 119 | hashinfo->bhash_size)]; | 116 | hashinfo->bhash_size)]; |
| @@ -408,7 +405,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
| 408 | struct net *net = sock_net(sk); | 405 | struct net *net = sock_net(sk); |
| 409 | int flags = inet_sk_flowi_flags(sk); | 406 | int flags = inet_sk_flowi_flags(sk); |
| 410 | 407 | ||
| 411 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | 408 | flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, |
| 412 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 409 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
| 413 | sk->sk_protocol, | 410 | sk->sk_protocol, |
| 414 | flags, | 411 | flags, |
| @@ -445,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | |||
| 445 | 442 | ||
| 446 | rcu_read_lock(); | 443 | rcu_read_lock(); |
| 447 | opt = rcu_dereference(newinet->inet_opt); | 444 | opt = rcu_dereference(newinet->inet_opt); |
| 448 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | 445 | flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, |
| 449 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 446 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
| 450 | sk->sk_protocol, inet_sk_flowi_flags(sk), | 447 | sk->sk_protocol, inet_sk_flowi_flags(sk), |
| 451 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, | 448 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, |
| @@ -680,6 +677,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, | |||
| 680 | inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); | 677 | inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); |
| 681 | newsk->sk_write_space = sk_stream_write_space; | 678 | newsk->sk_write_space = sk_stream_write_space; |
| 682 | 679 | ||
| 680 | newsk->sk_mark = inet_rsk(req)->ir_mark; | ||
| 681 | |||
| 683 | newicsk->icsk_retransmits = 0; | 682 | newicsk->icsk_retransmits = 0; |
| 684 | newicsk->icsk_backoff = 0; | 683 | newicsk->icsk_backoff = 0; |
| 685 | newicsk->icsk_probes_out = 0; | 684 | newicsk->icsk_probes_out = 0; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8b9cf279450d..43116e8c8e13 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
| @@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net, | |||
| 274 | const __be32 daddr, const u16 hnum, | 274 | const __be32 daddr, const u16 hnum, |
| 275 | const int dif) | 275 | const int dif) |
| 276 | { | 276 | { |
| 277 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 277 | INET_ADDR_COOKIE(acookie, saddr, daddr); |
| 278 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); | 278 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
| 279 | struct sock *sk; | 279 | struct sock *sk; |
| 280 | const struct hlist_nulls_node *node; | 280 | const struct hlist_nulls_node *node; |
| @@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
| 327 | __be32 daddr = inet->inet_rcv_saddr; | 327 | __be32 daddr = inet->inet_rcv_saddr; |
| 328 | __be32 saddr = inet->inet_daddr; | 328 | __be32 saddr = inet->inet_daddr; |
| 329 | int dif = sk->sk_bound_dev_if; | 329 | int dif = sk->sk_bound_dev_if; |
| 330 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 330 | INET_ADDR_COOKIE(acookie, saddr, daddr); |
| 331 | const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); | 331 | const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); |
| 332 | struct net *net = sock_net(sk); | 332 | struct net *net = sock_net(sk); |
| 333 | unsigned int hash = inet_ehashfn(net, daddr, lport, | 333 | unsigned int hash = inet_ehashfn(net, daddr, lport, |
| @@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
| 500 | local_bh_disable(); | 500 | local_bh_disable(); |
| 501 | for (i = 1; i <= remaining; i++) { | 501 | for (i = 1; i <= remaining; i++) { |
| 502 | port = low + (i + offset) % remaining; | 502 | port = low + (i + offset) % remaining; |
| 503 | if (inet_is_reserved_local_port(port)) | 503 | if (inet_is_local_reserved_port(net, port)) |
| 504 | continue; | 504 | continue; |
| 505 | head = &hinfo->bhash[inet_bhashfn(net, port, | 505 | head = &hinfo->bhash[inet_bhashfn(net, port, |
| 506 | hinfo->bhash_size)]; | 506 | hinfo->bhash_size)]; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 56cd458a1b8c..bd5f5928167d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
| @@ -26,20 +26,7 @@ | |||
| 26 | * Theory of operations. | 26 | * Theory of operations. |
| 27 | * We keep one entry for each peer IP address. The nodes contains long-living | 27 | * We keep one entry for each peer IP address. The nodes contains long-living |
| 28 | * information about the peer which doesn't depend on routes. | 28 | * information about the peer which doesn't depend on routes. |
| 29 | * At this moment this information consists only of ID field for the next | ||
| 30 | * outgoing IP packet. This field is incremented with each packet as encoded | ||
| 31 | * in inet_getid() function (include/net/inetpeer.h). | ||
| 32 | * At the moment of writing this notes identifier of IP packets is generated | ||
| 33 | * to be unpredictable using this code only for packets subjected | ||
| 34 | * (actually or potentially) to defragmentation. I.e. DF packets less than | ||
| 35 | * PMTU in size when local fragmentation is disabled use a constant ID and do | ||
| 36 | * not use this code (see ip_select_ident() in include/net/ip.h). | ||
| 37 | * | 29 | * |
| 38 | * Route cache entries hold references to our nodes. | ||
| 39 | * New cache entries get references via lookup by destination IP address in | ||
| 40 | * the avl tree. The reference is grabbed only when it's needed i.e. only | ||
| 41 | * when we try to output IP packet which needs an unpredictable ID (see | ||
| 42 | * __ip_select_ident() in net/ipv4/route.c). | ||
| 43 | * Nodes are removed only when reference counter goes to 0. | 30 | * Nodes are removed only when reference counter goes to 0. |
| 44 | * When it's happened the node may be removed when a sufficient amount of | 31 | * When it's happened the node may be removed when a sufficient amount of |
| 45 | * time has been passed since its last use. The less-recently-used entry can | 32 | * time has been passed since its last use. The less-recently-used entry can |
| @@ -62,7 +49,6 @@ | |||
| 62 | * refcnt: atomically against modifications on other CPU; | 49 | * refcnt: atomically against modifications on other CPU; |
| 63 | * usually under some other lock to prevent node disappearing | 50 | * usually under some other lock to prevent node disappearing |
| 64 | * daddr: unchangeable | 51 | * daddr: unchangeable |
| 65 | * ip_id_count: atomic value (no lock needed) | ||
| 66 | */ | 52 | */ |
| 67 | 53 | ||
| 68 | static struct kmem_cache *peer_cachep __read_mostly; | 54 | static struct kmem_cache *peer_cachep __read_mostly; |
| @@ -120,7 +106,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min | |||
| 120 | static void inetpeer_gc_worker(struct work_struct *work) | 106 | static void inetpeer_gc_worker(struct work_struct *work) |
| 121 | { | 107 | { |
| 122 | struct inet_peer *p, *n, *c; | 108 | struct inet_peer *p, *n, *c; |
| 123 | LIST_HEAD(list); | 109 | struct list_head list; |
| 124 | 110 | ||
| 125 | spin_lock_bh(&gc_lock); | 111 | spin_lock_bh(&gc_lock); |
| 126 | list_replace_init(&gc_list, &list); | 112 | list_replace_init(&gc_list, &list); |
| @@ -497,10 +483,6 @@ relookup: | |||
| 497 | p->daddr = *daddr; | 483 | p->daddr = *daddr; |
| 498 | atomic_set(&p->refcnt, 1); | 484 | atomic_set(&p->refcnt, 1); |
| 499 | atomic_set(&p->rid, 0); | 485 | atomic_set(&p->rid, 0); |
| 500 | atomic_set(&p->ip_id_count, | ||
| 501 | (daddr->family == AF_INET) ? | ||
| 502 | secure_ip_id(daddr->addr.a4) : | ||
| 503 | secure_ipv6_id(daddr->addr.a6)); | ||
| 504 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 486 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
| 505 | p->rate_tokens = 0; | 487 | p->rate_tokens = 0; |
| 506 | /* 60*HZ is arbitrary, but chosen enough high so that the first | 488 | /* 60*HZ is arbitrary, but chosen enough high so that the first |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 6f111e48e11c..3a83ce5efa80 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
| @@ -42,7 +42,7 @@ | |||
| 42 | static bool ip_may_fragment(const struct sk_buff *skb) | 42 | static bool ip_may_fragment(const struct sk_buff *skb) |
| 43 | { | 43 | { |
| 44 | return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || | 44 | return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || |
| 45 | skb->local_df; | 45 | skb->ignore_df; |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | 48 | static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 94213c891565..9b842544aea3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
| @@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev) | |||
| 410 | struct flowi4 fl4; | 410 | struct flowi4 fl4; |
| 411 | struct rtable *rt; | 411 | struct rtable *rt; |
| 412 | 412 | ||
| 413 | rt = ip_route_output_gre(dev_net(dev), &fl4, | 413 | rt = ip_route_output_gre(t->net, &fl4, |
| 414 | t->parms.iph.daddr, | 414 | t->parms.iph.daddr, |
| 415 | t->parms.iph.saddr, | 415 | t->parms.iph.saddr, |
| 416 | t->parms.o_key, | 416 | t->parms.o_key, |
| @@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev) | |||
| 434 | 434 | ||
| 435 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { | 435 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { |
| 436 | struct in_device *in_dev; | 436 | struct in_device *in_dev; |
| 437 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); | 437 | in_dev = inetdev_by_index(t->net, t->mlink); |
| 438 | if (in_dev) | 438 | if (in_dev) |
| 439 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); | 439 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); |
| 440 | } | 440 | } |
| @@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev) | |||
| 478 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; | 478 | dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; |
| 479 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; | 479 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; |
| 480 | 480 | ||
| 481 | dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; | 481 | dev->features |= GRE_FEATURES; |
| 482 | dev->hw_features |= GRE_FEATURES; | 482 | dev->hw_features |= GRE_FEATURES; |
| 483 | 483 | ||
| 484 | if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { | 484 | if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { |
| @@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev) | |||
| 649 | { | 649 | { |
| 650 | ether_setup(dev); | 650 | ether_setup(dev); |
| 651 | dev->netdev_ops = &gre_tap_netdev_ops; | 651 | dev->netdev_ops = &gre_tap_netdev_ops; |
| 652 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; | ||
| 652 | ip_tunnel_setup(dev, gre_tap_net_id); | 653 | ip_tunnel_setup(dev, gre_tap_net_id); |
| 653 | } | 654 | } |
| 654 | 655 | ||
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f4ab72e19af9..5e7aecea05cd 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
| @@ -364,7 +364,7 @@ int ip_options_compile(struct net *net, | |||
| 364 | } | 364 | } |
| 365 | if (optptr[2] <= optlen) { | 365 | if (optptr[2] <= optlen) { |
| 366 | unsigned char *timeptr = NULL; | 366 | unsigned char *timeptr = NULL; |
| 367 | if (optptr[2]+3 > optptr[1]) { | 367 | if (optptr[2]+3 > optlen) { |
| 368 | pp_ptr = optptr + 2; | 368 | pp_ptr = optptr + 2; |
| 369 | goto error; | 369 | goto error; |
| 370 | } | 370 | } |
| @@ -376,7 +376,7 @@ int ip_options_compile(struct net *net, | |||
| 376 | optptr[2] += 4; | 376 | optptr[2] += 4; |
| 377 | break; | 377 | break; |
| 378 | case IPOPT_TS_TSANDADDR: | 378 | case IPOPT_TS_TSANDADDR: |
| 379 | if (optptr[2]+7 > optptr[1]) { | 379 | if (optptr[2]+7 > optlen) { |
| 380 | pp_ptr = optptr + 2; | 380 | pp_ptr = optptr + 2; |
| 381 | goto error; | 381 | goto error; |
| 382 | } | 382 | } |
| @@ -390,7 +390,7 @@ int ip_options_compile(struct net *net, | |||
| 390 | optptr[2] += 8; | 390 | optptr[2] += 8; |
| 391 | break; | 391 | break; |
| 392 | case IPOPT_TS_PRESPEC: | 392 | case IPOPT_TS_PRESPEC: |
| 393 | if (optptr[2]+7 > optptr[1]) { | 393 | if (optptr[2]+7 > optlen) { |
| 394 | pp_ptr = optptr + 2; | 394 | pp_ptr = optptr + 2; |
| 395 | goto error; | 395 | goto error; |
| 396 | } | 396 | } |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a52f50187b54..8d3b6b0e9857 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
| 148 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); | 148 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); |
| 149 | iph->saddr = saddr; | 149 | iph->saddr = saddr; |
| 150 | iph->protocol = sk->sk_protocol; | 150 | iph->protocol = sk->sk_protocol; |
| 151 | ip_select_ident(skb, &rt->dst, sk); | 151 | ip_select_ident(skb, sk); |
| 152 | 152 | ||
| 153 | if (opt && opt->opt.optlen) { | 153 | if (opt && opt->opt.optlen) { |
| 154 | iph->ihl += opt->opt.optlen>>2; | 154 | iph->ihl += opt->opt.optlen>>2; |
| @@ -415,7 +415,7 @@ packet_routed: | |||
| 415 | skb_reset_network_header(skb); | 415 | skb_reset_network_header(skb); |
| 416 | iph = ip_hdr(skb); | 416 | iph = ip_hdr(skb); |
| 417 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 417 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
| 418 | if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) | 418 | if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) |
| 419 | iph->frag_off = htons(IP_DF); | 419 | iph->frag_off = htons(IP_DF); |
| 420 | else | 420 | else |
| 421 | iph->frag_off = 0; | 421 | iph->frag_off = 0; |
| @@ -430,8 +430,7 @@ packet_routed: | |||
| 430 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); | 430 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
| 431 | } | 431 | } |
| 432 | 432 | ||
| 433 | ip_select_ident_more(skb, &rt->dst, sk, | 433 | ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); |
| 434 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | ||
| 435 | 434 | ||
| 436 | /* TODO : should we use skb->sk here instead of sk ? */ | 435 | /* TODO : should we use skb->sk here instead of sk ? */ |
| 437 | skb->priority = sk->sk_priority; | 436 | skb->priority = sk->sk_priority; |
| @@ -501,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 501 | iph = ip_hdr(skb); | 500 | iph = ip_hdr(skb); |
| 502 | 501 | ||
| 503 | mtu = ip_skb_dst_mtu(skb); | 502 | mtu = ip_skb_dst_mtu(skb); |
| 504 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || | 503 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || |
| 505 | (IPCB(skb)->frag_max_size && | 504 | (IPCB(skb)->frag_max_size && |
| 506 | IPCB(skb)->frag_max_size > mtu))) { | 505 | IPCB(skb)->frag_max_size > mtu))) { |
| 507 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 506 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
| @@ -866,7 +865,7 @@ static int __ip_append_data(struct sock *sk, | |||
| 866 | 865 | ||
| 867 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 866 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
| 868 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 867 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
| 869 | maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; | 868 | maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; |
| 870 | 869 | ||
| 871 | if (cork->length + length > maxnonfragsize - fragheaderlen) { | 870 | if (cork->length + length > maxnonfragsize - fragheaderlen) { |
| 872 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, | 871 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
| @@ -1189,7 +1188,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, | |||
| 1189 | 1188 | ||
| 1190 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1189 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
| 1191 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 1190 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
| 1192 | maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; | 1191 | maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; |
| 1193 | 1192 | ||
| 1194 | if (cork->length + size > maxnonfragsize - fragheaderlen) { | 1193 | if (cork->length + size > maxnonfragsize - fragheaderlen) { |
| 1195 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, | 1194 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
| @@ -1350,10 +1349,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
| 1350 | * to fragment the frame generated here. No matter, what transforms | 1349 | * to fragment the frame generated here. No matter, what transforms |
| 1351 | * how transforms change size of the packet, it will come out. | 1350 | * how transforms change size of the packet, it will come out. |
| 1352 | */ | 1351 | */ |
| 1353 | skb->local_df = ip_sk_local_df(sk); | 1352 | skb->ignore_df = ip_sk_ignore_df(sk); |
| 1354 | 1353 | ||
| 1355 | /* DF bit is set when we want to see DF on outgoing frames. | 1354 | /* DF bit is set when we want to see DF on outgoing frames. |
| 1356 | * If local_df is set too, we still allow to fragment this frame | 1355 | * If ignore_df is set too, we still allow to fragment this frame |
| 1357 | * locally. */ | 1356 | * locally. */ |
| 1358 | if (inet->pmtudisc == IP_PMTUDISC_DO || | 1357 | if (inet->pmtudisc == IP_PMTUDISC_DO || |
| 1359 | inet->pmtudisc == IP_PMTUDISC_PROBE || | 1358 | inet->pmtudisc == IP_PMTUDISC_PROBE || |
| @@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
| 1379 | iph->ttl = ttl; | 1378 | iph->ttl = ttl; |
| 1380 | iph->protocol = sk->sk_protocol; | 1379 | iph->protocol = sk->sk_protocol; |
| 1381 | ip_copy_addrs(iph, fl4); | 1380 | ip_copy_addrs(iph, fl4); |
| 1382 | ip_select_ident(skb, &rt->dst, sk); | 1381 | ip_select_ident(skb, sk); |
| 1383 | 1382 | ||
| 1384 | if (opt) { | 1383 | if (opt) { |
| 1385 | iph->ihl += opt->optlen>>2; | 1384 | iph->ihl += opt->optlen>>2; |
| @@ -1546,7 +1545,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, | |||
| 1546 | daddr = replyopts.opt.opt.faddr; | 1545 | daddr = replyopts.opt.opt.faddr; |
| 1547 | } | 1546 | } |
| 1548 | 1547 | ||
| 1549 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, | 1548 | flowi4_init_output(&fl4, arg->bound_dev_if, |
| 1549 | IP4_REPLY_MARK(net, skb->mark), | ||
| 1550 | RT_TOS(arg->tos), | 1550 | RT_TOS(arg->tos), |
| 1551 | RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, | 1551 | RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, |
| 1552 | ip_reply_arg_flowi_flags(arg), | 1552 | ip_reply_arg_flowi_flags(arg), |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2acc2337d38b..097b3e7c1e8f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
| @@ -268,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, | |||
| 268 | __be32 remote = parms->iph.daddr; | 268 | __be32 remote = parms->iph.daddr; |
| 269 | __be32 local = parms->iph.saddr; | 269 | __be32 local = parms->iph.saddr; |
| 270 | __be32 key = parms->i_key; | 270 | __be32 key = parms->i_key; |
| 271 | __be16 flags = parms->i_flags; | ||
| 271 | int link = parms->link; | 272 | int link = parms->link; |
| 272 | struct ip_tunnel *t = NULL; | 273 | struct ip_tunnel *t = NULL; |
| 273 | struct hlist_head *head = ip_bucket(itn, parms); | 274 | struct hlist_head *head = ip_bucket(itn, parms); |
| @@ -275,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, | |||
| 275 | hlist_for_each_entry_rcu(t, head, hash_node) { | 276 | hlist_for_each_entry_rcu(t, head, hash_node) { |
| 276 | if (local == t->parms.iph.saddr && | 277 | if (local == t->parms.iph.saddr && |
| 277 | remote == t->parms.iph.daddr && | 278 | remote == t->parms.iph.daddr && |
| 278 | key == t->parms.i_key && | ||
| 279 | link == t->parms.link && | 279 | link == t->parms.link && |
| 280 | type == t->dev->type) | 280 | type == t->dev->type && |
| 281 | ip_tunnel_key_match(&t->parms, flags, key)) | ||
| 281 | break; | 282 | break; |
| 282 | } | 283 | } |
| 283 | return t; | 284 | return t; |
| @@ -395,11 +396,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, | |||
| 395 | struct ip_tunnel_net *itn, | 396 | struct ip_tunnel_net *itn, |
| 396 | struct ip_tunnel_parm *parms) | 397 | struct ip_tunnel_parm *parms) |
| 397 | { | 398 | { |
| 398 | struct ip_tunnel *nt, *fbt; | 399 | struct ip_tunnel *nt; |
| 399 | struct net_device *dev; | 400 | struct net_device *dev; |
| 400 | 401 | ||
| 401 | BUG_ON(!itn->fb_tunnel_dev); | 402 | BUG_ON(!itn->fb_tunnel_dev); |
| 402 | fbt = netdev_priv(itn->fb_tunnel_dev); | ||
| 403 | dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); | 403 | dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); |
| 404 | if (IS_ERR(dev)) | 404 | if (IS_ERR(dev)) |
| 405 | return ERR_CAST(dev); | 405 | return ERR_CAST(dev); |
| @@ -668,6 +668,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 668 | dev->needed_headroom = max_headroom; | 668 | dev->needed_headroom = max_headroom; |
| 669 | 669 | ||
| 670 | if (skb_cow_head(skb, dev->needed_headroom)) { | 670 | if (skb_cow_head(skb, dev->needed_headroom)) { |
| 671 | ip_rt_put(rt); | ||
| 671 | dev->stats.tx_dropped++; | 672 | dev->stats.tx_dropped++; |
| 672 | kfree_skb(skb); | 673 | kfree_skb(skb); |
| 673 | return; | 674 | return; |
| @@ -747,19 +748,19 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) | |||
| 747 | goto done; | 748 | goto done; |
| 748 | if (p->iph.ttl) | 749 | if (p->iph.ttl) |
| 749 | p->iph.frag_off |= htons(IP_DF); | 750 | p->iph.frag_off |= htons(IP_DF); |
| 750 | if (!(p->i_flags&TUNNEL_KEY)) | 751 | if (!(p->i_flags & VTI_ISVTI)) { |
| 751 | p->i_key = 0; | 752 | if (!(p->i_flags & TUNNEL_KEY)) |
| 752 | if (!(p->o_flags&TUNNEL_KEY)) | 753 | p->i_key = 0; |
| 753 | p->o_key = 0; | 754 | if (!(p->o_flags & TUNNEL_KEY)) |
| 755 | p->o_key = 0; | ||
| 756 | } | ||
| 754 | 757 | ||
| 755 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); | 758 | t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); |
| 756 | 759 | ||
| 757 | if (!t && (cmd == SIOCADDTUNNEL)) { | 760 | if (!t && (cmd == SIOCADDTUNNEL)) { |
| 758 | t = ip_tunnel_create(net, itn, p); | 761 | t = ip_tunnel_create(net, itn, p); |
| 759 | if (IS_ERR(t)) { | 762 | err = PTR_ERR_OR_ZERO(t); |
| 760 | err = PTR_ERR(t); | 763 | break; |
| 761 | break; | ||
| 762 | } | ||
| 763 | } | 764 | } |
| 764 | if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 765 | if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
| 765 | if (t != NULL) { | 766 | if (t != NULL) { |
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index bcf206c79005..f4c987bb7e94 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
| @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | |||
| 74 | iph->daddr = dst; | 74 | iph->daddr = dst; |
| 75 | iph->saddr = src; | 75 | iph->saddr = src; |
| 76 | iph->ttl = ttl; | 76 | iph->ttl = ttl; |
| 77 | __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 77 | __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); |
| 78 | 78 | ||
| 79 | err = ip_local_out_sk(sk, skb); | 79 | err = ip_local_out_sk(sk, skb); |
| 80 | if (unlikely(net_xmit_eval(err))) | 80 | if (unlikely(net_xmit_eval(err))) |
| @@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, | |||
| 135 | return skb; | 135 | return skb; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /* If packet is not gso and we are resolving any partial checksum, | ||
| 139 | * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL | ||
| 140 | * on the outer header without confusing devices that implement | ||
| 141 | * NETIF_F_IP_CSUM with encapsulation. | ||
| 142 | */ | ||
| 143 | if (csum_help) | ||
| 144 | skb->encapsulation = 0; | ||
| 145 | |||
| 138 | if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { | 146 | if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { |
| 139 | err = skb_checksum_help(skb); | 147 | err = skb_checksum_help(skb); |
| 140 | if (unlikely(err)) | 148 | if (unlikely(err)) |
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 13ef00f1e17b..b8960f3527f3 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c | |||
| @@ -313,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
| 313 | return -EINVAL; | 313 | return -EINVAL; |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | p.i_flags |= VTI_ISVTI; | 316 | if (!(p.i_flags & GRE_KEY)) |
| 317 | p.i_key = 0; | ||
| 318 | if (!(p.o_flags & GRE_KEY)) | ||
| 319 | p.o_key = 0; | ||
| 320 | |||
| 321 | p.i_flags = VTI_ISVTI; | ||
| 322 | |||
| 317 | err = ip_tunnel_ioctl(dev, &p, cmd); | 323 | err = ip_tunnel_ioctl(dev, &p, cmd); |
| 318 | if (err) | 324 | if (err) |
| 319 | return err; | 325 | return err; |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 812b18351462..62eaa005e146 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
| @@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
| 149 | 149 | ||
| 150 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 150 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
| 151 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | 151 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, |
| 152 | t->dev->ifindex, 0, IPPROTO_IPIP, 0); | 152 | t->parms.link, 0, IPPROTO_IPIP, 0); |
| 153 | err = 0; | 153 | err = 0; |
| 154 | goto out; | 154 | goto out; |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | if (type == ICMP_REDIRECT) { | 157 | if (type == ICMP_REDIRECT) { |
| 158 | ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, | 158 | ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, |
| 159 | IPPROTO_IPIP, 0); | 159 | IPPROTO_IPIP, 0); |
| 160 | err = 0; | 160 | err = 0; |
| 161 | goto out; | 161 | goto out; |
| @@ -486,4 +486,5 @@ static void __exit ipip_fini(void) | |||
| 486 | module_init(ipip_init); | 486 | module_init(ipip_init); |
| 487 | module_exit(ipip_fini); | 487 | module_exit(ipip_fini); |
| 488 | MODULE_LICENSE("GPL"); | 488 | MODULE_LICENSE("GPL"); |
| 489 | MODULE_ALIAS_RTNL_LINK("ipip"); | ||
| 489 | MODULE_ALIAS_NETDEV("tunl0"); | 490 | MODULE_ALIAS_NETDEV("tunl0"); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d84dc8d4c916..65bcaa789043 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev) | |||
| 484 | dev->type = ARPHRD_PIMREG; | 484 | dev->type = ARPHRD_PIMREG; |
| 485 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; | 485 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; |
| 486 | dev->flags = IFF_NOARP; | 486 | dev->flags = IFF_NOARP; |
| 487 | dev->netdev_ops = ®_vif_netdev_ops, | 487 | dev->netdev_ops = ®_vif_netdev_ops; |
| 488 | dev->destructor = free_netdev; | 488 | dev->destructor = free_netdev; |
| 489 | dev->features |= NETIF_F_NETNS_LOCAL; | 489 | dev->features |= NETIF_F_NETNS_LOCAL; |
| 490 | } | 490 | } |
| @@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | |||
| 1663 | iph->protocol = IPPROTO_IPIP; | 1663 | iph->protocol = IPPROTO_IPIP; |
| 1664 | iph->ihl = 5; | 1664 | iph->ihl = 5; |
| 1665 | iph->tot_len = htons(skb->len); | 1665 | iph->tot_len = htons(skb->len); |
| 1666 | ip_select_ident(skb, skb_dst(skb), NULL); | 1666 | ip_select_ident(skb, NULL); |
| 1667 | ip_send_check(iph); | 1667 | ip_send_check(iph); |
| 1668 | 1668 | ||
| 1669 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1669 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ee2886126e3d..f1787c04a4dd 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c | |||
| @@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, | |||
| 91 | if (nf_ct_is_untracked(ct)) | 91 | if (nf_ct_is_untracked(ct)) |
| 92 | return NF_ACCEPT; | 92 | return NF_ACCEPT; |
| 93 | 93 | ||
| 94 | nat = nfct_nat(ct); | 94 | nat = nf_ct_nat_ext_add(ct); |
| 95 | if (!nat) { | 95 | if (nat == NULL) |
| 96 | /* NAT module was loaded late. */ | 96 | return NF_ACCEPT; |
| 97 | if (nf_ct_is_confirmed(ct)) | ||
| 98 | return NF_ACCEPT; | ||
| 99 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | ||
| 100 | if (nat == NULL) { | ||
| 101 | pr_debug("failed to add NAT extension\n"); | ||
| 102 | return NF_ACCEPT; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | 97 | ||
| 106 | switch (ctinfo) { | 98 | switch (ctinfo) { |
| 107 | case IP_CT_RELATED: | 99 | case IP_CT_RELATED: |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f40f321b41fc..b8f6381c7d0b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
| @@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
| 34 | 34 | ||
| 35 | if (!err) { | 35 | if (!err) { |
| 36 | ip_send_check(ip_hdr(skb)); | 36 | ip_send_check(ip_hdr(skb)); |
| 37 | skb->local_df = 1; | 37 | skb->ignore_df = 1; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | return err; | 40 | return err; |
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index b5b256d45e67..3964157d826c 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c | |||
| @@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, | |||
| 48 | 48 | ||
| 49 | NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); | 49 | NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); |
| 50 | 50 | ||
| 51 | nat = nfct_nat(ct); | 51 | nat = nf_ct_nat_ext_add(ct); |
| 52 | if (nat == NULL) { | 52 | if (nat == NULL) |
| 53 | /* Conntrack module was loaded late, can't add extension. */ | 53 | return NF_ACCEPT; |
| 54 | if (nf_ct_is_confirmed(ct)) | ||
| 55 | return NF_ACCEPT; | ||
| 56 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | ||
| 57 | if (nat == NULL) | ||
| 58 | return NF_ACCEPT; | ||
| 59 | } | ||
| 60 | 54 | ||
| 61 | switch (ctinfo) { | 55 | switch (ctinfo) { |
| 62 | case IP_CT_RELATED: | 56 | case IP_CT_RELATED: |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ad737fad6d8b..ae0af9386f7c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
| @@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq) | |||
| 345 | for (i = 0; icmpmibmap[i].name != NULL; i++) | 345 | for (i = 0; icmpmibmap[i].name != NULL; i++) |
| 346 | seq_printf(seq, " Out%s", icmpmibmap[i].name); | 346 | seq_printf(seq, " Out%s", icmpmibmap[i].name); |
| 347 | seq_printf(seq, "\nIcmp: %lu %lu %lu", | 347 | seq_printf(seq, "\nIcmp: %lu %lu %lu", |
| 348 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), | 348 | snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), |
| 349 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), | 349 | snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), |
| 350 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); | 350 | snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); |
| 351 | for (i = 0; icmpmibmap[i].name != NULL; i++) | 351 | for (i = 0; icmpmibmap[i].name != NULL; i++) |
| 352 | seq_printf(seq, " %lu", | 352 | seq_printf(seq, " %lu", |
| 353 | atomic_long_read(ptr + icmpmibmap[i].index)); | 353 | atomic_long_read(ptr + icmpmibmap[i].index)); |
| 354 | seq_printf(seq, " %lu %lu", | 354 | seq_printf(seq, " %lu %lu", |
| 355 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), | 355 | snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), |
| 356 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); | 356 | snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); |
| 357 | for (i = 0; icmpmibmap[i].name != NULL; i++) | 357 | for (i = 0; icmpmibmap[i].name != NULL; i++) |
| 358 | seq_printf(seq, " %lu", | 358 | seq_printf(seq, " %lu", |
| 359 | atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); | 359 | atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); |
| @@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 379 | BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); | 379 | BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); |
| 380 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | 380 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) |
| 381 | seq_printf(seq, " %llu", | 381 | seq_printf(seq, " %llu", |
| 382 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, | 382 | snmp_fold_field64(net->mib.ip_statistics, |
| 383 | snmp4_ipstats_list[i].entry, | 383 | snmp4_ipstats_list[i].entry, |
| 384 | offsetof(struct ipstats_mib, syncp))); | 384 | offsetof(struct ipstats_mib, syncp))); |
| 385 | 385 | ||
| @@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 395 | /* MaxConn field is signed, RFC 2012 */ | 395 | /* MaxConn field is signed, RFC 2012 */ |
| 396 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) | 396 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) |
| 397 | seq_printf(seq, " %ld", | 397 | seq_printf(seq, " %ld", |
| 398 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, | 398 | snmp_fold_field(net->mib.tcp_statistics, |
| 399 | snmp4_tcp_list[i].entry)); | 399 | snmp4_tcp_list[i].entry)); |
| 400 | else | 400 | else |
| 401 | seq_printf(seq, " %lu", | 401 | seq_printf(seq, " %lu", |
| 402 | snmp_fold_field((void __percpu **)net->mib.tcp_statistics, | 402 | snmp_fold_field(net->mib.tcp_statistics, |
| 403 | snmp4_tcp_list[i].entry)); | 403 | snmp4_tcp_list[i].entry)); |
| 404 | } | 404 | } |
| 405 | 405 | ||
| @@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 410 | seq_puts(seq, "\nUdp:"); | 410 | seq_puts(seq, "\nUdp:"); |
| 411 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 411 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
| 412 | seq_printf(seq, " %lu", | 412 | seq_printf(seq, " %lu", |
| 413 | snmp_fold_field((void __percpu **)net->mib.udp_statistics, | 413 | snmp_fold_field(net->mib.udp_statistics, |
| 414 | snmp4_udp_list[i].entry)); | 414 | snmp4_udp_list[i].entry)); |
| 415 | 415 | ||
| 416 | /* the UDP and UDP-Lite MIBs are the same */ | 416 | /* the UDP and UDP-Lite MIBs are the same */ |
| @@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
| 421 | seq_puts(seq, "\nUdpLite:"); | 421 | seq_puts(seq, "\nUdpLite:"); |
| 422 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 422 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
| 423 | seq_printf(seq, " %lu", | 423 | seq_printf(seq, " %lu", |
| 424 | snmp_fold_field((void __percpu **)net->mib.udplite_statistics, | 424 | snmp_fold_field(net->mib.udplite_statistics, |
| 425 | snmp4_udp_list[i].entry)); | 425 | snmp4_udp_list[i].entry)); |
| 426 | 426 | ||
| 427 | seq_putc(seq, '\n'); | 427 | seq_putc(seq, '\n'); |
| @@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
| 458 | seq_puts(seq, "\nTcpExt:"); | 458 | seq_puts(seq, "\nTcpExt:"); |
| 459 | for (i = 0; snmp4_net_list[i].name != NULL; i++) | 459 | for (i = 0; snmp4_net_list[i].name != NULL; i++) |
| 460 | seq_printf(seq, " %lu", | 460 | seq_printf(seq, " %lu", |
| 461 | snmp_fold_field((void __percpu **)net->mib.net_statistics, | 461 | snmp_fold_field(net->mib.net_statistics, |
| 462 | snmp4_net_list[i].entry)); | 462 | snmp4_net_list[i].entry)); |
| 463 | 463 | ||
| 464 | seq_puts(seq, "\nIpExt:"); | 464 | seq_puts(seq, "\nIpExt:"); |
| @@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
| 468 | seq_puts(seq, "\nIpExt:"); | 468 | seq_puts(seq, "\nIpExt:"); |
| 469 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | 469 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) |
| 470 | seq_printf(seq, " %llu", | 470 | seq_printf(seq, " %llu", |
| 471 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, | 471 | snmp_fold_field64(net->mib.ip_statistics, |
| 472 | snmp4_ipextstats_list[i].entry, | 472 | snmp4_ipextstats_list[i].entry, |
| 473 | offsetof(struct ipstats_mib, syncp))); | 473 | offsetof(struct ipstats_mib, syncp))); |
| 474 | 474 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9dbe58bdfe7..2c65160565e1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
| @@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, | |||
| 389 | iph->check = 0; | 389 | iph->check = 0; |
| 390 | iph->tot_len = htons(length); | 390 | iph->tot_len = htons(length); |
| 391 | if (!iph->id) | 391 | if (!iph->id) |
| 392 | ip_select_ident(skb, &rt->dst, NULL); | 392 | ip_select_ident(skb, NULL); |
| 393 | 393 | ||
| 394 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 394 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
| 395 | } | 395 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5e676be3daeb..082239ffe34a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -89,6 +89,7 @@ | |||
| 89 | #include <linux/rcupdate.h> | 89 | #include <linux/rcupdate.h> |
| 90 | #include <linux/times.h> | 90 | #include <linux/times.h> |
| 91 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
| 92 | #include <linux/jhash.h> | ||
| 92 | #include <net/dst.h> | 93 | #include <net/dst.h> |
| 93 | #include <net/net_namespace.h> | 94 | #include <net/net_namespace.h> |
| 94 | #include <net/protocol.h> | 95 | #include <net/protocol.h> |
| @@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
| 456 | return neigh_create(&arp_tbl, pkey, dev); | 457 | return neigh_create(&arp_tbl, pkey, dev); |
| 457 | } | 458 | } |
| 458 | 459 | ||
| 459 | /* | 460 | atomic_t *ip_idents __read_mostly; |
| 460 | * Peer allocation may fail only in serious out-of-memory conditions. However | 461 | EXPORT_SYMBOL(ip_idents); |
| 461 | * we still can generate some output. | ||
| 462 | * Random ID selection looks a bit dangerous because we have no chances to | ||
| 463 | * select ID being unique in a reasonable period of time. | ||
| 464 | * But broken packet identifier may be better than no packet at all. | ||
| 465 | */ | ||
| 466 | static void ip_select_fb_ident(struct iphdr *iph) | ||
| 467 | { | ||
| 468 | static DEFINE_SPINLOCK(ip_fb_id_lock); | ||
| 469 | static u32 ip_fallback_id; | ||
| 470 | u32 salt; | ||
| 471 | |||
| 472 | spin_lock_bh(&ip_fb_id_lock); | ||
| 473 | salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); | ||
| 474 | iph->id = htons(salt & 0xFFFF); | ||
| 475 | ip_fallback_id = salt; | ||
| 476 | spin_unlock_bh(&ip_fb_id_lock); | ||
| 477 | } | ||
| 478 | 462 | ||
| 479 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | 463 | void __ip_select_ident(struct iphdr *iph, int segs) |
| 480 | { | 464 | { |
| 481 | struct net *net = dev_net(dst->dev); | 465 | static u32 ip_idents_hashrnd __read_mostly; |
| 482 | struct inet_peer *peer; | 466 | u32 hash, id; |
| 483 | 467 | ||
| 484 | peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); | 468 | net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); |
| 485 | if (peer) { | ||
| 486 | iph->id = htons(inet_getid(peer, more)); | ||
| 487 | inet_putpeer(peer); | ||
| 488 | return; | ||
| 489 | } | ||
| 490 | 469 | ||
| 491 | ip_select_fb_ident(iph); | 470 | hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); |
| 471 | id = ip_idents_reserve(hash, segs); | ||
| 472 | iph->id = htons(id); | ||
| 492 | } | 473 | } |
| 493 | EXPORT_SYMBOL(__ip_select_ident); | 474 | EXPORT_SYMBOL(__ip_select_ident); |
| 494 | 475 | ||
| @@ -993,6 +974,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | |||
| 993 | struct flowi4 fl4; | 974 | struct flowi4 fl4; |
| 994 | struct rtable *rt; | 975 | struct rtable *rt; |
| 995 | 976 | ||
| 977 | if (!mark) | ||
| 978 | mark = IP4_REPLY_MARK(net, skb->mark); | ||
| 979 | |||
| 996 | __build_flow_key(&fl4, NULL, iph, oif, | 980 | __build_flow_key(&fl4, NULL, iph, oif, |
| 997 | RT_TOS(iph->tos), protocol, mark, flow_flags); | 981 | RT_TOS(iph->tos), protocol, mark, flow_flags); |
| 998 | rt = __ip_route_output_key(net, &fl4); | 982 | rt = __ip_route_output_key(net, &fl4); |
| @@ -1010,6 +994,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
| 1010 | struct rtable *rt; | 994 | struct rtable *rt; |
| 1011 | 995 | ||
| 1012 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); | 996 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); |
| 997 | |||
| 998 | if (!fl4.flowi4_mark) | ||
| 999 | fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); | ||
| 1000 | |||
| 1013 | rt = __ip_route_output_key(sock_net(sk), &fl4); | 1001 | rt = __ip_route_output_key(sock_net(sk), &fl4); |
| 1014 | if (!IS_ERR(rt)) { | 1002 | if (!IS_ERR(rt)) { |
| 1015 | __ip_rt_update_pmtu(rt, &fl4, mtu); | 1003 | __ip_rt_update_pmtu(rt, &fl4, mtu); |
| @@ -2704,6 +2692,12 @@ int __init ip_rt_init(void) | |||
| 2704 | { | 2692 | { |
| 2705 | int rc = 0; | 2693 | int rc = 0; |
| 2706 | 2694 | ||
| 2695 | ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); | ||
| 2696 | if (!ip_idents) | ||
| 2697 | panic("IP: failed to allocate ip_idents\n"); | ||
| 2698 | |||
| 2699 | prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); | ||
| 2700 | |||
| 2707 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2701 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 2708 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 2702 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
| 2709 | if (!ip_rt_acct) | 2703 | if (!ip_rt_acct) |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f2ed13c2125f..c86624b36a62 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 303 | ireq->ir_rmt_port = th->source; | 303 | ireq->ir_rmt_port = th->source; |
| 304 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; | 304 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; |
| 305 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; | 305 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; |
| 306 | ireq->ir_mark = inet_request_mark(sk, skb); | ||
| 306 | ireq->ecn_ok = ecn_ok; | 307 | ireq->ecn_ok = ecn_ok; |
| 307 | ireq->snd_wscale = tcp_opt.snd_wscale; | 308 | ireq->snd_wscale = tcp_opt.snd_wscale; |
| 308 | ireq->sack_ok = tcp_opt.sack_ok; | 309 | ireq->sack_ok = tcp_opt.sack_ok; |
| @@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 339 | * hasn't changed since we received the original syn, but I see | 340 | * hasn't changed since we received the original syn, but I see |
| 340 | * no easy way to do this. | 341 | * no easy way to do this. |
| 341 | */ | 342 | */ |
| 342 | flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, | 343 | flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, |
| 343 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, | 344 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, |
| 344 | inet_sk_flowi_flags(sk), | 345 | inet_sk_flowi_flags(sk), |
| 345 | (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, | 346 | (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5cde8f263d40..79a007c52558 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
| @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { | |||
| 437 | .proc_handler = proc_dointvec | 437 | .proc_handler = proc_dointvec |
| 438 | }, | 438 | }, |
| 439 | { | 439 | { |
| 440 | .procname = "ip_local_reserved_ports", | ||
| 441 | .data = NULL, /* initialized in sysctl_ipv4_init */ | ||
| 442 | .maxlen = 65536, | ||
| 443 | .mode = 0644, | ||
| 444 | .proc_handler = proc_do_large_bitmap, | ||
| 445 | }, | ||
| 446 | { | ||
| 447 | .procname = "igmp_max_memberships", | 440 | .procname = "igmp_max_memberships", |
| 448 | .data = &sysctl_igmp_max_memberships, | 441 | .data = &sysctl_igmp_max_memberships, |
| 449 | .maxlen = sizeof(int), | 442 | .maxlen = sizeof(int), |
| @@ -825,6 +818,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
| 825 | .proc_handler = ipv4_local_port_range, | 818 | .proc_handler = ipv4_local_port_range, |
| 826 | }, | 819 | }, |
| 827 | { | 820 | { |
| 821 | .procname = "ip_local_reserved_ports", | ||
| 822 | .data = &init_net.ipv4.sysctl_local_reserved_ports, | ||
| 823 | .maxlen = 65536, | ||
| 824 | .mode = 0644, | ||
| 825 | .proc_handler = proc_do_large_bitmap, | ||
| 826 | }, | ||
| 827 | { | ||
| 828 | .procname = "ip_no_pmtu_disc", | 828 | .procname = "ip_no_pmtu_disc", |
| 829 | .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, | 829 | .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, |
| 830 | .maxlen = sizeof(int), | 830 | .maxlen = sizeof(int), |
| @@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = { | |||
| 838 | .mode = 0644, | 838 | .mode = 0644, |
| 839 | .proc_handler = proc_dointvec, | 839 | .proc_handler = proc_dointvec, |
| 840 | }, | 840 | }, |
| 841 | { | ||
| 842 | .procname = "fwmark_reflect", | ||
| 843 | .data = &init_net.ipv4.sysctl_fwmark_reflect, | ||
| 844 | .maxlen = sizeof(int), | ||
| 845 | .mode = 0644, | ||
| 846 | .proc_handler = proc_dointvec, | ||
| 847 | }, | ||
| 848 | { | ||
| 849 | .procname = "tcp_fwmark_accept", | ||
| 850 | .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, | ||
| 851 | .maxlen = sizeof(int), | ||
| 852 | .mode = 0644, | ||
| 853 | .proc_handler = proc_dointvec, | ||
| 854 | }, | ||
| 841 | { } | 855 | { } |
| 842 | }; | 856 | }; |
| 843 | 857 | ||
| @@ -862,8 +876,14 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
| 862 | if (net->ipv4.ipv4_hdr == NULL) | 876 | if (net->ipv4.ipv4_hdr == NULL) |
| 863 | goto err_reg; | 877 | goto err_reg; |
| 864 | 878 | ||
| 879 | net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); | ||
| 880 | if (!net->ipv4.sysctl_local_reserved_ports) | ||
| 881 | goto err_ports; | ||
| 882 | |||
| 865 | return 0; | 883 | return 0; |
| 866 | 884 | ||
| 885 | err_ports: | ||
| 886 | unregister_net_sysctl_table(net->ipv4.ipv4_hdr); | ||
| 867 | err_reg: | 887 | err_reg: |
| 868 | if (!net_eq(net, &init_net)) | 888 | if (!net_eq(net, &init_net)) |
| 869 | kfree(table); | 889 | kfree(table); |
| @@ -875,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) | |||
| 875 | { | 895 | { |
| 876 | struct ctl_table *table; | 896 | struct ctl_table *table; |
| 877 | 897 | ||
| 898 | kfree(net->ipv4.sysctl_local_reserved_ports); | ||
| 878 | table = net->ipv4.ipv4_hdr->ctl_table_arg; | 899 | table = net->ipv4.ipv4_hdr->ctl_table_arg; |
| 879 | unregister_net_sysctl_table(net->ipv4.ipv4_hdr); | 900 | unregister_net_sysctl_table(net->ipv4.ipv4_hdr); |
| 880 | kfree(table); | 901 | kfree(table); |
| @@ -888,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { | |||
| 888 | static __init int sysctl_ipv4_init(void) | 909 | static __init int sysctl_ipv4_init(void) |
| 889 | { | 910 | { |
| 890 | struct ctl_table_header *hdr; | 911 | struct ctl_table_header *hdr; |
| 891 | struct ctl_table *i; | ||
| 892 | |||
| 893 | for (i = ipv4_table; i->procname; i++) { | ||
| 894 | if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { | ||
| 895 | i->data = sysctl_local_reserved_ports; | ||
| 896 | break; | ||
| 897 | } | ||
| 898 | } | ||
| 899 | if (!i->procname) | ||
| 900 | return -EINVAL; | ||
| 901 | 912 | ||
| 902 | hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); | 913 | hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); |
| 903 | if (hdr == NULL) | 914 | if (hdr == NULL) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bd6d52eeffb..eb1dde37e678 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -2916,6 +2916,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
| 2916 | case TCP_USER_TIMEOUT: | 2916 | case TCP_USER_TIMEOUT: |
| 2917 | val = jiffies_to_msecs(icsk->icsk_user_timeout); | 2917 | val = jiffies_to_msecs(icsk->icsk_user_timeout); |
| 2918 | break; | 2918 | break; |
| 2919 | |||
| 2920 | case TCP_FASTOPEN: | ||
| 2921 | if (icsk->icsk_accept_queue.fastopenq != NULL) | ||
| 2922 | val = icsk->icsk_accept_queue.fastopenq->max_qlen; | ||
| 2923 | else | ||
| 2924 | val = 0; | ||
| 2925 | break; | ||
| 2926 | |||
| 2919 | case TCP_TIMESTAMP: | 2927 | case TCP_TIMESTAMP: |
| 2920 | val = tcp_time_stamp + tp->tsoffset; | 2928 | val = tcp_time_stamp + tp->tsoffset; |
| 2921 | break; | 2929 | break; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 821846fb0a7e..d5de69bc04f5 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
| @@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 140 | ca->cnt = 1; | 140 | ca->cnt = 1; |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 143 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 144 | u32 in_flight) | ||
| 145 | { | 144 | { |
| 146 | struct tcp_sock *tp = tcp_sk(sk); | 145 | struct tcp_sock *tp = tcp_sk(sk); |
| 147 | struct bictcp *ca = inet_csk_ca(sk); | 146 | struct bictcp *ca = inet_csk_ca(sk); |
| 148 | 147 | ||
| 149 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 148 | if (!tcp_is_cwnd_limited(sk)) |
| 150 | return; | 149 | return; |
| 151 | 150 | ||
| 152 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 151 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2b9464c93b88..7b09d8b49fa5 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
| 276 | return err; | 276 | return err; |
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | /* RFC2861 Check whether we are limited by application or congestion window | ||
| 280 | * This is the inverse of cwnd check in tcp_tso_should_defer | ||
| 281 | */ | ||
| 282 | bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | ||
| 283 | { | ||
| 284 | const struct tcp_sock *tp = tcp_sk(sk); | ||
| 285 | u32 left; | ||
| 286 | |||
| 287 | if (in_flight >= tp->snd_cwnd) | ||
| 288 | return true; | ||
| 289 | |||
| 290 | left = tp->snd_cwnd - in_flight; | ||
| 291 | if (sk_can_gso(sk) && | ||
| 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | ||
| 293 | left < tp->xmit_size_goal_segs) | ||
| 294 | return true; | ||
| 295 | return left <= tcp_max_tso_deferred_mss(tp); | ||
| 296 | } | ||
| 297 | EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); | ||
| 298 | |||
| 299 | /* Slow start is used when congestion window is no greater than the slow start | 279 | /* Slow start is used when congestion window is no greater than the slow start |
| 300 | * threshold. We base on RFC2581 and also handle stretch ACKs properly. | 280 | * threshold. We base on RFC2581 and also handle stretch ACKs properly. |
| 301 | * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but | 281 | * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but |
| @@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); | |||
| 337 | /* This is Jacobson's slow start and congestion avoidance. | 317 | /* This is Jacobson's slow start and congestion avoidance. |
| 338 | * SIGCOMM '88, p. 328. | 318 | * SIGCOMM '88, p. 328. |
| 339 | */ | 319 | */ |
| 340 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) | 320 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 341 | { | 321 | { |
| 342 | struct tcp_sock *tp = tcp_sk(sk); | 322 | struct tcp_sock *tp = tcp_sk(sk); |
| 343 | 323 | ||
| 344 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 324 | if (!tcp_is_cwnd_limited(sk)) |
| 345 | return; | 325 | return; |
| 346 | 326 | ||
| 347 | /* In "safe" area, increase. */ | 327 | /* In "safe" area, increase. */ |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b4f1b29b08bd..a9bd8a4828a9 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
| @@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 304 | ca->cnt = 1; | 304 | ca->cnt = 1; |
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 307 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 308 | u32 in_flight) | ||
| 309 | { | 308 | { |
| 310 | struct tcp_sock *tp = tcp_sk(sk); | 309 | struct tcp_sock *tp = tcp_sk(sk); |
| 311 | struct bictcp *ca = inet_csk_ca(sk); | 310 | struct bictcp *ca = inet_csk_ca(sk); |
| 312 | 311 | ||
| 313 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 312 | if (!tcp_is_cwnd_limited(sk)) |
| 314 | return; | 313 | return; |
| 315 | 314 | ||
| 316 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 315 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f195d9316e55..62e48cf84e60 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
| @@ -72,25 +72,224 @@ error: kfree(ctx); | |||
| 72 | return err; | 72 | return err; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | /* Computes the fastopen cookie for the IP path. | 75 | static bool __tcp_fastopen_cookie_gen(const void *path, |
| 76 | * The path is a 128 bits long (pad with zeros for IPv4). | 76 | struct tcp_fastopen_cookie *foc) |
| 77 | * | ||
| 78 | * The caller must check foc->len to determine if a valid cookie | ||
| 79 | * has been generated successfully. | ||
| 80 | */ | ||
| 81 | void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, | ||
| 82 | struct tcp_fastopen_cookie *foc) | ||
| 83 | { | 77 | { |
| 84 | __be32 path[4] = { src, dst, 0, 0 }; | ||
| 85 | struct tcp_fastopen_context *ctx; | 78 | struct tcp_fastopen_context *ctx; |
| 79 | bool ok = false; | ||
| 86 | 80 | ||
| 87 | tcp_fastopen_init_key_once(true); | 81 | tcp_fastopen_init_key_once(true); |
| 88 | 82 | ||
| 89 | rcu_read_lock(); | 83 | rcu_read_lock(); |
| 90 | ctx = rcu_dereference(tcp_fastopen_ctx); | 84 | ctx = rcu_dereference(tcp_fastopen_ctx); |
| 91 | if (ctx) { | 85 | if (ctx) { |
| 92 | crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); | 86 | crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); |
| 93 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | 87 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; |
| 88 | ok = true; | ||
| 94 | } | 89 | } |
| 95 | rcu_read_unlock(); | 90 | rcu_read_unlock(); |
| 91 | return ok; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* Generate the fastopen cookie by doing aes128 encryption on both | ||
| 95 | * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 | ||
| 96 | * addresses. For the longer IPv6 addresses use CBC-MAC. | ||
| 97 | * | ||
| 98 | * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. | ||
| 99 | */ | ||
| 100 | static bool tcp_fastopen_cookie_gen(struct request_sock *req, | ||
| 101 | struct sk_buff *syn, | ||
| 102 | struct tcp_fastopen_cookie *foc) | ||
| 103 | { | ||
| 104 | if (req->rsk_ops->family == AF_INET) { | ||
| 105 | const struct iphdr *iph = ip_hdr(syn); | ||
| 106 | |||
| 107 | __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; | ||
| 108 | return __tcp_fastopen_cookie_gen(path, foc); | ||
| 109 | } | ||
| 110 | |||
| 111 | #if IS_ENABLED(CONFIG_IPV6) | ||
| 112 | if (req->rsk_ops->family == AF_INET6) { | ||
| 113 | const struct ipv6hdr *ip6h = ipv6_hdr(syn); | ||
| 114 | struct tcp_fastopen_cookie tmp; | ||
| 115 | |||
| 116 | if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { | ||
| 117 | struct in6_addr *buf = (struct in6_addr *) tmp.val; | ||
| 118 | int i = 4; | ||
| 119 | |||
| 120 | for (i = 0; i < 4; i++) | ||
| 121 | buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; | ||
| 122 | return __tcp_fastopen_cookie_gen(buf, foc); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | #endif | ||
| 126 | return false; | ||
| 127 | } | ||
| 128 | |||
| 129 | static bool tcp_fastopen_create_child(struct sock *sk, | ||
| 130 | struct sk_buff *skb, | ||
| 131 | struct dst_entry *dst, | ||
| 132 | struct request_sock *req) | ||
| 133 | { | ||
| 134 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 135 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | ||
| 136 | struct sock *child; | ||
| 137 | |||
| 138 | req->num_retrans = 0; | ||
| 139 | req->num_timeout = 0; | ||
| 140 | req->sk = NULL; | ||
| 141 | |||
| 142 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | ||
| 143 | if (child == NULL) | ||
| 144 | return false; | ||
| 145 | |||
| 146 | spin_lock(&queue->fastopenq->lock); | ||
| 147 | queue->fastopenq->qlen++; | ||
| 148 | spin_unlock(&queue->fastopenq->lock); | ||
| 149 | |||
| 150 | /* Initialize the child socket. Have to fix some values to take | ||
| 151 | * into account the child is a Fast Open socket and is created | ||
| 152 | * only out of the bits carried in the SYN packet. | ||
| 153 | */ | ||
| 154 | tp = tcp_sk(child); | ||
| 155 | |||
| 156 | tp->fastopen_rsk = req; | ||
| 157 | /* Do a hold on the listner sk so that if the listener is being | ||
| 158 | * closed, the child that has been accepted can live on and still | ||
| 159 | * access listen_lock. | ||
| 160 | */ | ||
| 161 | sock_hold(sk); | ||
| 162 | tcp_rsk(req)->listener = sk; | ||
| 163 | |||
| 164 | /* RFC1323: The window in SYN & SYN/ACK segments is never | ||
| 165 | * scaled. So correct it appropriately. | ||
| 166 | */ | ||
| 167 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); | ||
| 168 | |||
| 169 | /* Activate the retrans timer so that SYNACK can be retransmitted. | ||
| 170 | * The request socket is not added to the SYN table of the parent | ||
| 171 | * because it's been added to the accept queue directly. | ||
| 172 | */ | ||
| 173 | inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, | ||
| 174 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | ||
| 175 | |||
| 176 | /* Add the child socket directly into the accept queue */ | ||
| 177 | inet_csk_reqsk_queue_add(sk, req, child); | ||
| 178 | |||
| 179 | /* Now finish processing the fastopen child socket. */ | ||
| 180 | inet_csk(child)->icsk_af_ops->rebuild_header(child); | ||
| 181 | tcp_init_congestion_control(child); | ||
| 182 | tcp_mtup_init(child); | ||
| 183 | tcp_init_metrics(child); | ||
| 184 | tcp_init_buffer_space(child); | ||
| 185 | |||
| 186 | /* Queue the data carried in the SYN packet. We need to first | ||
| 187 | * bump skb's refcnt because the caller will attempt to free it. | ||
| 188 | * | ||
| 189 | * XXX (TFO) - we honor a zero-payload TFO request for now, | ||
| 190 | * (any reason not to?) but no need to queue the skb since | ||
| 191 | * there is no data. How about SYN+FIN? | ||
| 192 | */ | ||
| 193 | if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { | ||
| 194 | skb = skb_get(skb); | ||
| 195 | skb_dst_drop(skb); | ||
| 196 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); | ||
| 197 | skb_set_owner_r(skb, child); | ||
| 198 | __skb_queue_tail(&child->sk_receive_queue, skb); | ||
| 199 | tp->syn_data_acked = 1; | ||
| 200 | } | ||
| 201 | tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
| 202 | sk->sk_data_ready(sk); | ||
| 203 | bh_unlock_sock(child); | ||
| 204 | sock_put(child); | ||
| 205 | WARN_ON(req->sk == NULL); | ||
| 206 | return true; | ||
| 207 | } | ||
| 208 | EXPORT_SYMBOL(tcp_fastopen_create_child); | ||
| 209 | |||
| 210 | static bool tcp_fastopen_queue_check(struct sock *sk) | ||
| 211 | { | ||
| 212 | struct fastopen_queue *fastopenq; | ||
| 213 | |||
| 214 | /* Make sure the listener has enabled fastopen, and we don't | ||
| 215 | * exceed the max # of pending TFO requests allowed before trying | ||
| 216 | * to validating the cookie in order to avoid burning CPU cycles | ||
| 217 | * unnecessarily. | ||
| 218 | * | ||
| 219 | * XXX (TFO) - The implication of checking the max_qlen before | ||
| 220 | * processing a cookie request is that clients can't differentiate | ||
| 221 | * between qlen overflow causing Fast Open to be disabled | ||
| 222 | * temporarily vs a server not supporting Fast Open at all. | ||
| 223 | */ | ||
| 224 | fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; | ||
| 225 | if (fastopenq == NULL || fastopenq->max_qlen == 0) | ||
| 226 | return false; | ||
| 227 | |||
| 228 | if (fastopenq->qlen >= fastopenq->max_qlen) { | ||
| 229 | struct request_sock *req1; | ||
| 230 | spin_lock(&fastopenq->lock); | ||
| 231 | req1 = fastopenq->rskq_rst_head; | ||
| 232 | if ((req1 == NULL) || time_after(req1->expires, jiffies)) { | ||
| 233 | spin_unlock(&fastopenq->lock); | ||
| 234 | NET_INC_STATS_BH(sock_net(sk), | ||
| 235 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); | ||
| 236 | return false; | ||
| 237 | } | ||
| 238 | fastopenq->rskq_rst_head = req1->dl_next; | ||
| 239 | fastopenq->qlen--; | ||
| 240 | spin_unlock(&fastopenq->lock); | ||
| 241 | reqsk_free(req1); | ||
| 242 | } | ||
| 243 | return true; | ||
| 244 | } | ||
| 245 | |||
| 246 | /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) | ||
| 247 | * may be updated and return the client in the SYN-ACK later. E.g., Fast Open | ||
| 248 | * cookie request (foc->len == 0). | ||
| 249 | */ | ||
| 250 | bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, | ||
| 251 | struct request_sock *req, | ||
| 252 | struct tcp_fastopen_cookie *foc, | ||
| 253 | struct dst_entry *dst) | ||
| 254 | { | ||
| 255 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | ||
| 256 | bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; | ||
| 257 | |||
| 258 | if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && | ||
| 259 | (syn_data || foc->len >= 0) && | ||
| 260 | tcp_fastopen_queue_check(sk))) { | ||
| 261 | foc->len = -1; | ||
| 262 | return false; | ||
| 263 | } | ||
| 264 | |||
| 265 | if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) | ||
| 266 | goto fastopen; | ||
| 267 | |||
| 268 | if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && | ||
| 269 | foc->len == TCP_FASTOPEN_COOKIE_SIZE && | ||
| 270 | foc->len == valid_foc.len && | ||
| 271 | !memcmp(foc->val, valid_foc.val, foc->len)) { | ||
| 272 | /* Cookie is valid. Create a (full) child socket to accept | ||
| 273 | * the data in SYN before returning a SYN-ACK to ack the | ||
| 274 | * data. If we fail to create the socket, fall back and | ||
| 275 | * ack the ISN only but includes the same cookie. | ||
| 276 | * | ||
| 277 | * Note: Data-less SYN with valid cookie is allowed to send | ||
| 278 | * data in SYN_RECV state. | ||
| 279 | */ | ||
| 280 | fastopen: | ||
| 281 | if (tcp_fastopen_create_child(sk, skb, dst, req)) { | ||
| 282 | foc->len = -1; | ||
| 283 | NET_INC_STATS_BH(sock_net(sk), | ||
| 284 | LINUX_MIB_TCPFASTOPENPASSIVE); | ||
| 285 | return true; | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | NET_INC_STATS_BH(sock_net(sk), foc->len ? | ||
| 290 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL : | ||
| 291 | LINUX_MIB_TCPFASTOPENCOOKIEREQD); | ||
| 292 | *foc = valid_foc; | ||
| 293 | return false; | ||
| 96 | } | 294 | } |
| 295 | EXPORT_SYMBOL(tcp_try_fastopen); | ||
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b9e7bad77c0..1c4908280d92 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
| @@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk) | |||
| 109 | tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); | 109 | tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) | 112 | static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 113 | { | 113 | { |
| 114 | struct tcp_sock *tp = tcp_sk(sk); | 114 | struct tcp_sock *tp = tcp_sk(sk); |
| 115 | struct hstcp *ca = inet_csk_ca(sk); | 115 | struct hstcp *ca = inet_csk_ca(sk); |
| 116 | 116 | ||
| 117 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 117 | if (!tcp_is_cwnd_limited(sk)) |
| 118 | return; | 118 | return; |
| 119 | 119 | ||
| 120 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 120 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a194acfd923..031361311a8b 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
| @@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) | |||
| 227 | return max((tp->snd_cwnd * ca->beta) >> 7, 2U); | 227 | return max((tp->snd_cwnd * ca->beta) >> 7, 2U); |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) | 230 | static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 231 | { | 231 | { |
| 232 | struct tcp_sock *tp = tcp_sk(sk); | 232 | struct tcp_sock *tp = tcp_sk(sk); |
| 233 | struct htcp *ca = inet_csk_ca(sk); | 233 | struct htcp *ca = inet_csk_ca(sk); |
| 234 | 234 | ||
| 235 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 235 | if (!tcp_is_cwnd_limited(sk)) |
| 236 | return; | 236 | return; |
| 237 | 237 | ||
| 238 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 238 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index a15a799bf768..d8f8f05a4951 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
| @@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds) | |||
| 87 | * o Give cwnd a new value based on the model proposed | 87 | * o Give cwnd a new value based on the model proposed |
| 88 | * o remember increments <1 | 88 | * o remember increments <1 |
| 89 | */ | 89 | */ |
| 90 | static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 90 | static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 91 | u32 in_flight) | ||
| 92 | { | 91 | { |
| 93 | struct tcp_sock *tp = tcp_sk(sk); | 92 | struct tcp_sock *tp = tcp_sk(sk); |
| 94 | struct hybla *ca = inet_csk_ca(sk); | 93 | struct hybla *ca = inet_csk_ca(sk); |
| @@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, | |||
| 101 | ca->minrtt_us = tp->srtt_us; | 100 | ca->minrtt_us = tp->srtt_us; |
| 102 | } | 101 | } |
| 103 | 102 | ||
| 104 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 103 | if (!tcp_is_cwnd_limited(sk)) |
| 105 | return; | 104 | return; |
| 106 | 105 | ||
| 107 | if (!ca->hybla_en) { | 106 | if (!ca->hybla_en) { |
| 108 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 107 | tcp_reno_cong_avoid(sk, ack, acked); |
| 109 | return; | 108 | return; |
| 110 | } | 109 | } |
| 111 | 110 | ||
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 863d105e3015..5999b3972e64 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
| @@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) | |||
| 255 | /* | 255 | /* |
| 256 | * Increase window in response to successful acknowledgment. | 256 | * Increase window in response to successful acknowledgment. |
| 257 | */ | 257 | */ |
| 258 | static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 258 | static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 259 | u32 in_flight) | ||
| 260 | { | 259 | { |
| 261 | struct tcp_sock *tp = tcp_sk(sk); | 260 | struct tcp_sock *tp = tcp_sk(sk); |
| 262 | struct illinois *ca = inet_csk_ca(sk); | 261 | struct illinois *ca = inet_csk_ca(sk); |
| @@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, | |||
| 265 | update_params(sk); | 264 | update_params(sk); |
| 266 | 265 | ||
| 267 | /* RFC2861 only increase cwnd if fully utilized */ | 266 | /* RFC2861 only increase cwnd if fully utilized */ |
| 268 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 267 | if (!tcp_is_cwnd_limited(sk)) |
| 269 | return; | 268 | return; |
| 270 | 269 | ||
| 271 | /* In slow start */ | 270 | /* In slow start */ |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a26b3b23f16..40661fc1e233 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -1167,7 +1167,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
| 1167 | } | 1167 | } |
| 1168 | pkt_len = new_len; | 1168 | pkt_len = new_len; |
| 1169 | } | 1169 | } |
| 1170 | err = tcp_fragment(sk, skb, pkt_len, mss); | 1170 | err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); |
| 1171 | if (err < 0) | 1171 | if (err < 0) |
| 1172 | return err; | 1172 | return err; |
| 1173 | } | 1173 | } |
| @@ -2241,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
| 2241 | break; | 2241 | break; |
| 2242 | 2242 | ||
| 2243 | mss = skb_shinfo(skb)->gso_size; | 2243 | mss = skb_shinfo(skb)->gso_size; |
| 2244 | err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss); | 2244 | err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, |
| 2245 | mss, GFP_ATOMIC); | ||
| 2245 | if (err < 0) | 2246 | if (err < 0) |
| 2246 | break; | 2247 | break; |
| 2247 | cnt = packets; | 2248 | cnt = packets; |
| @@ -2937,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) | |||
| 2937 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); | 2938 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); |
| 2938 | } | 2939 | } |
| 2939 | 2940 | ||
| 2940 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) | 2941 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 2941 | { | 2942 | { |
| 2942 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2943 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 2943 | icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); | 2944 | |
| 2945 | icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); | ||
| 2944 | tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; | 2946 | tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; |
| 2945 | } | 2947 | } |
| 2946 | 2948 | ||
| @@ -3363,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3363 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3365 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
| 3364 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3366 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
| 3365 | bool is_dupack = false; | 3367 | bool is_dupack = false; |
| 3366 | u32 prior_in_flight; | ||
| 3367 | u32 prior_fackets; | 3368 | u32 prior_fackets; |
| 3368 | int prior_packets = tp->packets_out; | 3369 | int prior_packets = tp->packets_out; |
| 3369 | const int prior_unsacked = tp->packets_out - tp->sacked_out; | 3370 | const int prior_unsacked = tp->packets_out - tp->sacked_out; |
| @@ -3396,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3396 | flag |= FLAG_SND_UNA_ADVANCED; | 3397 | flag |= FLAG_SND_UNA_ADVANCED; |
| 3397 | 3398 | ||
| 3398 | prior_fackets = tp->fackets_out; | 3399 | prior_fackets = tp->fackets_out; |
| 3399 | prior_in_flight = tcp_packets_in_flight(tp); | ||
| 3400 | 3400 | ||
| 3401 | /* ts_recent update must be made after we are sure that the packet | 3401 | /* ts_recent update must be made after we are sure that the packet |
| 3402 | * is in window. | 3402 | * is in window. |
| @@ -3451,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3451 | 3451 | ||
| 3452 | /* Advance cwnd if state allows */ | 3452 | /* Advance cwnd if state allows */ |
| 3453 | if (tcp_may_raise_cwnd(sk, flag)) | 3453 | if (tcp_may_raise_cwnd(sk, flag)) |
| 3454 | tcp_cong_avoid(sk, ack, acked, prior_in_flight); | 3454 | tcp_cong_avoid(sk, ack, acked); |
| 3455 | 3455 | ||
| 3456 | if (tcp_ack_is_dubious(sk, flag)) { | 3456 | if (tcp_ack_is_dubious(sk, flag)) { |
| 3457 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | 3457 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
| @@ -4702,28 +4702,6 @@ static int tcp_prune_queue(struct sock *sk) | |||
| 4702 | return -1; | 4702 | return -1; |
| 4703 | } | 4703 | } |
| 4704 | 4704 | ||
| 4705 | /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. | ||
| 4706 | * As additional protections, we do not touch cwnd in retransmission phases, | ||
| 4707 | * and if application hit its sndbuf limit recently. | ||
| 4708 | */ | ||
| 4709 | void tcp_cwnd_application_limited(struct sock *sk) | ||
| 4710 | { | ||
| 4711 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 4712 | |||
| 4713 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && | ||
| 4714 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
| 4715 | /* Limited by application or receiver window. */ | ||
| 4716 | u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); | ||
| 4717 | u32 win_used = max(tp->snd_cwnd_used, init_win); | ||
| 4718 | if (win_used < tp->snd_cwnd) { | ||
| 4719 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
| 4720 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; | ||
| 4721 | } | ||
| 4722 | tp->snd_cwnd_used = 0; | ||
| 4723 | } | ||
| 4724 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
| 4725 | } | ||
| 4726 | |||
| 4727 | static bool tcp_should_expand_sndbuf(const struct sock *sk) | 4705 | static bool tcp_should_expand_sndbuf(const struct sock *sk) |
| 4728 | { | 4706 | { |
| 4729 | const struct tcp_sock *tp = tcp_sk(sk); | 4707 | const struct tcp_sock *tp = tcp_sk(sk); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 438f3b95143d..77cccda1ad0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 336 | const int code = icmp_hdr(icmp_skb)->code; | 336 | const int code = icmp_hdr(icmp_skb)->code; |
| 337 | struct sock *sk; | 337 | struct sock *sk; |
| 338 | struct sk_buff *skb; | 338 | struct sk_buff *skb; |
| 339 | struct request_sock *req; | 339 | struct request_sock *fastopen; |
| 340 | __u32 seq; | 340 | __u32 seq, snd_una; |
| 341 | __u32 remaining; | 341 | __u32 remaining; |
| 342 | int err; | 342 | int err; |
| 343 | struct net *net = dev_net(icmp_skb->dev); | 343 | struct net *net = dev_net(icmp_skb->dev); |
| @@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 378 | 378 | ||
| 379 | icsk = inet_csk(sk); | 379 | icsk = inet_csk(sk); |
| 380 | tp = tcp_sk(sk); | 380 | tp = tcp_sk(sk); |
| 381 | req = tp->fastopen_rsk; | ||
| 382 | seq = ntohl(th->seq); | 381 | seq = ntohl(th->seq); |
| 382 | /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ | ||
| 383 | fastopen = tp->fastopen_rsk; | ||
| 384 | snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; | ||
| 383 | if (sk->sk_state != TCP_LISTEN && | 385 | if (sk->sk_state != TCP_LISTEN && |
| 384 | !between(seq, tp->snd_una, tp->snd_nxt) && | 386 | !between(seq, snd_una, tp->snd_nxt)) { |
| 385 | (req == NULL || seq != tcp_rsk(req)->snt_isn)) { | ||
| 386 | /* For a Fast Open socket, allow seq to be snt_isn. */ | ||
| 387 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); | 387 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
| 388 | goto out; | 388 | goto out; |
| 389 | } | 389 | } |
| @@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 426 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) | 426 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) |
| 427 | break; | 427 | break; |
| 428 | if (seq != tp->snd_una || !icsk->icsk_retransmits || | 428 | if (seq != tp->snd_una || !icsk->icsk_retransmits || |
| 429 | !icsk->icsk_backoff) | 429 | !icsk->icsk_backoff || fastopen) |
| 430 | break; | 430 | break; |
| 431 | 431 | ||
| 432 | /* XXX (TFO) - revisit the following logic for TFO */ | ||
| 433 | |||
| 434 | if (sock_owned_by_user(sk)) | 432 | if (sock_owned_by_user(sk)) |
| 435 | break; | 433 | break; |
| 436 | 434 | ||
| @@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 462 | goto out; | 460 | goto out; |
| 463 | } | 461 | } |
| 464 | 462 | ||
| 465 | /* XXX (TFO) - if it's a TFO socket and has been accepted, rather | ||
| 466 | * than following the TCP_SYN_RECV case and closing the socket, | ||
| 467 | * we ignore the ICMP error and keep trying like a fully established | ||
| 468 | * socket. Is this the right thing to do? | ||
| 469 | */ | ||
| 470 | if (req && req->sk == NULL) | ||
| 471 | goto out; | ||
| 472 | |||
| 473 | switch (sk->sk_state) { | 463 | switch (sk->sk_state) { |
| 474 | struct request_sock *req, **prev; | 464 | struct request_sock *req, **prev; |
| 475 | case TCP_LISTEN: | 465 | case TCP_LISTEN: |
| @@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 502 | goto out; | 492 | goto out; |
| 503 | 493 | ||
| 504 | case TCP_SYN_SENT: | 494 | case TCP_SYN_SENT: |
| 505 | case TCP_SYN_RECV: /* Cannot happen. | 495 | case TCP_SYN_RECV: |
| 506 | It can f.e. if SYNs crossed, | 496 | /* Only in fast or simultaneous open. If a fast open socket is |
| 507 | or Fast Open. | 497 | * is already accepted it is treated as a connected one below. |
| 508 | */ | 498 | */ |
| 499 | if (fastopen && fastopen->sk == NULL) | ||
| 500 | break; | ||
| 501 | |||
| 509 | if (!sock_owned_by_user(sk)) { | 502 | if (!sock_owned_by_user(sk)) { |
| 510 | sk->sk_err = err; | 503 | sk->sk_err = err; |
| 511 | 504 | ||
| @@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
| 822 | */ | 815 | */ |
| 823 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 816 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
| 824 | struct request_sock *req, | 817 | struct request_sock *req, |
| 825 | u16 queue_mapping) | 818 | u16 queue_mapping, |
| 819 | struct tcp_fastopen_cookie *foc) | ||
| 826 | { | 820 | { |
| 827 | const struct inet_request_sock *ireq = inet_rsk(req); | 821 | const struct inet_request_sock *ireq = inet_rsk(req); |
| 828 | struct flowi4 fl4; | 822 | struct flowi4 fl4; |
| @@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
| 833 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | 827 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
| 834 | return -1; | 828 | return -1; |
| 835 | 829 | ||
| 836 | skb = tcp_make_synack(sk, dst, req, NULL); | 830 | skb = tcp_make_synack(sk, dst, req, foc); |
| 837 | 831 | ||
| 838 | if (skb) { | 832 | if (skb) { |
| 839 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); | 833 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); |
| @@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
| 852 | 846 | ||
| 853 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) | 847 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) |
| 854 | { | 848 | { |
| 855 | int res = tcp_v4_send_synack(sk, NULL, req, 0); | 849 | int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); |
| 856 | 850 | ||
| 857 | if (!res) { | 851 | if (!res) { |
| 858 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | 852 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
| @@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | |||
| 1260 | }; | 1254 | }; |
| 1261 | #endif | 1255 | #endif |
| 1262 | 1256 | ||
| 1263 | static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, | ||
| 1264 | struct request_sock *req, | ||
| 1265 | struct tcp_fastopen_cookie *foc, | ||
| 1266 | struct tcp_fastopen_cookie *valid_foc) | ||
| 1267 | { | ||
| 1268 | bool skip_cookie = false; | ||
| 1269 | struct fastopen_queue *fastopenq; | ||
| 1270 | |||
| 1271 | if (likely(!fastopen_cookie_present(foc))) { | ||
| 1272 | /* See include/net/tcp.h for the meaning of these knobs */ | ||
| 1273 | if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || | ||
| 1274 | ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && | ||
| 1275 | (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) | ||
| 1276 | skip_cookie = true; /* no cookie to validate */ | ||
| 1277 | else | ||
| 1278 | return false; | ||
| 1279 | } | ||
| 1280 | fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; | ||
| 1281 | /* A FO option is present; bump the counter. */ | ||
| 1282 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); | ||
| 1283 | |||
| 1284 | /* Make sure the listener has enabled fastopen, and we don't | ||
| 1285 | * exceed the max # of pending TFO requests allowed before trying | ||
| 1286 | * to validating the cookie in order to avoid burning CPU cycles | ||
| 1287 | * unnecessarily. | ||
| 1288 | * | ||
| 1289 | * XXX (TFO) - The implication of checking the max_qlen before | ||
| 1290 | * processing a cookie request is that clients can't differentiate | ||
| 1291 | * between qlen overflow causing Fast Open to be disabled | ||
| 1292 | * temporarily vs a server not supporting Fast Open at all. | ||
| 1293 | */ | ||
| 1294 | if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || | ||
| 1295 | fastopenq == NULL || fastopenq->max_qlen == 0) | ||
| 1296 | return false; | ||
| 1297 | |||
| 1298 | if (fastopenq->qlen >= fastopenq->max_qlen) { | ||
| 1299 | struct request_sock *req1; | ||
| 1300 | spin_lock(&fastopenq->lock); | ||
| 1301 | req1 = fastopenq->rskq_rst_head; | ||
| 1302 | if ((req1 == NULL) || time_after(req1->expires, jiffies)) { | ||
| 1303 | spin_unlock(&fastopenq->lock); | ||
| 1304 | NET_INC_STATS_BH(sock_net(sk), | ||
| 1305 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); | ||
| 1306 | /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ | ||
| 1307 | foc->len = -1; | ||
| 1308 | return false; | ||
| 1309 | } | ||
| 1310 | fastopenq->rskq_rst_head = req1->dl_next; | ||
| 1311 | fastopenq->qlen--; | ||
| 1312 | spin_unlock(&fastopenq->lock); | ||
| 1313 | reqsk_free(req1); | ||
| 1314 | } | ||
| 1315 | if (skip_cookie) { | ||
| 1316 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
| 1317 | return true; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { | ||
| 1321 | if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { | ||
| 1322 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
| 1323 | ip_hdr(skb)->daddr, valid_foc); | ||
| 1324 | if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || | ||
| 1325 | memcmp(&foc->val[0], &valid_foc->val[0], | ||
| 1326 | TCP_FASTOPEN_COOKIE_SIZE) != 0) | ||
| 1327 | return false; | ||
| 1328 | valid_foc->len = -1; | ||
| 1329 | } | ||
| 1330 | /* Acknowledge the data received from the peer. */ | ||
| 1331 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
| 1332 | return true; | ||
| 1333 | } else if (foc->len == 0) { /* Client requesting a cookie */ | ||
| 1334 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
| 1335 | ip_hdr(skb)->daddr, valid_foc); | ||
| 1336 | NET_INC_STATS_BH(sock_net(sk), | ||
| 1337 | LINUX_MIB_TCPFASTOPENCOOKIEREQD); | ||
| 1338 | } else { | ||
| 1339 | /* Client sent a cookie with wrong size. Treat it | ||
| 1340 | * the same as invalid and return a valid one. | ||
| 1341 | */ | ||
| 1342 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
| 1343 | ip_hdr(skb)->daddr, valid_foc); | ||
| 1344 | } | ||
| 1345 | return false; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | static int tcp_v4_conn_req_fastopen(struct sock *sk, | ||
| 1349 | struct sk_buff *skb, | ||
| 1350 | struct sk_buff *skb_synack, | ||
| 1351 | struct request_sock *req) | ||
| 1352 | { | ||
| 1353 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1354 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | ||
| 1355 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
| 1356 | struct sock *child; | ||
| 1357 | int err; | ||
| 1358 | |||
| 1359 | req->num_retrans = 0; | ||
| 1360 | req->num_timeout = 0; | ||
| 1361 | req->sk = NULL; | ||
| 1362 | |||
| 1363 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | ||
| 1364 | if (child == NULL) { | ||
| 1365 | NET_INC_STATS_BH(sock_net(sk), | ||
| 1366 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
| 1367 | kfree_skb(skb_synack); | ||
| 1368 | return -1; | ||
| 1369 | } | ||
| 1370 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | ||
| 1371 | ireq->ir_rmt_addr, ireq->opt); | ||
| 1372 | err = net_xmit_eval(err); | ||
| 1373 | if (!err) | ||
| 1374 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
| 1375 | /* XXX (TFO) - is it ok to ignore error and continue? */ | ||
| 1376 | |||
| 1377 | spin_lock(&queue->fastopenq->lock); | ||
| 1378 | queue->fastopenq->qlen++; | ||
| 1379 | spin_unlock(&queue->fastopenq->lock); | ||
| 1380 | |||
| 1381 | /* Initialize the child socket. Have to fix some values to take | ||
| 1382 | * into account the child is a Fast Open socket and is created | ||
| 1383 | * only out of the bits carried in the SYN packet. | ||
| 1384 | */ | ||
| 1385 | tp = tcp_sk(child); | ||
| 1386 | |||
| 1387 | tp->fastopen_rsk = req; | ||
| 1388 | /* Do a hold on the listner sk so that if the listener is being | ||
| 1389 | * closed, the child that has been accepted can live on and still | ||
| 1390 | * access listen_lock. | ||
| 1391 | */ | ||
| 1392 | sock_hold(sk); | ||
| 1393 | tcp_rsk(req)->listener = sk; | ||
| 1394 | |||
| 1395 | /* RFC1323: The window in SYN & SYN/ACK segments is never | ||
| 1396 | * scaled. So correct it appropriately. | ||
| 1397 | */ | ||
| 1398 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); | ||
| 1399 | |||
| 1400 | /* Activate the retrans timer so that SYNACK can be retransmitted. | ||
| 1401 | * The request socket is not added to the SYN table of the parent | ||
| 1402 | * because it's been added to the accept queue directly. | ||
| 1403 | */ | ||
| 1404 | inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, | ||
| 1405 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | ||
| 1406 | |||
| 1407 | /* Add the child socket directly into the accept queue */ | ||
| 1408 | inet_csk_reqsk_queue_add(sk, req, child); | ||
| 1409 | |||
| 1410 | /* Now finish processing the fastopen child socket. */ | ||
| 1411 | inet_csk(child)->icsk_af_ops->rebuild_header(child); | ||
| 1412 | tcp_init_congestion_control(child); | ||
| 1413 | tcp_mtup_init(child); | ||
| 1414 | tcp_init_metrics(child); | ||
| 1415 | tcp_init_buffer_space(child); | ||
| 1416 | |||
| 1417 | /* Queue the data carried in the SYN packet. We need to first | ||
| 1418 | * bump skb's refcnt because the caller will attempt to free it. | ||
| 1419 | * | ||
| 1420 | * XXX (TFO) - we honor a zero-payload TFO request for now. | ||
| 1421 | * (Any reason not to?) | ||
| 1422 | */ | ||
| 1423 | if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { | ||
| 1424 | /* Don't queue the skb if there is no payload in SYN. | ||
| 1425 | * XXX (TFO) - How about SYN+FIN? | ||
| 1426 | */ | ||
| 1427 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
| 1428 | } else { | ||
| 1429 | skb = skb_get(skb); | ||
| 1430 | skb_dst_drop(skb); | ||
| 1431 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); | ||
| 1432 | skb_set_owner_r(skb, child); | ||
| 1433 | __skb_queue_tail(&child->sk_receive_queue, skb); | ||
| 1434 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
| 1435 | tp->syn_data_acked = 1; | ||
| 1436 | } | ||
| 1437 | sk->sk_data_ready(sk); | ||
| 1438 | bh_unlock_sock(child); | ||
| 1439 | sock_put(child); | ||
| 1440 | WARN_ON(req->sk == NULL); | ||
| 1441 | return 0; | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1257 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
| 1445 | { | 1258 | { |
| 1446 | struct tcp_options_received tmp_opt; | 1259 | struct tcp_options_received tmp_opt; |
| @@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1451 | __be32 saddr = ip_hdr(skb)->saddr; | 1264 | __be32 saddr = ip_hdr(skb)->saddr; |
| 1452 | __be32 daddr = ip_hdr(skb)->daddr; | 1265 | __be32 daddr = ip_hdr(skb)->daddr; |
| 1453 | __u32 isn = TCP_SKB_CB(skb)->when; | 1266 | __u32 isn = TCP_SKB_CB(skb)->when; |
| 1454 | bool want_cookie = false; | 1267 | bool want_cookie = false, fastopen; |
| 1455 | struct flowi4 fl4; | 1268 | struct flowi4 fl4; |
| 1456 | struct tcp_fastopen_cookie foc = { .len = -1 }; | 1269 | struct tcp_fastopen_cookie foc = { .len = -1 }; |
| 1457 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | 1270 | int err; |
| 1458 | struct sk_buff *skb_synack; | ||
| 1459 | int do_fastopen; | ||
| 1460 | 1271 | ||
| 1461 | /* Never answer to SYNs send to broadcast or multicast */ | 1272 | /* Never answer to SYNs send to broadcast or multicast */ |
| 1462 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1273 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
| @@ -1507,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1507 | ireq->ir_rmt_addr = saddr; | 1318 | ireq->ir_rmt_addr = saddr; |
| 1508 | ireq->no_srccheck = inet_sk(sk)->transparent; | 1319 | ireq->no_srccheck = inet_sk(sk)->transparent; |
| 1509 | ireq->opt = tcp_v4_save_options(skb); | 1320 | ireq->opt = tcp_v4_save_options(skb); |
| 1321 | ireq->ir_mark = inet_request_mark(sk, skb); | ||
| 1510 | 1322 | ||
| 1511 | if (security_inet_conn_request(sk, skb, req)) | 1323 | if (security_inet_conn_request(sk, skb, req)) |
| 1512 | goto drop_and_free; | 1324 | goto drop_and_free; |
| @@ -1555,52 +1367,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1555 | 1367 | ||
| 1556 | isn = tcp_v4_init_sequence(skb); | 1368 | isn = tcp_v4_init_sequence(skb); |
| 1557 | } | 1369 | } |
| 1558 | tcp_rsk(req)->snt_isn = isn; | 1370 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
| 1559 | |||
| 1560 | if (dst == NULL) { | ||
| 1561 | dst = inet_csk_route_req(sk, &fl4, req); | ||
| 1562 | if (dst == NULL) | ||
| 1563 | goto drop_and_free; | ||
| 1564 | } | ||
| 1565 | do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); | ||
| 1566 | |||
| 1567 | /* We don't call tcp_v4_send_synack() directly because we need | ||
| 1568 | * to make sure a child socket can be created successfully before | ||
| 1569 | * sending back synack! | ||
| 1570 | * | ||
| 1571 | * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() | ||
| 1572 | * (or better yet, call tcp_send_synack() in the child context | ||
| 1573 | * directly, but will have to fix bunch of other code first) | ||
| 1574 | * after syn_recv_sock() except one will need to first fix the | ||
| 1575 | * latter to remove its dependency on the current implementation | ||
| 1576 | * of tcp_v4_send_synack()->tcp_select_initial_window(). | ||
| 1577 | */ | ||
| 1578 | skb_synack = tcp_make_synack(sk, dst, req, | ||
| 1579 | fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); | ||
| 1580 | |||
| 1581 | if (skb_synack) { | ||
| 1582 | __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); | ||
| 1583 | skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); | ||
| 1584 | } else | ||
| 1585 | goto drop_and_free; | 1371 | goto drop_and_free; |
| 1586 | 1372 | ||
| 1587 | if (likely(!do_fastopen)) { | 1373 | tcp_rsk(req)->snt_isn = isn; |
| 1588 | int err; | 1374 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
| 1589 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | 1375 | tcp_openreq_init_rwin(req, sk, dst); |
| 1590 | ireq->ir_rmt_addr, ireq->opt); | 1376 | fastopen = !want_cookie && |
| 1591 | err = net_xmit_eval(err); | 1377 | tcp_try_fastopen(sk, skb, req, &foc, dst); |
| 1378 | err = tcp_v4_send_synack(sk, dst, req, | ||
| 1379 | skb_get_queue_mapping(skb), &foc); | ||
| 1380 | if (!fastopen) { | ||
| 1592 | if (err || want_cookie) | 1381 | if (err || want_cookie) |
| 1593 | goto drop_and_free; | 1382 | goto drop_and_free; |
| 1594 | 1383 | ||
| 1595 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | 1384 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
| 1596 | tcp_rsk(req)->listener = NULL; | 1385 | tcp_rsk(req)->listener = NULL; |
| 1597 | /* Add the request_sock to the SYN table */ | ||
| 1598 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 1386 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
| 1599 | if (fastopen_cookie_present(&foc) && foc.len != 0) | 1387 | } |
| 1600 | NET_INC_STATS_BH(sock_net(sk), | ||
| 1601 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
| 1602 | } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) | ||
| 1603 | goto drop_and_free; | ||
| 1604 | 1388 | ||
| 1605 | return 0; | 1389 | return 0; |
| 1606 | 1390 | ||
| @@ -1744,28 +1528,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
| 1744 | return sk; | 1528 | return sk; |
| 1745 | } | 1529 | } |
| 1746 | 1530 | ||
| 1747 | static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) | ||
| 1748 | { | ||
| 1749 | const struct iphdr *iph = ip_hdr(skb); | ||
| 1750 | |||
| 1751 | if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
| 1752 | if (!tcp_v4_check(skb->len, iph->saddr, | ||
| 1753 | iph->daddr, skb->csum)) { | ||
| 1754 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 1755 | return 0; | ||
| 1756 | } | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
| 1760 | skb->len, IPPROTO_TCP, 0); | ||
| 1761 | |||
| 1762 | if (skb->len <= 76) { | ||
| 1763 | return __skb_checksum_complete(skb); | ||
| 1764 | } | ||
| 1765 | return 0; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | |||
| 1769 | /* The socket must have it's spinlock held when we get | 1531 | /* The socket must have it's spinlock held when we get |
| 1770 | * here. | 1532 | * here. |
| 1771 | * | 1533 | * |
| @@ -1960,7 +1722,8 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
| 1960 | * Packet length and doff are validated by header prediction, | 1722 | * Packet length and doff are validated by header prediction, |
| 1961 | * provided case of th->doff==0 is eliminated. | 1723 | * provided case of th->doff==0 is eliminated. |
| 1962 | * So, we defer the checks. */ | 1724 | * So, we defer the checks. */ |
| 1963 | if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) | 1725 | |
| 1726 | if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) | ||
| 1964 | goto csum_error; | 1727 | goto csum_error; |
| 1965 | 1728 | ||
| 1966 | th = tcp_hdr(skb); | 1729 | th = tcp_hdr(skb); |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c9aecae31327..1e70fa8fa793 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
| @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) | |||
| 115 | * Will only call newReno CA when away from inference. | 115 | * Will only call newReno CA when away from inference. |
| 116 | * From TCP-LP's paper, this will be handled in additive increasement. | 116 | * From TCP-LP's paper, this will be handled in additive increasement. |
| 117 | */ | 117 | */ |
| 118 | static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 118 | static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 119 | u32 in_flight) | ||
| 120 | { | 119 | { |
| 121 | struct lp *lp = inet_csk_ca(sk); | 120 | struct lp *lp = inet_csk_ca(sk); |
| 122 | 121 | ||
| 123 | if (!(lp->flag & LP_WITHIN_INF)) | 122 | if (!(lp->flag & LP_WITHIN_INF)) |
| 124 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 123 | tcp_reno_cong_avoid(sk, ack, acked); |
| 125 | } | 124 | } |
| 126 | 125 | ||
| 127 | /** | 126 | /** |
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index dcaf72f10216..4fe041805989 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
| @@ -1159,10 +1159,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net) | |||
| 1159 | tm = next; | 1159 | tm = next; |
| 1160 | } | 1160 | } |
| 1161 | } | 1161 | } |
| 1162 | if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash)) | 1162 | kvfree(net->ipv4.tcp_metrics_hash); |
| 1163 | vfree(net->ipv4.tcp_metrics_hash); | ||
| 1164 | else | ||
| 1165 | kfree(net->ipv4.tcp_metrics_hash); | ||
| 1166 | } | 1163 | } |
| 1167 | 1164 | ||
| 1168 | static __net_initdata struct pernet_operations tcp_net_metrics_ops = { | 1165 | static __net_initdata struct pernet_operations tcp_net_metrics_ops = { |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..e68e0d4af6c9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) | |||
| 362 | } | 362 | } |
| 363 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); | 363 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); |
| 364 | 364 | ||
| 365 | void tcp_openreq_init_rwin(struct request_sock *req, | ||
| 366 | struct sock *sk, struct dst_entry *dst) | ||
| 367 | { | ||
| 368 | struct inet_request_sock *ireq = inet_rsk(req); | ||
| 369 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 370 | __u8 rcv_wscale; | ||
| 371 | int mss = dst_metric_advmss(dst); | ||
| 372 | |||
| 373 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | ||
| 374 | mss = tp->rx_opt.user_mss; | ||
| 375 | |||
| 376 | /* Set this up on the first call only */ | ||
| 377 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
| 378 | |||
| 379 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
| 380 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
| 381 | (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) | ||
| 382 | req->window_clamp = tcp_full_space(sk); | ||
| 383 | |||
| 384 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
| 385 | tcp_select_initial_window(tcp_full_space(sk), | ||
| 386 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
| 387 | &req->rcv_wnd, | ||
| 388 | &req->window_clamp, | ||
| 389 | ireq->wscale_ok, | ||
| 390 | &rcv_wscale, | ||
| 391 | dst_metric(dst, RTAX_INITRWND)); | ||
| 392 | ireq->rcv_wscale = rcv_wscale; | ||
| 393 | } | ||
| 394 | EXPORT_SYMBOL(tcp_openreq_init_rwin); | ||
| 395 | |||
| 365 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, | 396 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, |
| 366 | struct request_sock *req) | 397 | struct request_sock *req) |
| 367 | { | 398 | { |
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b92b81718ca4..4e86c59ec7f7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c | |||
| @@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, | |||
| 57 | SKB_GSO_TCP_ECN | | 57 | SKB_GSO_TCP_ECN | |
| 58 | SKB_GSO_TCPV6 | | 58 | SKB_GSO_TCPV6 | |
| 59 | SKB_GSO_GRE | | 59 | SKB_GSO_GRE | |
| 60 | SKB_GSO_GRE_CSUM | | ||
| 60 | SKB_GSO_IPIP | | 61 | SKB_GSO_IPIP | |
| 61 | SKB_GSO_SIT | | 62 | SKB_GSO_SIT | |
| 62 | SKB_GSO_MPLS | | 63 | SKB_GSO_MPLS | |
| 63 | SKB_GSO_UDP_TUNNEL | | 64 | SKB_GSO_UDP_TUNNEL | |
| 65 | SKB_GSO_UDP_TUNNEL_CSUM | | ||
| 64 | 0) || | 66 | 0) || |
| 65 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) | 67 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) |
| 66 | goto out; | 68 | goto out; |
| @@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, | |||
| 97 | th->check = newcheck; | 99 | th->check = newcheck; |
| 98 | 100 | ||
| 99 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 101 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
| 100 | th->check = | 102 | th->check = gso_make_checksum(skb, ~th->check); |
| 101 | csum_fold(csum_partial(skb_transport_header(skb), | ||
| 102 | thlen, skb->csum)); | ||
| 103 | 103 | ||
| 104 | seq += mss; | 104 | seq += mss; |
| 105 | if (copy_destructor) { | 105 | if (copy_destructor) { |
| @@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, | |||
| 133 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + | 133 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + |
| 134 | (__force u32)delta)); | 134 | (__force u32)delta)); |
| 135 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 135 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
| 136 | th->check = csum_fold(csum_partial(skb_transport_header(skb), | 136 | th->check = gso_make_checksum(skb, ~th->check); |
| 137 | thlen, skb->csum)); | ||
| 138 | out: | 137 | out: |
| 139 | return segs; | 138 | return segs; |
| 140 | } | 139 | } |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d340bd2cd3d..d92bce0ea24e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
| 627 | if (unlikely(!ireq->tstamp_ok)) | 627 | if (unlikely(!ireq->tstamp_ok)) |
| 628 | remaining -= TCPOLEN_SACKPERM_ALIGNED; | 628 | remaining -= TCPOLEN_SACKPERM_ALIGNED; |
| 629 | } | 629 | } |
| 630 | if (foc != NULL) { | 630 | if (foc != NULL && foc->len >= 0) { |
| 631 | u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; | 631 | u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; |
| 632 | need = (need + 3) & ~3U; /* Align to 32 bits */ | 632 | need = (need + 3) & ~3U; /* Align to 32 bits */ |
| 633 | if (remaining >= need) { | 633 | if (remaining >= need) { |
| @@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 878 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 878 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
| 879 | 879 | ||
| 880 | if (clone_it) { | 880 | if (clone_it) { |
| 881 | const struct sk_buff *fclone = skb + 1; | ||
| 882 | |||
| 883 | skb_mstamp_get(&skb->skb_mstamp); | 881 | skb_mstamp_get(&skb->skb_mstamp); |
| 884 | 882 | ||
| 885 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
| 886 | fclone->fclone == SKB_FCLONE_CLONE)) | ||
| 887 | NET_INC_STATS(sock_net(sk), | ||
| 888 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | ||
| 889 | |||
| 890 | if (unlikely(skb_cloned(skb))) | 883 | if (unlikely(skb_cloned(skb))) |
| 891 | skb = pskb_copy(skb, gfp_mask); | 884 | skb = pskb_copy(skb, gfp_mask); |
| 892 | else | 885 | else |
| @@ -1081,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de | |||
| 1081 | * Remember, these are still headerless SKBs at this point. | 1074 | * Remember, these are still headerless SKBs at this point. |
| 1082 | */ | 1075 | */ |
| 1083 | int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | 1076 | int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, |
| 1084 | unsigned int mss_now) | 1077 | unsigned int mss_now, gfp_t gfp) |
| 1085 | { | 1078 | { |
| 1086 | struct tcp_sock *tp = tcp_sk(sk); | 1079 | struct tcp_sock *tp = tcp_sk(sk); |
| 1087 | struct sk_buff *buff; | 1080 | struct sk_buff *buff; |
| @@ -1096,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
| 1096 | if (nsize < 0) | 1089 | if (nsize < 0) |
| 1097 | nsize = 0; | 1090 | nsize = 0; |
| 1098 | 1091 | ||
| 1099 | if (skb_unclone(skb, GFP_ATOMIC)) | 1092 | if (skb_unclone(skb, gfp)) |
| 1100 | return -ENOMEM; | 1093 | return -ENOMEM; |
| 1101 | 1094 | ||
| 1102 | /* Get a new skb... force flag on. */ | 1095 | /* Get a new skb... force flag on. */ |
| 1103 | buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC); | 1096 | buff = sk_stream_alloc_skb(sk, nsize, gfp); |
| 1104 | if (buff == NULL) | 1097 | if (buff == NULL) |
| 1105 | return -ENOMEM; /* We'll just try again later. */ | 1098 | return -ENOMEM; /* We'll just try again later. */ |
| 1106 | 1099 | ||
| @@ -1387,12 +1380,43 @@ unsigned int tcp_current_mss(struct sock *sk) | |||
| 1387 | return mss_now; | 1380 | return mss_now; |
| 1388 | } | 1381 | } |
| 1389 | 1382 | ||
| 1390 | /* Congestion window validation. (RFC2861) */ | 1383 | /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. |
| 1391 | static void tcp_cwnd_validate(struct sock *sk) | 1384 | * As additional protections, we do not touch cwnd in retransmission phases, |
| 1385 | * and if application hit its sndbuf limit recently. | ||
| 1386 | */ | ||
| 1387 | static void tcp_cwnd_application_limited(struct sock *sk) | ||
| 1388 | { | ||
| 1389 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1390 | |||
| 1391 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && | ||
| 1392 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
| 1393 | /* Limited by application or receiver window. */ | ||
| 1394 | u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); | ||
| 1395 | u32 win_used = max(tp->snd_cwnd_used, init_win); | ||
| 1396 | if (win_used < tp->snd_cwnd) { | ||
| 1397 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
| 1398 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; | ||
| 1399 | } | ||
| 1400 | tp->snd_cwnd_used = 0; | ||
| 1401 | } | ||
| 1402 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) | ||
| 1392 | { | 1406 | { |
| 1393 | struct tcp_sock *tp = tcp_sk(sk); | 1407 | struct tcp_sock *tp = tcp_sk(sk); |
| 1394 | 1408 | ||
| 1395 | if (tp->packets_out >= tp->snd_cwnd) { | 1409 | /* Track the maximum number of outstanding packets in each |
| 1410 | * window, and remember whether we were cwnd-limited then. | ||
| 1411 | */ | ||
| 1412 | if (!before(tp->snd_una, tp->max_packets_seq) || | ||
| 1413 | tp->packets_out > tp->max_packets_out) { | ||
| 1414 | tp->max_packets_out = tp->packets_out; | ||
| 1415 | tp->max_packets_seq = tp->snd_nxt; | ||
| 1416 | tp->is_cwnd_limited = is_cwnd_limited; | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | if (tcp_is_cwnd_limited(sk)) { | ||
| 1396 | /* Network is feed fully. */ | 1420 | /* Network is feed fully. */ |
| 1397 | tp->snd_cwnd_used = 0; | 1421 | tp->snd_cwnd_used = 0; |
| 1398 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1422 | tp->snd_cwnd_stamp = tcp_time_stamp; |
| @@ -1601,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
| 1601 | 1625 | ||
| 1602 | /* All of a TSO frame must be composed of paged data. */ | 1626 | /* All of a TSO frame must be composed of paged data. */ |
| 1603 | if (skb->len != skb->data_len) | 1627 | if (skb->len != skb->data_len) |
| 1604 | return tcp_fragment(sk, skb, len, mss_now); | 1628 | return tcp_fragment(sk, skb, len, mss_now, gfp); |
| 1605 | 1629 | ||
| 1606 | buff = sk_stream_alloc_skb(sk, 0, gfp); | 1630 | buff = sk_stream_alloc_skb(sk, 0, gfp); |
| 1607 | if (unlikely(buff == NULL)) | 1631 | if (unlikely(buff == NULL)) |
| @@ -1644,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
| 1644 | * | 1668 | * |
| 1645 | * This algorithm is from John Heffner. | 1669 | * This algorithm is from John Heffner. |
| 1646 | */ | 1670 | */ |
| 1647 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | 1671 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, |
| 1672 | bool *is_cwnd_limited) | ||
| 1648 | { | 1673 | { |
| 1649 | struct tcp_sock *tp = tcp_sk(sk); | 1674 | struct tcp_sock *tp = tcp_sk(sk); |
| 1650 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1675 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| @@ -1708,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1708 | if (!tp->tso_deferred) | 1733 | if (!tp->tso_deferred) |
| 1709 | tp->tso_deferred = 1 | (jiffies << 1); | 1734 | tp->tso_deferred = 1 | (jiffies << 1); |
| 1710 | 1735 | ||
| 1736 | if (cong_win < send_win && cong_win < skb->len) | ||
| 1737 | *is_cwnd_limited = true; | ||
| 1738 | |||
| 1711 | return true; | 1739 | return true; |
| 1712 | 1740 | ||
| 1713 | send_now: | 1741 | send_now: |
| @@ -1868,6 +1896,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1868 | unsigned int tso_segs, sent_pkts; | 1896 | unsigned int tso_segs, sent_pkts; |
| 1869 | int cwnd_quota; | 1897 | int cwnd_quota; |
| 1870 | int result; | 1898 | int result; |
| 1899 | bool is_cwnd_limited = false; | ||
| 1871 | 1900 | ||
| 1872 | sent_pkts = 0; | 1901 | sent_pkts = 0; |
| 1873 | 1902 | ||
| @@ -1892,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1892 | 1921 | ||
| 1893 | cwnd_quota = tcp_cwnd_test(tp, skb); | 1922 | cwnd_quota = tcp_cwnd_test(tp, skb); |
| 1894 | if (!cwnd_quota) { | 1923 | if (!cwnd_quota) { |
| 1924 | is_cwnd_limited = true; | ||
| 1895 | if (push_one == 2) | 1925 | if (push_one == 2) |
| 1896 | /* Force out a loss probe pkt. */ | 1926 | /* Force out a loss probe pkt. */ |
| 1897 | cwnd_quota = 1; | 1927 | cwnd_quota = 1; |
| @@ -1908,7 +1938,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1908 | nonagle : TCP_NAGLE_PUSH)))) | 1938 | nonagle : TCP_NAGLE_PUSH)))) |
| 1909 | break; | 1939 | break; |
| 1910 | } else { | 1940 | } else { |
| 1911 | if (!push_one && tcp_tso_should_defer(sk, skb)) | 1941 | if (!push_one && |
| 1942 | tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) | ||
| 1912 | break; | 1943 | break; |
| 1913 | } | 1944 | } |
| 1914 | 1945 | ||
| @@ -1973,7 +2004,7 @@ repair: | |||
| 1973 | /* Send one loss probe per tail loss episode. */ | 2004 | /* Send one loss probe per tail loss episode. */ |
| 1974 | if (push_one != 2) | 2005 | if (push_one != 2) |
| 1975 | tcp_schedule_loss_probe(sk); | 2006 | tcp_schedule_loss_probe(sk); |
| 1976 | tcp_cwnd_validate(sk); | 2007 | tcp_cwnd_validate(sk, is_cwnd_limited); |
| 1977 | return false; | 2008 | return false; |
| 1978 | } | 2009 | } |
| 1979 | return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); | 2010 | return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); |
| @@ -2037,6 +2068,25 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
| 2037 | return true; | 2068 | return true; |
| 2038 | } | 2069 | } |
| 2039 | 2070 | ||
| 2071 | /* Thanks to skb fast clones, we can detect if a prior transmit of | ||
| 2072 | * a packet is still in a qdisc or driver queue. | ||
| 2073 | * In this case, there is very little point doing a retransmit ! | ||
| 2074 | * Note: This is called from BH context only. | ||
| 2075 | */ | ||
| 2076 | static bool skb_still_in_host_queue(const struct sock *sk, | ||
| 2077 | const struct sk_buff *skb) | ||
| 2078 | { | ||
| 2079 | const struct sk_buff *fclone = skb + 1; | ||
| 2080 | |||
| 2081 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
| 2082 | fclone->fclone == SKB_FCLONE_CLONE)) { | ||
| 2083 | NET_INC_STATS_BH(sock_net(sk), | ||
| 2084 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | ||
| 2085 | return true; | ||
| 2086 | } | ||
| 2087 | return false; | ||
| 2088 | } | ||
| 2089 | |||
| 2040 | /* When probe timeout (PTO) fires, send a new segment if one exists, else | 2090 | /* When probe timeout (PTO) fires, send a new segment if one exists, else |
| 2041 | * retransmit the last segment. | 2091 | * retransmit the last segment. |
| 2042 | */ | 2092 | */ |
| @@ -2062,12 +2112,16 @@ void tcp_send_loss_probe(struct sock *sk) | |||
| 2062 | if (WARN_ON(!skb)) | 2112 | if (WARN_ON(!skb)) |
| 2063 | goto rearm_timer; | 2113 | goto rearm_timer; |
| 2064 | 2114 | ||
| 2115 | if (skb_still_in_host_queue(sk, skb)) | ||
| 2116 | goto rearm_timer; | ||
| 2117 | |||
| 2065 | pcount = tcp_skb_pcount(skb); | 2118 | pcount = tcp_skb_pcount(skb); |
| 2066 | if (WARN_ON(!pcount)) | 2119 | if (WARN_ON(!pcount)) |
| 2067 | goto rearm_timer; | 2120 | goto rearm_timer; |
| 2068 | 2121 | ||
| 2069 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { | 2122 | if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { |
| 2070 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) | 2123 | if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, |
| 2124 | GFP_ATOMIC))) | ||
| 2071 | goto rearm_timer; | 2125 | goto rearm_timer; |
| 2072 | skb = tcp_write_queue_tail(sk); | 2126 | skb = tcp_write_queue_tail(sk); |
| 2073 | } | 2127 | } |
| @@ -2075,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk) | |||
| 2075 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) | 2129 | if (WARN_ON(!skb || !tcp_skb_pcount(skb))) |
| 2076 | goto rearm_timer; | 2130 | goto rearm_timer; |
| 2077 | 2131 | ||
| 2078 | /* Probe with zero data doesn't trigger fast recovery. */ | 2132 | err = __tcp_retransmit_skb(sk, skb); |
| 2079 | if (skb->len > 0) | ||
| 2080 | err = __tcp_retransmit_skb(sk, skb); | ||
| 2081 | 2133 | ||
| 2082 | /* Record snd_nxt for loss detection. */ | 2134 | /* Record snd_nxt for loss detection. */ |
| 2083 | if (likely(!err)) | 2135 | if (likely(!err)) |
| @@ -2383,6 +2435,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2383 | min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) | 2435 | min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) |
| 2384 | return -EAGAIN; | 2436 | return -EAGAIN; |
| 2385 | 2437 | ||
| 2438 | if (skb_still_in_host_queue(sk, skb)) | ||
| 2439 | return -EBUSY; | ||
| 2440 | |||
| 2386 | if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { | 2441 | if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { |
| 2387 | if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 2442 | if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) |
| 2388 | BUG(); | 2443 | BUG(); |
| @@ -2405,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2405 | return -EAGAIN; | 2460 | return -EAGAIN; |
| 2406 | 2461 | ||
| 2407 | if (skb->len > cur_mss) { | 2462 | if (skb->len > cur_mss) { |
| 2408 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) | 2463 | if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC)) |
| 2409 | return -ENOMEM; /* We'll try again later. */ | 2464 | return -ENOMEM; /* We'll try again later. */ |
| 2410 | } else { | 2465 | } else { |
| 2411 | int oldpcount = tcp_skb_pcount(skb); | 2466 | int oldpcount = tcp_skb_pcount(skb); |
| @@ -2476,7 +2531,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2476 | * see tcp_input.c tcp_sacktag_write_queue(). | 2531 | * see tcp_input.c tcp_sacktag_write_queue(). |
| 2477 | */ | 2532 | */ |
| 2478 | TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; | 2533 | TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; |
| 2479 | } else { | 2534 | } else if (err != -EBUSY) { |
| 2480 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); | 2535 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); |
| 2481 | } | 2536 | } |
| 2482 | return err; | 2537 | return err; |
| @@ -2754,27 +2809,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2754 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2809 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
| 2755 | mss = tp->rx_opt.user_mss; | 2810 | mss = tp->rx_opt.user_mss; |
| 2756 | 2811 | ||
| 2757 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ | ||
| 2758 | __u8 rcv_wscale; | ||
| 2759 | /* Set this up on the first call only */ | ||
| 2760 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
| 2761 | |||
| 2762 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
| 2763 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
| 2764 | (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) | ||
| 2765 | req->window_clamp = tcp_full_space(sk); | ||
| 2766 | |||
| 2767 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
| 2768 | tcp_select_initial_window(tcp_full_space(sk), | ||
| 2769 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
| 2770 | &req->rcv_wnd, | ||
| 2771 | &req->window_clamp, | ||
| 2772 | ireq->wscale_ok, | ||
| 2773 | &rcv_wscale, | ||
| 2774 | dst_metric(dst, RTAX_INITRWND)); | ||
| 2775 | ireq->rcv_wscale = rcv_wscale; | ||
| 2776 | } | ||
| 2777 | |||
| 2778 | memset(&opts, 0, sizeof(opts)); | 2812 | memset(&opts, 0, sizeof(opts)); |
| 2779 | #ifdef CONFIG_SYN_COOKIES | 2813 | #ifdef CONFIG_SYN_COOKIES |
| 2780 | if (unlikely(req->cookie_ts)) | 2814 | if (unlikely(req->cookie_ts)) |
| @@ -3207,7 +3241,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
| 3207 | skb->len > mss) { | 3241 | skb->len > mss) { |
| 3208 | seg_size = min(seg_size, mss); | 3242 | seg_size = min(seg_size, mss); |
| 3209 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; | 3243 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
| 3210 | if (tcp_fragment(sk, skb, seg_size, mss)) | 3244 | if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) |
| 3211 | return -1; | 3245 | return -1; |
| 3212 | } else if (!tcp_skb_pcount(skb)) | 3246 | } else if (!tcp_skb_pcount(skb)) |
| 3213 | tcp_set_skb_tso_segs(sk, skb, mss); | 3247 | tcp_set_skb_tso_segs(sk, skb, mss); |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 0ac50836da4d..8250949b8853 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
| @@ -15,12 +15,11 @@ | |||
| 15 | #define TCP_SCALABLE_AI_CNT 50U | 15 | #define TCP_SCALABLE_AI_CNT 50U |
| 16 | #define TCP_SCALABLE_MD_SCALE 3 | 16 | #define TCP_SCALABLE_MD_SCALE 3 |
| 17 | 17 | ||
| 18 | static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 18 | static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 19 | u32 in_flight) | ||
| 20 | { | 19 | { |
| 21 | struct tcp_sock *tp = tcp_sk(sk); | 20 | struct tcp_sock *tp = tcp_sk(sk); |
| 22 | 21 | ||
| 23 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 22 | if (!tcp_is_cwnd_limited(sk)) |
| 24 | return; | 23 | return; |
| 25 | 24 | ||
| 26 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 25 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 48539fff6357..9a5e05f27f4f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
| @@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) | |||
| 163 | return min(tp->snd_ssthresh, tp->snd_cwnd-1); | 163 | return min(tp->snd_ssthresh, tp->snd_cwnd-1); |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 166 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 167 | u32 in_flight) | ||
| 168 | { | 167 | { |
| 169 | struct tcp_sock *tp = tcp_sk(sk); | 168 | struct tcp_sock *tp = tcp_sk(sk); |
| 170 | struct vegas *vegas = inet_csk_ca(sk); | 169 | struct vegas *vegas = inet_csk_ca(sk); |
| 171 | 170 | ||
| 172 | if (!vegas->doing_vegas_now) { | 171 | if (!vegas->doing_vegas_now) { |
| 173 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 172 | tcp_reno_cong_avoid(sk, ack, acked); |
| 174 | return; | 173 | return; |
| 175 | } | 174 | } |
| 176 | 175 | ||
| @@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, | |||
| 195 | /* We don't have enough RTT samples to do the Vegas | 194 | /* We don't have enough RTT samples to do the Vegas |
| 196 | * calculation, so we'll behave like Reno. | 195 | * calculation, so we'll behave like Reno. |
| 197 | */ | 196 | */ |
| 198 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 197 | tcp_reno_cong_avoid(sk, ack, acked); |
| 199 | } else { | 198 | } else { |
| 200 | u32 rtt, diff; | 199 | u32 rtt, diff; |
| 201 | u64 target_cwnd; | 200 | u64 target_cwnd; |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 1b8e28fcd7e1..27b9825753d1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
| @@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) | |||
| 114 | tcp_veno_init(sk); | 114 | tcp_veno_init(sk); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 117 | static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 118 | u32 in_flight) | ||
| 119 | { | 118 | { |
| 120 | struct tcp_sock *tp = tcp_sk(sk); | 119 | struct tcp_sock *tp = tcp_sk(sk); |
| 121 | struct veno *veno = inet_csk_ca(sk); | 120 | struct veno *veno = inet_csk_ca(sk); |
| 122 | 121 | ||
| 123 | if (!veno->doing_veno_now) { | 122 | if (!veno->doing_veno_now) { |
| 124 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 123 | tcp_reno_cong_avoid(sk, ack, acked); |
| 125 | return; | 124 | return; |
| 126 | } | 125 | } |
| 127 | 126 | ||
| 128 | /* limited by applications */ | 127 | /* limited by applications */ |
| 129 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 128 | if (!tcp_is_cwnd_limited(sk)) |
| 130 | return; | 129 | return; |
| 131 | 130 | ||
| 132 | /* We do the Veno calculations only if we got enough rtt samples */ | 131 | /* We do the Veno calculations only if we got enough rtt samples */ |
| @@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, | |||
| 134 | /* We don't have enough rtt samples to do the Veno | 133 | /* We don't have enough rtt samples to do the Veno |
| 135 | * calculation, so we'll behave like Reno. | 134 | * calculation, so we'll behave like Reno. |
| 136 | */ | 135 | */ |
| 137 | tcp_reno_cong_avoid(sk, ack, acked, in_flight); | 136 | tcp_reno_cong_avoid(sk, ack, acked); |
| 138 | } else { | 137 | } else { |
| 139 | u64 target_cwnd; | 138 | u64 target_cwnd; |
| 140 | u32 rtt; | 139 | u32 rtt; |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 5ede0e727945..599b79b8eac0 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
| @@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) | |||
| 69 | tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); | 69 | tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, | 72 | static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
| 73 | u32 in_flight) | ||
| 74 | { | 73 | { |
| 75 | struct tcp_sock *tp = tcp_sk(sk); | 74 | struct tcp_sock *tp = tcp_sk(sk); |
| 76 | struct yeah *yeah = inet_csk_ca(sk); | 75 | struct yeah *yeah = inet_csk_ca(sk); |
| 77 | 76 | ||
| 78 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 77 | if (!tcp_is_cwnd_limited(sk)) |
| 79 | return; | 78 | return; |
| 80 | 79 | ||
| 81 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 80 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4468e1adc094..185ed3e59802 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
| 246 | do { | 246 | do { |
| 247 | if (low <= snum && snum <= high && | 247 | if (low <= snum && snum <= high && |
| 248 | !test_bit(snum >> udptable->log, bitmap) && | 248 | !test_bit(snum >> udptable->log, bitmap) && |
| 249 | !inet_is_reserved_local_port(snum)) | 249 | !inet_is_local_reserved_port(net, snum)) |
| 250 | goto found; | 250 | goto found; |
| 251 | snum += rand; | 251 | snum += rand; |
| 252 | } while (snum != first); | 252 | } while (snum != first); |
| @@ -727,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames); | |||
| 727 | void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) | 727 | void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) |
| 728 | { | 728 | { |
| 729 | struct udphdr *uh = udp_hdr(skb); | 729 | struct udphdr *uh = udp_hdr(skb); |
| 730 | struct sk_buff *frags = skb_shinfo(skb)->frag_list; | ||
| 731 | int offset = skb_transport_offset(skb); | 730 | int offset = skb_transport_offset(skb); |
| 732 | int len = skb->len - offset; | 731 | int len = skb->len - offset; |
| 733 | int hlen = len; | 732 | int hlen = len; |
| 734 | __wsum csum = 0; | 733 | __wsum csum = 0; |
| 735 | 734 | ||
| 736 | if (!frags) { | 735 | if (!skb_has_frag_list(skb)) { |
| 737 | /* | 736 | /* |
| 738 | * Only one fragment on the socket. | 737 | * Only one fragment on the socket. |
| 739 | */ | 738 | */ |
| @@ -742,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) | |||
| 742 | uh->check = ~csum_tcpudp_magic(src, dst, len, | 741 | uh->check = ~csum_tcpudp_magic(src, dst, len, |
| 743 | IPPROTO_UDP, 0); | 742 | IPPROTO_UDP, 0); |
| 744 | } else { | 743 | } else { |
| 744 | struct sk_buff *frags; | ||
| 745 | |||
| 745 | /* | 746 | /* |
| 746 | * HW-checksum won't work as there are two or more | 747 | * HW-checksum won't work as there are two or more |
| 747 | * fragments on the socket so that all csums of sk_buffs | 748 | * fragments on the socket so that all csums of sk_buffs |
| 748 | * should be together | 749 | * should be together |
| 749 | */ | 750 | */ |
| 750 | do { | 751 | skb_walk_frags(skb, frags) { |
| 751 | csum = csum_add(csum, frags->csum); | 752 | csum = csum_add(csum, frags->csum); |
| 752 | hlen -= frags->len; | 753 | hlen -= frags->len; |
| 753 | } while ((frags = frags->next)); | 754 | } |
| 754 | 755 | ||
| 755 | csum = skb_checksum(skb, offset, hlen, csum); | 756 | csum = skb_checksum(skb, offset, hlen, csum); |
| 756 | skb->ip_summed = CHECKSUM_NONE; | 757 | skb->ip_summed = CHECKSUM_NONE; |
| @@ -762,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) | |||
| 762 | } | 763 | } |
| 763 | EXPORT_SYMBOL_GPL(udp4_hwcsum); | 764 | EXPORT_SYMBOL_GPL(udp4_hwcsum); |
| 764 | 765 | ||
| 766 | /* Function to set UDP checksum for an IPv4 UDP packet. This is intended | ||
| 767 | * for the simple case like when setting the checksum for a UDP tunnel. | ||
| 768 | */ | ||
| 769 | void udp_set_csum(bool nocheck, struct sk_buff *skb, | ||
| 770 | __be32 saddr, __be32 daddr, int len) | ||
| 771 | { | ||
| 772 | struct udphdr *uh = udp_hdr(skb); | ||
| 773 | |||
| 774 | if (nocheck) | ||
| 775 | uh->check = 0; | ||
| 776 | else if (skb_is_gso(skb)) | ||
| 777 | uh->check = ~udp_v4_check(len, saddr, daddr, 0); | ||
| 778 | else if (skb_dst(skb) && skb_dst(skb)->dev && | ||
| 779 | (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { | ||
| 780 | |||
| 781 | BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); | ||
| 782 | |||
| 783 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
| 784 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
| 785 | skb->csum_offset = offsetof(struct udphdr, check); | ||
| 786 | uh->check = ~udp_v4_check(len, saddr, daddr, 0); | ||
| 787 | } else { | ||
| 788 | __wsum csum; | ||
| 789 | |||
| 790 | BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); | ||
| 791 | |||
| 792 | uh->check = 0; | ||
| 793 | csum = skb_checksum(skb, 0, len, 0); | ||
| 794 | uh->check = udp_v4_check(len, saddr, daddr, csum); | ||
| 795 | if (uh->check == 0) | ||
| 796 | uh->check = CSUM_MANGLED_0; | ||
| 797 | |||
| 798 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 799 | } | ||
| 800 | } | ||
| 801 | EXPORT_SYMBOL(udp_set_csum); | ||
| 802 | |||
| 765 | static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) | 803 | static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) |
| 766 | { | 804 | { |
| 767 | struct sock *sk = skb->sk; | 805 | struct sock *sk = skb->sk; |
| @@ -785,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) | |||
| 785 | if (is_udplite) /* UDP-Lite */ | 823 | if (is_udplite) /* UDP-Lite */ |
| 786 | csum = udplite_csum(skb); | 824 | csum = udplite_csum(skb); |
| 787 | 825 | ||
| 788 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | 826 | else if (sk->sk_no_check_tx) { /* UDP csum disabled */ |
| 789 | 827 | ||
| 790 | skb->ip_summed = CHECKSUM_NONE; | 828 | skb->ip_summed = CHECKSUM_NONE; |
| 791 | goto send; | 829 | goto send; |
| @@ -1495,6 +1533,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1495 | if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { | 1533 | if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { |
| 1496 | int ret; | 1534 | int ret; |
| 1497 | 1535 | ||
| 1536 | /* Verify checksum before giving to encap */ | ||
| 1537 | if (udp_lib_checksum_complete(skb)) | ||
| 1538 | goto csum_error; | ||
| 1539 | |||
| 1498 | ret = encap_rcv(sk, skb); | 1540 | ret = encap_rcv(sk, skb); |
| 1499 | if (ret <= 0) { | 1541 | if (ret <= 0) { |
| 1500 | UDP_INC_STATS_BH(sock_net(sk), | 1542 | UDP_INC_STATS_BH(sock_net(sk), |
| @@ -1672,7 +1714,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
| 1672 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | 1714 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, |
| 1673 | int proto) | 1715 | int proto) |
| 1674 | { | 1716 | { |
| 1675 | const struct iphdr *iph; | ||
| 1676 | int err; | 1717 | int err; |
| 1677 | 1718 | ||
| 1678 | UDP_SKB_CB(skb)->partial_cov = 0; | 1719 | UDP_SKB_CB(skb)->partial_cov = 0; |
| @@ -1684,22 +1725,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | |||
| 1684 | return err; | 1725 | return err; |
| 1685 | } | 1726 | } |
| 1686 | 1727 | ||
| 1687 | iph = ip_hdr(skb); | 1728 | return skb_checksum_init_zero_check(skb, proto, uh->check, |
| 1688 | if (uh->check == 0) { | 1729 | inet_compute_pseudo); |
| 1689 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 1690 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
| 1691 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
| 1692 | proto, skb->csum)) | ||
| 1693 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 1694 | } | ||
| 1695 | if (!skb_csum_unnecessary(skb)) | ||
| 1696 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
| 1697 | skb->len, proto, 0); | ||
| 1698 | /* Probably, we should checksum udp header (it should be in cache | ||
| 1699 | * in any case) and data in tiny packets (< rx copybreak). | ||
| 1700 | */ | ||
| 1701 | |||
| 1702 | return 0; | ||
| 1703 | } | 1730 | } |
| 1704 | 1731 | ||
| 1705 | /* | 1732 | /* |
| @@ -1886,7 +1913,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, | |||
| 1886 | unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); | 1913 | unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); |
| 1887 | unsigned int slot2 = hash2 & udp_table.mask; | 1914 | unsigned int slot2 = hash2 & udp_table.mask; |
| 1888 | struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; | 1915 | struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; |
| 1889 | INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) | 1916 | INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); |
| 1890 | const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); | 1917 | const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); |
| 1891 | 1918 | ||
| 1892 | rcu_read_lock(); | 1919 | rcu_read_lock(); |
| @@ -1979,7 +2006,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
| 1979 | int (*push_pending_frames)(struct sock *)) | 2006 | int (*push_pending_frames)(struct sock *)) |
| 1980 | { | 2007 | { |
| 1981 | struct udp_sock *up = udp_sk(sk); | 2008 | struct udp_sock *up = udp_sk(sk); |
| 1982 | int val; | 2009 | int val, valbool; |
| 1983 | int err = 0; | 2010 | int err = 0; |
| 1984 | int is_udplite = IS_UDPLITE(sk); | 2011 | int is_udplite = IS_UDPLITE(sk); |
| 1985 | 2012 | ||
| @@ -1989,6 +2016,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
| 1989 | if (get_user(val, (int __user *)optval)) | 2016 | if (get_user(val, (int __user *)optval)) |
| 1990 | return -EFAULT; | 2017 | return -EFAULT; |
| 1991 | 2018 | ||
| 2019 | valbool = val ? 1 : 0; | ||
| 2020 | |||
| 1992 | switch (optname) { | 2021 | switch (optname) { |
| 1993 | case UDP_CORK: | 2022 | case UDP_CORK: |
| 1994 | if (val != 0) { | 2023 | if (val != 0) { |
| @@ -2018,6 +2047,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
| 2018 | } | 2047 | } |
| 2019 | break; | 2048 | break; |
| 2020 | 2049 | ||
| 2050 | case UDP_NO_CHECK6_TX: | ||
| 2051 | up->no_check6_tx = valbool; | ||
| 2052 | break; | ||
| 2053 | |||
| 2054 | case UDP_NO_CHECK6_RX: | ||
| 2055 | up->no_check6_rx = valbool; | ||
| 2056 | break; | ||
| 2057 | |||
| 2021 | /* | 2058 | /* |
| 2022 | * UDP-Lite's partial checksum coverage (RFC 3828). | 2059 | * UDP-Lite's partial checksum coverage (RFC 3828). |
| 2023 | */ | 2060 | */ |
| @@ -2100,6 +2137,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
| 2100 | val = up->encap_type; | 2137 | val = up->encap_type; |
| 2101 | break; | 2138 | break; |
| 2102 | 2139 | ||
| 2140 | case UDP_NO_CHECK6_TX: | ||
| 2141 | val = up->no_check6_tx; | ||
| 2142 | break; | ||
| 2143 | |||
| 2144 | case UDP_NO_CHECK6_RX: | ||
| 2145 | val = up->no_check6_rx; | ||
| 2146 | break; | ||
| 2147 | |||
| 2103 | /* The following two cannot be changed on UDP sockets, the return is | 2148 | /* The following two cannot be changed on UDP sockets, the return is |
| 2104 | * always 0 (which corresponds to the full checksum coverage of UDP). */ | 2149 | * always 0 (which corresponds to the full checksum coverage of UDP). */ |
| 2105 | case UDPLITE_SEND_CSCOV: | 2150 | case UDPLITE_SEND_CSCOV: |
| @@ -2484,7 +2529,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
| 2484 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | 2529 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); |
| 2485 | __be16 protocol = skb->protocol; | 2530 | __be16 protocol = skb->protocol; |
| 2486 | netdev_features_t enc_features; | 2531 | netdev_features_t enc_features; |
| 2487 | int outer_hlen; | 2532 | int udp_offset, outer_hlen; |
| 2533 | unsigned int oldlen; | ||
| 2534 | bool need_csum; | ||
| 2535 | |||
| 2536 | oldlen = (u16)~skb->len; | ||
| 2488 | 2537 | ||
| 2489 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | 2538 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) |
| 2490 | goto out; | 2539 | goto out; |
| @@ -2496,6 +2545,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
| 2496 | skb->mac_len = skb_inner_network_offset(skb); | 2545 | skb->mac_len = skb_inner_network_offset(skb); |
| 2497 | skb->protocol = htons(ETH_P_TEB); | 2546 | skb->protocol = htons(ETH_P_TEB); |
| 2498 | 2547 | ||
| 2548 | need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); | ||
| 2549 | if (need_csum) | ||
| 2550 | skb->encap_hdr_csum = 1; | ||
| 2551 | |||
| 2499 | /* segment inner packet. */ | 2552 | /* segment inner packet. */ |
| 2500 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | 2553 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); |
| 2501 | segs = skb_mac_gso_segment(skb, enc_features); | 2554 | segs = skb_mac_gso_segment(skb, enc_features); |
| @@ -2506,10 +2559,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
| 2506 | } | 2559 | } |
| 2507 | 2560 | ||
| 2508 | outer_hlen = skb_tnl_header_len(skb); | 2561 | outer_hlen = skb_tnl_header_len(skb); |
| 2562 | udp_offset = outer_hlen - tnl_hlen; | ||
| 2509 | skb = segs; | 2563 | skb = segs; |
| 2510 | do { | 2564 | do { |
| 2511 | struct udphdr *uh; | 2565 | struct udphdr *uh; |
| 2512 | int udp_offset = outer_hlen - tnl_hlen; | 2566 | int len; |
| 2513 | 2567 | ||
| 2514 | skb_reset_inner_headers(skb); | 2568 | skb_reset_inner_headers(skb); |
| 2515 | skb->encapsulation = 1; | 2569 | skb->encapsulation = 1; |
| @@ -2520,31 +2574,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | |||
| 2520 | skb_reset_mac_header(skb); | 2574 | skb_reset_mac_header(skb); |
| 2521 | skb_set_network_header(skb, mac_len); | 2575 | skb_set_network_header(skb, mac_len); |
| 2522 | skb_set_transport_header(skb, udp_offset); | 2576 | skb_set_transport_header(skb, udp_offset); |
| 2577 | len = skb->len - udp_offset; | ||
| 2523 | uh = udp_hdr(skb); | 2578 | uh = udp_hdr(skb); |
| 2524 | uh->len = htons(skb->len - udp_offset); | 2579 | uh->len = htons(len); |
| 2525 | |||
| 2526 | /* csum segment if tunnel sets skb with csum. */ | ||
| 2527 | if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { | ||
| 2528 | struct iphdr *iph = ip_hdr(skb); | ||
| 2529 | 2580 | ||
| 2530 | uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | 2581 | if (need_csum) { |
| 2531 | skb->len - udp_offset, | 2582 | __be32 delta = htonl(oldlen + len); |
| 2532 | IPPROTO_UDP, 0); | ||
| 2533 | uh->check = csum_fold(skb_checksum(skb, udp_offset, | ||
| 2534 | skb->len - udp_offset, 0)); | ||
| 2535 | if (uh->check == 0) | ||
| 2536 | uh->check = CSUM_MANGLED_0; | ||
| 2537 | 2583 | ||
| 2538 | } else if (protocol == htons(ETH_P_IPV6)) { | 2584 | uh->check = ~csum_fold((__force __wsum) |
| 2539 | struct ipv6hdr *ipv6h = ipv6_hdr(skb); | 2585 | ((__force u32)uh->check + |
| 2540 | u32 len = skb->len - udp_offset; | 2586 | (__force u32)delta)); |
| 2587 | uh->check = gso_make_checksum(skb, ~uh->check); | ||
| 2541 | 2588 | ||
| 2542 | uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, | ||
| 2543 | len, IPPROTO_UDP, 0); | ||
| 2544 | uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); | ||
| 2545 | if (uh->check == 0) | 2589 | if (uh->check == 0) |
| 2546 | uh->check = CSUM_MANGLED_0; | 2590 | uh->check = CSUM_MANGLED_0; |
| 2547 | skb->ip_summed = CHECKSUM_NONE; | ||
| 2548 | } | 2591 | } |
| 2549 | 2592 | ||
| 2550 | skb->protocol = protocol; | 2593 | skb->protocol = protocol; |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 88b4023ecfcf..546d2d439dda 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
| @@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
| 56 | __wsum csum; | 56 | __wsum csum; |
| 57 | 57 | ||
| 58 | if (skb->encapsulation && | 58 | if (skb->encapsulation && |
| 59 | skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { | 59 | (skb_shinfo(skb)->gso_type & |
| 60 | (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { | ||
| 60 | segs = skb_udp_tunnel_segment(skb, features); | 61 | segs = skb_udp_tunnel_segment(skb, features); |
| 61 | goto out; | 62 | goto out; |
| 62 | } | 63 | } |
| @@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
| 71 | 72 | ||
| 72 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | | 73 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | |
| 73 | SKB_GSO_UDP_TUNNEL | | 74 | SKB_GSO_UDP_TUNNEL | |
| 75 | SKB_GSO_UDP_TUNNEL_CSUM | | ||
| 74 | SKB_GSO_IPIP | | 76 | SKB_GSO_IPIP | |
| 75 | SKB_GSO_GRE | SKB_GSO_MPLS) || | 77 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | |
| 78 | SKB_GSO_MPLS) || | ||
| 76 | !(type & (SKB_GSO_UDP)))) | 79 | !(type & (SKB_GSO_UDP)))) |
| 77 | goto out; | 80 | goto out; |
| 78 | 81 | ||
| @@ -197,6 +200,7 @@ unflush: | |||
| 197 | } | 200 | } |
| 198 | 201 | ||
| 199 | skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ | 202 | skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ |
| 203 | skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); | ||
| 200 | pp = uo_priv->offload->callbacks.gro_receive(head, skb); | 204 | pp = uo_priv->offload->callbacks.gro_receive(head, skb); |
| 201 | 205 | ||
| 202 | out_unlock: | 206 | out_unlock: |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2c46acd4cc36..3b3efbda48e1 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
| @@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = { | |||
| 70 | .protocol = IPPROTO_UDPLITE, | 70 | .protocol = IPPROTO_UDPLITE, |
| 71 | .prot = &udplite_prot, | 71 | .prot = &udplite_prot, |
| 72 | .ops = &inet_dgram_ops, | 72 | .ops = &inet_dgram_ops, |
| 73 | .no_check = 0, /* must checksum (RFC 3828) */ | ||
| 74 | .flags = INET_PROTOSW_PERMANENT, | 73 | .flags = INET_PROTOSW_PERMANENT, |
| 75 | }; | 74 | }; |
| 76 | 75 | ||
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 05f2b484954f..91771a7c802f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
| @@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 58 | 58 | ||
| 59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? | 59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? |
| 60 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); | 60 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); |
| 61 | ip_select_ident(skb, dst->child, NULL); | ||
| 62 | 61 | ||
| 63 | top_iph->ttl = ip4_dst_hoplimit(dst->child); | 62 | top_iph->ttl = ip4_dst_hoplimit(dst->child); |
| 64 | 63 | ||
| 65 | top_iph->saddr = x->props.saddr.a4; | 64 | top_iph->saddr = x->props.saddr.a4; |
| 66 | top_iph->daddr = x->id.daddr.a4; | 65 | top_iph->daddr = x->id.daddr.a4; |
| 66 | ip_select_ident(skb, NULL); | ||
| 67 | 67 | ||
| 68 | return 0; | 68 | return 0; |
| 69 | } | 69 | } |
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 186a8ecf92fa..d5f6bd9a210a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
| @@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) | |||
| 25 | if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) | 25 | if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) |
| 26 | goto out; | 26 | goto out; |
| 27 | 27 | ||
| 28 | if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) | 28 | if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) |
| 29 | goto out; | 29 | goto out; |
| 30 | 30 | ||
| 31 | mtu = dst_mtu(skb_dst(skb)); | 31 | mtu = dst_mtu(skb_dst(skb)); |
