diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-02-20 14:54:53 -0500 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-02-20 14:54:53 -0500 |
| commit | 4c971aa78314253cce914ed29e3d90df3326d646 (patch) | |
| tree | a9dcf0b1fdc9e1aacff90afb5b3ab79983115dcc /net/ipv4 | |
| parent | 4ba24fef3eb3b142197135223b90ced2f319cd53 (diff) | |
| parent | 290b799c390d77d27effee3ce312203aaa32ee74 (diff) | |
Merge branch 'next' into for-linus
Second round of updates for 3.20.
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/ip_forward.c | 3 | ||||
| -rw-r--r-- | net/ipv4/ip_output.c | 29 | ||||
| -rw-r--r-- | net/ipv4/ip_sockglue.c | 8 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nft_redir_ipv4.c | 8 | ||||
| -rw-r--r-- | net/ipv4/ping.c | 5 | ||||
| -rw-r--r-- | net/ipv4/route.c | 12 | ||||
| -rw-r--r-- | net/ipv4/tcp_bic.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 32 | ||||
| -rw-r--r-- | net/ipv4/tcp_cubic.c | 39 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 37 | ||||
| -rw-r--r-- | net/ipv4/tcp_scalable.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_veno.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_yeah.c | 2 | ||||
| -rw-r--r-- | net/ipv4/udp_diag.c | 4 |
14 files changed, 100 insertions, 86 deletions
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3a83ce5efa80..787b3c294ce6 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
| @@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb) | |||
| 129 | * We now generate an ICMP HOST REDIRECT giving the route | 129 | * We now generate an ICMP HOST REDIRECT giving the route |
| 130 | * we calculated. | 130 | * we calculated. |
| 131 | */ | 131 | */ |
| 132 | if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) | 132 | if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr && |
| 133 | !skb_sec_path(skb)) | ||
| 133 | ip_rt_send_redirect(skb); | 134 | ip_rt_send_redirect(skb); |
| 134 | 135 | ||
| 135 | skb->priority = rt_tos2priority(iph->tos); | 136 | skb->priority = rt_tos2priority(iph->tos); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b50861b22b6b..c373c0708d97 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -1506,23 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, | |||
| 1506 | /* | 1506 | /* |
| 1507 | * Generic function to send a packet as reply to another packet. | 1507 | * Generic function to send a packet as reply to another packet. |
| 1508 | * Used to send some TCP resets/acks so far. | 1508 | * Used to send some TCP resets/acks so far. |
| 1509 | * | ||
| 1510 | * Use a fake percpu inet socket to avoid false sharing and contention. | ||
| 1511 | */ | 1509 | */ |
| 1512 | static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { | 1510 | void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, |
| 1513 | .sk = { | ||
| 1514 | .__sk_common = { | ||
| 1515 | .skc_refcnt = ATOMIC_INIT(1), | ||
| 1516 | }, | ||
| 1517 | .sk_wmem_alloc = ATOMIC_INIT(1), | ||
| 1518 | .sk_allocation = GFP_ATOMIC, | ||
| 1519 | .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), | ||
| 1520 | }, | ||
| 1521 | .pmtudisc = IP_PMTUDISC_WANT, | ||
| 1522 | .uc_ttl = -1, | ||
| 1523 | }; | ||
| 1524 | |||
| 1525 | void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, | ||
| 1526 | const struct ip_options *sopt, | 1511 | const struct ip_options *sopt, |
| 1527 | __be32 daddr, __be32 saddr, | 1512 | __be32 daddr, __be32 saddr, |
| 1528 | const struct ip_reply_arg *arg, | 1513 | const struct ip_reply_arg *arg, |
| @@ -1532,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, | |||
| 1532 | struct ipcm_cookie ipc; | 1517 | struct ipcm_cookie ipc; |
| 1533 | struct flowi4 fl4; | 1518 | struct flowi4 fl4; |
| 1534 | struct rtable *rt = skb_rtable(skb); | 1519 | struct rtable *rt = skb_rtable(skb); |
| 1520 | struct net *net = sock_net(sk); | ||
| 1535 | struct sk_buff *nskb; | 1521 | struct sk_buff *nskb; |
| 1536 | struct sock *sk; | ||
| 1537 | struct inet_sock *inet; | ||
| 1538 | int err; | 1522 | int err; |
| 1539 | 1523 | ||
| 1540 | if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) | 1524 | if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) |
| @@ -1565,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, | |||
| 1565 | if (IS_ERR(rt)) | 1549 | if (IS_ERR(rt)) |
| 1566 | return; | 1550 | return; |
| 1567 | 1551 | ||
| 1568 | inet = &get_cpu_var(unicast_sock); | 1552 | inet_sk(sk)->tos = arg->tos; |
| 1569 | 1553 | ||
| 1570 | inet->tos = arg->tos; | ||
| 1571 | sk = &inet->sk; | ||
| 1572 | sk->sk_priority = skb->priority; | 1554 | sk->sk_priority = skb->priority; |
| 1573 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1555 | sk->sk_protocol = ip_hdr(skb)->protocol; |
| 1574 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1556 | sk->sk_bound_dev_if = arg->bound_dev_if; |
| 1575 | sock_net_set(sk, net); | ||
| 1576 | __skb_queue_head_init(&sk->sk_write_queue); | ||
| 1577 | sk->sk_sndbuf = sysctl_wmem_default; | 1557 | sk->sk_sndbuf = sysctl_wmem_default; |
| 1578 | err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, | 1558 | err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, |
| 1579 | len, 0, &ipc, &rt, MSG_DONTWAIT); | 1559 | len, 0, &ipc, &rt, MSG_DONTWAIT); |
| @@ -1589,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, | |||
| 1589 | arg->csumoffset) = csum_fold(csum_add(nskb->csum, | 1569 | arg->csumoffset) = csum_fold(csum_add(nskb->csum, |
| 1590 | arg->csum)); | 1570 | arg->csum)); |
| 1591 | nskb->ip_summed = CHECKSUM_NONE; | 1571 | nskb->ip_summed = CHECKSUM_NONE; |
| 1592 | skb_orphan(nskb); | ||
| 1593 | skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); | 1572 | skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); |
| 1594 | ip_push_pending_frames(sk, &fl4); | 1573 | ip_push_pending_frames(sk, &fl4); |
| 1595 | } | 1574 | } |
| 1596 | out: | 1575 | out: |
| 1597 | put_cpu_var(unicast_sock); | ||
| 1598 | |||
| 1599 | ip_rt_put(rt); | 1576 | ip_rt_put(rt); |
| 1600 | } | 1577 | } |
| 1601 | 1578 | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8a89c738b7a3..6b85adb05003 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
| @@ -461,17 +461,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |||
| 461 | 461 | ||
| 462 | memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); | 462 | memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); |
| 463 | sin = &errhdr.offender; | 463 | sin = &errhdr.offender; |
| 464 | sin->sin_family = AF_UNSPEC; | 464 | memset(sin, 0, sizeof(*sin)); |
| 465 | 465 | ||
| 466 | if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || | 466 | if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || |
| 467 | ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { | 467 | ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { |
| 468 | struct inet_sock *inet = inet_sk(sk); | ||
| 469 | |||
| 470 | sin->sin_family = AF_INET; | 468 | sin->sin_family = AF_INET; |
| 471 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | 469 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
| 472 | sin->sin_port = 0; | 470 | if (inet_sk(sk)->cmsg_flags) |
| 473 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
| 474 | if (inet->cmsg_flags) | ||
| 475 | ip_cmsg_recv(msg, skb); | 471 | ip_cmsg_recv(msg, skb); |
| 476 | } | 472 | } |
| 477 | 473 | ||
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index ff2d23d8c87a..6ecfce63201a 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c | |||
| @@ -27,10 +27,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, | |||
| 27 | 27 | ||
| 28 | memset(&mr, 0, sizeof(mr)); | 28 | memset(&mr, 0, sizeof(mr)); |
| 29 | if (priv->sreg_proto_min) { | 29 | if (priv->sreg_proto_min) { |
| 30 | mr.range[0].min.all = (__force __be16) | 30 | mr.range[0].min.all = |
| 31 | data[priv->sreg_proto_min].data[0]; | 31 | *(__be16 *)&data[priv->sreg_proto_min].data[0]; |
| 32 | mr.range[0].max.all = (__force __be16) | 32 | mr.range[0].max.all = |
| 33 | data[priv->sreg_proto_max].data[0]; | 33 | *(__be16 *)&data[priv->sreg_proto_max].data[0]; |
| 34 | mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | 34 | mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
| 35 | } | 35 | } |
| 36 | 36 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c0d82f78d364..2a3720fb5a5f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
| @@ -966,8 +966,11 @@ bool ping_rcv(struct sk_buff *skb) | |||
| 966 | 966 | ||
| 967 | sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); | 967 | sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); |
| 968 | if (sk != NULL) { | 968 | if (sk != NULL) { |
| 969 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | ||
| 970 | |||
| 969 | pr_debug("rcv on socket %p\n", sk); | 971 | pr_debug("rcv on socket %p\n", sk); |
| 970 | ping_queue_rcv_skb(sk, skb_get(skb)); | 972 | if (skb2) |
| 973 | ping_queue_rcv_skb(sk, skb2); | ||
| 971 | sock_put(sk); | 974 | sock_put(sk); |
| 972 | return true; | 975 | return true; |
| 973 | } | 976 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a2155b02602..52e1f2bf0ca2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
| 966 | if (dst->dev->mtu < mtu) | 966 | if (dst->dev->mtu < mtu) |
| 967 | return; | 967 | return; |
| 968 | 968 | ||
| 969 | if (rt->rt_pmtu && rt->rt_pmtu < mtu) | ||
| 970 | return; | ||
| 971 | |||
| 969 | if (mtu < ip_rt_min_pmtu) | 972 | if (mtu < ip_rt_min_pmtu) |
| 970 | mtu = ip_rt_min_pmtu; | 973 | mtu = ip_rt_min_pmtu; |
| 971 | 974 | ||
| @@ -1554,11 +1557,10 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1554 | 1557 | ||
| 1555 | do_cache = res->fi && !itag; | 1558 | do_cache = res->fi && !itag; |
| 1556 | if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && | 1559 | if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && |
| 1560 | skb->protocol == htons(ETH_P_IP) && | ||
| 1557 | (IN_DEV_SHARED_MEDIA(out_dev) || | 1561 | (IN_DEV_SHARED_MEDIA(out_dev) || |
| 1558 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { | 1562 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) |
| 1559 | flags |= RTCF_DOREDIRECT; | 1563 | IPCB(skb)->flags |= IPSKB_DOREDIRECT; |
| 1560 | do_cache = false; | ||
| 1561 | } | ||
| 1562 | 1564 | ||
| 1563 | if (skb->protocol != htons(ETH_P_IP)) { | 1565 | if (skb->protocol != htons(ETH_P_IP)) { |
| 1564 | /* Not IP (i.e. ARP). Do not create route, if it is | 1566 | /* Not IP (i.e. ARP). Do not create route, if it is |
| @@ -2303,6 +2305,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | |||
| 2303 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; | 2305 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; |
| 2304 | if (rt->rt_flags & RTCF_NOTIFY) | 2306 | if (rt->rt_flags & RTCF_NOTIFY) |
| 2305 | r->rtm_flags |= RTM_F_NOTIFY; | 2307 | r->rtm_flags |= RTM_F_NOTIFY; |
| 2308 | if (IPCB(skb)->flags & IPSKB_DOREDIRECT) | ||
| 2309 | r->rtm_flags |= RTCF_DOREDIRECT; | ||
| 2306 | 2310 | ||
| 2307 | if (nla_put_be32(skb, RTA_DST, dst)) | 2311 | if (nla_put_be32(skb, RTA_DST, dst)) |
| 2308 | goto nla_put_failure; | 2312 | goto nla_put_failure; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index bb395d46a389..c037644eafb7 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
| @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 150 | tcp_slow_start(tp, acked); | 150 | tcp_slow_start(tp, acked); |
| 151 | else { | 151 | else { |
| 152 | bictcp_update(ca, tp->snd_cwnd); | 152 | bictcp_update(ca, tp->snd_cwnd); |
| 153 | tcp_cong_avoid_ai(tp, ca->cnt); | 153 | tcp_cong_avoid_ai(tp, ca->cnt, 1); |
| 154 | } | 154 | } |
| 155 | } | 155 | } |
| 156 | 156 | ||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 27ead0dd16bc..8670e68e2ce6 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -291,26 +291,32 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
| 291 | * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and | 291 | * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and |
| 292 | * returns the leftover acks to adjust cwnd in congestion avoidance mode. | 292 | * returns the leftover acks to adjust cwnd in congestion avoidance mode. |
| 293 | */ | 293 | */ |
| 294 | void tcp_slow_start(struct tcp_sock *tp, u32 acked) | 294 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) |
| 295 | { | 295 | { |
| 296 | u32 cwnd = tp->snd_cwnd + acked; | 296 | u32 cwnd = tp->snd_cwnd + acked; |
| 297 | 297 | ||
| 298 | if (cwnd > tp->snd_ssthresh) | 298 | if (cwnd > tp->snd_ssthresh) |
| 299 | cwnd = tp->snd_ssthresh + 1; | 299 | cwnd = tp->snd_ssthresh + 1; |
| 300 | acked -= cwnd - tp->snd_cwnd; | ||
| 300 | tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); | 301 | tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); |
| 302 | |||
| 303 | return acked; | ||
| 301 | } | 304 | } |
| 302 | EXPORT_SYMBOL_GPL(tcp_slow_start); | 305 | EXPORT_SYMBOL_GPL(tcp_slow_start); |
| 303 | 306 | ||
| 304 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ | 307 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), |
| 305 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) | 308 | * for every packet that was ACKed. |
| 309 | */ | ||
| 310 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) | ||
| 306 | { | 311 | { |
| 312 | tp->snd_cwnd_cnt += acked; | ||
| 307 | if (tp->snd_cwnd_cnt >= w) { | 313 | if (tp->snd_cwnd_cnt >= w) { |
| 308 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 314 | u32 delta = tp->snd_cwnd_cnt / w; |
| 309 | tp->snd_cwnd++; | 315 | |
| 310 | tp->snd_cwnd_cnt = 0; | 316 | tp->snd_cwnd_cnt -= delta * w; |
| 311 | } else { | 317 | tp->snd_cwnd += delta; |
| 312 | tp->snd_cwnd_cnt++; | ||
| 313 | } | 318 | } |
| 319 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); | ||
| 314 | } | 320 | } |
| 315 | EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); | 321 | EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); |
| 316 | 322 | ||
| @@ -329,11 +335,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 329 | return; | 335 | return; |
| 330 | 336 | ||
| 331 | /* In "safe" area, increase. */ | 337 | /* In "safe" area, increase. */ |
| 332 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 338 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
| 333 | tcp_slow_start(tp, acked); | 339 | acked = tcp_slow_start(tp, acked); |
| 340 | if (!acked) | ||
| 341 | return; | ||
| 342 | } | ||
| 334 | /* In dangerous area, increase slowly. */ | 343 | /* In dangerous area, increase slowly. */ |
| 335 | else | 344 | tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); |
| 336 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); | ||
| 337 | } | 345 | } |
| 338 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 346 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
| 339 | 347 | ||
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 6b6002416a73..4b276d1ed980 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
| @@ -93,9 +93,7 @@ struct bictcp { | |||
| 93 | u32 epoch_start; /* beginning of an epoch */ | 93 | u32 epoch_start; /* beginning of an epoch */ |
| 94 | u32 ack_cnt; /* number of acks */ | 94 | u32 ack_cnt; /* number of acks */ |
| 95 | u32 tcp_cwnd; /* estimated tcp cwnd */ | 95 | u32 tcp_cwnd; /* estimated tcp cwnd */ |
| 96 | #define ACK_RATIO_SHIFT 4 | 96 | u16 unused; |
| 97 | #define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) | ||
| 98 | u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | ||
| 99 | u8 sample_cnt; /* number of samples to decide curr_rtt */ | 97 | u8 sample_cnt; /* number of samples to decide curr_rtt */ |
| 100 | u8 found; /* the exit point is found? */ | 98 | u8 found; /* the exit point is found? */ |
| 101 | u32 round_start; /* beginning of each round */ | 99 | u32 round_start; /* beginning of each round */ |
| @@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
| 114 | ca->bic_K = 0; | 112 | ca->bic_K = 0; |
| 115 | ca->delay_min = 0; | 113 | ca->delay_min = 0; |
| 116 | ca->epoch_start = 0; | 114 | ca->epoch_start = 0; |
| 117 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | ||
| 118 | ca->ack_cnt = 0; | 115 | ca->ack_cnt = 0; |
| 119 | ca->tcp_cwnd = 0; | 116 | ca->tcp_cwnd = 0; |
| 120 | ca->found = 0; | 117 | ca->found = 0; |
| @@ -205,23 +202,30 @@ static u32 cubic_root(u64 a) | |||
| 205 | /* | 202 | /* |
| 206 | * Compute congestion window to use. | 203 | * Compute congestion window to use. |
| 207 | */ | 204 | */ |
| 208 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | 205 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) |
| 209 | { | 206 | { |
| 210 | u32 delta, bic_target, max_cnt; | 207 | u32 delta, bic_target, max_cnt; |
| 211 | u64 offs, t; | 208 | u64 offs, t; |
| 212 | 209 | ||
| 213 | ca->ack_cnt++; /* count the number of ACKs */ | 210 | ca->ack_cnt += acked; /* count the number of ACKed packets */ |
| 214 | 211 | ||
| 215 | if (ca->last_cwnd == cwnd && | 212 | if (ca->last_cwnd == cwnd && |
| 216 | (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) | 213 | (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) |
| 217 | return; | 214 | return; |
| 218 | 215 | ||
| 216 | /* The CUBIC function can update ca->cnt at most once per jiffy. | ||
| 217 | * On all cwnd reduction events, ca->epoch_start is set to 0, | ||
| 218 | * which will force a recalculation of ca->cnt. | ||
| 219 | */ | ||
| 220 | if (ca->epoch_start && tcp_time_stamp == ca->last_time) | ||
| 221 | goto tcp_friendliness; | ||
| 222 | |||
| 219 | ca->last_cwnd = cwnd; | 223 | ca->last_cwnd = cwnd; |
| 220 | ca->last_time = tcp_time_stamp; | 224 | ca->last_time = tcp_time_stamp; |
| 221 | 225 | ||
| 222 | if (ca->epoch_start == 0) { | 226 | if (ca->epoch_start == 0) { |
| 223 | ca->epoch_start = tcp_time_stamp; /* record beginning */ | 227 | ca->epoch_start = tcp_time_stamp; /* record beginning */ |
| 224 | ca->ack_cnt = 1; /* start counting */ | 228 | ca->ack_cnt = acked; /* start counting */ |
| 225 | ca->tcp_cwnd = cwnd; /* syn with cubic */ | 229 | ca->tcp_cwnd = cwnd; /* syn with cubic */ |
| 226 | 230 | ||
| 227 | if (ca->last_max_cwnd <= cwnd) { | 231 | if (ca->last_max_cwnd <= cwnd) { |
| @@ -283,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 283 | if (ca->last_max_cwnd == 0 && ca->cnt > 20) | 287 | if (ca->last_max_cwnd == 0 && ca->cnt > 20) |
| 284 | ca->cnt = 20; /* increase cwnd 5% per RTT */ | 288 | ca->cnt = 20; /* increase cwnd 5% per RTT */ |
| 285 | 289 | ||
| 290 | tcp_friendliness: | ||
| 286 | /* TCP Friendly */ | 291 | /* TCP Friendly */ |
| 287 | if (tcp_friendliness) { | 292 | if (tcp_friendliness) { |
| 288 | u32 scale = beta_scale; | 293 | u32 scale = beta_scale; |
| @@ -301,7 +306,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 301 | } | 306 | } |
| 302 | } | 307 | } |
| 303 | 308 | ||
| 304 | ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; | ||
| 305 | if (ca->cnt == 0) /* cannot be zero */ | 309 | if (ca->cnt == 0) /* cannot be zero */ |
| 306 | ca->cnt = 1; | 310 | ca->cnt = 1; |
| 307 | } | 311 | } |
| @@ -317,11 +321,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 317 | if (tp->snd_cwnd <= tp->snd_ssthresh) { | 321 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
| 318 | if (hystart && after(ack, ca->end_seq)) | 322 | if (hystart && after(ack, ca->end_seq)) |
| 319 | bictcp_hystart_reset(sk); | 323 | bictcp_hystart_reset(sk); |
| 320 | tcp_slow_start(tp, acked); | 324 | acked = tcp_slow_start(tp, acked); |
| 321 | } else { | 325 | if (!acked) |
| 322 | bictcp_update(ca, tp->snd_cwnd); | 326 | return; |
| 323 | tcp_cong_avoid_ai(tp, ca->cnt); | ||
| 324 | } | 327 | } |
| 328 | bictcp_update(ca, tp->snd_cwnd, acked); | ||
| 329 | tcp_cong_avoid_ai(tp, ca->cnt, acked); | ||
| 325 | } | 330 | } |
| 326 | 331 | ||
| 327 | static u32 bictcp_recalc_ssthresh(struct sock *sk) | 332 | static u32 bictcp_recalc_ssthresh(struct sock *sk) |
| @@ -411,20 +416,10 @@ static void hystart_update(struct sock *sk, u32 delay) | |||
| 411 | */ | 416 | */ |
| 412 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | 417 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) |
| 413 | { | 418 | { |
| 414 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 415 | const struct tcp_sock *tp = tcp_sk(sk); | 419 | const struct tcp_sock *tp = tcp_sk(sk); |
| 416 | struct bictcp *ca = inet_csk_ca(sk); | 420 | struct bictcp *ca = inet_csk_ca(sk); |
| 417 | u32 delay; | 421 | u32 delay; |
| 418 | 422 | ||
| 419 | if (icsk->icsk_ca_state == TCP_CA_Open) { | ||
| 420 | u32 ratio = ca->delayed_ack; | ||
| 421 | |||
| 422 | ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; | ||
| 423 | ratio += cnt; | ||
| 424 | |||
| 425 | ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); | ||
| 426 | } | ||
| 427 | |||
| 428 | /* Some calls are for duplicates without timetamps */ | 423 | /* Some calls are for duplicates without timetamps */ |
| 429 | if (rtt_us < 0) | 424 | if (rtt_us < 0) |
| 430 | return; | 425 | return; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a3f72d7fc06c..d22f54482bab 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
| 683 | arg.bound_dev_if = sk->sk_bound_dev_if; | 683 | arg.bound_dev_if = sk->sk_bound_dev_if; |
| 684 | 684 | ||
| 685 | arg.tos = ip_hdr(skb)->tos; | 685 | arg.tos = ip_hdr(skb)->tos; |
| 686 | ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, | 686 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), |
| 687 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | ||
| 687 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | 688 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
| 688 | &arg, arg.iov[0].iov_len); | 689 | &arg, arg.iov[0].iov_len); |
| 689 | 690 | ||
| @@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
| 767 | if (oif) | 768 | if (oif) |
| 768 | arg.bound_dev_if = oif; | 769 | arg.bound_dev_if = oif; |
| 769 | arg.tos = tos; | 770 | arg.tos = tos; |
| 770 | ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, | 771 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), |
| 772 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | ||
| 771 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | 773 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
| 772 | &arg, arg.iov[0].iov_len); | 774 | &arg, arg.iov[0].iov_len); |
| 773 | 775 | ||
| @@ -2428,14 +2430,39 @@ struct proto tcp_prot = { | |||
| 2428 | }; | 2430 | }; |
| 2429 | EXPORT_SYMBOL(tcp_prot); | 2431 | EXPORT_SYMBOL(tcp_prot); |
| 2430 | 2432 | ||
| 2433 | static void __net_exit tcp_sk_exit(struct net *net) | ||
| 2434 | { | ||
| 2435 | int cpu; | ||
| 2436 | |||
| 2437 | for_each_possible_cpu(cpu) | ||
| 2438 | inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); | ||
| 2439 | free_percpu(net->ipv4.tcp_sk); | ||
| 2440 | } | ||
| 2441 | |||
| 2431 | static int __net_init tcp_sk_init(struct net *net) | 2442 | static int __net_init tcp_sk_init(struct net *net) |
| 2432 | { | 2443 | { |
| 2444 | int res, cpu; | ||
| 2445 | |||
| 2446 | net->ipv4.tcp_sk = alloc_percpu(struct sock *); | ||
| 2447 | if (!net->ipv4.tcp_sk) | ||
| 2448 | return -ENOMEM; | ||
| 2449 | |||
| 2450 | for_each_possible_cpu(cpu) { | ||
| 2451 | struct sock *sk; | ||
| 2452 | |||
| 2453 | res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, | ||
| 2454 | IPPROTO_TCP, net); | ||
| 2455 | if (res) | ||
| 2456 | goto fail; | ||
| 2457 | *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; | ||
| 2458 | } | ||
| 2433 | net->ipv4.sysctl_tcp_ecn = 2; | 2459 | net->ipv4.sysctl_tcp_ecn = 2; |
| 2434 | return 0; | 2460 | return 0; |
| 2435 | } | ||
| 2436 | 2461 | ||
| 2437 | static void __net_exit tcp_sk_exit(struct net *net) | 2462 | fail: |
| 2438 | { | 2463 | tcp_sk_exit(net); |
| 2464 | |||
| 2465 | return res; | ||
| 2439 | } | 2466 | } |
| 2440 | 2467 | ||
| 2441 | static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) | 2468 | static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 6824afb65d93..333bcb2415ff 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
| @@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 25 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 25 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
| 26 | tcp_slow_start(tp, acked); | 26 | tcp_slow_start(tp, acked); |
| 27 | else | 27 | else |
| 28 | tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); | 28 | tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), |
| 29 | 1); | ||
| 29 | } | 30 | } |
| 30 | 31 | ||
| 31 | static u32 tcp_scalable_ssthresh(struct sock *sk) | 32 | static u32 tcp_scalable_ssthresh(struct sock *sk) |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index a4d2d2d88dca..112151eeee45 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
| @@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 159 | /* In the "non-congestive state", increase cwnd | 159 | /* In the "non-congestive state", increase cwnd |
| 160 | * every rtt. | 160 | * every rtt. |
| 161 | */ | 161 | */ |
| 162 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); | 162 | tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); |
| 163 | } else { | 163 | } else { |
| 164 | /* In the "congestive state", increase cwnd | 164 | /* In the "congestive state", increase cwnd |
| 165 | * every other rtt. | 165 | * every other rtt. |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index cd7273218598..17d35662930d 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
| @@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
| 92 | 92 | ||
| 93 | } else { | 93 | } else { |
| 94 | /* Reno */ | 94 | /* Reno */ |
| 95 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); | 95 | tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. | 98 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. |
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7927db0a9279..4a000f1dd757 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c | |||
| @@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin | |||
| 99 | s_slot = cb->args[0]; | 99 | s_slot = cb->args[0]; |
| 100 | num = s_num = cb->args[1]; | 100 | num = s_num = cb->args[1]; |
| 101 | 101 | ||
| 102 | for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { | 102 | for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { |
| 103 | struct sock *sk; | 103 | struct sock *sk; |
| 104 | struct hlist_nulls_node *node; | 104 | struct hlist_nulls_node *node; |
| 105 | struct udp_hslot *hslot = &table->hash[slot]; | 105 | struct udp_hslot *hslot = &table->hash[slot]; |
| 106 | 106 | ||
| 107 | num = 0; | ||
| 108 | |||
| 107 | if (hlist_nulls_empty(&hslot->head)) | 109 | if (hlist_nulls_empty(&hslot->head)) |
| 108 | continue; | 110 | continue; |
| 109 | 111 | ||
