aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2015-02-20 14:54:53 -0500
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2015-02-20 14:54:53 -0500
commit4c971aa78314253cce914ed29e3d90df3326d646 (patch)
treea9dcf0b1fdc9e1aacff90afb5b3ab79983115dcc /net/ipv4
parent4ba24fef3eb3b142197135223b90ced2f319cd53 (diff)
parent290b799c390d77d27effee3ce312203aaa32ee74 (diff)
Merge branch 'next' into for-linus
Second round of updates for 3.20.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ip_forward.c3
-rw-r--r--net/ipv4/ip_output.c29
-rw-r--r--net/ipv4/ip_sockglue.c8
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c8
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/route.c12
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c32
-rw-r--r--net/ipv4/tcp_cubic.c39
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_scalable.c3
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp_diag.c4
14 files changed, 100 insertions, 86 deletions
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3a83ce5efa80..787b3c294ce6 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb)
129 * We now generate an ICMP HOST REDIRECT giving the route 129 * We now generate an ICMP HOST REDIRECT giving the route
130 * we calculated. 130 * we calculated.
131 */ 131 */
132 if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) 132 if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr &&
133 !skb_sec_path(skb))
133 ip_rt_send_redirect(skb); 134 ip_rt_send_redirect(skb);
134 135
135 skb->priority = rt_tos2priority(iph->tos); 136 skb->priority = rt_tos2priority(iph->tos);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..c373c0708d97 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1506,23 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1506/* 1506/*
1507 * Generic function to send a packet as reply to another packet. 1507 * Generic function to send a packet as reply to another packet.
1508 * Used to send some TCP resets/acks so far. 1508 * Used to send some TCP resets/acks so far.
1509 *
1510 * Use a fake percpu inet socket to avoid false sharing and contention.
1511 */ 1509 */
1512static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { 1510void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
1513 .sk = {
1514 .__sk_common = {
1515 .skc_refcnt = ATOMIC_INIT(1),
1516 },
1517 .sk_wmem_alloc = ATOMIC_INIT(1),
1518 .sk_allocation = GFP_ATOMIC,
1519 .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE),
1520 },
1521 .pmtudisc = IP_PMTUDISC_WANT,
1522 .uc_ttl = -1,
1523};
1524
1525void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
1526 const struct ip_options *sopt, 1511 const struct ip_options *sopt,
1527 __be32 daddr, __be32 saddr, 1512 __be32 daddr, __be32 saddr,
1528 const struct ip_reply_arg *arg, 1513 const struct ip_reply_arg *arg,
@@ -1532,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
1532 struct ipcm_cookie ipc; 1517 struct ipcm_cookie ipc;
1533 struct flowi4 fl4; 1518 struct flowi4 fl4;
1534 struct rtable *rt = skb_rtable(skb); 1519 struct rtable *rt = skb_rtable(skb);
1520 struct net *net = sock_net(sk);
1535 struct sk_buff *nskb; 1521 struct sk_buff *nskb;
1536 struct sock *sk;
1537 struct inet_sock *inet;
1538 int err; 1522 int err;
1539 1523
1540 if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) 1524 if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
@@ -1565,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
1565 if (IS_ERR(rt)) 1549 if (IS_ERR(rt))
1566 return; 1550 return;
1567 1551
1568 inet = &get_cpu_var(unicast_sock); 1552 inet_sk(sk)->tos = arg->tos;
1569 1553
1570 inet->tos = arg->tos;
1571 sk = &inet->sk;
1572 sk->sk_priority = skb->priority; 1554 sk->sk_priority = skb->priority;
1573 sk->sk_protocol = ip_hdr(skb)->protocol; 1555 sk->sk_protocol = ip_hdr(skb)->protocol;
1574 sk->sk_bound_dev_if = arg->bound_dev_if; 1556 sk->sk_bound_dev_if = arg->bound_dev_if;
1575 sock_net_set(sk, net);
1576 __skb_queue_head_init(&sk->sk_write_queue);
1577 sk->sk_sndbuf = sysctl_wmem_default; 1557 sk->sk_sndbuf = sysctl_wmem_default;
1578 err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, 1558 err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
1579 len, 0, &ipc, &rt, MSG_DONTWAIT); 1559 len, 0, &ipc, &rt, MSG_DONTWAIT);
@@ -1589,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
1589 arg->csumoffset) = csum_fold(csum_add(nskb->csum, 1569 arg->csumoffset) = csum_fold(csum_add(nskb->csum,
1590 arg->csum)); 1570 arg->csum));
1591 nskb->ip_summed = CHECKSUM_NONE; 1571 nskb->ip_summed = CHECKSUM_NONE;
1592 skb_orphan(nskb);
1593 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); 1572 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1594 ip_push_pending_frames(sk, &fl4); 1573 ip_push_pending_frames(sk, &fl4);
1595 } 1574 }
1596out: 1575out:
1597 put_cpu_var(unicast_sock);
1598
1599 ip_rt_put(rt); 1576 ip_rt_put(rt);
1600} 1577}
1601 1578
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8a89c738b7a3..6b85adb05003 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -461,17 +461,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
461 461
462 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 462 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
463 sin = &errhdr.offender; 463 sin = &errhdr.offender;
464 sin->sin_family = AF_UNSPEC; 464 memset(sin, 0, sizeof(*sin));
465 465
466 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 466 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
467 ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { 467 ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
468 struct inet_sock *inet = inet_sk(sk);
469
470 sin->sin_family = AF_INET; 468 sin->sin_family = AF_INET;
471 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 469 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
472 sin->sin_port = 0; 470 if (inet_sk(sk)->cmsg_flags)
473 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
474 if (inet->cmsg_flags)
475 ip_cmsg_recv(msg, skb); 471 ip_cmsg_recv(msg, skb);
476 } 472 }
477 473
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index ff2d23d8c87a..6ecfce63201a 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -27,10 +27,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
27 27
28 memset(&mr, 0, sizeof(mr)); 28 memset(&mr, 0, sizeof(mr));
29 if (priv->sreg_proto_min) { 29 if (priv->sreg_proto_min) {
30 mr.range[0].min.all = (__force __be16) 30 mr.range[0].min.all =
31 data[priv->sreg_proto_min].data[0]; 31 *(__be16 *)&data[priv->sreg_proto_min].data[0];
32 mr.range[0].max.all = (__force __be16) 32 mr.range[0].max.all =
33 data[priv->sreg_proto_max].data[0]; 33 *(__be16 *)&data[priv->sreg_proto_max].data[0];
34 mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 34 mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
35 } 35 }
36 36
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c0d82f78d364..2a3720fb5a5f 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -966,8 +966,11 @@ bool ping_rcv(struct sk_buff *skb)
966 966
967 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); 967 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
968 if (sk != NULL) { 968 if (sk != NULL) {
969 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
970
969 pr_debug("rcv on socket %p\n", sk); 971 pr_debug("rcv on socket %p\n", sk);
970 ping_queue_rcv_skb(sk, skb_get(skb)); 972 if (skb2)
973 ping_queue_rcv_skb(sk, skb2);
971 sock_put(sk); 974 sock_put(sk);
972 return true; 975 return true;
973 } 976 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..52e1f2bf0ca2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
966 if (dst->dev->mtu < mtu) 966 if (dst->dev->mtu < mtu)
967 return; 967 return;
968 968
969 if (rt->rt_pmtu && rt->rt_pmtu < mtu)
970 return;
971
969 if (mtu < ip_rt_min_pmtu) 972 if (mtu < ip_rt_min_pmtu)
970 mtu = ip_rt_min_pmtu; 973 mtu = ip_rt_min_pmtu;
971 974
@@ -1554,11 +1557,10 @@ static int __mkroute_input(struct sk_buff *skb,
1554 1557
1555 do_cache = res->fi && !itag; 1558 do_cache = res->fi && !itag;
1556 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && 1559 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1560 skb->protocol == htons(ETH_P_IP) &&
1557 (IN_DEV_SHARED_MEDIA(out_dev) || 1561 (IN_DEV_SHARED_MEDIA(out_dev) ||
1558 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { 1562 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1559 flags |= RTCF_DOREDIRECT; 1563 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1560 do_cache = false;
1561 }
1562 1564
1563 if (skb->protocol != htons(ETH_P_IP)) { 1565 if (skb->protocol != htons(ETH_P_IP)) {
1564 /* Not IP (i.e. ARP). Do not create route, if it is 1566 /* Not IP (i.e. ARP). Do not create route, if it is
@@ -2303,6 +2305,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2303 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2305 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2304 if (rt->rt_flags & RTCF_NOTIFY) 2306 if (rt->rt_flags & RTCF_NOTIFY)
2305 r->rtm_flags |= RTM_F_NOTIFY; 2307 r->rtm_flags |= RTM_F_NOTIFY;
2308 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2309 r->rtm_flags |= RTCF_DOREDIRECT;
2306 2310
2307 if (nla_put_be32(skb, RTA_DST, dst)) 2311 if (nla_put_be32(skb, RTA_DST, dst))
2308 goto nla_put_failure; 2312 goto nla_put_failure;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index bb395d46a389..c037644eafb7 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
150 tcp_slow_start(tp, acked); 150 tcp_slow_start(tp, acked);
151 else { 151 else {
152 bictcp_update(ca, tp->snd_cwnd); 152 bictcp_update(ca, tp->snd_cwnd);
153 tcp_cong_avoid_ai(tp, ca->cnt); 153 tcp_cong_avoid_ai(tp, ca->cnt, 1);
154 } 154 }
155} 155}
156 156
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..8670e68e2ce6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,26 +291,32 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
291 * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and 291 * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
292 * returns the leftover acks to adjust cwnd in congestion avoidance mode. 292 * returns the leftover acks to adjust cwnd in congestion avoidance mode.
293 */ 293 */
294void tcp_slow_start(struct tcp_sock *tp, u32 acked) 294u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
295{ 295{
296 u32 cwnd = tp->snd_cwnd + acked; 296 u32 cwnd = tp->snd_cwnd + acked;
297 297
298 if (cwnd > tp->snd_ssthresh) 298 if (cwnd > tp->snd_ssthresh)
299 cwnd = tp->snd_ssthresh + 1; 299 cwnd = tp->snd_ssthresh + 1;
300 acked -= cwnd - tp->snd_cwnd;
300 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 301 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
302
303 return acked;
301} 304}
302EXPORT_SYMBOL_GPL(tcp_slow_start); 305EXPORT_SYMBOL_GPL(tcp_slow_start);
303 306
304/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ 307/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
305void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) 308 * for every packet that was ACKed.
309 */
310void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
306{ 311{
312 tp->snd_cwnd_cnt += acked;
307 if (tp->snd_cwnd_cnt >= w) { 313 if (tp->snd_cwnd_cnt >= w) {
308 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 314 u32 delta = tp->snd_cwnd_cnt / w;
309 tp->snd_cwnd++; 315
310 tp->snd_cwnd_cnt = 0; 316 tp->snd_cwnd_cnt -= delta * w;
311 } else { 317 tp->snd_cwnd += delta;
312 tp->snd_cwnd_cnt++;
313 } 318 }
319 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
314} 320}
315EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); 321EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
316 322
@@ -329,11 +335,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
329 return; 335 return;
330 336
331 /* In "safe" area, increase. */ 337 /* In "safe" area, increase. */
332 if (tp->snd_cwnd <= tp->snd_ssthresh) 338 if (tp->snd_cwnd <= tp->snd_ssthresh) {
333 tcp_slow_start(tp, acked); 339 acked = tcp_slow_start(tp, acked);
340 if (!acked)
341 return;
342 }
334 /* In dangerous area, increase slowly. */ 343 /* In dangerous area, increase slowly. */
335 else 344 tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
336 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
337} 345}
338EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 346EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
339 347
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6b6002416a73..4b276d1ed980 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -93,9 +93,7 @@ struct bictcp {
93 u32 epoch_start; /* beginning of an epoch */ 93 u32 epoch_start; /* beginning of an epoch */
94 u32 ack_cnt; /* number of acks */ 94 u32 ack_cnt; /* number of acks */
95 u32 tcp_cwnd; /* estimated tcp cwnd */ 95 u32 tcp_cwnd; /* estimated tcp cwnd */
96#define ACK_RATIO_SHIFT 4 96 u16 unused;
97#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
98 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
99 u8 sample_cnt; /* number of samples to decide curr_rtt */ 97 u8 sample_cnt; /* number of samples to decide curr_rtt */
100 u8 found; /* the exit point is found? */ 98 u8 found; /* the exit point is found? */
101 u32 round_start; /* beginning of each round */ 99 u32 round_start; /* beginning of each round */
@@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca)
114 ca->bic_K = 0; 112 ca->bic_K = 0;
115 ca->delay_min = 0; 113 ca->delay_min = 0;
116 ca->epoch_start = 0; 114 ca->epoch_start = 0;
117 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
118 ca->ack_cnt = 0; 115 ca->ack_cnt = 0;
119 ca->tcp_cwnd = 0; 116 ca->tcp_cwnd = 0;
120 ca->found = 0; 117 ca->found = 0;
@@ -205,23 +202,30 @@ static u32 cubic_root(u64 a)
205/* 202/*
206 * Compute congestion window to use. 203 * Compute congestion window to use.
207 */ 204 */
208static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 205static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
209{ 206{
210 u32 delta, bic_target, max_cnt; 207 u32 delta, bic_target, max_cnt;
211 u64 offs, t; 208 u64 offs, t;
212 209
213 ca->ack_cnt++; /* count the number of ACKs */ 210 ca->ack_cnt += acked; /* count the number of ACKed packets */
214 211
215 if (ca->last_cwnd == cwnd && 212 if (ca->last_cwnd == cwnd &&
216 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) 213 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
217 return; 214 return;
218 215
216 /* The CUBIC function can update ca->cnt at most once per jiffy.
217 * On all cwnd reduction events, ca->epoch_start is set to 0,
218 * which will force a recalculation of ca->cnt.
219 */
220 if (ca->epoch_start && tcp_time_stamp == ca->last_time)
221 goto tcp_friendliness;
222
219 ca->last_cwnd = cwnd; 223 ca->last_cwnd = cwnd;
220 ca->last_time = tcp_time_stamp; 224 ca->last_time = tcp_time_stamp;
221 225
222 if (ca->epoch_start == 0) { 226 if (ca->epoch_start == 0) {
223 ca->epoch_start = tcp_time_stamp; /* record beginning */ 227 ca->epoch_start = tcp_time_stamp; /* record beginning */
224 ca->ack_cnt = 1; /* start counting */ 228 ca->ack_cnt = acked; /* start counting */
225 ca->tcp_cwnd = cwnd; /* syn with cubic */ 229 ca->tcp_cwnd = cwnd; /* syn with cubic */
226 230
227 if (ca->last_max_cwnd <= cwnd) { 231 if (ca->last_max_cwnd <= cwnd) {
@@ -283,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
283 if (ca->last_max_cwnd == 0 && ca->cnt > 20) 287 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
284 ca->cnt = 20; /* increase cwnd 5% per RTT */ 288 ca->cnt = 20; /* increase cwnd 5% per RTT */
285 289
290tcp_friendliness:
286 /* TCP Friendly */ 291 /* TCP Friendly */
287 if (tcp_friendliness) { 292 if (tcp_friendliness) {
288 u32 scale = beta_scale; 293 u32 scale = beta_scale;
@@ -301,7 +306,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
301 } 306 }
302 } 307 }
303 308
304 ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
305 if (ca->cnt == 0) /* cannot be zero */ 309 if (ca->cnt == 0) /* cannot be zero */
306 ca->cnt = 1; 310 ca->cnt = 1;
307} 311}
@@ -317,11 +321,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
317 if (tp->snd_cwnd <= tp->snd_ssthresh) { 321 if (tp->snd_cwnd <= tp->snd_ssthresh) {
318 if (hystart && after(ack, ca->end_seq)) 322 if (hystart && after(ack, ca->end_seq))
319 bictcp_hystart_reset(sk); 323 bictcp_hystart_reset(sk);
320 tcp_slow_start(tp, acked); 324 acked = tcp_slow_start(tp, acked);
321 } else { 325 if (!acked)
322 bictcp_update(ca, tp->snd_cwnd); 326 return;
323 tcp_cong_avoid_ai(tp, ca->cnt);
324 } 327 }
328 bictcp_update(ca, tp->snd_cwnd, acked);
329 tcp_cong_avoid_ai(tp, ca->cnt, acked);
325} 330}
326 331
327static u32 bictcp_recalc_ssthresh(struct sock *sk) 332static u32 bictcp_recalc_ssthresh(struct sock *sk)
@@ -411,20 +416,10 @@ static void hystart_update(struct sock *sk, u32 delay)
411 */ 416 */
412static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) 417static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
413{ 418{
414 const struct inet_connection_sock *icsk = inet_csk(sk);
415 const struct tcp_sock *tp = tcp_sk(sk); 419 const struct tcp_sock *tp = tcp_sk(sk);
416 struct bictcp *ca = inet_csk_ca(sk); 420 struct bictcp *ca = inet_csk_ca(sk);
417 u32 delay; 421 u32 delay;
418 422
419 if (icsk->icsk_ca_state == TCP_CA_Open) {
420 u32 ratio = ca->delayed_ack;
421
422 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
423 ratio += cnt;
424
425 ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
426 }
427
428 /* Some calls are for duplicates without timetamps */ 423 /* Some calls are for duplicates without timetamps */
429 if (rtt_us < 0) 424 if (rtt_us < 0)
430 return; 425 return;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..d22f54482bab 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
683 arg.bound_dev_if = sk->sk_bound_dev_if; 683 arg.bound_dev_if = sk->sk_bound_dev_if;
684 684
685 arg.tos = ip_hdr(skb)->tos; 685 arg.tos = ip_hdr(skb)->tos;
686 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, 686 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
687 skb, &TCP_SKB_CB(skb)->header.h4.opt,
687 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 688 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
688 &arg, arg.iov[0].iov_len); 689 &arg, arg.iov[0].iov_len);
689 690
@@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
767 if (oif) 768 if (oif)
768 arg.bound_dev_if = oif; 769 arg.bound_dev_if = oif;
769 arg.tos = tos; 770 arg.tos = tos;
770 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, 771 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
772 skb, &TCP_SKB_CB(skb)->header.h4.opt,
771 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 773 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
772 &arg, arg.iov[0].iov_len); 774 &arg, arg.iov[0].iov_len);
773 775
@@ -2428,14 +2430,39 @@ struct proto tcp_prot = {
2428}; 2430};
2429EXPORT_SYMBOL(tcp_prot); 2431EXPORT_SYMBOL(tcp_prot);
2430 2432
2433static void __net_exit tcp_sk_exit(struct net *net)
2434{
2435 int cpu;
2436
2437 for_each_possible_cpu(cpu)
2438 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2439 free_percpu(net->ipv4.tcp_sk);
2440}
2441
2431static int __net_init tcp_sk_init(struct net *net) 2442static int __net_init tcp_sk_init(struct net *net)
2432{ 2443{
2444 int res, cpu;
2445
2446 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2447 if (!net->ipv4.tcp_sk)
2448 return -ENOMEM;
2449
2450 for_each_possible_cpu(cpu) {
2451 struct sock *sk;
2452
2453 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2454 IPPROTO_TCP, net);
2455 if (res)
2456 goto fail;
2457 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2458 }
2433 net->ipv4.sysctl_tcp_ecn = 2; 2459 net->ipv4.sysctl_tcp_ecn = 2;
2434 return 0; 2460 return 0;
2435}
2436 2461
2437static void __net_exit tcp_sk_exit(struct net *net) 2462fail:
2438{ 2463 tcp_sk_exit(net);
2464
2465 return res;
2439} 2466}
2440 2467
2441static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2468static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 6824afb65d93..333bcb2415ff 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
25 if (tp->snd_cwnd <= tp->snd_ssthresh) 25 if (tp->snd_cwnd <= tp->snd_ssthresh)
26 tcp_slow_start(tp, acked); 26 tcp_slow_start(tp, acked);
27 else 27 else
28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); 28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
29 1);
29} 30}
30 31
31static u32 tcp_scalable_ssthresh(struct sock *sk) 32static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index a4d2d2d88dca..112151eeee45 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
159 /* In the "non-congestive state", increase cwnd 159 /* In the "non-congestive state", increase cwnd
160 * every rtt. 160 * every rtt.
161 */ 161 */
162 tcp_cong_avoid_ai(tp, tp->snd_cwnd); 162 tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
163 } else { 163 } else {
164 /* In the "congestive state", increase cwnd 164 /* In the "congestive state", increase cwnd
165 * every other rtt. 165 * every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index cd7273218598..17d35662930d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
92 92
93 } else { 93 } else {
94 /* Reno */ 94 /* Reno */
95 tcp_cong_avoid_ai(tp, tp->snd_cwnd); 95 tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
96 } 96 }
97 97
98 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. 98 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 7927db0a9279..4a000f1dd757 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin
99 s_slot = cb->args[0]; 99 s_slot = cb->args[0];
100 num = s_num = cb->args[1]; 100 num = s_num = cb->args[1];
101 101
102 for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { 102 for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
103 struct sock *sk; 103 struct sock *sk;
104 struct hlist_nulls_node *node; 104 struct hlist_nulls_node *node;
105 struct udp_hslot *hslot = &table->hash[slot]; 105 struct udp_hslot *hslot = &table->hash[slot];
106 106
107 num = 0;
108
107 if (hlist_nulls_empty(&hslot->head)) 109 if (hlist_nulls_empty(&hslot->head))
108 continue; 110 continue;
109 111