diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 231 |
1 files changed, 108 insertions, 123 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 020766292bb0..708dc203b034 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique); | |||
146 | /* This will initiate an outgoing connection. */ | 146 | /* This will initiate an outgoing connection. */ |
147 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 147 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
148 | { | 148 | { |
149 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
149 | struct inet_sock *inet = inet_sk(sk); | 150 | struct inet_sock *inet = inet_sk(sk); |
150 | struct tcp_sock *tp = tcp_sk(sk); | 151 | struct tcp_sock *tp = tcp_sk(sk); |
151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | 152 | __be16 orig_sport, orig_dport; |
152 | struct rtable *rt; | ||
153 | __be32 daddr, nexthop; | 153 | __be32 daddr, nexthop; |
154 | int tmp; | 154 | struct flowi4 *fl4; |
155 | struct rtable *rt; | ||
155 | int err; | 156 | int err; |
157 | struct ip_options_rcu *inet_opt; | ||
156 | 158 | ||
157 | if (addr_len < sizeof(struct sockaddr_in)) | 159 | if (addr_len < sizeof(struct sockaddr_in)) |
158 | return -EINVAL; | 160 | return -EINVAL; |
@@ -161,20 +163,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
161 | return -EAFNOSUPPORT; | 163 | return -EAFNOSUPPORT; |
162 | 164 | ||
163 | nexthop = daddr = usin->sin_addr.s_addr; | 165 | nexthop = daddr = usin->sin_addr.s_addr; |
164 | if (inet->opt && inet->opt->srr) { | 166 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
167 | sock_owned_by_user(sk)); | ||
168 | if (inet_opt && inet_opt->opt.srr) { | ||
165 | if (!daddr) | 169 | if (!daddr) |
166 | return -EINVAL; | 170 | return -EINVAL; |
167 | nexthop = inet->opt->faddr; | 171 | nexthop = inet_opt->opt.faddr; |
168 | } | 172 | } |
169 | 173 | ||
170 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, | 174 | orig_sport = inet->inet_sport; |
171 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 175 | orig_dport = usin->sin_port; |
172 | IPPROTO_TCP, | 176 | fl4 = &inet->cork.fl.u.ip4; |
173 | inet->inet_sport, usin->sin_port, sk, 1); | 177 | rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, |
174 | if (tmp < 0) { | 178 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
175 | if (tmp == -ENETUNREACH) | 179 | IPPROTO_TCP, |
180 | orig_sport, orig_dport, sk, true); | ||
181 | if (IS_ERR(rt)) { | ||
182 | err = PTR_ERR(rt); | ||
183 | if (err == -ENETUNREACH) | ||
176 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 184 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
177 | return tmp; | 185 | return err; |
178 | } | 186 | } |
179 | 187 | ||
180 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | 188 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { |
@@ -182,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
182 | return -ENETUNREACH; | 190 | return -ENETUNREACH; |
183 | } | 191 | } |
184 | 192 | ||
185 | if (!inet->opt || !inet->opt->srr) | 193 | if (!inet_opt || !inet_opt->opt.srr) |
186 | daddr = rt->rt_dst; | 194 | daddr = fl4->daddr; |
187 | 195 | ||
188 | if (!inet->inet_saddr) | 196 | if (!inet->inet_saddr) |
189 | inet->inet_saddr = rt->rt_src; | 197 | inet->inet_saddr = fl4->saddr; |
190 | inet->inet_rcv_saddr = inet->inet_saddr; | 198 | inet->inet_rcv_saddr = inet->inet_saddr; |
191 | 199 | ||
192 | if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { | 200 | if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { |
@@ -197,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
197 | } | 205 | } |
198 | 206 | ||
199 | if (tcp_death_row.sysctl_tw_recycle && | 207 | if (tcp_death_row.sysctl_tw_recycle && |
200 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { | 208 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { |
201 | struct inet_peer *peer = rt_get_peer(rt); | 209 | struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); |
202 | /* | 210 | /* |
203 | * VJ's idea. We save last timestamp seen from | 211 | * VJ's idea. We save last timestamp seen from |
204 | * the destination in peer table, when entering state | 212 | * the destination in peer table, when entering state |
@@ -218,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
218 | inet->inet_daddr = daddr; | 226 | inet->inet_daddr = daddr; |
219 | 227 | ||
220 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 228 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
221 | if (inet->opt) | 229 | if (inet_opt) |
222 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 230 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
223 | 231 | ||
224 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; | 232 | tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; |
225 | 233 | ||
@@ -233,11 +241,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
233 | if (err) | 241 | if (err) |
234 | goto failure; | 242 | goto failure; |
235 | 243 | ||
236 | err = ip_route_newports(&rt, IPPROTO_TCP, | 244 | rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, |
237 | inet->inet_sport, inet->inet_dport, sk); | 245 | inet->inet_sport, inet->inet_dport, sk); |
238 | if (err) | 246 | if (IS_ERR(rt)) { |
247 | err = PTR_ERR(rt); | ||
248 | rt = NULL; | ||
239 | goto failure; | 249 | goto failure; |
240 | 250 | } | |
241 | /* OK, now commit destination to socket. */ | 251 | /* OK, now commit destination to socket. */ |
242 | sk->sk_gso_type = SKB_GSO_TCPV4; | 252 | sk->sk_gso_type = SKB_GSO_TCPV4; |
243 | sk_setup_caps(sk, &rt->dst); | 253 | sk_setup_caps(sk, &rt->dst); |
@@ -273,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect); | |||
273 | /* | 283 | /* |
274 | * This routine does path mtu discovery as defined in RFC1191. | 284 | * This routine does path mtu discovery as defined in RFC1191. |
275 | */ | 285 | */ |
276 | static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | 286 | static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) |
277 | { | 287 | { |
278 | struct dst_entry *dst; | 288 | struct dst_entry *dst; |
279 | struct inet_sock *inet = inet_sk(sk); | 289 | struct inet_sock *inet = inet_sk(sk); |
@@ -335,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | |||
335 | 345 | ||
336 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | 346 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) |
337 | { | 347 | { |
338 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; | 348 | const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; |
339 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); | 349 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
340 | struct inet_connection_sock *icsk; | 350 | struct inet_connection_sock *icsk; |
341 | struct tcp_sock *tp; | 351 | struct tcp_sock *tp; |
@@ -415,6 +425,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
415 | !icsk->icsk_backoff) | 425 | !icsk->icsk_backoff) |
416 | break; | 426 | break; |
417 | 427 | ||
428 | if (sock_owned_by_user(sk)) | ||
429 | break; | ||
430 | |||
418 | icsk->icsk_backoff--; | 431 | icsk->icsk_backoff--; |
419 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | 432 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << |
420 | icsk->icsk_backoff; | 433 | icsk->icsk_backoff; |
@@ -429,11 +442,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
429 | if (remaining) { | 442 | if (remaining) { |
430 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 443 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
431 | remaining, TCP_RTO_MAX); | 444 | remaining, TCP_RTO_MAX); |
432 | } else if (sock_owned_by_user(sk)) { | ||
433 | /* RTO revert clocked out retransmission, | ||
434 | * but socket is locked. Will defer. */ | ||
435 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
436 | HZ/20, TCP_RTO_MAX); | ||
437 | } else { | 445 | } else { |
438 | /* RTO revert clocked out retransmission. | 446 | /* RTO revert clocked out retransmission. |
439 | * Will retransmit now */ | 447 | * Will retransmit now */ |
@@ -643,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
643 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; | 651 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; |
644 | 652 | ||
645 | net = dev_net(skb_dst(skb)->dev); | 653 | net = dev_net(skb_dst(skb)->dev); |
646 | ip_send_reply(net->ipv4.tcp_sock, skb, | 654 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
647 | &arg, arg.iov[0].iov_len); | 655 | &arg, arg.iov[0].iov_len); |
648 | 656 | ||
649 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 657 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
@@ -718,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
718 | if (oif) | 726 | if (oif) |
719 | arg.bound_dev_if = oif; | 727 | arg.bound_dev_if = oif; |
720 | 728 | ||
721 | ip_send_reply(net->ipv4.tcp_sock, skb, | 729 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
722 | &arg, arg.iov[0].iov_len); | 730 | &arg, arg.iov[0].iov_len); |
723 | 731 | ||
724 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 732 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
@@ -761,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
761 | struct request_values *rvp) | 769 | struct request_values *rvp) |
762 | { | 770 | { |
763 | const struct inet_request_sock *ireq = inet_rsk(req); | 771 | const struct inet_request_sock *ireq = inet_rsk(req); |
772 | struct flowi4 fl4; | ||
764 | int err = -1; | 773 | int err = -1; |
765 | struct sk_buff * skb; | 774 | struct sk_buff * skb; |
766 | 775 | ||
767 | /* First, grab a route. */ | 776 | /* First, grab a route. */ |
768 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | 777 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
769 | return -1; | 778 | return -1; |
770 | 779 | ||
771 | skb = tcp_make_synack(sk, dst, req, rvp); | 780 | skb = tcp_make_synack(sk, dst, req, rvp); |
@@ -816,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb) | |||
816 | /* | 825 | /* |
817 | * Save and compile IPv4 options into the request_sock if needed. | 826 | * Save and compile IPv4 options into the request_sock if needed. |
818 | */ | 827 | */ |
819 | static struct ip_options *tcp_v4_save_options(struct sock *sk, | 828 | static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, |
820 | struct sk_buff *skb) | 829 | struct sk_buff *skb) |
821 | { | 830 | { |
822 | struct ip_options *opt = &(IPCB(skb)->opt); | 831 | const struct ip_options *opt = &(IPCB(skb)->opt); |
823 | struct ip_options *dopt = NULL; | 832 | struct ip_options_rcu *dopt = NULL; |
824 | 833 | ||
825 | if (opt && opt->optlen) { | 834 | if (opt && opt->optlen) { |
826 | int opt_size = optlength(opt); | 835 | int opt_size = sizeof(*dopt) + opt->optlen; |
836 | |||
827 | dopt = kmalloc(opt_size, GFP_ATOMIC); | 837 | dopt = kmalloc(opt_size, GFP_ATOMIC); |
828 | if (dopt) { | 838 | if (dopt) { |
829 | if (ip_options_echo(dopt, skb)) { | 839 | if (ip_options_echo(&dopt->opt, skb)) { |
830 | kfree(dopt); | 840 | kfree(dopt); |
831 | dopt = NULL; | 841 | dopt = NULL; |
832 | } | 842 | } |
@@ -1212,12 +1222,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | |||
1212 | }; | 1222 | }; |
1213 | #endif | 1223 | #endif |
1214 | 1224 | ||
1215 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | ||
1216 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
1217 | .twsk_unique = tcp_twsk_unique, | ||
1218 | .twsk_destructor= tcp_twsk_destructor, | ||
1219 | }; | ||
1220 | |||
1221 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1225 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1222 | { | 1226 | { |
1223 | struct tcp_extend_values tmp_ext; | 1227 | struct tcp_extend_values tmp_ext; |
@@ -1335,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1335 | req->cookie_ts = tmp_opt.tstamp_ok; | 1339 | req->cookie_ts = tmp_opt.tstamp_ok; |
1336 | } else if (!isn) { | 1340 | } else if (!isn) { |
1337 | struct inet_peer *peer = NULL; | 1341 | struct inet_peer *peer = NULL; |
1342 | struct flowi4 fl4; | ||
1338 | 1343 | ||
1339 | /* VJ's idea. We save last timestamp seen | 1344 | /* VJ's idea. We save last timestamp seen |
1340 | * from the destination in peer table, when entering | 1345 | * from the destination in peer table, when entering |
@@ -1347,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1347 | */ | 1352 | */ |
1348 | if (tmp_opt.saw_tstamp && | 1353 | if (tmp_opt.saw_tstamp && |
1349 | tcp_death_row.sysctl_tw_recycle && | 1354 | tcp_death_row.sysctl_tw_recycle && |
1350 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1355 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && |
1351 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1356 | fl4.daddr == saddr && |
1352 | peer->v4daddr == saddr) { | 1357 | (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { |
1353 | inet_peer_refcheck(peer); | 1358 | inet_peer_refcheck(peer); |
1354 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1359 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1355 | (s32)(peer->tcp_ts - req->ts_recent) > | 1360 | (s32)(peer->tcp_ts - req->ts_recent) > |
@@ -1413,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1413 | #ifdef CONFIG_TCP_MD5SIG | 1418 | #ifdef CONFIG_TCP_MD5SIG |
1414 | struct tcp_md5sig_key *key; | 1419 | struct tcp_md5sig_key *key; |
1415 | #endif | 1420 | #endif |
1421 | struct ip_options_rcu *inet_opt; | ||
1416 | 1422 | ||
1417 | if (sk_acceptq_is_full(sk)) | 1423 | if (sk_acceptq_is_full(sk)) |
1418 | goto exit_overflow; | 1424 | goto exit_overflow; |
1419 | 1425 | ||
1420 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
1421 | goto exit; | ||
1422 | |||
1423 | newsk = tcp_create_openreq_child(sk, req, skb); | 1426 | newsk = tcp_create_openreq_child(sk, req, skb); |
1424 | if (!newsk) | 1427 | if (!newsk) |
1425 | goto exit; | 1428 | goto exit_nonewsk; |
1426 | 1429 | ||
1427 | newsk->sk_gso_type = SKB_GSO_TCPV4; | 1430 | newsk->sk_gso_type = SKB_GSO_TCPV4; |
1428 | sk_setup_caps(newsk, dst); | ||
1429 | 1431 | ||
1430 | newtp = tcp_sk(newsk); | 1432 | newtp = tcp_sk(newsk); |
1431 | newinet = inet_sk(newsk); | 1433 | newinet = inet_sk(newsk); |
@@ -1433,18 +1435,24 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1433 | newinet->inet_daddr = ireq->rmt_addr; | 1435 | newinet->inet_daddr = ireq->rmt_addr; |
1434 | newinet->inet_rcv_saddr = ireq->loc_addr; | 1436 | newinet->inet_rcv_saddr = ireq->loc_addr; |
1435 | newinet->inet_saddr = ireq->loc_addr; | 1437 | newinet->inet_saddr = ireq->loc_addr; |
1436 | newinet->opt = ireq->opt; | 1438 | inet_opt = ireq->opt; |
1439 | rcu_assign_pointer(newinet->inet_opt, inet_opt); | ||
1437 | ireq->opt = NULL; | 1440 | ireq->opt = NULL; |
1438 | newinet->mc_index = inet_iif(skb); | 1441 | newinet->mc_index = inet_iif(skb); |
1439 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1442 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1440 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1443 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1441 | if (newinet->opt) | 1444 | if (inet_opt) |
1442 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1445 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1443 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1446 | newinet->inet_id = newtp->write_seq ^ jiffies; |
1444 | 1447 | ||
1448 | if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) | ||
1449 | goto put_and_exit; | ||
1450 | |||
1451 | sk_setup_caps(newsk, dst); | ||
1452 | |||
1445 | tcp_mtup_init(newsk); | 1453 | tcp_mtup_init(newsk); |
1446 | tcp_sync_mss(newsk, dst_mtu(dst)); | 1454 | tcp_sync_mss(newsk, dst_mtu(dst)); |
1447 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1455 | newtp->advmss = dst_metric_advmss(dst); |
1448 | if (tcp_sk(sk)->rx_opt.user_mss && | 1456 | if (tcp_sk(sk)->rx_opt.user_mss && |
1449 | tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) | 1457 | tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) |
1450 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | 1458 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; |
@@ -1469,17 +1477,22 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1469 | } | 1477 | } |
1470 | #endif | 1478 | #endif |
1471 | 1479 | ||
1480 | if (__inet_inherit_port(sk, newsk) < 0) | ||
1481 | goto put_and_exit; | ||
1472 | __inet_hash_nolisten(newsk, NULL); | 1482 | __inet_hash_nolisten(newsk, NULL); |
1473 | __inet_inherit_port(sk, newsk); | ||
1474 | 1483 | ||
1475 | return newsk; | 1484 | return newsk; |
1476 | 1485 | ||
1477 | exit_overflow: | 1486 | exit_overflow: |
1478 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 1487 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
1488 | exit_nonewsk: | ||
1489 | dst_release(dst); | ||
1479 | exit: | 1490 | exit: |
1480 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1491 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1481 | dst_release(dst); | ||
1482 | return NULL; | 1492 | return NULL; |
1493 | put_and_exit: | ||
1494 | sock_put(newsk); | ||
1495 | goto exit; | ||
1483 | } | 1496 | } |
1484 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 1497 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |
1485 | 1498 | ||
@@ -1560,12 +1573,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1560 | 1573 | ||
1561 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1574 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
1562 | sock_rps_save_rxhash(sk, skb->rxhash); | 1575 | sock_rps_save_rxhash(sk, skb->rxhash); |
1563 | TCP_CHECK_TIMER(sk); | ||
1564 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { | 1576 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
1565 | rsk = sk; | 1577 | rsk = sk; |
1566 | goto reset; | 1578 | goto reset; |
1567 | } | 1579 | } |
1568 | TCP_CHECK_TIMER(sk); | ||
1569 | return 0; | 1580 | return 0; |
1570 | } | 1581 | } |
1571 | 1582 | ||
@@ -1578,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1578 | goto discard; | 1589 | goto discard; |
1579 | 1590 | ||
1580 | if (nsk != sk) { | 1591 | if (nsk != sk) { |
1592 | sock_rps_save_rxhash(nsk, skb->rxhash); | ||
1581 | if (tcp_child_process(sk, nsk, skb)) { | 1593 | if (tcp_child_process(sk, nsk, skb)) { |
1582 | rsk = nsk; | 1594 | rsk = nsk; |
1583 | goto reset; | 1595 | goto reset; |
@@ -1587,13 +1599,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1587 | } else | 1599 | } else |
1588 | sock_rps_save_rxhash(sk, skb->rxhash); | 1600 | sock_rps_save_rxhash(sk, skb->rxhash); |
1589 | 1601 | ||
1590 | |||
1591 | TCP_CHECK_TIMER(sk); | ||
1592 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { | 1602 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { |
1593 | rsk = sk; | 1603 | rsk = sk; |
1594 | goto reset; | 1604 | goto reset; |
1595 | } | 1605 | } |
1596 | TCP_CHECK_TIMER(sk); | ||
1597 | return 0; | 1606 | return 0; |
1598 | 1607 | ||
1599 | reset: | 1608 | reset: |
@@ -1761,64 +1770,41 @@ do_time_wait: | |||
1761 | goto discard_it; | 1770 | goto discard_it; |
1762 | } | 1771 | } |
1763 | 1772 | ||
1764 | /* VJ's idea. Save last timestamp seen from this destination | 1773 | struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) |
1765 | * and hold it at least for normal timewait interval to use for duplicate | ||
1766 | * segment detection in subsequent connections, before they enter synchronized | ||
1767 | * state. | ||
1768 | */ | ||
1769 | |||
1770 | int tcp_v4_remember_stamp(struct sock *sk) | ||
1771 | { | 1774 | { |
1775 | struct rtable *rt = (struct rtable *) __sk_dst_get(sk); | ||
1772 | struct inet_sock *inet = inet_sk(sk); | 1776 | struct inet_sock *inet = inet_sk(sk); |
1773 | struct tcp_sock *tp = tcp_sk(sk); | 1777 | struct inet_peer *peer; |
1774 | struct rtable *rt = (struct rtable *)__sk_dst_get(sk); | ||
1775 | struct inet_peer *peer = NULL; | ||
1776 | int release_it = 0; | ||
1777 | 1778 | ||
1778 | if (!rt || rt->rt_dst != inet->inet_daddr) { | 1779 | if (!rt || |
1779 | peer = inet_getpeer(inet->inet_daddr, 1); | 1780 | inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { |
1780 | release_it = 1; | 1781 | peer = inet_getpeer_v4(inet->inet_daddr, 1); |
1782 | *release_it = true; | ||
1781 | } else { | 1783 | } else { |
1782 | if (!rt->peer) | 1784 | if (!rt->peer) |
1783 | rt_bind_peer(rt, 1); | 1785 | rt_bind_peer(rt, inet->inet_daddr, 1); |
1784 | peer = rt->peer; | 1786 | peer = rt->peer; |
1787 | *release_it = false; | ||
1785 | } | 1788 | } |
1786 | 1789 | ||
1787 | if (peer) { | 1790 | return peer; |
1788 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | ||
1789 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
1790 | peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { | ||
1791 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | ||
1792 | peer->tcp_ts = tp->rx_opt.ts_recent; | ||
1793 | } | ||
1794 | if (release_it) | ||
1795 | inet_putpeer(peer); | ||
1796 | return 1; | ||
1797 | } | ||
1798 | |||
1799 | return 0; | ||
1800 | } | 1791 | } |
1801 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | 1792 | EXPORT_SYMBOL(tcp_v4_get_peer); |
1802 | 1793 | ||
1803 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | 1794 | void *tcp_v4_tw_get_peer(struct sock *sk) |
1804 | { | 1795 | { |
1805 | struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); | 1796 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1806 | |||
1807 | if (peer) { | ||
1808 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | ||
1809 | |||
1810 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
1811 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
1812 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | ||
1813 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | ||
1814 | peer->tcp_ts = tcptw->tw_ts_recent; | ||
1815 | } | ||
1816 | inet_putpeer(peer); | ||
1817 | return 1; | ||
1818 | } | ||
1819 | 1797 | ||
1820 | return 0; | 1798 | return inet_getpeer_v4(tw->tw_daddr, 1); |
1821 | } | 1799 | } |
1800 | EXPORT_SYMBOL(tcp_v4_tw_get_peer); | ||
1801 | |||
1802 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | ||
1803 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
1804 | .twsk_unique = tcp_twsk_unique, | ||
1805 | .twsk_destructor= tcp_twsk_destructor, | ||
1806 | .twsk_getpeer = tcp_v4_tw_get_peer, | ||
1807 | }; | ||
1822 | 1808 | ||
1823 | const struct inet_connection_sock_af_ops ipv4_specific = { | 1809 | const struct inet_connection_sock_af_ops ipv4_specific = { |
1824 | .queue_xmit = ip_queue_xmit, | 1810 | .queue_xmit = ip_queue_xmit, |
@@ -1826,7 +1812,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
1826 | .rebuild_header = inet_sk_rebuild_header, | 1812 | .rebuild_header = inet_sk_rebuild_header, |
1827 | .conn_request = tcp_v4_conn_request, | 1813 | .conn_request = tcp_v4_conn_request, |
1828 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1814 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
1829 | .remember_stamp = tcp_v4_remember_stamp, | 1815 | .get_peer = tcp_v4_get_peer, |
1830 | .net_header_len = sizeof(struct iphdr), | 1816 | .net_header_len = sizeof(struct iphdr), |
1831 | .setsockopt = ip_setsockopt, | 1817 | .setsockopt = ip_setsockopt, |
1832 | .getsockopt = ip_getsockopt, | 1818 | .getsockopt = ip_getsockopt, |
@@ -2022,13 +2008,12 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2022 | } | 2008 | } |
2023 | req = req->dl_next; | 2009 | req = req->dl_next; |
2024 | } | 2010 | } |
2025 | st->offset = 0; | ||
2026 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) | 2011 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) |
2027 | break; | 2012 | break; |
2028 | get_req: | 2013 | get_req: |
2029 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; | 2014 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; |
2030 | } | 2015 | } |
2031 | sk = sk_next(st->syn_wait_sk); | 2016 | sk = sk_nulls_next(st->syn_wait_sk); |
2032 | st->state = TCP_SEQ_STATE_LISTENING; | 2017 | st->state = TCP_SEQ_STATE_LISTENING; |
2033 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2018 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2034 | } else { | 2019 | } else { |
@@ -2037,11 +2022,13 @@ get_req: | |||
2037 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) | 2022 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) |
2038 | goto start_req; | 2023 | goto start_req; |
2039 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2024 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2040 | sk = sk_next(sk); | 2025 | sk = sk_nulls_next(sk); |
2041 | } | 2026 | } |
2042 | get_sk: | 2027 | get_sk: |
2043 | sk_nulls_for_each_from(sk, node) { | 2028 | sk_nulls_for_each_from(sk, node) { |
2044 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { | 2029 | if (!net_eq(sock_net(sk), net)) |
2030 | continue; | ||
2031 | if (sk->sk_family == st->family) { | ||
2045 | cur = sk; | 2032 | cur = sk; |
2046 | goto out; | 2033 | goto out; |
2047 | } | 2034 | } |
@@ -2385,7 +2372,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req, | |||
2385 | int ttd = req->expires - jiffies; | 2372 | int ttd = req->expires - jiffies; |
2386 | 2373 | ||
2387 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 2374 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" |
2388 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", | 2375 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", |
2389 | i, | 2376 | i, |
2390 | ireq->loc_addr, | 2377 | ireq->loc_addr, |
2391 | ntohs(inet_sk(sk)->inet_sport), | 2378 | ntohs(inet_sk(sk)->inet_sport), |
@@ -2440,7 +2427,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2440 | rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); | 2427 | rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); |
2441 | 2428 | ||
2442 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " | 2429 | seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " |
2443 | "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", | 2430 | "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", |
2444 | i, src, srcp, dest, destp, sk->sk_state, | 2431 | i, src, srcp, dest, destp, sk->sk_state, |
2445 | tp->write_seq - tp->snd_una, | 2432 | tp->write_seq - tp->snd_una, |
2446 | rx_queue, | 2433 | rx_queue, |
@@ -2475,7 +2462,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw, | |||
2475 | srcp = ntohs(tw->tw_sport); | 2462 | srcp = ntohs(tw->tw_sport); |
2476 | 2463 | ||
2477 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 2464 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" |
2478 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n", | 2465 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", |
2479 | i, src, srcp, dest, destp, tw->tw_substate, 0, 0, | 2466 | i, src, srcp, dest, destp, tw->tw_substate, 0, 0, |
2480 | 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, | 2467 | 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, |
2481 | atomic_read(&tw->tw_refcnt), tw, len); | 2468 | atomic_read(&tw->tw_refcnt), tw, len); |
@@ -2553,7 +2540,7 @@ void tcp4_proc_exit(void) | |||
2553 | 2540 | ||
2554 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | 2541 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) |
2555 | { | 2542 | { |
2556 | struct iphdr *iph = skb_gro_network_header(skb); | 2543 | const struct iphdr *iph = skb_gro_network_header(skb); |
2557 | 2544 | ||
2558 | switch (skb->ip_summed) { | 2545 | switch (skb->ip_summed) { |
2559 | case CHECKSUM_COMPLETE: | 2546 | case CHECKSUM_COMPLETE: |
@@ -2571,11 +2558,10 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2571 | 2558 | ||
2572 | return tcp_gro_receive(head, skb); | 2559 | return tcp_gro_receive(head, skb); |
2573 | } | 2560 | } |
2574 | EXPORT_SYMBOL(tcp4_gro_receive); | ||
2575 | 2561 | ||
2576 | int tcp4_gro_complete(struct sk_buff *skb) | 2562 | int tcp4_gro_complete(struct sk_buff *skb) |
2577 | { | 2563 | { |
2578 | struct iphdr *iph = ip_hdr(skb); | 2564 | const struct iphdr *iph = ip_hdr(skb); |
2579 | struct tcphdr *th = tcp_hdr(skb); | 2565 | struct tcphdr *th = tcp_hdr(skb); |
2580 | 2566 | ||
2581 | th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), | 2567 | th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), |
@@ -2584,7 +2570,6 @@ int tcp4_gro_complete(struct sk_buff *skb) | |||
2584 | 2570 | ||
2585 | return tcp_gro_complete(skb); | 2571 | return tcp_gro_complete(skb); |
2586 | } | 2572 | } |
2587 | EXPORT_SYMBOL(tcp4_gro_complete); | ||
2588 | 2573 | ||
2589 | struct proto tcp_prot = { | 2574 | struct proto tcp_prot = { |
2590 | .name = "TCP", | 2575 | .name = "TCP", |