diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 340 |
1 files changed, 116 insertions, 224 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77cccda1ad0c..94d1a7757ff7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -72,7 +72,6 @@ | |||
72 | #include <net/inet_common.h> | 72 | #include <net/inet_common.h> |
73 | #include <net/timewait_sock.h> | 73 | #include <net/timewait_sock.h> |
74 | #include <net/xfrm.h> | 74 | #include <net/xfrm.h> |
75 | #include <net/netdma.h> | ||
76 | #include <net/secure_seq.h> | 75 | #include <net/secure_seq.h> |
77 | #include <net/tcp_memcontrol.h> | 76 | #include <net/tcp_memcontrol.h> |
78 | #include <net/busy_poll.h> | 77 | #include <net/busy_poll.h> |
@@ -90,7 +89,6 @@ int sysctl_tcp_tw_reuse __read_mostly; | |||
90 | int sysctl_tcp_low_latency __read_mostly; | 89 | int sysctl_tcp_low_latency __read_mostly; |
91 | EXPORT_SYMBOL(sysctl_tcp_low_latency); | 90 | EXPORT_SYMBOL(sysctl_tcp_low_latency); |
92 | 91 | ||
93 | |||
94 | #ifdef CONFIG_TCP_MD5SIG | 92 | #ifdef CONFIG_TCP_MD5SIG |
95 | static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, | 93 | static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, |
96 | __be32 daddr, __be32 saddr, const struct tcphdr *th); | 94 | __be32 daddr, __be32 saddr, const struct tcphdr *th); |
@@ -99,7 +97,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, | |||
99 | struct inet_hashinfo tcp_hashinfo; | 97 | struct inet_hashinfo tcp_hashinfo; |
100 | EXPORT_SYMBOL(tcp_hashinfo); | 98 | EXPORT_SYMBOL(tcp_hashinfo); |
101 | 99 | ||
102 | static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) | 100 | static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) |
103 | { | 101 | { |
104 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, | 102 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, |
105 | ip_hdr(skb)->saddr, | 103 | ip_hdr(skb)->saddr, |
@@ -208,6 +206,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
208 | inet->inet_dport = usin->sin_port; | 206 | inet->inet_dport = usin->sin_port; |
209 | inet->inet_daddr = daddr; | 207 | inet->inet_daddr = daddr; |
210 | 208 | ||
209 | inet_set_txhash(sk); | ||
210 | |||
211 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 211 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
212 | if (inet_opt) | 212 | if (inet_opt) |
213 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; | 213 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect); | |||
269 | * It can be called through tcp_release_cb() if socket was owned by user | 269 | * It can be called through tcp_release_cb() if socket was owned by user |
270 | * at the time tcp_v4_err() was called to handle ICMP message. | 270 | * at the time tcp_v4_err() was called to handle ICMP message. |
271 | */ | 271 | */ |
272 | static void tcp_v4_mtu_reduced(struct sock *sk) | 272 | void tcp_v4_mtu_reduced(struct sock *sk) |
273 | { | 273 | { |
274 | struct dst_entry *dst; | 274 | struct dst_entry *dst; |
275 | struct inet_sock *inet = inet_sk(sk); | 275 | struct inet_sock *inet = inet_sk(sk); |
@@ -300,6 +300,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk) | |||
300 | tcp_simple_retransmit(sk); | 300 | tcp_simple_retransmit(sk); |
301 | } /* else let the usual retransmit timer handle it */ | 301 | } /* else let the usual retransmit timer handle it */ |
302 | } | 302 | } |
303 | EXPORT_SYMBOL(tcp_v4_mtu_reduced); | ||
303 | 304 | ||
304 | static void do_redirect(struct sk_buff *skb, struct sock *sk) | 305 | static void do_redirect(struct sk_buff *skb, struct sock *sk) |
305 | { | 306 | { |
@@ -342,11 +343,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
342 | int err; | 343 | int err; |
343 | struct net *net = dev_net(icmp_skb->dev); | 344 | struct net *net = dev_net(icmp_skb->dev); |
344 | 345 | ||
345 | if (icmp_skb->len < (iph->ihl << 2) + 8) { | ||
346 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | ||
347 | return; | ||
348 | } | ||
349 | |||
350 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, | 346 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, |
351 | iph->saddr, th->source, inet_iif(icmp_skb)); | 347 | iph->saddr, th->source, inet_iif(icmp_skb)); |
352 | if (!sk) { | 348 | if (!sk) { |
@@ -433,15 +429,16 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
433 | break; | 429 | break; |
434 | 430 | ||
435 | icsk->icsk_backoff--; | 431 | icsk->icsk_backoff--; |
436 | inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : | 432 | icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : |
437 | TCP_TIMEOUT_INIT) << icsk->icsk_backoff; | 433 | TCP_TIMEOUT_INIT; |
438 | tcp_bound_rto(sk); | 434 | icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); |
439 | 435 | ||
440 | skb = tcp_write_queue_head(sk); | 436 | skb = tcp_write_queue_head(sk); |
441 | BUG_ON(!skb); | 437 | BUG_ON(!skb); |
442 | 438 | ||
443 | remaining = icsk->icsk_rto - min(icsk->icsk_rto, | 439 | remaining = icsk->icsk_rto - |
444 | tcp_time_stamp - TCP_SKB_CB(skb)->when); | 440 | min(icsk->icsk_rto, |
441 | tcp_time_stamp - tcp_skb_timestamp(skb)); | ||
445 | 442 | ||
446 | if (remaining) { | 443 | if (remaining) { |
447 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 444 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
@@ -683,8 +680,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
683 | 680 | ||
684 | net = dev_net(skb_dst(skb)->dev); | 681 | net = dev_net(skb_dst(skb)->dev); |
685 | arg.tos = ip_hdr(skb)->tos; | 682 | arg.tos = ip_hdr(skb)->tos; |
686 | ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, | 683 | ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, |
687 | ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); | 684 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
685 | &arg, arg.iov[0].iov_len); | ||
688 | 686 | ||
689 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 687 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
690 | TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); | 688 | TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); |
@@ -766,8 +764,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
766 | if (oif) | 764 | if (oif) |
767 | arg.bound_dev_if = oif; | 765 | arg.bound_dev_if = oif; |
768 | arg.tos = tos; | 766 | arg.tos = tos; |
769 | ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, | 767 | ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, |
770 | ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); | 768 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
769 | &arg, arg.iov[0].iov_len); | ||
771 | 770 | ||
772 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 771 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
773 | } | 772 | } |
@@ -814,6 +813,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
814 | * socket. | 813 | * socket. |
815 | */ | 814 | */ |
816 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 815 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
816 | struct flowi *fl, | ||
817 | struct request_sock *req, | 817 | struct request_sock *req, |
818 | u16 queue_mapping, | 818 | u16 queue_mapping, |
819 | struct tcp_fastopen_cookie *foc) | 819 | struct tcp_fastopen_cookie *foc) |
@@ -837,24 +837,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
837 | ireq->ir_rmt_addr, | 837 | ireq->ir_rmt_addr, |
838 | ireq->opt); | 838 | ireq->opt); |
839 | err = net_xmit_eval(err); | 839 | err = net_xmit_eval(err); |
840 | if (!tcp_rsk(req)->snt_synack && !err) | ||
841 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
842 | } | 840 | } |
843 | 841 | ||
844 | return err; | 842 | return err; |
845 | } | 843 | } |
846 | 844 | ||
847 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) | ||
848 | { | ||
849 | int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); | ||
850 | |||
851 | if (!res) { | ||
852 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | ||
853 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
854 | } | ||
855 | return res; | ||
856 | } | ||
857 | |||
858 | /* | 845 | /* |
859 | * IPv4 request_sock destructor. | 846 | * IPv4 request_sock destructor. |
860 | */ | 847 | */ |
@@ -893,28 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk, | |||
893 | } | 880 | } |
894 | EXPORT_SYMBOL(tcp_syn_flood_action); | 881 | EXPORT_SYMBOL(tcp_syn_flood_action); |
895 | 882 | ||
896 | /* | ||
897 | * Save and compile IPv4 options into the request_sock if needed. | ||
898 | */ | ||
899 | static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) | ||
900 | { | ||
901 | const struct ip_options *opt = &(IPCB(skb)->opt); | ||
902 | struct ip_options_rcu *dopt = NULL; | ||
903 | |||
904 | if (opt && opt->optlen) { | ||
905 | int opt_size = sizeof(*dopt) + opt->optlen; | ||
906 | |||
907 | dopt = kmalloc(opt_size, GFP_ATOMIC); | ||
908 | if (dopt) { | ||
909 | if (ip_options_echo(&dopt->opt, skb)) { | ||
910 | kfree(dopt); | ||
911 | dopt = NULL; | ||
912 | } | ||
913 | } | ||
914 | } | ||
915 | return dopt; | ||
916 | } | ||
917 | |||
918 | #ifdef CONFIG_TCP_MD5SIG | 883 | #ifdef CONFIG_TCP_MD5SIG |
919 | /* | 884 | /* |
920 | * RFC2385 MD5 checksumming requires a mapping of | 885 | * RFC2385 MD5 checksumming requires a mapping of |
@@ -1064,7 +1029,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, | |||
1064 | if (sin->sin_family != AF_INET) | 1029 | if (sin->sin_family != AF_INET) |
1065 | return -EINVAL; | 1030 | return -EINVAL; |
1066 | 1031 | ||
1067 | if (!cmd.tcpm_key || !cmd.tcpm_keylen) | 1032 | if (!cmd.tcpm_keylen) |
1068 | return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, | 1033 | return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, |
1069 | AF_INET); | 1034 | AF_INET); |
1070 | 1035 | ||
@@ -1182,7 +1147,8 @@ clear_hash_noput: | |||
1182 | } | 1147 | } |
1183 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); | 1148 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); |
1184 | 1149 | ||
1185 | static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | 1150 | static bool __tcp_v4_inbound_md5_hash(struct sock *sk, |
1151 | const struct sk_buff *skb) | ||
1186 | { | 1152 | { |
1187 | /* | 1153 | /* |
1188 | * This gets called for each TCP segment that arrives | 1154 | * This gets called for each TCP segment that arrives |
@@ -1235,163 +1201,81 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | |||
1235 | return false; | 1201 | return false; |
1236 | } | 1202 | } |
1237 | 1203 | ||
1204 | static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | ||
1205 | { | ||
1206 | bool ret; | ||
1207 | |||
1208 | rcu_read_lock(); | ||
1209 | ret = __tcp_v4_inbound_md5_hash(sk, skb); | ||
1210 | rcu_read_unlock(); | ||
1211 | |||
1212 | return ret; | ||
1213 | } | ||
1214 | |||
1238 | #endif | 1215 | #endif |
1239 | 1216 | ||
1217 | static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, | ||
1218 | struct sk_buff *skb) | ||
1219 | { | ||
1220 | struct inet_request_sock *ireq = inet_rsk(req); | ||
1221 | |||
1222 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; | ||
1223 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; | ||
1224 | ireq->no_srccheck = inet_sk(sk)->transparent; | ||
1225 | ireq->opt = tcp_v4_save_options(skb); | ||
1226 | } | ||
1227 | |||
1228 | static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, | ||
1229 | const struct request_sock *req, | ||
1230 | bool *strict) | ||
1231 | { | ||
1232 | struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); | ||
1233 | |||
1234 | if (strict) { | ||
1235 | if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) | ||
1236 | *strict = true; | ||
1237 | else | ||
1238 | *strict = false; | ||
1239 | } | ||
1240 | |||
1241 | return dst; | ||
1242 | } | ||
1243 | |||
1240 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { | 1244 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { |
1241 | .family = PF_INET, | 1245 | .family = PF_INET, |
1242 | .obj_size = sizeof(struct tcp_request_sock), | 1246 | .obj_size = sizeof(struct tcp_request_sock), |
1243 | .rtx_syn_ack = tcp_v4_rtx_synack, | 1247 | .rtx_syn_ack = tcp_rtx_synack, |
1244 | .send_ack = tcp_v4_reqsk_send_ack, | 1248 | .send_ack = tcp_v4_reqsk_send_ack, |
1245 | .destructor = tcp_v4_reqsk_destructor, | 1249 | .destructor = tcp_v4_reqsk_destructor, |
1246 | .send_reset = tcp_v4_send_reset, | 1250 | .send_reset = tcp_v4_send_reset, |
1247 | .syn_ack_timeout = tcp_syn_ack_timeout, | 1251 | .syn_ack_timeout = tcp_syn_ack_timeout, |
1248 | }; | 1252 | }; |
1249 | 1253 | ||
1250 | #ifdef CONFIG_TCP_MD5SIG | ||
1251 | static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | 1254 | static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { |
1255 | .mss_clamp = TCP_MSS_DEFAULT, | ||
1256 | #ifdef CONFIG_TCP_MD5SIG | ||
1252 | .md5_lookup = tcp_v4_reqsk_md5_lookup, | 1257 | .md5_lookup = tcp_v4_reqsk_md5_lookup, |
1253 | .calc_md5_hash = tcp_v4_md5_hash_skb, | 1258 | .calc_md5_hash = tcp_v4_md5_hash_skb, |
1254 | }; | ||
1255 | #endif | 1259 | #endif |
1260 | .init_req = tcp_v4_init_req, | ||
1261 | #ifdef CONFIG_SYN_COOKIES | ||
1262 | .cookie_init_seq = cookie_v4_init_sequence, | ||
1263 | #endif | ||
1264 | .route_req = tcp_v4_route_req, | ||
1265 | .init_seq = tcp_v4_init_sequence, | ||
1266 | .send_synack = tcp_v4_send_synack, | ||
1267 | .queue_hash_add = inet_csk_reqsk_queue_hash_add, | ||
1268 | }; | ||
1256 | 1269 | ||
1257 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1270 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1258 | { | 1271 | { |
1259 | struct tcp_options_received tmp_opt; | ||
1260 | struct request_sock *req; | ||
1261 | struct inet_request_sock *ireq; | ||
1262 | struct tcp_sock *tp = tcp_sk(sk); | ||
1263 | struct dst_entry *dst = NULL; | ||
1264 | __be32 saddr = ip_hdr(skb)->saddr; | ||
1265 | __be32 daddr = ip_hdr(skb)->daddr; | ||
1266 | __u32 isn = TCP_SKB_CB(skb)->when; | ||
1267 | bool want_cookie = false, fastopen; | ||
1268 | struct flowi4 fl4; | ||
1269 | struct tcp_fastopen_cookie foc = { .len = -1 }; | ||
1270 | int err; | ||
1271 | |||
1272 | /* Never answer to SYNs send to broadcast or multicast */ | 1272 | /* Never answer to SYNs send to broadcast or multicast */ |
1273 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1273 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
1274 | goto drop; | 1274 | goto drop; |
1275 | 1275 | ||
1276 | /* TW buckets are converted to open requests without | 1276 | return tcp_conn_request(&tcp_request_sock_ops, |
1277 | * limitations, they conserve resources and peer is | 1277 | &tcp_request_sock_ipv4_ops, sk, skb); |
1278 | * evidently real one. | ||
1279 | */ | ||
1280 | if ((sysctl_tcp_syncookies == 2 || | ||
1281 | inet_csk_reqsk_queue_is_full(sk)) && !isn) { | ||
1282 | want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); | ||
1283 | if (!want_cookie) | ||
1284 | goto drop; | ||
1285 | } | ||
1286 | |||
1287 | /* Accept backlog is full. If we have already queued enough | ||
1288 | * of warm entries in syn queue, drop request. It is better than | ||
1289 | * clogging syn queue with openreqs with exponentially increasing | ||
1290 | * timeout. | ||
1291 | */ | ||
1292 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { | ||
1293 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | ||
1294 | goto drop; | ||
1295 | } | ||
1296 | |||
1297 | req = inet_reqsk_alloc(&tcp_request_sock_ops); | ||
1298 | if (!req) | ||
1299 | goto drop; | ||
1300 | |||
1301 | #ifdef CONFIG_TCP_MD5SIG | ||
1302 | tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; | ||
1303 | #endif | ||
1304 | |||
1305 | tcp_clear_options(&tmp_opt); | ||
1306 | tmp_opt.mss_clamp = TCP_MSS_DEFAULT; | ||
1307 | tmp_opt.user_mss = tp->rx_opt.user_mss; | ||
1308 | tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); | ||
1309 | |||
1310 | if (want_cookie && !tmp_opt.saw_tstamp) | ||
1311 | tcp_clear_options(&tmp_opt); | ||
1312 | 1278 | ||
1313 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | ||
1314 | tcp_openreq_init(req, &tmp_opt, skb); | ||
1315 | |||
1316 | ireq = inet_rsk(req); | ||
1317 | ireq->ir_loc_addr = daddr; | ||
1318 | ireq->ir_rmt_addr = saddr; | ||
1319 | ireq->no_srccheck = inet_sk(sk)->transparent; | ||
1320 | ireq->opt = tcp_v4_save_options(skb); | ||
1321 | ireq->ir_mark = inet_request_mark(sk, skb); | ||
1322 | |||
1323 | if (security_inet_conn_request(sk, skb, req)) | ||
1324 | goto drop_and_free; | ||
1325 | |||
1326 | if (!want_cookie || tmp_opt.tstamp_ok) | ||
1327 | TCP_ECN_create_request(req, skb, sock_net(sk)); | ||
1328 | |||
1329 | if (want_cookie) { | ||
1330 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | ||
1331 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
1332 | } else if (!isn) { | ||
1333 | /* VJ's idea. We save last timestamp seen | ||
1334 | * from the destination in peer table, when entering | ||
1335 | * state TIME-WAIT, and check against it before | ||
1336 | * accepting new connection request. | ||
1337 | * | ||
1338 | * If "isn" is not zero, this request hit alive | ||
1339 | * timewait bucket, so that all the necessary checks | ||
1340 | * are made in the function processing timewait state. | ||
1341 | */ | ||
1342 | if (tmp_opt.saw_tstamp && | ||
1343 | tcp_death_row.sysctl_tw_recycle && | ||
1344 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && | ||
1345 | fl4.daddr == saddr) { | ||
1346 | if (!tcp_peer_is_proven(req, dst, true)) { | ||
1347 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | ||
1348 | goto drop_and_release; | ||
1349 | } | ||
1350 | } | ||
1351 | /* Kill the following clause, if you dislike this way. */ | ||
1352 | else if (!sysctl_tcp_syncookies && | ||
1353 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | ||
1354 | (sysctl_max_syn_backlog >> 2)) && | ||
1355 | !tcp_peer_is_proven(req, dst, false)) { | ||
1356 | /* Without syncookies last quarter of | ||
1357 | * backlog is filled with destinations, | ||
1358 | * proven to be alive. | ||
1359 | * It means that we continue to communicate | ||
1360 | * to destinations, already remembered | ||
1361 | * to the moment of synflood. | ||
1362 | */ | ||
1363 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), | ||
1364 | &saddr, ntohs(tcp_hdr(skb)->source)); | ||
1365 | goto drop_and_release; | ||
1366 | } | ||
1367 | |||
1368 | isn = tcp_v4_init_sequence(skb); | ||
1369 | } | ||
1370 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | ||
1371 | goto drop_and_free; | ||
1372 | |||
1373 | tcp_rsk(req)->snt_isn = isn; | ||
1374 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1375 | tcp_openreq_init_rwin(req, sk, dst); | ||
1376 | fastopen = !want_cookie && | ||
1377 | tcp_try_fastopen(sk, skb, req, &foc, dst); | ||
1378 | err = tcp_v4_send_synack(sk, dst, req, | ||
1379 | skb_get_queue_mapping(skb), &foc); | ||
1380 | if (!fastopen) { | ||
1381 | if (err || want_cookie) | ||
1382 | goto drop_and_free; | ||
1383 | |||
1384 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1385 | tcp_rsk(req)->listener = NULL; | ||
1386 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1387 | } | ||
1388 | |||
1389 | return 0; | ||
1390 | |||
1391 | drop_and_release: | ||
1392 | dst_release(dst); | ||
1393 | drop_and_free: | ||
1394 | reqsk_free(req); | ||
1395 | drop: | 1279 | drop: |
1396 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1280 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1397 | return 0; | 1281 | return 0; |
@@ -1439,6 +1323,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1439 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1323 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1440 | newinet->rcv_tos = ip_hdr(skb)->tos; | 1324 | newinet->rcv_tos = ip_hdr(skb)->tos; |
1441 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1325 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1326 | inet_set_txhash(newsk); | ||
1442 | if (inet_opt) | 1327 | if (inet_opt) |
1443 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; | 1328 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1444 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1329 | newinet->inet_id = newtp->write_seq ^ jiffies; |
@@ -1523,7 +1408,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1523 | 1408 | ||
1524 | #ifdef CONFIG_SYN_COOKIES | 1409 | #ifdef CONFIG_SYN_COOKIES |
1525 | if (!th->syn) | 1410 | if (!th->syn) |
1526 | sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); | 1411 | sk = cookie_v4_check(sk, skb); |
1527 | #endif | 1412 | #endif |
1528 | return sk; | 1413 | return sk; |
1529 | } | 1414 | } |
@@ -1539,16 +1424,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1539 | int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | 1424 | int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) |
1540 | { | 1425 | { |
1541 | struct sock *rsk; | 1426 | struct sock *rsk; |
1542 | #ifdef CONFIG_TCP_MD5SIG | ||
1543 | /* | ||
1544 | * We really want to reject the packet as early as possible | ||
1545 | * if: | ||
1546 | * o We're expecting an MD5'd packet and this is no MD5 tcp option | ||
1547 | * o There is an MD5 option and we're not expecting one | ||
1548 | */ | ||
1549 | if (tcp_v4_inbound_md5_hash(sk, skb)) | ||
1550 | goto discard; | ||
1551 | #endif | ||
1552 | 1427 | ||
1553 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1428 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
1554 | struct dst_entry *dst = sk->sk_rx_dst; | 1429 | struct dst_entry *dst = sk->sk_rx_dst; |
@@ -1663,7 +1538,17 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |||
1663 | skb_queue_len(&tp->ucopy.prequeue) == 0) | 1538 | skb_queue_len(&tp->ucopy.prequeue) == 0) |
1664 | return false; | 1539 | return false; |
1665 | 1540 | ||
1666 | skb_dst_force(skb); | 1541 | /* Before escaping RCU protected region, we need to take care of skb |
1542 | * dst. Prequeue is only enabled for established sockets. | ||
1543 | * For such sockets, we might need the skb dst only to set sk->sk_rx_dst | ||
1544 | * Instead of doing full sk_rx_dst validity here, let's perform | ||
1545 | * an optimistic check. | ||
1546 | */ | ||
1547 | if (likely(sk->sk_rx_dst)) | ||
1548 | skb_dst_drop(skb); | ||
1549 | else | ||
1550 | skb_dst_force(skb); | ||
1551 | |||
1667 | __skb_queue_tail(&tp->ucopy.prequeue, skb); | 1552 | __skb_queue_tail(&tp->ucopy.prequeue, skb); |
1668 | tp->ucopy.memory += skb->truesize; | 1553 | tp->ucopy.memory += skb->truesize; |
1669 | if (tp->ucopy.memory > sk->sk_rcvbuf) { | 1554 | if (tp->ucopy.memory > sk->sk_rcvbuf) { |
@@ -1728,11 +1613,19 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1728 | 1613 | ||
1729 | th = tcp_hdr(skb); | 1614 | th = tcp_hdr(skb); |
1730 | iph = ip_hdr(skb); | 1615 | iph = ip_hdr(skb); |
1616 | /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() | ||
1617 | * barrier() makes sure compiler wont play fool^Waliasing games. | ||
1618 | */ | ||
1619 | memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), | ||
1620 | sizeof(struct inet_skb_parm)); | ||
1621 | barrier(); | ||
1622 | |||
1731 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); | 1623 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); |
1732 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + | 1624 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + |
1733 | skb->len - th->doff * 4); | 1625 | skb->len - th->doff * 4); |
1734 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); | 1626 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); |
1735 | TCP_SKB_CB(skb)->when = 0; | 1627 | TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); |
1628 | TCP_SKB_CB(skb)->tcp_tw_isn = 0; | ||
1736 | TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); | 1629 | TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); |
1737 | TCP_SKB_CB(skb)->sacked = 0; | 1630 | TCP_SKB_CB(skb)->sacked = 0; |
1738 | 1631 | ||
@@ -1751,6 +1644,18 @@ process: | |||
1751 | 1644 | ||
1752 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | 1645 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1753 | goto discard_and_relse; | 1646 | goto discard_and_relse; |
1647 | |||
1648 | #ifdef CONFIG_TCP_MD5SIG | ||
1649 | /* | ||
1650 | * We really want to reject the packet as early as possible | ||
1651 | * if: | ||
1652 | * o We're expecting an MD5'd packet and this is no MD5 tcp option | ||
1653 | * o There is an MD5 option and we're not expecting one | ||
1654 | */ | ||
1655 | if (tcp_v4_inbound_md5_hash(sk, skb)) | ||
1656 | goto discard_and_relse; | ||
1657 | #endif | ||
1658 | |||
1754 | nf_reset(skb); | 1659 | nf_reset(skb); |
1755 | 1660 | ||
1756 | if (sk_filter(sk, skb)) | 1661 | if (sk_filter(sk, skb)) |
@@ -1762,18 +1667,8 @@ process: | |||
1762 | bh_lock_sock_nested(sk); | 1667 | bh_lock_sock_nested(sk); |
1763 | ret = 0; | 1668 | ret = 0; |
1764 | if (!sock_owned_by_user(sk)) { | 1669 | if (!sock_owned_by_user(sk)) { |
1765 | #ifdef CONFIG_NET_DMA | 1670 | if (!tcp_prequeue(sk, skb)) |
1766 | struct tcp_sock *tp = tcp_sk(sk); | ||
1767 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | ||
1768 | tp->ucopy.dma_chan = net_dma_find_channel(); | ||
1769 | if (tp->ucopy.dma_chan) | ||
1770 | ret = tcp_v4_do_rcv(sk, skb); | 1671 | ret = tcp_v4_do_rcv(sk, skb); |
1771 | else | ||
1772 | #endif | ||
1773 | { | ||
1774 | if (!tcp_prequeue(sk, skb)) | ||
1775 | ret = tcp_v4_do_rcv(sk, skb); | ||
1776 | } | ||
1777 | } else if (unlikely(sk_add_backlog(sk, skb, | 1672 | } else if (unlikely(sk_add_backlog(sk, skb, |
1778 | sk->sk_rcvbuf + sk->sk_sndbuf))) { | 1673 | sk->sk_rcvbuf + sk->sk_sndbuf))) { |
1779 | bh_unlock_sock(sk); | 1674 | bh_unlock_sock(sk); |
@@ -1857,9 +1752,11 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) | |||
1857 | { | 1752 | { |
1858 | struct dst_entry *dst = skb_dst(skb); | 1753 | struct dst_entry *dst = skb_dst(skb); |
1859 | 1754 | ||
1860 | dst_hold(dst); | 1755 | if (dst) { |
1861 | sk->sk_rx_dst = dst; | 1756 | dst_hold(dst); |
1862 | inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; | 1757 | sk->sk_rx_dst = dst; |
1758 | inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; | ||
1759 | } | ||
1863 | } | 1760 | } |
1864 | EXPORT_SYMBOL(inet_sk_rx_dst_set); | 1761 | EXPORT_SYMBOL(inet_sk_rx_dst_set); |
1865 | 1762 | ||
@@ -1880,6 +1777,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
1880 | .compat_setsockopt = compat_ip_setsockopt, | 1777 | .compat_setsockopt = compat_ip_setsockopt, |
1881 | .compat_getsockopt = compat_ip_getsockopt, | 1778 | .compat_getsockopt = compat_ip_getsockopt, |
1882 | #endif | 1779 | #endif |
1780 | .mtu_reduced = tcp_v4_mtu_reduced, | ||
1883 | }; | 1781 | }; |
1884 | EXPORT_SYMBOL(ipv4_specific); | 1782 | EXPORT_SYMBOL(ipv4_specific); |
1885 | 1783 | ||
@@ -1932,11 +1830,6 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1932 | } | 1830 | } |
1933 | #endif | 1831 | #endif |
1934 | 1832 | ||
1935 | #ifdef CONFIG_NET_DMA | ||
1936 | /* Cleans up our sk_async_wait_queue */ | ||
1937 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1938 | #endif | ||
1939 | |||
1940 | /* Clean prequeue, it must be empty really */ | 1833 | /* Clean prequeue, it must be empty really */ |
1941 | __skb_queue_purge(&tp->ucopy.prequeue); | 1834 | __skb_queue_purge(&tp->ucopy.prequeue); |
1942 | 1835 | ||
@@ -2274,7 +2167,7 @@ int tcp_seq_open(struct inode *inode, struct file *file) | |||
2274 | 2167 | ||
2275 | s = ((struct seq_file *)file->private_data)->private; | 2168 | s = ((struct seq_file *)file->private_data)->private; |
2276 | s->family = afinfo->family; | 2169 | s->family = afinfo->family; |
2277 | s->last_pos = 0; | 2170 | s->last_pos = 0; |
2278 | return 0; | 2171 | return 0; |
2279 | } | 2172 | } |
2280 | EXPORT_SYMBOL(tcp_seq_open); | 2173 | EXPORT_SYMBOL(tcp_seq_open); |
@@ -2499,7 +2392,6 @@ struct proto tcp_prot = { | |||
2499 | .sendpage = tcp_sendpage, | 2392 | .sendpage = tcp_sendpage, |
2500 | .backlog_rcv = tcp_v4_do_rcv, | 2393 | .backlog_rcv = tcp_v4_do_rcv, |
2501 | .release_cb = tcp_release_cb, | 2394 | .release_cb = tcp_release_cb, |
2502 | .mtu_reduced = tcp_v4_mtu_reduced, | ||
2503 | .hash = inet_hash, | 2395 | .hash = inet_hash, |
2504 | .unhash = inet_unhash, | 2396 | .unhash = inet_unhash, |
2505 | .get_port = inet_csk_get_port, | 2397 | .get_port = inet_csk_get_port, |