aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c340
1 files changed, 116 insertions, 224 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77cccda1ad0c..94d1a7757ff7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,7 +72,6 @@
72#include <net/inet_common.h> 72#include <net/inet_common.h>
73#include <net/timewait_sock.h> 73#include <net/timewait_sock.h>
74#include <net/xfrm.h> 74#include <net/xfrm.h>
75#include <net/netdma.h>
76#include <net/secure_seq.h> 75#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h> 76#include <net/tcp_memcontrol.h>
78#include <net/busy_poll.h> 77#include <net/busy_poll.h>
@@ -90,7 +89,6 @@ int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly; 89int sysctl_tcp_low_latency __read_mostly;
91EXPORT_SYMBOL(sysctl_tcp_low_latency); 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
92 91
93
94#ifdef CONFIG_TCP_MD5SIG 92#ifdef CONFIG_TCP_MD5SIG
95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 93static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
96 __be32 daddr, __be32 saddr, const struct tcphdr *th); 94 __be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -99,7 +97,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
99struct inet_hashinfo tcp_hashinfo; 97struct inet_hashinfo tcp_hashinfo;
100EXPORT_SYMBOL(tcp_hashinfo); 98EXPORT_SYMBOL(tcp_hashinfo);
101 99
102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 100static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
103{ 101{
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 102 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr, 103 ip_hdr(skb)->saddr,
@@ -208,6 +206,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
208 inet->inet_dport = usin->sin_port; 206 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr; 207 inet->inet_daddr = daddr;
210 208
209 inet_set_txhash(sk);
210
211 inet_csk(sk)->icsk_ext_hdr_len = 0; 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
212 if (inet_opt) 212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
269 * It can be called through tcp_release_cb() if socket was owned by user 269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message. 270 * at the time tcp_v4_err() was called to handle ICMP message.
271 */ 271 */
272static void tcp_v4_mtu_reduced(struct sock *sk) 272void tcp_v4_mtu_reduced(struct sock *sk)
273{ 273{
274 struct dst_entry *dst; 274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk); 275 struct inet_sock *inet = inet_sk(sk);
@@ -300,6 +300,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
300 tcp_simple_retransmit(sk); 300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */ 301 } /* else let the usual retransmit timer handle it */
302} 302}
303EXPORT_SYMBOL(tcp_v4_mtu_reduced);
303 304
304static void do_redirect(struct sk_buff *skb, struct sock *sk) 305static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{ 306{
@@ -342,11 +343,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
342 int err; 343 int err;
343 struct net *net = dev_net(icmp_skb->dev); 344 struct net *net = dev_net(icmp_skb->dev);
344 345
345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
347 return;
348 }
349
350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 346 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
351 iph->saddr, th->source, inet_iif(icmp_skb)); 347 iph->saddr, th->source, inet_iif(icmp_skb));
352 if (!sk) { 348 if (!sk) {
@@ -433,15 +429,16 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
433 break; 429 break;
434 430
435 icsk->icsk_backoff--; 431 icsk->icsk_backoff--;
436 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : 432 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff; 433 TCP_TIMEOUT_INIT;
438 tcp_bound_rto(sk); 434 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
439 435
440 skb = tcp_write_queue_head(sk); 436 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb); 437 BUG_ON(!skb);
442 438
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto, 439 remaining = icsk->icsk_rto -
444 tcp_time_stamp - TCP_SKB_CB(skb)->when); 440 min(icsk->icsk_rto,
441 tcp_time_stamp - tcp_skb_timestamp(skb));
445 442
446 if (remaining) { 443 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 444 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -683,8 +680,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
683 680
684 net = dev_net(skb_dst(skb)->dev); 681 net = dev_net(skb_dst(skb)->dev);
685 arg.tos = ip_hdr(skb)->tos; 682 arg.tos = ip_hdr(skb)->tos;
686 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, 683 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
687 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 684 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
685 &arg, arg.iov[0].iov_len);
688 686
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 687 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 688 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -766,8 +764,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
766 if (oif) 764 if (oif)
767 arg.bound_dev_if = oif; 765 arg.bound_dev_if = oif;
768 arg.tos = tos; 766 arg.tos = tos;
769 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, 767 ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
770 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 768 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
769 &arg, arg.iov[0].iov_len);
771 770
772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 771 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
773} 772}
@@ -814,6 +813,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
814 * socket. 813 * socket.
815 */ 814 */
816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 815static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
816 struct flowi *fl,
817 struct request_sock *req, 817 struct request_sock *req,
818 u16 queue_mapping, 818 u16 queue_mapping,
819 struct tcp_fastopen_cookie *foc) 819 struct tcp_fastopen_cookie *foc)
@@ -837,24 +837,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
837 ireq->ir_rmt_addr, 837 ireq->ir_rmt_addr,
838 ireq->opt); 838 ireq->opt);
839 err = net_xmit_eval(err); 839 err = net_xmit_eval(err);
840 if (!tcp_rsk(req)->snt_synack && !err)
841 tcp_rsk(req)->snt_synack = tcp_time_stamp;
842 } 840 }
843 841
844 return err; 842 return err;
845} 843}
846 844
847static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
848{
849 int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
850
851 if (!res) {
852 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
853 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
854 }
855 return res;
856}
857
858/* 845/*
859 * IPv4 request_sock destructor. 846 * IPv4 request_sock destructor.
860 */ 847 */
@@ -893,28 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk,
893} 880}
894EXPORT_SYMBOL(tcp_syn_flood_action); 881EXPORT_SYMBOL(tcp_syn_flood_action);
895 882
896/*
897 * Save and compile IPv4 options into the request_sock if needed.
898 */
899static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
900{
901 const struct ip_options *opt = &(IPCB(skb)->opt);
902 struct ip_options_rcu *dopt = NULL;
903
904 if (opt && opt->optlen) {
905 int opt_size = sizeof(*dopt) + opt->optlen;
906
907 dopt = kmalloc(opt_size, GFP_ATOMIC);
908 if (dopt) {
909 if (ip_options_echo(&dopt->opt, skb)) {
910 kfree(dopt);
911 dopt = NULL;
912 }
913 }
914 }
915 return dopt;
916}
917
918#ifdef CONFIG_TCP_MD5SIG 883#ifdef CONFIG_TCP_MD5SIG
919/* 884/*
920 * RFC2385 MD5 checksumming requires a mapping of 885 * RFC2385 MD5 checksumming requires a mapping of
@@ -1064,7 +1029,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1064 if (sin->sin_family != AF_INET) 1029 if (sin->sin_family != AF_INET)
1065 return -EINVAL; 1030 return -EINVAL;
1066 1031
1067 if (!cmd.tcpm_key || !cmd.tcpm_keylen) 1032 if (!cmd.tcpm_keylen)
1068 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1033 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1069 AF_INET); 1034 AF_INET);
1070 1035
@@ -1182,7 +1147,8 @@ clear_hash_noput:
1182} 1147}
1183EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1148EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1184 1149
1185static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 1150static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1151 const struct sk_buff *skb)
1186{ 1152{
1187 /* 1153 /*
1188 * This gets called for each TCP segment that arrives 1154 * This gets called for each TCP segment that arrives
@@ -1235,163 +1201,81 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1235 return false; 1201 return false;
1236} 1202}
1237 1203
1204static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1205{
1206 bool ret;
1207
1208 rcu_read_lock();
1209 ret = __tcp_v4_inbound_md5_hash(sk, skb);
1210 rcu_read_unlock();
1211
1212 return ret;
1213}
1214
1238#endif 1215#endif
1239 1216
1217static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1218 struct sk_buff *skb)
1219{
1220 struct inet_request_sock *ireq = inet_rsk(req);
1221
1222 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1223 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1224 ireq->no_srccheck = inet_sk(sk)->transparent;
1225 ireq->opt = tcp_v4_save_options(skb);
1226}
1227
1228static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1229 const struct request_sock *req,
1230 bool *strict)
1231{
1232 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1233
1234 if (strict) {
1235 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1236 *strict = true;
1237 else
1238 *strict = false;
1239 }
1240
1241 return dst;
1242}
1243
1240struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1244struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1241 .family = PF_INET, 1245 .family = PF_INET,
1242 .obj_size = sizeof(struct tcp_request_sock), 1246 .obj_size = sizeof(struct tcp_request_sock),
1243 .rtx_syn_ack = tcp_v4_rtx_synack, 1247 .rtx_syn_ack = tcp_rtx_synack,
1244 .send_ack = tcp_v4_reqsk_send_ack, 1248 .send_ack = tcp_v4_reqsk_send_ack,
1245 .destructor = tcp_v4_reqsk_destructor, 1249 .destructor = tcp_v4_reqsk_destructor,
1246 .send_reset = tcp_v4_send_reset, 1250 .send_reset = tcp_v4_send_reset,
1247 .syn_ack_timeout = tcp_syn_ack_timeout, 1251 .syn_ack_timeout = tcp_syn_ack_timeout,
1248}; 1252};
1249 1253
1250#ifdef CONFIG_TCP_MD5SIG
1251static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1254static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1255 .mss_clamp = TCP_MSS_DEFAULT,
1256#ifdef CONFIG_TCP_MD5SIG
1252 .md5_lookup = tcp_v4_reqsk_md5_lookup, 1257 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1253 .calc_md5_hash = tcp_v4_md5_hash_skb, 1258 .calc_md5_hash = tcp_v4_md5_hash_skb,
1254};
1255#endif 1259#endif
1260 .init_req = tcp_v4_init_req,
1261#ifdef CONFIG_SYN_COOKIES
1262 .cookie_init_seq = cookie_v4_init_sequence,
1263#endif
1264 .route_req = tcp_v4_route_req,
1265 .init_seq = tcp_v4_init_sequence,
1266 .send_synack = tcp_v4_send_synack,
1267 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
1268};
1256 1269
1257int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1270int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258{ 1271{
1259 struct tcp_options_received tmp_opt;
1260 struct request_sock *req;
1261 struct inet_request_sock *ireq;
1262 struct tcp_sock *tp = tcp_sk(sk);
1263 struct dst_entry *dst = NULL;
1264 __be32 saddr = ip_hdr(skb)->saddr;
1265 __be32 daddr = ip_hdr(skb)->daddr;
1266 __u32 isn = TCP_SKB_CB(skb)->when;
1267 bool want_cookie = false, fastopen;
1268 struct flowi4 fl4;
1269 struct tcp_fastopen_cookie foc = { .len = -1 };
1270 int err;
1271
1272 /* Never answer to SYNs send to broadcast or multicast */ 1272 /* Never answer to SYNs send to broadcast or multicast */
1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1274 goto drop; 1274 goto drop;
1275 1275
1276 /* TW buckets are converted to open requests without 1276 return tcp_conn_request(&tcp_request_sock_ops,
1277 * limitations, they conserve resources and peer is 1277 &tcp_request_sock_ipv4_ops, sk, skb);
1278 * evidently real one.
1279 */
1280 if ((sysctl_tcp_syncookies == 2 ||
1281 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1293 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1294 goto drop;
1295 }
1296
1297 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1298 if (!req)
1299 goto drop;
1300
1301#ifdef CONFIG_TCP_MD5SIG
1302 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1303#endif
1304
1305 tcp_clear_options(&tmp_opt);
1306 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1307 tmp_opt.user_mss = tp->rx_opt.user_mss;
1308 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1309
1310 if (want_cookie && !tmp_opt.saw_tstamp)
1311 tcp_clear_options(&tmp_opt);
1312 1278
1313 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1314 tcp_openreq_init(req, &tmp_opt, skb);
1315
1316 ireq = inet_rsk(req);
1317 ireq->ir_loc_addr = daddr;
1318 ireq->ir_rmt_addr = saddr;
1319 ireq->no_srccheck = inet_sk(sk)->transparent;
1320 ireq->opt = tcp_v4_save_options(skb);
1321 ireq->ir_mark = inet_request_mark(sk, skb);
1322
1323 if (security_inet_conn_request(sk, skb, req))
1324 goto drop_and_free;
1325
1326 if (!want_cookie || tmp_opt.tstamp_ok)
1327 TCP_ECN_create_request(req, skb, sock_net(sk));
1328
1329 if (want_cookie) {
1330 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1331 req->cookie_ts = tmp_opt.tstamp_ok;
1332 } else if (!isn) {
1333 /* VJ's idea. We save last timestamp seen
1334 * from the destination in peer table, when entering
1335 * state TIME-WAIT, and check against it before
1336 * accepting new connection request.
1337 *
1338 * If "isn" is not zero, this request hit alive
1339 * timewait bucket, so that all the necessary checks
1340 * are made in the function processing timewait state.
1341 */
1342 if (tmp_opt.saw_tstamp &&
1343 tcp_death_row.sysctl_tw_recycle &&
1344 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1345 fl4.daddr == saddr) {
1346 if (!tcp_peer_is_proven(req, dst, true)) {
1347 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1348 goto drop_and_release;
1349 }
1350 }
1351 /* Kill the following clause, if you dislike this way. */
1352 else if (!sysctl_tcp_syncookies &&
1353 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1354 (sysctl_max_syn_backlog >> 2)) &&
1355 !tcp_peer_is_proven(req, dst, false)) {
1356 /* Without syncookies last quarter of
1357 * backlog is filled with destinations,
1358 * proven to be alive.
1359 * It means that we continue to communicate
1360 * to destinations, already remembered
1361 * to the moment of synflood.
1362 */
1363 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1364 &saddr, ntohs(tcp_hdr(skb)->source));
1365 goto drop_and_release;
1366 }
1367
1368 isn = tcp_v4_init_sequence(skb);
1369 }
1370 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
1371 goto drop_and_free;
1372
1373 tcp_rsk(req)->snt_isn = isn;
1374 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1375 tcp_openreq_init_rwin(req, sk, dst);
1376 fastopen = !want_cookie &&
1377 tcp_try_fastopen(sk, skb, req, &foc, dst);
1378 err = tcp_v4_send_synack(sk, dst, req,
1379 skb_get_queue_mapping(skb), &foc);
1380 if (!fastopen) {
1381 if (err || want_cookie)
1382 goto drop_and_free;
1383
1384 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1385 tcp_rsk(req)->listener = NULL;
1386 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 }
1388
1389 return 0;
1390
1391drop_and_release:
1392 dst_release(dst);
1393drop_and_free:
1394 reqsk_free(req);
1395drop: 1279drop:
1396 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1280 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1397 return 0; 1281 return 0;
@@ -1439,6 +1323,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1439 newinet->mc_ttl = ip_hdr(skb)->ttl; 1323 newinet->mc_ttl = ip_hdr(skb)->ttl;
1440 newinet->rcv_tos = ip_hdr(skb)->tos; 1324 newinet->rcv_tos = ip_hdr(skb)->tos;
1441 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1325 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1326 inet_set_txhash(newsk);
1442 if (inet_opt) 1327 if (inet_opt)
1443 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1328 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1444 newinet->inet_id = newtp->write_seq ^ jiffies; 1329 newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1523,7 +1408,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1523 1408
1524#ifdef CONFIG_SYN_COOKIES 1409#ifdef CONFIG_SYN_COOKIES
1525 if (!th->syn) 1410 if (!th->syn)
1526 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1411 sk = cookie_v4_check(sk, skb);
1527#endif 1412#endif
1528 return sk; 1413 return sk;
1529} 1414}
@@ -1539,16 +1424,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1539int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 1424int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1540{ 1425{
1541 struct sock *rsk; 1426 struct sock *rsk;
1542#ifdef CONFIG_TCP_MD5SIG
1543 /*
1544 * We really want to reject the packet as early as possible
1545 * if:
1546 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1547 * o There is an MD5 option and we're not expecting one
1548 */
1549 if (tcp_v4_inbound_md5_hash(sk, skb))
1550 goto discard;
1551#endif
1552 1427
1553 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1428 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1554 struct dst_entry *dst = sk->sk_rx_dst; 1429 struct dst_entry *dst = sk->sk_rx_dst;
@@ -1663,7 +1538,17 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1663 skb_queue_len(&tp->ucopy.prequeue) == 0) 1538 skb_queue_len(&tp->ucopy.prequeue) == 0)
1664 return false; 1539 return false;
1665 1540
1666 skb_dst_force(skb); 1541 /* Before escaping RCU protected region, we need to take care of skb
1542 * dst. Prequeue is only enabled for established sockets.
1543 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1544 * Instead of doing full sk_rx_dst validity here, let's perform
1545 * an optimistic check.
1546 */
1547 if (likely(sk->sk_rx_dst))
1548 skb_dst_drop(skb);
1549 else
1550 skb_dst_force(skb);
1551
1667 __skb_queue_tail(&tp->ucopy.prequeue, skb); 1552 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1668 tp->ucopy.memory += skb->truesize; 1553 tp->ucopy.memory += skb->truesize;
1669 if (tp->ucopy.memory > sk->sk_rcvbuf) { 1554 if (tp->ucopy.memory > sk->sk_rcvbuf) {
@@ -1728,11 +1613,19 @@ int tcp_v4_rcv(struct sk_buff *skb)
1728 1613
1729 th = tcp_hdr(skb); 1614 th = tcp_hdr(skb);
1730 iph = ip_hdr(skb); 1615 iph = ip_hdr(skb);
1616 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1617 * barrier() makes sure compiler wont play fool^Waliasing games.
1618 */
1619 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1620 sizeof(struct inet_skb_parm));
1621 barrier();
1622
1731 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1623 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1732 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1624 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1733 skb->len - th->doff * 4); 1625 skb->len - th->doff * 4);
1734 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1626 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1735 TCP_SKB_CB(skb)->when = 0; 1627 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1628 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1736 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 1629 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1737 TCP_SKB_CB(skb)->sacked = 0; 1630 TCP_SKB_CB(skb)->sacked = 0;
1738 1631
@@ -1751,6 +1644,18 @@ process:
1751 1644
1752 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1645 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse; 1646 goto discard_and_relse;
1647
1648#ifdef CONFIG_TCP_MD5SIG
1649 /*
1650 * We really want to reject the packet as early as possible
1651 * if:
1652 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1653 * o There is an MD5 option and we're not expecting one
1654 */
1655 if (tcp_v4_inbound_md5_hash(sk, skb))
1656 goto discard_and_relse;
1657#endif
1658
1754 nf_reset(skb); 1659 nf_reset(skb);
1755 1660
1756 if (sk_filter(sk, skb)) 1661 if (sk_filter(sk, skb))
@@ -1762,18 +1667,8 @@ process:
1762 bh_lock_sock_nested(sk); 1667 bh_lock_sock_nested(sk);
1763 ret = 0; 1668 ret = 0;
1764 if (!sock_owned_by_user(sk)) { 1669 if (!sock_owned_by_user(sk)) {
1765#ifdef CONFIG_NET_DMA 1670 if (!tcp_prequeue(sk, skb))
1766 struct tcp_sock *tp = tcp_sk(sk);
1767 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1768 tp->ucopy.dma_chan = net_dma_find_channel();
1769 if (tp->ucopy.dma_chan)
1770 ret = tcp_v4_do_rcv(sk, skb); 1671 ret = tcp_v4_do_rcv(sk, skb);
1771 else
1772#endif
1773 {
1774 if (!tcp_prequeue(sk, skb))
1775 ret = tcp_v4_do_rcv(sk, skb);
1776 }
1777 } else if (unlikely(sk_add_backlog(sk, skb, 1672 } else if (unlikely(sk_add_backlog(sk, skb,
1778 sk->sk_rcvbuf + sk->sk_sndbuf))) { 1673 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1779 bh_unlock_sock(sk); 1674 bh_unlock_sock(sk);
@@ -1857,9 +1752,11 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1857{ 1752{
1858 struct dst_entry *dst = skb_dst(skb); 1753 struct dst_entry *dst = skb_dst(skb);
1859 1754
1860 dst_hold(dst); 1755 if (dst) {
1861 sk->sk_rx_dst = dst; 1756 dst_hold(dst);
1862 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 1757 sk->sk_rx_dst = dst;
1758 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1759 }
1863} 1760}
1864EXPORT_SYMBOL(inet_sk_rx_dst_set); 1761EXPORT_SYMBOL(inet_sk_rx_dst_set);
1865 1762
@@ -1880,6 +1777,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1880 .compat_setsockopt = compat_ip_setsockopt, 1777 .compat_setsockopt = compat_ip_setsockopt,
1881 .compat_getsockopt = compat_ip_getsockopt, 1778 .compat_getsockopt = compat_ip_getsockopt,
1882#endif 1779#endif
1780 .mtu_reduced = tcp_v4_mtu_reduced,
1883}; 1781};
1884EXPORT_SYMBOL(ipv4_specific); 1782EXPORT_SYMBOL(ipv4_specific);
1885 1783
@@ -1932,11 +1830,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1932 } 1830 }
1933#endif 1831#endif
1934 1832
1935#ifdef CONFIG_NET_DMA
1936 /* Cleans up our sk_async_wait_queue */
1937 __skb_queue_purge(&sk->sk_async_wait_queue);
1938#endif
1939
1940 /* Clean prequeue, it must be empty really */ 1833 /* Clean prequeue, it must be empty really */
1941 __skb_queue_purge(&tp->ucopy.prequeue); 1834 __skb_queue_purge(&tp->ucopy.prequeue);
1942 1835
@@ -2274,7 +2167,7 @@ int tcp_seq_open(struct inode *inode, struct file *file)
2274 2167
2275 s = ((struct seq_file *)file->private_data)->private; 2168 s = ((struct seq_file *)file->private_data)->private;
2276 s->family = afinfo->family; 2169 s->family = afinfo->family;
2277 s->last_pos = 0; 2170 s->last_pos = 0;
2278 return 0; 2171 return 0;
2279} 2172}
2280EXPORT_SYMBOL(tcp_seq_open); 2173EXPORT_SYMBOL(tcp_seq_open);
@@ -2499,7 +2392,6 @@ struct proto tcp_prot = {
2499 .sendpage = tcp_sendpage, 2392 .sendpage = tcp_sendpage,
2500 .backlog_rcv = tcp_v4_do_rcv, 2393 .backlog_rcv = tcp_v4_do_rcv,
2501 .release_cb = tcp_release_cb, 2394 .release_cb = tcp_release_cb,
2502 .mtu_reduced = tcp_v4_mtu_reduced,
2503 .hash = inet_hash, 2395 .hash = inet_hash,
2504 .unhash = inet_unhash, 2396 .unhash = inet_unhash,
2505 .get_port = inet_csk_get_port, 2397 .get_port = inet_csk_get_port,