aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorJames Morris <james.l.morris@oracle.com>2014-11-19 05:32:12 -0500
committerJames Morris <james.l.morris@oracle.com>2014-11-19 05:32:12 -0500
commitb10778a00d40b3d9fdaaf5891e802794781ff71c (patch)
tree6ba4cbac86eecedc3f30650e7f764ecf00c83898 /net/ipv4/tcp_ipv4.c
parent594081ee7145cc30a3977cb4e218f81213b63dc5 (diff)
parentbfe01a5ba2490f299e1d2d5508cbbbadd897bbe9 (diff)
Merge commit 'v3.17' into next
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c242
1 files changed, 75 insertions, 167 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77cccda1ad0c..cd17f009aede 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
99struct inet_hashinfo tcp_hashinfo; 99struct inet_hashinfo tcp_hashinfo;
100EXPORT_SYMBOL(tcp_hashinfo); 100EXPORT_SYMBOL(tcp_hashinfo);
101 101
102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 102static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
103{ 103{
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr, 105 ip_hdr(skb)->saddr,
@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
208 inet->inet_dport = usin->sin_port; 208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr; 209 inet->inet_daddr = daddr;
210 210
211 inet_set_txhash(sk);
212
211 inet_csk(sk)->icsk_ext_hdr_len = 0; 213 inet_csk(sk)->icsk_ext_hdr_len = 0;
212 if (inet_opt) 214 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 215 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -269,7 +271,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
269 * It can be called through tcp_release_cb() if socket was owned by user 271 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message. 272 * at the time tcp_v4_err() was called to handle ICMP message.
271 */ 273 */
272static void tcp_v4_mtu_reduced(struct sock *sk) 274void tcp_v4_mtu_reduced(struct sock *sk)
273{ 275{
274 struct dst_entry *dst; 276 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk); 277 struct inet_sock *inet = inet_sk(sk);
@@ -300,6 +302,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
300 tcp_simple_retransmit(sk); 302 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */ 303 } /* else let the usual retransmit timer handle it */
302} 304}
305EXPORT_SYMBOL(tcp_v4_mtu_reduced);
303 306
304static void do_redirect(struct sk_buff *skb, struct sock *sk) 307static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{ 308{
@@ -342,11 +345,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
342 int err; 345 int err;
343 struct net *net = dev_net(icmp_skb->dev); 346 struct net *net = dev_net(icmp_skb->dev);
344 347
345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
347 return;
348 }
349
350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 348 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
351 iph->saddr, th->source, inet_iif(icmp_skb)); 349 iph->saddr, th->source, inet_iif(icmp_skb));
352 if (!sk) { 350 if (!sk) {
@@ -814,6 +812,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
814 * socket. 812 * socket.
815 */ 813 */
816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 814static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
815 struct flowi *fl,
817 struct request_sock *req, 816 struct request_sock *req,
818 u16 queue_mapping, 817 u16 queue_mapping,
819 struct tcp_fastopen_cookie *foc) 818 struct tcp_fastopen_cookie *foc)
@@ -837,24 +836,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
837 ireq->ir_rmt_addr, 836 ireq->ir_rmt_addr,
838 ireq->opt); 837 ireq->opt);
839 err = net_xmit_eval(err); 838 err = net_xmit_eval(err);
840 if (!tcp_rsk(req)->snt_synack && !err)
841 tcp_rsk(req)->snt_synack = tcp_time_stamp;
842 } 839 }
843 840
844 return err; 841 return err;
845} 842}
846 843
847static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
848{
849 int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
850
851 if (!res) {
852 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
853 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
854 }
855 return res;
856}
857
858/* 844/*
859 * IPv4 request_sock destructor. 845 * IPv4 request_sock destructor.
860 */ 846 */
@@ -1064,7 +1050,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1064 if (sin->sin_family != AF_INET) 1050 if (sin->sin_family != AF_INET)
1065 return -EINVAL; 1051 return -EINVAL;
1066 1052
1067 if (!cmd.tcpm_key || !cmd.tcpm_keylen) 1053 if (!cmd.tcpm_keylen)
1068 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1054 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1069 AF_INET); 1055 AF_INET);
1070 1056
@@ -1182,7 +1168,8 @@ clear_hash_noput:
1182} 1168}
1183EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1169EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1184 1170
1185static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 1171static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1172 const struct sk_buff *skb)
1186{ 1173{
1187 /* 1174 /*
1188 * This gets called for each TCP segment that arrives 1175 * This gets called for each TCP segment that arrives
@@ -1235,163 +1222,81 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1235 return false; 1222 return false;
1236} 1223}
1237 1224
1225static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1226{
1227 bool ret;
1228
1229 rcu_read_lock();
1230 ret = __tcp_v4_inbound_md5_hash(sk, skb);
1231 rcu_read_unlock();
1232
1233 return ret;
1234}
1235
1238#endif 1236#endif
1239 1237
1238static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1239 struct sk_buff *skb)
1240{
1241 struct inet_request_sock *ireq = inet_rsk(req);
1242
1243 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1244 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1245 ireq->no_srccheck = inet_sk(sk)->transparent;
1246 ireq->opt = tcp_v4_save_options(skb);
1247}
1248
1249static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1250 const struct request_sock *req,
1251 bool *strict)
1252{
1253 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1254
1255 if (strict) {
1256 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1257 *strict = true;
1258 else
1259 *strict = false;
1260 }
1261
1262 return dst;
1263}
1264
1240struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1265struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1241 .family = PF_INET, 1266 .family = PF_INET,
1242 .obj_size = sizeof(struct tcp_request_sock), 1267 .obj_size = sizeof(struct tcp_request_sock),
1243 .rtx_syn_ack = tcp_v4_rtx_synack, 1268 .rtx_syn_ack = tcp_rtx_synack,
1244 .send_ack = tcp_v4_reqsk_send_ack, 1269 .send_ack = tcp_v4_reqsk_send_ack,
1245 .destructor = tcp_v4_reqsk_destructor, 1270 .destructor = tcp_v4_reqsk_destructor,
1246 .send_reset = tcp_v4_send_reset, 1271 .send_reset = tcp_v4_send_reset,
1247 .syn_ack_timeout = tcp_syn_ack_timeout, 1272 .syn_ack_timeout = tcp_syn_ack_timeout,
1248}; 1273};
1249 1274
1250#ifdef CONFIG_TCP_MD5SIG
1251static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1275static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1276 .mss_clamp = TCP_MSS_DEFAULT,
1277#ifdef CONFIG_TCP_MD5SIG
1252 .md5_lookup = tcp_v4_reqsk_md5_lookup, 1278 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1253 .calc_md5_hash = tcp_v4_md5_hash_skb, 1279 .calc_md5_hash = tcp_v4_md5_hash_skb,
1254};
1255#endif 1280#endif
1281 .init_req = tcp_v4_init_req,
1282#ifdef CONFIG_SYN_COOKIES
1283 .cookie_init_seq = cookie_v4_init_sequence,
1284#endif
1285 .route_req = tcp_v4_route_req,
1286 .init_seq = tcp_v4_init_sequence,
1287 .send_synack = tcp_v4_send_synack,
1288 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
1289};
1256 1290
1257int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1291int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258{ 1292{
1259 struct tcp_options_received tmp_opt;
1260 struct request_sock *req;
1261 struct inet_request_sock *ireq;
1262 struct tcp_sock *tp = tcp_sk(sk);
1263 struct dst_entry *dst = NULL;
1264 __be32 saddr = ip_hdr(skb)->saddr;
1265 __be32 daddr = ip_hdr(skb)->daddr;
1266 __u32 isn = TCP_SKB_CB(skb)->when;
1267 bool want_cookie = false, fastopen;
1268 struct flowi4 fl4;
1269 struct tcp_fastopen_cookie foc = { .len = -1 };
1270 int err;
1271
1272 /* Never answer to SYNs send to broadcast or multicast */ 1293 /* Never answer to SYNs send to broadcast or multicast */
1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1294 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1274 goto drop; 1295 goto drop;
1275 1296
1276 /* TW buckets are converted to open requests without 1297 return tcp_conn_request(&tcp_request_sock_ops,
1277 * limitations, they conserve resources and peer is 1298 &tcp_request_sock_ipv4_ops, sk, skb);
1278 * evidently real one.
1279 */
1280 if ((sysctl_tcp_syncookies == 2 ||
1281 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1282 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1283 if (!want_cookie)
1284 goto drop;
1285 }
1286
1287 /* Accept backlog is full. If we have already queued enough
1288 * of warm entries in syn queue, drop request. It is better than
1289 * clogging syn queue with openreqs with exponentially increasing
1290 * timeout.
1291 */
1292 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1293 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1294 goto drop;
1295 }
1296
1297 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1298 if (!req)
1299 goto drop;
1300
1301#ifdef CONFIG_TCP_MD5SIG
1302 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1303#endif
1304
1305 tcp_clear_options(&tmp_opt);
1306 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1307 tmp_opt.user_mss = tp->rx_opt.user_mss;
1308 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1309
1310 if (want_cookie && !tmp_opt.saw_tstamp)
1311 tcp_clear_options(&tmp_opt);
1312
1313 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1314 tcp_openreq_init(req, &tmp_opt, skb);
1315
1316 ireq = inet_rsk(req);
1317 ireq->ir_loc_addr = daddr;
1318 ireq->ir_rmt_addr = saddr;
1319 ireq->no_srccheck = inet_sk(sk)->transparent;
1320 ireq->opt = tcp_v4_save_options(skb);
1321 ireq->ir_mark = inet_request_mark(sk, skb);
1322
1323 if (security_inet_conn_request(sk, skb, req))
1324 goto drop_and_free;
1325
1326 if (!want_cookie || tmp_opt.tstamp_ok)
1327 TCP_ECN_create_request(req, skb, sock_net(sk));
1328
1329 if (want_cookie) {
1330 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1331 req->cookie_ts = tmp_opt.tstamp_ok;
1332 } else if (!isn) {
1333 /* VJ's idea. We save last timestamp seen
1334 * from the destination in peer table, when entering
1335 * state TIME-WAIT, and check against it before
1336 * accepting new connection request.
1337 *
1338 * If "isn" is not zero, this request hit alive
1339 * timewait bucket, so that all the necessary checks
1340 * are made in the function processing timewait state.
1341 */
1342 if (tmp_opt.saw_tstamp &&
1343 tcp_death_row.sysctl_tw_recycle &&
1344 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1345 fl4.daddr == saddr) {
1346 if (!tcp_peer_is_proven(req, dst, true)) {
1347 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1348 goto drop_and_release;
1349 }
1350 }
1351 /* Kill the following clause, if you dislike this way. */
1352 else if (!sysctl_tcp_syncookies &&
1353 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1354 (sysctl_max_syn_backlog >> 2)) &&
1355 !tcp_peer_is_proven(req, dst, false)) {
1356 /* Without syncookies last quarter of
1357 * backlog is filled with destinations,
1358 * proven to be alive.
1359 * It means that we continue to communicate
1360 * to destinations, already remembered
1361 * to the moment of synflood.
1362 */
1363 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1364 &saddr, ntohs(tcp_hdr(skb)->source));
1365 goto drop_and_release;
1366 }
1367
1368 isn = tcp_v4_init_sequence(skb);
1369 }
1370 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
1371 goto drop_and_free;
1372
1373 tcp_rsk(req)->snt_isn = isn;
1374 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1375 tcp_openreq_init_rwin(req, sk, dst);
1376 fastopen = !want_cookie &&
1377 tcp_try_fastopen(sk, skb, req, &foc, dst);
1378 err = tcp_v4_send_synack(sk, dst, req,
1379 skb_get_queue_mapping(skb), &foc);
1380 if (!fastopen) {
1381 if (err || want_cookie)
1382 goto drop_and_free;
1383 1299
1384 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1385 tcp_rsk(req)->listener = NULL;
1386 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 }
1388
1389 return 0;
1390
1391drop_and_release:
1392 dst_release(dst);
1393drop_and_free:
1394 reqsk_free(req);
1395drop: 1300drop:
1396 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1301 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1397 return 0; 1302 return 0;
@@ -1439,6 +1344,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1439 newinet->mc_ttl = ip_hdr(skb)->ttl; 1344 newinet->mc_ttl = ip_hdr(skb)->ttl;
1440 newinet->rcv_tos = ip_hdr(skb)->tos; 1345 newinet->rcv_tos = ip_hdr(skb)->tos;
1441 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1346 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1347 inet_set_txhash(newsk);
1442 if (inet_opt) 1348 if (inet_opt)
1443 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1349 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1444 newinet->inet_id = newtp->write_seq ^ jiffies; 1350 newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1539,16 +1445,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1539int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 1445int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1540{ 1446{
1541 struct sock *rsk; 1447 struct sock *rsk;
1542#ifdef CONFIG_TCP_MD5SIG
1543 /*
1544 * We really want to reject the packet as early as possible
1545 * if:
1546 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1547 * o There is an MD5 option and we're not expecting one
1548 */
1549 if (tcp_v4_inbound_md5_hash(sk, skb))
1550 goto discard;
1551#endif
1552 1448
1553 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1449 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1554 struct dst_entry *dst = sk->sk_rx_dst; 1450 struct dst_entry *dst = sk->sk_rx_dst;
@@ -1751,6 +1647,18 @@ process:
1751 1647
1752 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1648 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse; 1649 goto discard_and_relse;
1650
1651#ifdef CONFIG_TCP_MD5SIG
1652 /*
1653 * We really want to reject the packet as early as possible
1654 * if:
1655 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1656 * o There is an MD5 option and we're not expecting one
1657 */
1658 if (tcp_v4_inbound_md5_hash(sk, skb))
1659 goto discard_and_relse;
1660#endif
1661
1754 nf_reset(skb); 1662 nf_reset(skb);
1755 1663
1756 if (sk_filter(sk, skb)) 1664 if (sk_filter(sk, skb))
@@ -1880,6 +1788,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1880 .compat_setsockopt = compat_ip_setsockopt, 1788 .compat_setsockopt = compat_ip_setsockopt,
1881 .compat_getsockopt = compat_ip_getsockopt, 1789 .compat_getsockopt = compat_ip_getsockopt,
1882#endif 1790#endif
1791 .mtu_reduced = tcp_v4_mtu_reduced,
1883}; 1792};
1884EXPORT_SYMBOL(ipv4_specific); 1793EXPORT_SYMBOL(ipv4_specific);
1885 1794
@@ -2499,7 +2408,6 @@ struct proto tcp_prot = {
2499 .sendpage = tcp_sendpage, 2408 .sendpage = tcp_sendpage,
2500 .backlog_rcv = tcp_v4_do_rcv, 2409 .backlog_rcv = tcp_v4_do_rcv,
2501 .release_cb = tcp_release_cb, 2410 .release_cb = tcp_release_cb,
2502 .mtu_reduced = tcp_v4_mtu_reduced,
2503 .hash = inet_hash, 2411 .hash = inet_hash,
2504 .unhash = inet_unhash, 2412 .unhash = inet_unhash,
2505 .get_port = inet_csk_get_port, 2413 .get_port = inet_csk_get_port,