diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 86 |
1 files changed, 26 insertions, 60 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a037bafcba3c..674bbd8cfd36 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; | |||
271 | 271 | ||
272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); | 272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); |
273 | 273 | ||
274 | kmem_cache_t *tcp_openreq_cachep; | ||
275 | kmem_cache_t *tcp_bucket_cachep; | 274 | kmem_cache_t *tcp_bucket_cachep; |
276 | kmem_cache_t *tcp_timewait_cachep; | 275 | kmem_cache_t *tcp_timewait_cachep; |
277 | 276 | ||
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); | |||
317 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, | 316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, |
318 | poll_table *wait) | 317 | poll_table *wait) |
319 | { | 318 | { |
320 | return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; | 319 | return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; |
321 | } | 320 | } |
322 | 321 | ||
323 | /* | 322 | /* |
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk) | |||
463 | { | 462 | { |
464 | struct inet_sock *inet = inet_sk(sk); | 463 | struct inet_sock *inet = inet_sk(sk); |
465 | struct tcp_sock *tp = tcp_sk(sk); | 464 | struct tcp_sock *tp = tcp_sk(sk); |
466 | struct tcp_listen_opt *lopt; | 465 | int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); |
466 | |||
467 | if (rc != 0) | ||
468 | return rc; | ||
467 | 469 | ||
468 | sk->sk_max_ack_backlog = 0; | 470 | sk->sk_max_ack_backlog = 0; |
469 | sk->sk_ack_backlog = 0; | 471 | sk->sk_ack_backlog = 0; |
470 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
471 | rwlock_init(&tp->syn_wait_lock); | ||
472 | tcp_delack_init(tp); | 472 | tcp_delack_init(tp); |
473 | 473 | ||
474 | lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); | ||
475 | if (!lopt) | ||
476 | return -ENOMEM; | ||
477 | |||
478 | memset(lopt, 0, sizeof(struct tcp_listen_opt)); | ||
479 | for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) | ||
480 | if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) | ||
481 | break; | ||
482 | get_random_bytes(&lopt->hash_rnd, 4); | ||
483 | |||
484 | write_lock_bh(&tp->syn_wait_lock); | ||
485 | tp->listen_opt = lopt; | ||
486 | write_unlock_bh(&tp->syn_wait_lock); | ||
487 | |||
488 | /* There is race window here: we announce ourselves listening, | 474 | /* There is race window here: we announce ourselves listening, |
489 | * but this transition is still not validated by get_port(). | 475 | * but this transition is still not validated by get_port(). |
490 | * It is OK, because this socket enters to hash table only | 476 | * It is OK, because this socket enters to hash table only |
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk) | |||
501 | } | 487 | } |
502 | 488 | ||
503 | sk->sk_state = TCP_CLOSE; | 489 | sk->sk_state = TCP_CLOSE; |
504 | write_lock_bh(&tp->syn_wait_lock); | 490 | reqsk_queue_destroy(&tp->accept_queue); |
505 | tp->listen_opt = NULL; | ||
506 | write_unlock_bh(&tp->syn_wait_lock); | ||
507 | kfree(lopt); | ||
508 | return -EADDRINUSE; | 491 | return -EADDRINUSE; |
509 | } | 492 | } |
510 | 493 | ||
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk) | |||
516 | static void tcp_listen_stop (struct sock *sk) | 499 | static void tcp_listen_stop (struct sock *sk) |
517 | { | 500 | { |
518 | struct tcp_sock *tp = tcp_sk(sk); | 501 | struct tcp_sock *tp = tcp_sk(sk); |
519 | struct tcp_listen_opt *lopt = tp->listen_opt; | 502 | struct listen_sock *lopt; |
520 | struct open_request *acc_req = tp->accept_queue; | 503 | struct request_sock *acc_req; |
521 | struct open_request *req; | 504 | struct request_sock *req; |
522 | int i; | 505 | int i; |
523 | 506 | ||
524 | tcp_delete_keepalive_timer(sk); | 507 | tcp_delete_keepalive_timer(sk); |
525 | 508 | ||
526 | /* make all the listen_opt local to us */ | 509 | /* make all the listen_opt local to us */ |
527 | write_lock_bh(&tp->syn_wait_lock); | 510 | lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); |
528 | tp->listen_opt = NULL; | 511 | acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); |
529 | write_unlock_bh(&tp->syn_wait_lock); | ||
530 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
531 | 512 | ||
532 | if (lopt->qlen) { | 513 | if (lopt->qlen) { |
533 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { | 514 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { |
534 | while ((req = lopt->syn_table[i]) != NULL) { | 515 | while ((req = lopt->syn_table[i]) != NULL) { |
535 | lopt->syn_table[i] = req->dl_next; | 516 | lopt->syn_table[i] = req->dl_next; |
536 | lopt->qlen--; | 517 | lopt->qlen--; |
537 | tcp_openreq_free(req); | 518 | reqsk_free(req); |
538 | 519 | ||
539 | /* Following specs, it would be better either to send FIN | 520 | /* Following specs, it would be better either to send FIN |
540 | * (and enter FIN-WAIT-1, it is normal close) | 521 | * (and enter FIN-WAIT-1, it is normal close) |
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk) | |||
574 | sock_put(child); | 555 | sock_put(child); |
575 | 556 | ||
576 | sk_acceptq_removed(sk); | 557 | sk_acceptq_removed(sk); |
577 | tcp_openreq_fastfree(req); | 558 | __reqsk_free(req); |
578 | } | 559 | } |
579 | BUG_TRAP(!sk->sk_ack_backlog); | 560 | BUG_TRAP(!sk->sk_ack_backlog); |
580 | } | 561 | } |
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1345 | 1326 | ||
1346 | cleanup_rbuf(sk, copied); | 1327 | cleanup_rbuf(sk, copied); |
1347 | 1328 | ||
1348 | if (tp->ucopy.task == user_recv) { | 1329 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { |
1349 | /* Install new reader */ | 1330 | /* Install new reader */ |
1350 | if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { | 1331 | if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { |
1351 | user_recv = current; | 1332 | user_recv = current; |
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
1868 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | 1849 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, |
1869 | TASK_INTERRUPTIBLE); | 1850 | TASK_INTERRUPTIBLE); |
1870 | release_sock(sk); | 1851 | release_sock(sk); |
1871 | if (!tp->accept_queue) | 1852 | if (reqsk_queue_empty(&tp->accept_queue)) |
1872 | timeo = schedule_timeout(timeo); | 1853 | timeo = schedule_timeout(timeo); |
1873 | lock_sock(sk); | 1854 | lock_sock(sk); |
1874 | err = 0; | 1855 | err = 0; |
1875 | if (tp->accept_queue) | 1856 | if (!reqsk_queue_empty(&tp->accept_queue)) |
1876 | break; | 1857 | break; |
1877 | err = -EINVAL; | 1858 | err = -EINVAL; |
1878 | if (sk->sk_state != TCP_LISTEN) | 1859 | if (sk->sk_state != TCP_LISTEN) |
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
1895 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) | 1876 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) |
1896 | { | 1877 | { |
1897 | struct tcp_sock *tp = tcp_sk(sk); | 1878 | struct tcp_sock *tp = tcp_sk(sk); |
1898 | struct open_request *req; | ||
1899 | struct sock *newsk; | 1879 | struct sock *newsk; |
1900 | int error; | 1880 | int error; |
1901 | 1881 | ||
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) | |||
1906 | */ | 1886 | */ |
1907 | error = -EINVAL; | 1887 | error = -EINVAL; |
1908 | if (sk->sk_state != TCP_LISTEN) | 1888 | if (sk->sk_state != TCP_LISTEN) |
1909 | goto out; | 1889 | goto out_err; |
1910 | 1890 | ||
1911 | /* Find already established connection */ | 1891 | /* Find already established connection */ |
1912 | if (!tp->accept_queue) { | 1892 | if (reqsk_queue_empty(&tp->accept_queue)) { |
1913 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 1893 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
1914 | 1894 | ||
1915 | /* If this is a non blocking socket don't sleep */ | 1895 | /* If this is a non blocking socket don't sleep */ |
1916 | error = -EAGAIN; | 1896 | error = -EAGAIN; |
1917 | if (!timeo) | 1897 | if (!timeo) |
1918 | goto out; | 1898 | goto out_err; |
1919 | 1899 | ||
1920 | error = wait_for_connect(sk, timeo); | 1900 | error = wait_for_connect(sk, timeo); |
1921 | if (error) | 1901 | if (error) |
1922 | goto out; | 1902 | goto out_err; |
1923 | } | 1903 | } |
1924 | 1904 | ||
1925 | req = tp->accept_queue; | 1905 | newsk = reqsk_queue_get_child(&tp->accept_queue, sk); |
1926 | if ((tp->accept_queue = req->dl_next) == NULL) | ||
1927 | tp->accept_queue_tail = NULL; | ||
1928 | |||
1929 | newsk = req->sk; | ||
1930 | sk_acceptq_removed(sk); | ||
1931 | tcp_openreq_fastfree(req); | ||
1932 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | 1906 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); |
1933 | release_sock(sk); | ||
1934 | return newsk; | ||
1935 | |||
1936 | out: | 1907 | out: |
1937 | release_sock(sk); | 1908 | release_sock(sk); |
1909 | return newsk; | ||
1910 | out_err: | ||
1911 | newsk = NULL; | ||
1938 | *err = error; | 1912 | *err = error; |
1939 | return NULL; | 1913 | goto out; |
1940 | } | 1914 | } |
1941 | 1915 | ||
1942 | /* | 1916 | /* |
@@ -2271,13 +2245,6 @@ void __init tcp_init(void) | |||
2271 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), | 2245 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), |
2272 | sizeof(skb->cb)); | 2246 | sizeof(skb->cb)); |
2273 | 2247 | ||
2274 | tcp_openreq_cachep = kmem_cache_create("tcp_open_request", | ||
2275 | sizeof(struct open_request), | ||
2276 | 0, SLAB_HWCACHE_ALIGN, | ||
2277 | NULL, NULL); | ||
2278 | if (!tcp_openreq_cachep) | ||
2279 | panic("tcp_init: Cannot alloc open_request cache."); | ||
2280 | |||
2281 | tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", | 2248 | tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", |
2282 | sizeof(struct tcp_bind_bucket), | 2249 | sizeof(struct tcp_bind_bucket), |
2283 | 0, SLAB_HWCACHE_ALIGN, | 2250 | 0, SLAB_HWCACHE_ALIGN, |
@@ -2338,7 +2305,7 @@ void __init tcp_init(void) | |||
2338 | (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); | 2305 | (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); |
2339 | order++) | 2306 | order++) |
2340 | ; | 2307 | ; |
2341 | if (order > 4) { | 2308 | if (order >= 4) { |
2342 | sysctl_local_port_range[0] = 32768; | 2309 | sysctl_local_port_range[0] = 32768; |
2343 | sysctl_local_port_range[1] = 61000; | 2310 | sysctl_local_port_range[1] = 61000; |
2344 | sysctl_tcp_max_tw_buckets = 180000; | 2311 | sysctl_tcp_max_tw_buckets = 180000; |
@@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock); | |||
2374 | EXPORT_SYMBOL(tcp_disconnect); | 2341 | EXPORT_SYMBOL(tcp_disconnect); |
2375 | EXPORT_SYMBOL(tcp_getsockopt); | 2342 | EXPORT_SYMBOL(tcp_getsockopt); |
2376 | EXPORT_SYMBOL(tcp_ioctl); | 2343 | EXPORT_SYMBOL(tcp_ioctl); |
2377 | EXPORT_SYMBOL(tcp_openreq_cachep); | ||
2378 | EXPORT_SYMBOL(tcp_poll); | 2344 | EXPORT_SYMBOL(tcp_poll); |
2379 | EXPORT_SYMBOL(tcp_read_sock); | 2345 | EXPORT_SYMBOL(tcp_read_sock); |
2380 | EXPORT_SYMBOL(tcp_recvmsg); | 2346 | EXPORT_SYMBOL(tcp_recvmsg); |