diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 86 |
1 files changed, 26 insertions, 60 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a037bafcba3c..674bbd8cfd36 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; | |||
| 271 | 271 | ||
| 272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); | 272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); |
| 273 | 273 | ||
| 274 | kmem_cache_t *tcp_openreq_cachep; | ||
| 275 | kmem_cache_t *tcp_bucket_cachep; | 274 | kmem_cache_t *tcp_bucket_cachep; |
| 276 | kmem_cache_t *tcp_timewait_cachep; | 275 | kmem_cache_t *tcp_timewait_cachep; |
| 277 | 276 | ||
| @@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); | |||
| 317 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, | 316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, |
| 318 | poll_table *wait) | 317 | poll_table *wait) |
| 319 | { | 318 | { |
| 320 | return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; | 319 | return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; |
| 321 | } | 320 | } |
| 322 | 321 | ||
| 323 | /* | 322 | /* |
| @@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk) | |||
| 463 | { | 462 | { |
| 464 | struct inet_sock *inet = inet_sk(sk); | 463 | struct inet_sock *inet = inet_sk(sk); |
| 465 | struct tcp_sock *tp = tcp_sk(sk); | 464 | struct tcp_sock *tp = tcp_sk(sk); |
| 466 | struct tcp_listen_opt *lopt; | 465 | int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); |
| 466 | |||
| 467 | if (rc != 0) | ||
| 468 | return rc; | ||
| 467 | 469 | ||
| 468 | sk->sk_max_ack_backlog = 0; | 470 | sk->sk_max_ack_backlog = 0; |
| 469 | sk->sk_ack_backlog = 0; | 471 | sk->sk_ack_backlog = 0; |
| 470 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
| 471 | rwlock_init(&tp->syn_wait_lock); | ||
| 472 | tcp_delack_init(tp); | 472 | tcp_delack_init(tp); |
| 473 | 473 | ||
| 474 | lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); | ||
| 475 | if (!lopt) | ||
| 476 | return -ENOMEM; | ||
| 477 | |||
| 478 | memset(lopt, 0, sizeof(struct tcp_listen_opt)); | ||
| 479 | for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) | ||
| 480 | if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) | ||
| 481 | break; | ||
| 482 | get_random_bytes(&lopt->hash_rnd, 4); | ||
| 483 | |||
| 484 | write_lock_bh(&tp->syn_wait_lock); | ||
| 485 | tp->listen_opt = lopt; | ||
| 486 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 487 | |||
| 488 | /* There is race window here: we announce ourselves listening, | 474 | /* There is race window here: we announce ourselves listening, |
| 489 | * but this transition is still not validated by get_port(). | 475 | * but this transition is still not validated by get_port(). |
| 490 | * It is OK, because this socket enters to hash table only | 476 | * It is OK, because this socket enters to hash table only |
| @@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk) | |||
| 501 | } | 487 | } |
| 502 | 488 | ||
| 503 | sk->sk_state = TCP_CLOSE; | 489 | sk->sk_state = TCP_CLOSE; |
| 504 | write_lock_bh(&tp->syn_wait_lock); | 490 | reqsk_queue_destroy(&tp->accept_queue); |
| 505 | tp->listen_opt = NULL; | ||
| 506 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 507 | kfree(lopt); | ||
| 508 | return -EADDRINUSE; | 491 | return -EADDRINUSE; |
| 509 | } | 492 | } |
| 510 | 493 | ||
| @@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk) | |||
| 516 | static void tcp_listen_stop (struct sock *sk) | 499 | static void tcp_listen_stop (struct sock *sk) |
| 517 | { | 500 | { |
| 518 | struct tcp_sock *tp = tcp_sk(sk); | 501 | struct tcp_sock *tp = tcp_sk(sk); |
| 519 | struct tcp_listen_opt *lopt = tp->listen_opt; | 502 | struct listen_sock *lopt; |
| 520 | struct open_request *acc_req = tp->accept_queue; | 503 | struct request_sock *acc_req; |
| 521 | struct open_request *req; | 504 | struct request_sock *req; |
| 522 | int i; | 505 | int i; |
| 523 | 506 | ||
| 524 | tcp_delete_keepalive_timer(sk); | 507 | tcp_delete_keepalive_timer(sk); |
| 525 | 508 | ||
| 526 | /* make all the listen_opt local to us */ | 509 | /* make all the listen_opt local to us */ |
| 527 | write_lock_bh(&tp->syn_wait_lock); | 510 | lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); |
| 528 | tp->listen_opt = NULL; | 511 | acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); |
| 529 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 530 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
| 531 | 512 | ||
| 532 | if (lopt->qlen) { | 513 | if (lopt->qlen) { |
| 533 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { | 514 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { |
| 534 | while ((req = lopt->syn_table[i]) != NULL) { | 515 | while ((req = lopt->syn_table[i]) != NULL) { |
| 535 | lopt->syn_table[i] = req->dl_next; | 516 | lopt->syn_table[i] = req->dl_next; |
| 536 | lopt->qlen--; | 517 | lopt->qlen--; |
| 537 | tcp_openreq_free(req); | 518 | reqsk_free(req); |
| 538 | 519 | ||
| 539 | /* Following specs, it would be better either to send FIN | 520 | /* Following specs, it would be better either to send FIN |
| 540 | * (and enter FIN-WAIT-1, it is normal close) | 521 | * (and enter FIN-WAIT-1, it is normal close) |
| @@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk) | |||
| 574 | sock_put(child); | 555 | sock_put(child); |
| 575 | 556 | ||
| 576 | sk_acceptq_removed(sk); | 557 | sk_acceptq_removed(sk); |
| 577 | tcp_openreq_fastfree(req); | 558 | __reqsk_free(req); |
| 578 | } | 559 | } |
| 579 | BUG_TRAP(!sk->sk_ack_backlog); | 560 | BUG_TRAP(!sk->sk_ack_backlog); |
| 580 | } | 561 | } |
| @@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1345 | 1326 | ||
| 1346 | cleanup_rbuf(sk, copied); | 1327 | cleanup_rbuf(sk, copied); |
| 1347 | 1328 | ||
| 1348 | if (tp->ucopy.task == user_recv) { | 1329 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { |
| 1349 | /* Install new reader */ | 1330 | /* Install new reader */ |
| 1350 | if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { | 1331 | if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { |
| 1351 | user_recv = current; | 1332 | user_recv = current; |
| @@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
| 1868 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | 1849 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, |
| 1869 | TASK_INTERRUPTIBLE); | 1850 | TASK_INTERRUPTIBLE); |
| 1870 | release_sock(sk); | 1851 | release_sock(sk); |
| 1871 | if (!tp->accept_queue) | 1852 | if (reqsk_queue_empty(&tp->accept_queue)) |
| 1872 | timeo = schedule_timeout(timeo); | 1853 | timeo = schedule_timeout(timeo); |
| 1873 | lock_sock(sk); | 1854 | lock_sock(sk); |
| 1874 | err = 0; | 1855 | err = 0; |
| 1875 | if (tp->accept_queue) | 1856 | if (!reqsk_queue_empty(&tp->accept_queue)) |
| 1876 | break; | 1857 | break; |
| 1877 | err = -EINVAL; | 1858 | err = -EINVAL; |
| 1878 | if (sk->sk_state != TCP_LISTEN) | 1859 | if (sk->sk_state != TCP_LISTEN) |
| @@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
| 1895 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) | 1876 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) |
| 1896 | { | 1877 | { |
| 1897 | struct tcp_sock *tp = tcp_sk(sk); | 1878 | struct tcp_sock *tp = tcp_sk(sk); |
| 1898 | struct open_request *req; | ||
| 1899 | struct sock *newsk; | 1879 | struct sock *newsk; |
| 1900 | int error; | 1880 | int error; |
| 1901 | 1881 | ||
| @@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) | |||
| 1906 | */ | 1886 | */ |
| 1907 | error = -EINVAL; | 1887 | error = -EINVAL; |
| 1908 | if (sk->sk_state != TCP_LISTEN) | 1888 | if (sk->sk_state != TCP_LISTEN) |
| 1909 | goto out; | 1889 | goto out_err; |
| 1910 | 1890 | ||
| 1911 | /* Find already established connection */ | 1891 | /* Find already established connection */ |
| 1912 | if (!tp->accept_queue) { | 1892 | if (reqsk_queue_empty(&tp->accept_queue)) { |
| 1913 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 1893 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
| 1914 | 1894 | ||
| 1915 | /* If this is a non blocking socket don't sleep */ | 1895 | /* If this is a non blocking socket don't sleep */ |
| 1916 | error = -EAGAIN; | 1896 | error = -EAGAIN; |
| 1917 | if (!timeo) | 1897 | if (!timeo) |
| 1918 | goto out; | 1898 | goto out_err; |
| 1919 | 1899 | ||
| 1920 | error = wait_for_connect(sk, timeo); | 1900 | error = wait_for_connect(sk, timeo); |
| 1921 | if (error) | 1901 | if (error) |
| 1922 | goto out; | 1902 | goto out_err; |
| 1923 | } | 1903 | } |
| 1924 | 1904 | ||
| 1925 | req = tp->accept_queue; | 1905 | newsk = reqsk_queue_get_child(&tp->accept_queue, sk); |
| 1926 | if ((tp->accept_queue = req->dl_next) == NULL) | ||
| 1927 | tp->accept_queue_tail = NULL; | ||
| 1928 | |||
| 1929 | newsk = req->sk; | ||
| 1930 | sk_acceptq_removed(sk); | ||
| 1931 | tcp_openreq_fastfree(req); | ||
| 1932 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | 1906 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); |
| 1933 | release_sock(sk); | ||
| 1934 | return newsk; | ||
| 1935 | |||
| 1936 | out: | 1907 | out: |
| 1937 | release_sock(sk); | 1908 | release_sock(sk); |
| 1909 | return newsk; | ||
| 1910 | out_err: | ||
| 1911 | newsk = NULL; | ||
| 1938 | *err = error; | 1912 | *err = error; |
| 1939 | return NULL; | 1913 | goto out; |
| 1940 | } | 1914 | } |
| 1941 | 1915 | ||
| 1942 | /* | 1916 | /* |
| @@ -2271,13 +2245,6 @@ void __init tcp_init(void) | |||
| 2271 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), | 2245 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), |
| 2272 | sizeof(skb->cb)); | 2246 | sizeof(skb->cb)); |
| 2273 | 2247 | ||
| 2274 | tcp_openreq_cachep = kmem_cache_create("tcp_open_request", | ||
| 2275 | sizeof(struct open_request), | ||
| 2276 | 0, SLAB_HWCACHE_ALIGN, | ||
| 2277 | NULL, NULL); | ||
| 2278 | if (!tcp_openreq_cachep) | ||
| 2279 | panic("tcp_init: Cannot alloc open_request cache."); | ||
| 2280 | |||
| 2281 | tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", | 2248 | tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", |
| 2282 | sizeof(struct tcp_bind_bucket), | 2249 | sizeof(struct tcp_bind_bucket), |
| 2283 | 0, SLAB_HWCACHE_ALIGN, | 2250 | 0, SLAB_HWCACHE_ALIGN, |
| @@ -2338,7 +2305,7 @@ void __init tcp_init(void) | |||
| 2338 | (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); | 2305 | (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); |
| 2339 | order++) | 2306 | order++) |
| 2340 | ; | 2307 | ; |
| 2341 | if (order > 4) { | 2308 | if (order >= 4) { |
| 2342 | sysctl_local_port_range[0] = 32768; | 2309 | sysctl_local_port_range[0] = 32768; |
| 2343 | sysctl_local_port_range[1] = 61000; | 2310 | sysctl_local_port_range[1] = 61000; |
| 2344 | sysctl_tcp_max_tw_buckets = 180000; | 2311 | sysctl_tcp_max_tw_buckets = 180000; |
| @@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock); | |||
| 2374 | EXPORT_SYMBOL(tcp_disconnect); | 2341 | EXPORT_SYMBOL(tcp_disconnect); |
| 2375 | EXPORT_SYMBOL(tcp_getsockopt); | 2342 | EXPORT_SYMBOL(tcp_getsockopt); |
| 2376 | EXPORT_SYMBOL(tcp_ioctl); | 2343 | EXPORT_SYMBOL(tcp_ioctl); |
| 2377 | EXPORT_SYMBOL(tcp_openreq_cachep); | ||
| 2378 | EXPORT_SYMBOL(tcp_poll); | 2344 | EXPORT_SYMBOL(tcp_poll); |
| 2379 | EXPORT_SYMBOL(tcp_read_sock); | 2345 | EXPORT_SYMBOL(tcp_read_sock); |
| 2380 | EXPORT_SYMBOL(tcp_recvmsg); | 2346 | EXPORT_SYMBOL(tcp_recvmsg); |
