aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/request_sock.c48
-rw-r--r--net/ipv4/tcp.c67
-rw-r--r--net/ipv4/tcp_diag.c6
-rw-r--r--net/ipv4/tcp_ipv4.c32
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_timer.c10
-rw-r--r--net/ipv6/tcp_ipv6.c14
8 files changed, 94 insertions, 92 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe2f..5e0c56b7f607 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o
6 7
7obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
8 9
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 000000000000..1258333ca007
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,48 @@
1/*
2 * NET Generic infrastructure for Network protocols.
3 *
4 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
5 *
6 * From code originally in include/net/tcp.h
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/random.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18
19#include <net/request_sock.h>
20
21int reqsk_queue_alloc(struct request_sock_queue *queue,
22 const int nr_table_entries)
23{
24 const int lopt_size = sizeof(struct tcp_listen_opt) +
25 nr_table_entries * sizeof(struct request_sock *);
26 struct tcp_listen_opt *lopt = kmalloc(lopt_size, GFP_KERNEL);
27
28 if (lopt == NULL)
29 return -ENOMEM;
30
31 memset(lopt, 0, lopt_size);
32
33 for (lopt->max_qlen_log = 6;
34 (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
35 lopt->max_qlen_log++);
36
37 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
38 rwlock_init(&queue->syn_wait_lock);
39 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
40
41 write_lock_bh(&queue->syn_wait_lock);
42 queue->listen_opt = lopt;
43 write_unlock_bh(&queue->syn_wait_lock);
44
45 return 0;
46}
47
48EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1c29feb6b35f..b85a46dd40a0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
316static __inline__ unsigned int tcp_listen_poll(struct sock *sk, 316static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
317 poll_table *wait) 317 poll_table *wait)
318{ 318{
319 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; 319 return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
320} 320}
321 321
322/* 322/*
@@ -462,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
462{ 462{
463 struct inet_sock *inet = inet_sk(sk); 463 struct inet_sock *inet = inet_sk(sk);
464 struct tcp_sock *tp = tcp_sk(sk); 464 struct tcp_sock *tp = tcp_sk(sk);
465 struct tcp_listen_opt *lopt; 465 int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
466
467 if (rc != 0)
468 return rc;
466 469
467 sk->sk_max_ack_backlog = 0; 470 sk->sk_max_ack_backlog = 0;
468 sk->sk_ack_backlog = 0; 471 sk->sk_ack_backlog = 0;
469 tp->accept_queue = tp->accept_queue_tail = NULL;
470 rwlock_init(&tp->syn_wait_lock);
471 tcp_delack_init(tp); 472 tcp_delack_init(tp);
472 473
473 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
474 if (!lopt)
475 return -ENOMEM;
476
477 memset(lopt, 0, sizeof(struct tcp_listen_opt));
478 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
479 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
480 break;
481 get_random_bytes(&lopt->hash_rnd, 4);
482
483 write_lock_bh(&tp->syn_wait_lock);
484 tp->listen_opt = lopt;
485 write_unlock_bh(&tp->syn_wait_lock);
486
487 /* There is race window here: we announce ourselves listening, 474 /* There is race window here: we announce ourselves listening,
488 * but this transition is still not validated by get_port(). 475 * but this transition is still not validated by get_port().
489 * It is OK, because this socket enters to hash table only 476 * It is OK, because this socket enters to hash table only
@@ -500,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
500 } 487 }
501 488
502 sk->sk_state = TCP_CLOSE; 489 sk->sk_state = TCP_CLOSE;
503 write_lock_bh(&tp->syn_wait_lock); 490 reqsk_queue_destroy(&tp->accept_queue);
504 tp->listen_opt = NULL;
505 write_unlock_bh(&tp->syn_wait_lock);
506 kfree(lopt);
507 return -EADDRINUSE; 491 return -EADDRINUSE;
508} 492}
509 493
@@ -515,18 +499,16 @@ int tcp_listen_start(struct sock *sk)
515static void tcp_listen_stop (struct sock *sk) 499static void tcp_listen_stop (struct sock *sk)
516{ 500{
517 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
518 struct tcp_listen_opt *lopt = tp->listen_opt; 502 struct tcp_listen_opt *lopt;
519 struct request_sock *acc_req = tp->accept_queue; 503 struct request_sock *acc_req;
520 struct request_sock *req; 504 struct request_sock *req;
521 int i; 505 int i;
522 506
523 tcp_delete_keepalive_timer(sk); 507 tcp_delete_keepalive_timer(sk);
524 508
525 /* make all the listen_opt local to us */ 509 /* make all the listen_opt local to us */
526 write_lock_bh(&tp->syn_wait_lock); 510 lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
527 tp->listen_opt = NULL; 511 acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
528 write_unlock_bh(&tp->syn_wait_lock);
529 tp->accept_queue = tp->accept_queue_tail = NULL;
530 512
531 if (lopt->qlen) { 513 if (lopt->qlen) {
532 for (i = 0; i < TCP_SYNQ_HSIZE; i++) { 514 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -1867,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
1867 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 1849 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1868 TASK_INTERRUPTIBLE); 1850 TASK_INTERRUPTIBLE);
1869 release_sock(sk); 1851 release_sock(sk);
1870 if (!tp->accept_queue) 1852 if (reqsk_queue_empty(&tp->accept_queue))
1871 timeo = schedule_timeout(timeo); 1853 timeo = schedule_timeout(timeo);
1872 lock_sock(sk); 1854 lock_sock(sk);
1873 err = 0; 1855 err = 0;
1874 if (tp->accept_queue) 1856 if (!reqsk_queue_empty(&tp->accept_queue))
1875 break; 1857 break;
1876 err = -EINVAL; 1858 err = -EINVAL;
1877 if (sk->sk_state != TCP_LISTEN) 1859 if (sk->sk_state != TCP_LISTEN)
@@ -1894,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
1894struct sock *tcp_accept(struct sock *sk, int flags, int *err) 1876struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1895{ 1877{
1896 struct tcp_sock *tp = tcp_sk(sk); 1878 struct tcp_sock *tp = tcp_sk(sk);
1897 struct request_sock *req;
1898 struct sock *newsk; 1879 struct sock *newsk;
1899 int error; 1880 int error;
1900 1881
@@ -1905,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1905 */ 1886 */
1906 error = -EINVAL; 1887 error = -EINVAL;
1907 if (sk->sk_state != TCP_LISTEN) 1888 if (sk->sk_state != TCP_LISTEN)
1908 goto out; 1889 goto out_err;
1909 1890
1910 /* Find already established connection */ 1891 /* Find already established connection */
1911 if (!tp->accept_queue) { 1892 if (reqsk_queue_empty(&tp->accept_queue)) {
1912 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1893 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1913 1894
1914 /* If this is a non blocking socket don't sleep */ 1895 /* If this is a non blocking socket don't sleep */
1915 error = -EAGAIN; 1896 error = -EAGAIN;
1916 if (!timeo) 1897 if (!timeo)
1917 goto out; 1898 goto out_err;
1918 1899
1919 error = wait_for_connect(sk, timeo); 1900 error = wait_for_connect(sk, timeo);
1920 if (error) 1901 if (error)
1921 goto out; 1902 goto out_err;
1922 } 1903 }
1923 1904
1924 req = tp->accept_queue; 1905 newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
1925 if ((tp->accept_queue = req->dl_next) == NULL)
1926 tp->accept_queue_tail = NULL;
1927
1928 newsk = req->sk;
1929 sk_acceptq_removed(sk);
1930 __reqsk_free(req);
1931 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); 1906 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1932 release_sock(sk);
1933 return newsk;
1934
1935out: 1907out:
1936 release_sock(sk); 1908 release_sock(sk);
1909 return newsk;
1910out_err:
1911 newsk = NULL;
1937 *err = error; 1912 *err = error;
1938 return NULL; 1913 goto out;
1939} 1914}
1940 1915
1941/* 1916/*
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 67277800d0c1..c3328fa48837 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -529,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
529 529
530 entry.family = sk->sk_family; 530 entry.family = sk->sk_family;
531 531
532 read_lock_bh(&tp->syn_wait_lock); 532 read_lock_bh(&tp->accept_queue.syn_wait_lock);
533 533
534 lopt = tp->listen_opt; 534 lopt = tp->accept_queue.listen_opt;
535 if (!lopt || !lopt->qlen) 535 if (!lopt || !lopt->qlen)
536 goto out; 536 goto out;
537 537
@@ -588,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
588 } 588 }
589 589
590out: 590out:
591 read_unlock_bh(&tp->syn_wait_lock); 591 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
592 592
593 return err; 593 return err;
594} 594}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 95528a75a63d..1745dc8d25e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -874,7 +874,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
874 __u16 rport, 874 __u16 rport,
875 __u32 raddr, __u32 laddr) 875 __u32 raddr, __u32 laddr)
876{ 876{
877 struct tcp_listen_opt *lopt = tp->listen_opt; 877 struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
878 struct request_sock *req, **prev; 878 struct request_sock *req, **prev;
879 879
880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; 880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -898,18 +898,10 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) 898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
899{ 899{
900 struct tcp_sock *tp = tcp_sk(sk); 900 struct tcp_sock *tp = tcp_sk(sk);
901 struct tcp_listen_opt *lopt = tp->listen_opt; 901 struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); 902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
903 903
904 req->expires = jiffies + TCP_TIMEOUT_INIT; 904 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
905 req->retrans = 0;
906 req->sk = NULL;
907 req->dl_next = lopt->syn_table[h];
908
909 write_lock(&tp->syn_wait_lock);
910 lopt->syn_table[h] = req;
911 write_unlock(&tp->syn_wait_lock);
912
913 tcp_synq_added(sk); 905 tcp_synq_added(sk);
914} 906}
915 907
@@ -2167,17 +2159,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2167 if (++st->sbucket >= TCP_SYNQ_HSIZE) 2159 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2168 break; 2160 break;
2169get_req: 2161get_req:
2170 req = tp->listen_opt->syn_table[st->sbucket]; 2162 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
2171 } 2163 }
2172 sk = sk_next(st->syn_wait_sk); 2164 sk = sk_next(st->syn_wait_sk);
2173 st->state = TCP_SEQ_STATE_LISTENING; 2165 st->state = TCP_SEQ_STATE_LISTENING;
2174 read_unlock_bh(&tp->syn_wait_lock); 2166 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2175 } else { 2167 } else {
2176 tp = tcp_sk(sk); 2168 tp = tcp_sk(sk);
2177 read_lock_bh(&tp->syn_wait_lock); 2169 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2178 if (tp->listen_opt && tp->listen_opt->qlen) 2170 if (reqsk_queue_len(&tp->accept_queue))
2179 goto start_req; 2171 goto start_req;
2180 read_unlock_bh(&tp->syn_wait_lock); 2172 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2181 sk = sk_next(sk); 2173 sk = sk_next(sk);
2182 } 2174 }
2183get_sk: 2175get_sk:
@@ -2187,8 +2179,8 @@ get_sk:
2187 goto out; 2179 goto out;
2188 } 2180 }
2189 tp = tcp_sk(sk); 2181 tp = tcp_sk(sk);
2190 read_lock_bh(&tp->syn_wait_lock); 2182 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2191 if (tp->listen_opt && tp->listen_opt->qlen) { 2183 if (reqsk_queue_len(&tp->accept_queue)) {
2192start_req: 2184start_req:
2193 st->uid = sock_i_uid(sk); 2185 st->uid = sock_i_uid(sk);
2194 st->syn_wait_sk = sk; 2186 st->syn_wait_sk = sk;
@@ -2196,7 +2188,7 @@ start_req:
2196 st->sbucket = 0; 2188 st->sbucket = 0;
2197 goto get_req; 2189 goto get_req;
2198 } 2190 }
2199 read_unlock_bh(&tp->syn_wait_lock); 2191 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2200 } 2192 }
2201 if (++st->bucket < TCP_LHTABLE_SIZE) { 2193 if (++st->bucket < TCP_LHTABLE_SIZE) {
2202 sk = sk_head(&tcp_listening_hash[st->bucket]); 2194 sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2383,7 +2375,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2383 case TCP_SEQ_STATE_OPENREQ: 2375 case TCP_SEQ_STATE_OPENREQ:
2384 if (v) { 2376 if (v) {
2385 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 2377 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2386 read_unlock_bh(&tp->syn_wait_lock); 2378 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2387 } 2379 }
2388 case TCP_SEQ_STATE_LISTENING: 2380 case TCP_SEQ_STATE_LISTENING:
2389 if (v != SEQ_START_TOKEN) 2381 if (v != SEQ_START_TOKEN)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0e6d525a8341..b3943e7562f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -790,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
790 newtp->probes_out = 0; 790 newtp->probes_out = 0;
791 newtp->rx_opt.num_sacks = 0; 791 newtp->rx_opt.num_sacks = 0;
792 newtp->urg_data = 0; 792 newtp->urg_data = 0;
793 newtp->listen_opt = NULL; 793 /* Deinitialize accept_queue to trap illegal accesses. */
794 newtp->accept_queue = newtp->accept_queue_tail = NULL; 794 memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
795 /* Deinitialize syn_wait_lock to trap illegal accesses. */
796 memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
797 795
798 /* Back to base struct sock members. */ 796 /* Back to base struct sock members. */
799 newsk->sk_err = 0; 797 newsk->sk_err = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index f03efe5fb76a..d97d191149c1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,7 +464,7 @@ out_unlock:
464static void tcp_synack_timer(struct sock *sk) 464static void tcp_synack_timer(struct sock *sk)
465{ 465{
466 struct tcp_sock *tp = tcp_sk(sk); 466 struct tcp_sock *tp = tcp_sk(sk);
467 struct tcp_listen_opt *lopt = tp->listen_opt; 467 struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; 468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 int thresh = max_retries; 469 int thresh = max_retries;
470 unsigned long now = jiffies; 470 unsigned long now = jiffies;
@@ -527,12 +527,8 @@ static void tcp_synack_timer(struct sock *sk)
527 } 527 }
528 528
529 /* Drop this request */ 529 /* Drop this request */
530 write_lock(&tp->syn_wait_lock); 530 tcp_synq_unlink(tp, req, reqp);
531 *reqp = req->dl_next; 531 reqsk_queue_removed(&tp->accept_queue, req);
532 write_unlock(&tp->syn_wait_lock);
533 lopt->qlen--;
534 if (req->retrans == 0)
535 lopt->qlen_young--;
536 reqsk_free(req); 532 reqsk_free(req);
537 continue; 533 continue;
538 } 534 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 068cd4a8c292..84091daad6b5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -401,7 +401,7 @@ static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
401 struct in6_addr *laddr, 401 struct in6_addr *laddr,
402 int iif) 402 int iif)
403{ 403{
404 struct tcp_listen_opt *lopt = tp->listen_opt; 404 struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
405 struct request_sock *req, **prev; 405 struct request_sock *req, **prev;
406 406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; 407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -1267,18 +1267,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) 1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1268{ 1268{
1269 struct tcp_sock *tp = tcp_sk(sk); 1269 struct tcp_sock *tp = tcp_sk(sk);
1270 struct tcp_listen_opt *lopt = tp->listen_opt; 1270 struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt;
1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); 1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1272 1272
1273 req->sk = NULL; 1273 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1274 req->expires = jiffies + TCP_TIMEOUT_INIT;
1275 req->retrans = 0;
1276 req->dl_next = lopt->syn_table[h];
1277
1278 write_lock(&tp->syn_wait_lock);
1279 lopt->syn_table[h] = req;
1280 write_unlock(&tp->syn_wait_lock);
1281
1282 tcp_synq_added(sk); 1274 tcp_synq_added(sk);
1283} 1275}
1284 1276