diff options
-rw-r--r-- | include/linux/tcp.h | 17 | ||||
-rw-r--r-- | include/net/request_sock.h | 178 | ||||
-rw-r--r-- | include/net/tcp.h | 46 | ||||
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/request_sock.c | 48 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 67 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 10 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 14 |
11 files changed, 281 insertions, 146 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fb54292a15aa..97a7c9e03df5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -379,22 +379,7 @@ struct tcp_sock { | |||
379 | 379 | ||
380 | __u32 total_retrans; /* Total retransmits for entire connection */ | 380 | __u32 total_retrans; /* Total retransmits for entire connection */ |
381 | 381 | ||
382 | /* The syn_wait_lock is necessary only to avoid proc interface having | 382 | struct request_sock_queue accept_queue; /* FIFO of established children */ |
383 | * to grab the main lock sock while browsing the listening hash | ||
384 | * (otherwise it's deadlock prone). | ||
385 | * This lock is acquired in read mode only from listening_get_next() | ||
386 | * and it's acquired in write mode _only_ from code that is actively | ||
387 | * changing the syn_wait_queue. All readers that are holding | ||
388 | * the master sock lock don't need to grab this lock in read mode | ||
389 | * too as the syn_wait_queue writes are always protected from | ||
390 | * the main sock lock. | ||
391 | */ | ||
392 | rwlock_t syn_wait_lock; | ||
393 | struct tcp_listen_opt *listen_opt; | ||
394 | |||
395 | /* FIFO of established children */ | ||
396 | struct request_sock *accept_queue; | ||
397 | struct request_sock *accept_queue_tail; | ||
398 | 383 | ||
399 | unsigned int keepalive_time; /* time before keep alive takes place */ | 384 | unsigned int keepalive_time; /* time before keep alive takes place */ |
400 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ | 385 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 08a8fd1d1610..38943ed04e73 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #define _REQUEST_SOCK_H | 16 | #define _REQUEST_SOCK_H |
17 | 17 | ||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/spinlock.h> | ||
19 | #include <linux/types.h> | 20 | #include <linux/types.h> |
21 | |||
20 | #include <net/sock.h> | 22 | #include <net/sock.h> |
21 | 23 | ||
22 | struct request_sock; | 24 | struct request_sock; |
@@ -74,4 +76,180 @@ static inline void reqsk_free(struct request_sock *req) | |||
74 | __reqsk_free(req); | 76 | __reqsk_free(req); |
75 | } | 77 | } |
76 | 78 | ||
79 | extern int sysctl_max_syn_backlog; | ||
80 | |||
81 | /** struct tcp_listen_opt - listen state | ||
82 | * | ||
83 | * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs | ||
84 | */ | ||
85 | struct tcp_listen_opt { | ||
86 | u8 max_qlen_log; | ||
87 | /* 3 bytes hole, try to use */ | ||
88 | int qlen; | ||
89 | int qlen_young; | ||
90 | int clock_hand; | ||
91 | u32 hash_rnd; | ||
92 | struct request_sock *syn_table[0]; | ||
93 | }; | ||
94 | |||
95 | /** struct request_sock_queue - queue of request_socks | ||
96 | * | ||
97 | * @rskq_accept_head - FIFO head of established children | ||
98 | * @rskq_accept_tail - FIFO tail of established children | ||
99 | * @syn_wait_lock - serializer | ||
100 | * | ||
101 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main | ||
102 | * lock sock while browsing the listening hash (otherwise it's deadlock prone). | ||
103 | * | ||
104 | * This lock is acquired in read mode only from listening_get_next() seq_file | ||
105 | * op and it's acquired in write mode _only_ from code that is actively | ||
106 | * changing rskq_accept_head. All readers that are holding the master sock lock | ||
107 | * don't need to grab this lock in read mode too as rskq_accept_head. writes | ||
108 | * are always protected from the main sock lock. | ||
109 | */ | ||
110 | struct request_sock_queue { | ||
111 | struct request_sock *rskq_accept_head; | ||
112 | struct request_sock *rskq_accept_tail; | ||
113 | rwlock_t syn_wait_lock; | ||
114 | struct tcp_listen_opt *listen_opt; | ||
115 | }; | ||
116 | |||
117 | extern int reqsk_queue_alloc(struct request_sock_queue *queue, | ||
118 | const int nr_table_entries); | ||
119 | |||
120 | static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) | ||
121 | { | ||
122 | struct tcp_listen_opt *lopt; | ||
123 | |||
124 | write_lock_bh(&queue->syn_wait_lock); | ||
125 | lopt = queue->listen_opt; | ||
126 | queue->listen_opt = NULL; | ||
127 | write_unlock_bh(&queue->syn_wait_lock); | ||
128 | |||
129 | return lopt; | ||
130 | } | ||
131 | |||
132 | static inline void reqsk_queue_destroy(struct request_sock_queue *queue) | ||
133 | { | ||
134 | kfree(reqsk_queue_yank_listen_sk(queue)); | ||
135 | } | ||
136 | |||
137 | static inline struct request_sock * | ||
138 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) | ||
139 | { | ||
140 | struct request_sock *req = queue->rskq_accept_head; | ||
141 | |||
142 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; | ||
143 | return req; | ||
144 | } | ||
145 | |||
146 | static inline int reqsk_queue_empty(struct request_sock_queue *queue) | ||
147 | { | ||
148 | return queue->rskq_accept_head == NULL; | ||
149 | } | ||
150 | |||
151 | static inline void reqsk_queue_unlink(struct request_sock_queue *queue, | ||
152 | struct request_sock *req, | ||
153 | struct request_sock **prev_req) | ||
154 | { | ||
155 | write_lock(&queue->syn_wait_lock); | ||
156 | *prev_req = req->dl_next; | ||
157 | write_unlock(&queue->syn_wait_lock); | ||
158 | } | ||
159 | |||
160 | static inline void reqsk_queue_add(struct request_sock_queue *queue, | ||
161 | struct request_sock *req, | ||
162 | struct sock *parent, | ||
163 | struct sock *child) | ||
164 | { | ||
165 | req->sk = child; | ||
166 | sk_acceptq_added(parent); | ||
167 | |||
168 | if (queue->rskq_accept_head == NULL) | ||
169 | queue->rskq_accept_head = req; | ||
170 | else | ||
171 | queue->rskq_accept_tail->dl_next = req; | ||
172 | |||
173 | queue->rskq_accept_tail = req; | ||
174 | req->dl_next = NULL; | ||
175 | } | ||
176 | |||
177 | static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) | ||
178 | { | ||
179 | struct request_sock *req = queue->rskq_accept_head; | ||
180 | |||
181 | BUG_TRAP(req != NULL); | ||
182 | |||
183 | queue->rskq_accept_head = req->dl_next; | ||
184 | if (queue->rskq_accept_head == NULL) | ||
185 | queue->rskq_accept_tail = NULL; | ||
186 | |||
187 | return req; | ||
188 | } | ||
189 | |||
190 | static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, | ||
191 | struct sock *parent) | ||
192 | { | ||
193 | struct request_sock *req = reqsk_queue_remove(queue); | ||
194 | struct sock *child = req->sk; | ||
195 | |||
196 | BUG_TRAP(child != NULL); | ||
197 | |||
198 | sk_acceptq_removed(parent); | ||
199 | __reqsk_free(req); | ||
200 | return child; | ||
201 | } | ||
202 | |||
203 | static inline int reqsk_queue_removed(struct request_sock_queue *queue, | ||
204 | struct request_sock *req) | ||
205 | { | ||
206 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
207 | |||
208 | if (req->retrans == 0) | ||
209 | --lopt->qlen_young; | ||
210 | |||
211 | return --lopt->qlen; | ||
212 | } | ||
213 | |||
214 | static inline int reqsk_queue_added(struct request_sock_queue *queue) | ||
215 | { | ||
216 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
217 | const int prev_qlen = lopt->qlen; | ||
218 | |||
219 | lopt->qlen_young++; | ||
220 | lopt->qlen++; | ||
221 | return prev_qlen; | ||
222 | } | ||
223 | |||
224 | static inline int reqsk_queue_len(struct request_sock_queue *queue) | ||
225 | { | ||
226 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; | ||
227 | } | ||
228 | |||
229 | static inline int reqsk_queue_len_young(struct request_sock_queue *queue) | ||
230 | { | ||
231 | return queue->listen_opt->qlen_young; | ||
232 | } | ||
233 | |||
234 | static inline int reqsk_queue_is_full(struct request_sock_queue *queue) | ||
235 | { | ||
236 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; | ||
237 | } | ||
238 | |||
239 | static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, | ||
240 | u32 hash, struct request_sock *req, | ||
241 | unsigned timeout) | ||
242 | { | ||
243 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
244 | |||
245 | req->expires = jiffies + timeout; | ||
246 | req->retrans = 0; | ||
247 | req->sk = NULL; | ||
248 | req->dl_next = lopt->syn_table[hash]; | ||
249 | |||
250 | write_lock(&queue->syn_wait_lock); | ||
251 | lopt->syn_table[hash] = req; | ||
252 | write_unlock(&queue->syn_wait_lock); | ||
253 | } | ||
254 | |||
77 | #endif /* _REQUEST_SOCK_H */ | 255 | #endif /* _REQUEST_SOCK_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 6663086a5e35..a2e323c54457 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1686,71 +1686,41 @@ static inline int tcp_full_space(const struct sock *sk) | |||
1686 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, | 1686 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, |
1687 | struct sock *child) | 1687 | struct sock *child) |
1688 | { | 1688 | { |
1689 | struct tcp_sock *tp = tcp_sk(sk); | 1689 | reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); |
1690 | |||
1691 | req->sk = child; | ||
1692 | sk_acceptq_added(sk); | ||
1693 | |||
1694 | if (!tp->accept_queue_tail) { | ||
1695 | tp->accept_queue = req; | ||
1696 | } else { | ||
1697 | tp->accept_queue_tail->dl_next = req; | ||
1698 | } | ||
1699 | tp->accept_queue_tail = req; | ||
1700 | req->dl_next = NULL; | ||
1701 | } | 1690 | } |
1702 | 1691 | ||
1703 | struct tcp_listen_opt | ||
1704 | { | ||
1705 | u8 max_qlen_log; /* log_2 of maximal queued SYNs */ | ||
1706 | int qlen; | ||
1707 | int qlen_young; | ||
1708 | int clock_hand; | ||
1709 | u32 hash_rnd; | ||
1710 | struct request_sock *syn_table[TCP_SYNQ_HSIZE]; | ||
1711 | }; | ||
1712 | |||
1713 | static inline void | 1692 | static inline void |
1714 | tcp_synq_removed(struct sock *sk, struct request_sock *req) | 1693 | tcp_synq_removed(struct sock *sk, struct request_sock *req) |
1715 | { | 1694 | { |
1716 | struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; | 1695 | if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) |
1717 | |||
1718 | if (--lopt->qlen == 0) | ||
1719 | tcp_delete_keepalive_timer(sk); | 1696 | tcp_delete_keepalive_timer(sk); |
1720 | if (req->retrans == 0) | ||
1721 | lopt->qlen_young--; | ||
1722 | } | 1697 | } |
1723 | 1698 | ||
1724 | static inline void tcp_synq_added(struct sock *sk) | 1699 | static inline void tcp_synq_added(struct sock *sk) |
1725 | { | 1700 | { |
1726 | struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; | 1701 | if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) |
1727 | |||
1728 | if (lopt->qlen++ == 0) | ||
1729 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); | 1702 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); |
1730 | lopt->qlen_young++; | ||
1731 | } | 1703 | } |
1732 | 1704 | ||
1733 | static inline int tcp_synq_len(struct sock *sk) | 1705 | static inline int tcp_synq_len(struct sock *sk) |
1734 | { | 1706 | { |
1735 | return tcp_sk(sk)->listen_opt->qlen; | 1707 | return reqsk_queue_len(&tcp_sk(sk)->accept_queue); |
1736 | } | 1708 | } |
1737 | 1709 | ||
1738 | static inline int tcp_synq_young(struct sock *sk) | 1710 | static inline int tcp_synq_young(struct sock *sk) |
1739 | { | 1711 | { |
1740 | return tcp_sk(sk)->listen_opt->qlen_young; | 1712 | return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); |
1741 | } | 1713 | } |
1742 | 1714 | ||
1743 | static inline int tcp_synq_is_full(struct sock *sk) | 1715 | static inline int tcp_synq_is_full(struct sock *sk) |
1744 | { | 1716 | { |
1745 | return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; | 1717 | return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); |
1746 | } | 1718 | } |
1747 | 1719 | ||
1748 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, | 1720 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, |
1749 | struct request_sock **prev) | 1721 | struct request_sock **prev) |
1750 | { | 1722 | { |
1751 | write_lock(&tp->syn_wait_lock); | 1723 | reqsk_queue_unlink(&tp->accept_queue, req, prev); |
1752 | *prev = req->dl_next; | ||
1753 | write_unlock(&tp->syn_wait_lock); | ||
1754 | } | 1724 | } |
1755 | 1725 | ||
1756 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, | 1726 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, |
diff --git a/net/core/Makefile b/net/core/Makefile index 81f03243fe2f..5e0c56b7f607 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -2,7 +2,8 @@ | |||
2 | # Makefile for the Linux networking core. | 2 | # Makefile for the Linux networking core. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o | 5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ |
6 | gen_stats.o gen_estimator.o | ||
6 | 7 | ||
7 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
8 | 9 | ||
diff --git a/net/core/request_sock.c b/net/core/request_sock.c new file mode 100644 index 000000000000..1258333ca007 --- /dev/null +++ b/net/core/request_sock.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * NET Generic infrastructure for Network protocols. | ||
3 | * | ||
4 | * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
5 | * | ||
6 | * From code originally in include/net/tcp.h | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/random.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/string.h> | ||
18 | |||
19 | #include <net/request_sock.h> | ||
20 | |||
21 | int reqsk_queue_alloc(struct request_sock_queue *queue, | ||
22 | const int nr_table_entries) | ||
23 | { | ||
24 | const int lopt_size = sizeof(struct tcp_listen_opt) + | ||
25 | nr_table_entries * sizeof(struct request_sock *); | ||
26 | struct tcp_listen_opt *lopt = kmalloc(lopt_size, GFP_KERNEL); | ||
27 | |||
28 | if (lopt == NULL) | ||
29 | return -ENOMEM; | ||
30 | |||
31 | memset(lopt, 0, lopt_size); | ||
32 | |||
33 | for (lopt->max_qlen_log = 6; | ||
34 | (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; | ||
35 | lopt->max_qlen_log++); | ||
36 | |||
37 | get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); | ||
38 | rwlock_init(&queue->syn_wait_lock); | ||
39 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; | ||
40 | |||
41 | write_lock_bh(&queue->syn_wait_lock); | ||
42 | queue->listen_opt = lopt; | ||
43 | write_unlock_bh(&queue->syn_wait_lock); | ||
44 | |||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | EXPORT_SYMBOL(reqsk_queue_alloc); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1c29feb6b35f..b85a46dd40a0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); | |||
316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, | 316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, |
317 | poll_table *wait) | 317 | poll_table *wait) |
318 | { | 318 | { |
319 | return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; | 319 | return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; |
320 | } | 320 | } |
321 | 321 | ||
322 | /* | 322 | /* |
@@ -462,28 +462,15 @@ int tcp_listen_start(struct sock *sk) | |||
462 | { | 462 | { |
463 | struct inet_sock *inet = inet_sk(sk); | 463 | struct inet_sock *inet = inet_sk(sk); |
464 | struct tcp_sock *tp = tcp_sk(sk); | 464 | struct tcp_sock *tp = tcp_sk(sk); |
465 | struct tcp_listen_opt *lopt; | 465 | int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); |
466 | |||
467 | if (rc != 0) | ||
468 | return rc; | ||
466 | 469 | ||
467 | sk->sk_max_ack_backlog = 0; | 470 | sk->sk_max_ack_backlog = 0; |
468 | sk->sk_ack_backlog = 0; | 471 | sk->sk_ack_backlog = 0; |
469 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
470 | rwlock_init(&tp->syn_wait_lock); | ||
471 | tcp_delack_init(tp); | 472 | tcp_delack_init(tp); |
472 | 473 | ||
473 | lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); | ||
474 | if (!lopt) | ||
475 | return -ENOMEM; | ||
476 | |||
477 | memset(lopt, 0, sizeof(struct tcp_listen_opt)); | ||
478 | for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) | ||
479 | if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) | ||
480 | break; | ||
481 | get_random_bytes(&lopt->hash_rnd, 4); | ||
482 | |||
483 | write_lock_bh(&tp->syn_wait_lock); | ||
484 | tp->listen_opt = lopt; | ||
485 | write_unlock_bh(&tp->syn_wait_lock); | ||
486 | |||
487 | /* There is race window here: we announce ourselves listening, | 474 | /* There is race window here: we announce ourselves listening, |
488 | * but this transition is still not validated by get_port(). | 475 | * but this transition is still not validated by get_port(). |
489 | * It is OK, because this socket enters to hash table only | 476 | * It is OK, because this socket enters to hash table only |
@@ -500,10 +487,7 @@ int tcp_listen_start(struct sock *sk) | |||
500 | } | 487 | } |
501 | 488 | ||
502 | sk->sk_state = TCP_CLOSE; | 489 | sk->sk_state = TCP_CLOSE; |
503 | write_lock_bh(&tp->syn_wait_lock); | 490 | reqsk_queue_destroy(&tp->accept_queue); |
504 | tp->listen_opt = NULL; | ||
505 | write_unlock_bh(&tp->syn_wait_lock); | ||
506 | kfree(lopt); | ||
507 | return -EADDRINUSE; | 491 | return -EADDRINUSE; |
508 | } | 492 | } |
509 | 493 | ||
@@ -515,18 +499,16 @@ int tcp_listen_start(struct sock *sk) | |||
515 | static void tcp_listen_stop (struct sock *sk) | 499 | static void tcp_listen_stop (struct sock *sk) |
516 | { | 500 | { |
517 | struct tcp_sock *tp = tcp_sk(sk); | 501 | struct tcp_sock *tp = tcp_sk(sk); |
518 | struct tcp_listen_opt *lopt = tp->listen_opt; | 502 | struct tcp_listen_opt *lopt; |
519 | struct request_sock *acc_req = tp->accept_queue; | 503 | struct request_sock *acc_req; |
520 | struct request_sock *req; | 504 | struct request_sock *req; |
521 | int i; | 505 | int i; |
522 | 506 | ||
523 | tcp_delete_keepalive_timer(sk); | 507 | tcp_delete_keepalive_timer(sk); |
524 | 508 | ||
525 | /* make all the listen_opt local to us */ | 509 | /* make all the listen_opt local to us */ |
526 | write_lock_bh(&tp->syn_wait_lock); | 510 | lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); |
527 | tp->listen_opt = NULL; | 511 | acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); |
528 | write_unlock_bh(&tp->syn_wait_lock); | ||
529 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
530 | 512 | ||
531 | if (lopt->qlen) { | 513 | if (lopt->qlen) { |
532 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { | 514 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { |
@@ -1867,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
1867 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | 1849 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, |
1868 | TASK_INTERRUPTIBLE); | 1850 | TASK_INTERRUPTIBLE); |
1869 | release_sock(sk); | 1851 | release_sock(sk); |
1870 | if (!tp->accept_queue) | 1852 | if (reqsk_queue_empty(&tp->accept_queue)) |
1871 | timeo = schedule_timeout(timeo); | 1853 | timeo = schedule_timeout(timeo); |
1872 | lock_sock(sk); | 1854 | lock_sock(sk); |
1873 | err = 0; | 1855 | err = 0; |
1874 | if (tp->accept_queue) | 1856 | if (!reqsk_queue_empty(&tp->accept_queue)) |
1875 | break; | 1857 | break; |
1876 | err = -EINVAL; | 1858 | err = -EINVAL; |
1877 | if (sk->sk_state != TCP_LISTEN) | 1859 | if (sk->sk_state != TCP_LISTEN) |
@@ -1894,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
1894 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) | 1876 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) |
1895 | { | 1877 | { |
1896 | struct tcp_sock *tp = tcp_sk(sk); | 1878 | struct tcp_sock *tp = tcp_sk(sk); |
1897 | struct request_sock *req; | ||
1898 | struct sock *newsk; | 1879 | struct sock *newsk; |
1899 | int error; | 1880 | int error; |
1900 | 1881 | ||
@@ -1905,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) | |||
1905 | */ | 1886 | */ |
1906 | error = -EINVAL; | 1887 | error = -EINVAL; |
1907 | if (sk->sk_state != TCP_LISTEN) | 1888 | if (sk->sk_state != TCP_LISTEN) |
1908 | goto out; | 1889 | goto out_err; |
1909 | 1890 | ||
1910 | /* Find already established connection */ | 1891 | /* Find already established connection */ |
1911 | if (!tp->accept_queue) { | 1892 | if (reqsk_queue_empty(&tp->accept_queue)) { |
1912 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 1893 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
1913 | 1894 | ||
1914 | /* If this is a non blocking socket don't sleep */ | 1895 | /* If this is a non blocking socket don't sleep */ |
1915 | error = -EAGAIN; | 1896 | error = -EAGAIN; |
1916 | if (!timeo) | 1897 | if (!timeo) |
1917 | goto out; | 1898 | goto out_err; |
1918 | 1899 | ||
1919 | error = wait_for_connect(sk, timeo); | 1900 | error = wait_for_connect(sk, timeo); |
1920 | if (error) | 1901 | if (error) |
1921 | goto out; | 1902 | goto out_err; |
1922 | } | 1903 | } |
1923 | 1904 | ||
1924 | req = tp->accept_queue; | 1905 | newsk = reqsk_queue_get_child(&tp->accept_queue, sk); |
1925 | if ((tp->accept_queue = req->dl_next) == NULL) | ||
1926 | tp->accept_queue_tail = NULL; | ||
1927 | |||
1928 | newsk = req->sk; | ||
1929 | sk_acceptq_removed(sk); | ||
1930 | __reqsk_free(req); | ||
1931 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | 1906 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); |
1932 | release_sock(sk); | ||
1933 | return newsk; | ||
1934 | |||
1935 | out: | 1907 | out: |
1936 | release_sock(sk); | 1908 | release_sock(sk); |
1909 | return newsk; | ||
1910 | out_err: | ||
1911 | newsk = NULL; | ||
1937 | *err = error; | 1912 | *err = error; |
1938 | return NULL; | 1913 | goto out; |
1939 | } | 1914 | } |
1940 | 1915 | ||
1941 | /* | 1916 | /* |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 67277800d0c1..c3328fa48837 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -529,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
529 | 529 | ||
530 | entry.family = sk->sk_family; | 530 | entry.family = sk->sk_family; |
531 | 531 | ||
532 | read_lock_bh(&tp->syn_wait_lock); | 532 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
533 | 533 | ||
534 | lopt = tp->listen_opt; | 534 | lopt = tp->accept_queue.listen_opt; |
535 | if (!lopt || !lopt->qlen) | 535 | if (!lopt || !lopt->qlen) |
536 | goto out; | 536 | goto out; |
537 | 537 | ||
@@ -588,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
588 | } | 588 | } |
589 | 589 | ||
590 | out: | 590 | out: |
591 | read_unlock_bh(&tp->syn_wait_lock); | 591 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
592 | 592 | ||
593 | return err; | 593 | return err; |
594 | } | 594 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 95528a75a63d..1745dc8d25e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -874,7 +874,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | |||
874 | __u16 rport, | 874 | __u16 rport, |
875 | __u32 raddr, __u32 laddr) | 875 | __u32 raddr, __u32 laddr) |
876 | { | 876 | { |
877 | struct tcp_listen_opt *lopt = tp->listen_opt; | 877 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
878 | struct request_sock *req, **prev; | 878 | struct request_sock *req, **prev; |
879 | 879 | ||
880 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; | 880 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; |
@@ -898,18 +898,10 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | |||
898 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) | 898 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) |
899 | { | 899 | { |
900 | struct tcp_sock *tp = tcp_sk(sk); | 900 | struct tcp_sock *tp = tcp_sk(sk); |
901 | struct tcp_listen_opt *lopt = tp->listen_opt; | 901 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
902 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | 902 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); |
903 | 903 | ||
904 | req->expires = jiffies + TCP_TIMEOUT_INIT; | 904 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); |
905 | req->retrans = 0; | ||
906 | req->sk = NULL; | ||
907 | req->dl_next = lopt->syn_table[h]; | ||
908 | |||
909 | write_lock(&tp->syn_wait_lock); | ||
910 | lopt->syn_table[h] = req; | ||
911 | write_unlock(&tp->syn_wait_lock); | ||
912 | |||
913 | tcp_synq_added(sk); | 905 | tcp_synq_added(sk); |
914 | } | 906 | } |
915 | 907 | ||
@@ -2167,17 +2159,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2167 | if (++st->sbucket >= TCP_SYNQ_HSIZE) | 2159 | if (++st->sbucket >= TCP_SYNQ_HSIZE) |
2168 | break; | 2160 | break; |
2169 | get_req: | 2161 | get_req: |
2170 | req = tp->listen_opt->syn_table[st->sbucket]; | 2162 | req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; |
2171 | } | 2163 | } |
2172 | sk = sk_next(st->syn_wait_sk); | 2164 | sk = sk_next(st->syn_wait_sk); |
2173 | st->state = TCP_SEQ_STATE_LISTENING; | 2165 | st->state = TCP_SEQ_STATE_LISTENING; |
2174 | read_unlock_bh(&tp->syn_wait_lock); | 2166 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
2175 | } else { | 2167 | } else { |
2176 | tp = tcp_sk(sk); | 2168 | tp = tcp_sk(sk); |
2177 | read_lock_bh(&tp->syn_wait_lock); | 2169 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
2178 | if (tp->listen_opt && tp->listen_opt->qlen) | 2170 | if (reqsk_queue_len(&tp->accept_queue)) |
2179 | goto start_req; | 2171 | goto start_req; |
2180 | read_unlock_bh(&tp->syn_wait_lock); | 2172 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
2181 | sk = sk_next(sk); | 2173 | sk = sk_next(sk); |
2182 | } | 2174 | } |
2183 | get_sk: | 2175 | get_sk: |
@@ -2187,8 +2179,8 @@ get_sk: | |||
2187 | goto out; | 2179 | goto out; |
2188 | } | 2180 | } |
2189 | tp = tcp_sk(sk); | 2181 | tp = tcp_sk(sk); |
2190 | read_lock_bh(&tp->syn_wait_lock); | 2182 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
2191 | if (tp->listen_opt && tp->listen_opt->qlen) { | 2183 | if (reqsk_queue_len(&tp->accept_queue)) { |
2192 | start_req: | 2184 | start_req: |
2193 | st->uid = sock_i_uid(sk); | 2185 | st->uid = sock_i_uid(sk); |
2194 | st->syn_wait_sk = sk; | 2186 | st->syn_wait_sk = sk; |
@@ -2196,7 +2188,7 @@ start_req: | |||
2196 | st->sbucket = 0; | 2188 | st->sbucket = 0; |
2197 | goto get_req; | 2189 | goto get_req; |
2198 | } | 2190 | } |
2199 | read_unlock_bh(&tp->syn_wait_lock); | 2191 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
2200 | } | 2192 | } |
2201 | if (++st->bucket < TCP_LHTABLE_SIZE) { | 2193 | if (++st->bucket < TCP_LHTABLE_SIZE) { |
2202 | sk = sk_head(&tcp_listening_hash[st->bucket]); | 2194 | sk = sk_head(&tcp_listening_hash[st->bucket]); |
@@ -2383,7 +2375,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2383 | case TCP_SEQ_STATE_OPENREQ: | 2375 | case TCP_SEQ_STATE_OPENREQ: |
2384 | if (v) { | 2376 | if (v) { |
2385 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); | 2377 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); |
2386 | read_unlock_bh(&tp->syn_wait_lock); | 2378 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
2387 | } | 2379 | } |
2388 | case TCP_SEQ_STATE_LISTENING: | 2380 | case TCP_SEQ_STATE_LISTENING: |
2389 | if (v != SEQ_START_TOKEN) | 2381 | if (v != SEQ_START_TOKEN) |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0e6d525a8341..b3943e7562f3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -790,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
790 | newtp->probes_out = 0; | 790 | newtp->probes_out = 0; |
791 | newtp->rx_opt.num_sacks = 0; | 791 | newtp->rx_opt.num_sacks = 0; |
792 | newtp->urg_data = 0; | 792 | newtp->urg_data = 0; |
793 | newtp->listen_opt = NULL; | 793 | /* Deinitialize accept_queue to trap illegal accesses. */ |
794 | newtp->accept_queue = newtp->accept_queue_tail = NULL; | 794 | memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); |
795 | /* Deinitialize syn_wait_lock to trap illegal accesses. */ | ||
796 | memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock)); | ||
797 | 795 | ||
798 | /* Back to base struct sock members. */ | 796 | /* Back to base struct sock members. */ |
799 | newsk->sk_err = 0; | 797 | newsk->sk_err = 0; |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f03efe5fb76a..d97d191149c1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -464,7 +464,7 @@ out_unlock: | |||
464 | static void tcp_synack_timer(struct sock *sk) | 464 | static void tcp_synack_timer(struct sock *sk) |
465 | { | 465 | { |
466 | struct tcp_sock *tp = tcp_sk(sk); | 466 | struct tcp_sock *tp = tcp_sk(sk); |
467 | struct tcp_listen_opt *lopt = tp->listen_opt; | 467 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
468 | int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; | 468 | int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; |
469 | int thresh = max_retries; | 469 | int thresh = max_retries; |
470 | unsigned long now = jiffies; | 470 | unsigned long now = jiffies; |
@@ -527,12 +527,8 @@ static void tcp_synack_timer(struct sock *sk) | |||
527 | } | 527 | } |
528 | 528 | ||
529 | /* Drop this request */ | 529 | /* Drop this request */ |
530 | write_lock(&tp->syn_wait_lock); | 530 | tcp_synq_unlink(tp, req, reqp); |
531 | *reqp = req->dl_next; | 531 | reqsk_queue_removed(&tp->accept_queue, req); |
532 | write_unlock(&tp->syn_wait_lock); | ||
533 | lopt->qlen--; | ||
534 | if (req->retrans == 0) | ||
535 | lopt->qlen_young--; | ||
536 | reqsk_free(req); | 532 | reqsk_free(req); |
537 | continue; | 533 | continue; |
538 | } | 534 | } |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 068cd4a8c292..84091daad6b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -401,7 +401,7 @@ static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, | |||
401 | struct in6_addr *laddr, | 401 | struct in6_addr *laddr, |
402 | int iif) | 402 | int iif) |
403 | { | 403 | { |
404 | struct tcp_listen_opt *lopt = tp->listen_opt; | 404 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
405 | struct request_sock *req, **prev; | 405 | struct request_sock *req, **prev; |
406 | 406 | ||
407 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; | 407 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; |
@@ -1267,18 +1267,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
1267 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) | 1267 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) |
1268 | { | 1268 | { |
1269 | struct tcp_sock *tp = tcp_sk(sk); | 1269 | struct tcp_sock *tp = tcp_sk(sk); |
1270 | struct tcp_listen_opt *lopt = tp->listen_opt; | 1270 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
1271 | u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | 1271 | u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); |
1272 | 1272 | ||
1273 | req->sk = NULL; | 1273 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); |
1274 | req->expires = jiffies + TCP_TIMEOUT_INIT; | ||
1275 | req->retrans = 0; | ||
1276 | req->dl_next = lopt->syn_table[h]; | ||
1277 | |||
1278 | write_lock(&tp->syn_wait_lock); | ||
1279 | lopt->syn_table[h] = req; | ||
1280 | write_unlock(&tp->syn_wait_lock); | ||
1281 | |||
1282 | tcp_synq_added(sk); | 1274 | tcp_synq_added(sk); |
1283 | } | 1275 | } |
1284 | 1276 | ||