diff options
| -rw-r--r-- | include/linux/tcp.h | 17 | ||||
| -rw-r--r-- | include/net/request_sock.h | 178 | ||||
| -rw-r--r-- | include/net/tcp.h | 46 | ||||
| -rw-r--r-- | net/core/Makefile | 3 | ||||
| -rw-r--r-- | net/core/request_sock.c | 48 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 67 | ||||
| -rw-r--r-- | net/ipv4/tcp_diag.c | 6 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 32 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 6 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 10 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 14 |
11 files changed, 281 insertions, 146 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fb54292a15aa..97a7c9e03df5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
| @@ -379,22 +379,7 @@ struct tcp_sock { | |||
| 379 | 379 | ||
| 380 | __u32 total_retrans; /* Total retransmits for entire connection */ | 380 | __u32 total_retrans; /* Total retransmits for entire connection */ |
| 381 | 381 | ||
| 382 | /* The syn_wait_lock is necessary only to avoid proc interface having | 382 | struct request_sock_queue accept_queue; /* FIFO of established children */ |
| 383 | * to grab the main lock sock while browsing the listening hash | ||
| 384 | * (otherwise it's deadlock prone). | ||
| 385 | * This lock is acquired in read mode only from listening_get_next() | ||
| 386 | * and it's acquired in write mode _only_ from code that is actively | ||
| 387 | * changing the syn_wait_queue. All readers that are holding | ||
| 388 | * the master sock lock don't need to grab this lock in read mode | ||
| 389 | * too as the syn_wait_queue writes are always protected from | ||
| 390 | * the main sock lock. | ||
| 391 | */ | ||
| 392 | rwlock_t syn_wait_lock; | ||
| 393 | struct tcp_listen_opt *listen_opt; | ||
| 394 | |||
| 395 | /* FIFO of established children */ | ||
| 396 | struct request_sock *accept_queue; | ||
| 397 | struct request_sock *accept_queue_tail; | ||
| 398 | 383 | ||
| 399 | unsigned int keepalive_time; /* time before keep alive takes place */ | 384 | unsigned int keepalive_time; /* time before keep alive takes place */ |
| 400 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ | 385 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 08a8fd1d1610..38943ed04e73 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
| @@ -16,7 +16,9 @@ | |||
| 16 | #define _REQUEST_SOCK_H | 16 | #define _REQUEST_SOCK_H |
| 17 | 17 | ||
| 18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 19 | #include <linux/spinlock.h> | ||
| 19 | #include <linux/types.h> | 20 | #include <linux/types.h> |
| 21 | |||
| 20 | #include <net/sock.h> | 22 | #include <net/sock.h> |
| 21 | 23 | ||
| 22 | struct request_sock; | 24 | struct request_sock; |
| @@ -74,4 +76,180 @@ static inline void reqsk_free(struct request_sock *req) | |||
| 74 | __reqsk_free(req); | 76 | __reqsk_free(req); |
| 75 | } | 77 | } |
| 76 | 78 | ||
| 79 | extern int sysctl_max_syn_backlog; | ||
| 80 | |||
| 81 | /** struct tcp_listen_opt - listen state | ||
| 82 | * | ||
| 83 | * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs | ||
| 84 | */ | ||
| 85 | struct tcp_listen_opt { | ||
| 86 | u8 max_qlen_log; | ||
| 87 | /* 3 bytes hole, try to use */ | ||
| 88 | int qlen; | ||
| 89 | int qlen_young; | ||
| 90 | int clock_hand; | ||
| 91 | u32 hash_rnd; | ||
| 92 | struct request_sock *syn_table[0]; | ||
| 93 | }; | ||
| 94 | |||
| 95 | /** struct request_sock_queue - queue of request_socks | ||
| 96 | * | ||
| 97 | * @rskq_accept_head - FIFO head of established children | ||
| 98 | * @rskq_accept_tail - FIFO tail of established children | ||
| 99 | * @syn_wait_lock - serializer | ||
| 100 | * | ||
| 101 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main | ||
| 102 | * lock sock while browsing the listening hash (otherwise it's deadlock prone). | ||
| 103 | * | ||
| 104 | * This lock is acquired in read mode only from listening_get_next() seq_file | ||
| 105 | * op and it's acquired in write mode _only_ from code that is actively | ||
| 106 | * changing rskq_accept_head. All readers that are holding the master sock lock | ||
| 107 | * don't need to grab this lock in read mode too as rskq_accept_head. writes | ||
| 108 | * are always protected from the main sock lock. | ||
| 109 | */ | ||
| 110 | struct request_sock_queue { | ||
| 111 | struct request_sock *rskq_accept_head; | ||
| 112 | struct request_sock *rskq_accept_tail; | ||
| 113 | rwlock_t syn_wait_lock; | ||
| 114 | struct tcp_listen_opt *listen_opt; | ||
| 115 | }; | ||
| 116 | |||
| 117 | extern int reqsk_queue_alloc(struct request_sock_queue *queue, | ||
| 118 | const int nr_table_entries); | ||
| 119 | |||
| 120 | static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) | ||
| 121 | { | ||
| 122 | struct tcp_listen_opt *lopt; | ||
| 123 | |||
| 124 | write_lock_bh(&queue->syn_wait_lock); | ||
| 125 | lopt = queue->listen_opt; | ||
| 126 | queue->listen_opt = NULL; | ||
| 127 | write_unlock_bh(&queue->syn_wait_lock); | ||
| 128 | |||
| 129 | return lopt; | ||
| 130 | } | ||
| 131 | |||
| 132 | static inline void reqsk_queue_destroy(struct request_sock_queue *queue) | ||
| 133 | { | ||
| 134 | kfree(reqsk_queue_yank_listen_sk(queue)); | ||
| 135 | } | ||
| 136 | |||
| 137 | static inline struct request_sock * | ||
| 138 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) | ||
| 139 | { | ||
| 140 | struct request_sock *req = queue->rskq_accept_head; | ||
| 141 | |||
| 142 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; | ||
| 143 | return req; | ||
| 144 | } | ||
| 145 | |||
| 146 | static inline int reqsk_queue_empty(struct request_sock_queue *queue) | ||
| 147 | { | ||
| 148 | return queue->rskq_accept_head == NULL; | ||
| 149 | } | ||
| 150 | |||
| 151 | static inline void reqsk_queue_unlink(struct request_sock_queue *queue, | ||
| 152 | struct request_sock *req, | ||
| 153 | struct request_sock **prev_req) | ||
| 154 | { | ||
| 155 | write_lock(&queue->syn_wait_lock); | ||
| 156 | *prev_req = req->dl_next; | ||
| 157 | write_unlock(&queue->syn_wait_lock); | ||
| 158 | } | ||
| 159 | |||
| 160 | static inline void reqsk_queue_add(struct request_sock_queue *queue, | ||
| 161 | struct request_sock *req, | ||
| 162 | struct sock *parent, | ||
| 163 | struct sock *child) | ||
| 164 | { | ||
| 165 | req->sk = child; | ||
| 166 | sk_acceptq_added(parent); | ||
| 167 | |||
| 168 | if (queue->rskq_accept_head == NULL) | ||
| 169 | queue->rskq_accept_head = req; | ||
| 170 | else | ||
| 171 | queue->rskq_accept_tail->dl_next = req; | ||
| 172 | |||
| 173 | queue->rskq_accept_tail = req; | ||
| 174 | req->dl_next = NULL; | ||
| 175 | } | ||
| 176 | |||
| 177 | static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) | ||
| 178 | { | ||
| 179 | struct request_sock *req = queue->rskq_accept_head; | ||
| 180 | |||
| 181 | BUG_TRAP(req != NULL); | ||
| 182 | |||
| 183 | queue->rskq_accept_head = req->dl_next; | ||
| 184 | if (queue->rskq_accept_head == NULL) | ||
| 185 | queue->rskq_accept_tail = NULL; | ||
| 186 | |||
| 187 | return req; | ||
| 188 | } | ||
| 189 | |||
| 190 | static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, | ||
| 191 | struct sock *parent) | ||
| 192 | { | ||
| 193 | struct request_sock *req = reqsk_queue_remove(queue); | ||
| 194 | struct sock *child = req->sk; | ||
| 195 | |||
| 196 | BUG_TRAP(child != NULL); | ||
| 197 | |||
| 198 | sk_acceptq_removed(parent); | ||
| 199 | __reqsk_free(req); | ||
| 200 | return child; | ||
| 201 | } | ||
| 202 | |||
| 203 | static inline int reqsk_queue_removed(struct request_sock_queue *queue, | ||
| 204 | struct request_sock *req) | ||
| 205 | { | ||
| 206 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
| 207 | |||
| 208 | if (req->retrans == 0) | ||
| 209 | --lopt->qlen_young; | ||
| 210 | |||
| 211 | return --lopt->qlen; | ||
| 212 | } | ||
| 213 | |||
| 214 | static inline int reqsk_queue_added(struct request_sock_queue *queue) | ||
| 215 | { | ||
| 216 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
| 217 | const int prev_qlen = lopt->qlen; | ||
| 218 | |||
| 219 | lopt->qlen_young++; | ||
| 220 | lopt->qlen++; | ||
| 221 | return prev_qlen; | ||
| 222 | } | ||
| 223 | |||
| 224 | static inline int reqsk_queue_len(struct request_sock_queue *queue) | ||
| 225 | { | ||
| 226 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline int reqsk_queue_len_young(struct request_sock_queue *queue) | ||
| 230 | { | ||
| 231 | return queue->listen_opt->qlen_young; | ||
| 232 | } | ||
| 233 | |||
| 234 | static inline int reqsk_queue_is_full(struct request_sock_queue *queue) | ||
| 235 | { | ||
| 236 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; | ||
| 237 | } | ||
| 238 | |||
| 239 | static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, | ||
| 240 | u32 hash, struct request_sock *req, | ||
| 241 | unsigned timeout) | ||
| 242 | { | ||
| 243 | struct tcp_listen_opt *lopt = queue->listen_opt; | ||
| 244 | |||
| 245 | req->expires = jiffies + timeout; | ||
| 246 | req->retrans = 0; | ||
| 247 | req->sk = NULL; | ||
| 248 | req->dl_next = lopt->syn_table[hash]; | ||
| 249 | |||
| 250 | write_lock(&queue->syn_wait_lock); | ||
| 251 | lopt->syn_table[hash] = req; | ||
| 252 | write_unlock(&queue->syn_wait_lock); | ||
| 253 | } | ||
| 254 | |||
| 77 | #endif /* _REQUEST_SOCK_H */ | 255 | #endif /* _REQUEST_SOCK_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 6663086a5e35..a2e323c54457 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
| @@ -1686,71 +1686,41 @@ static inline int tcp_full_space(const struct sock *sk) | |||
| 1686 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, | 1686 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, |
| 1687 | struct sock *child) | 1687 | struct sock *child) |
| 1688 | { | 1688 | { |
| 1689 | struct tcp_sock *tp = tcp_sk(sk); | 1689 | reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); |
| 1690 | |||
| 1691 | req->sk = child; | ||
| 1692 | sk_acceptq_added(sk); | ||
| 1693 | |||
| 1694 | if (!tp->accept_queue_tail) { | ||
| 1695 | tp->accept_queue = req; | ||
| 1696 | } else { | ||
| 1697 | tp->accept_queue_tail->dl_next = req; | ||
| 1698 | } | ||
| 1699 | tp->accept_queue_tail = req; | ||
| 1700 | req->dl_next = NULL; | ||
| 1701 | } | 1690 | } |
| 1702 | 1691 | ||
| 1703 | struct tcp_listen_opt | ||
| 1704 | { | ||
| 1705 | u8 max_qlen_log; /* log_2 of maximal queued SYNs */ | ||
| 1706 | int qlen; | ||
| 1707 | int qlen_young; | ||
| 1708 | int clock_hand; | ||
| 1709 | u32 hash_rnd; | ||
| 1710 | struct request_sock *syn_table[TCP_SYNQ_HSIZE]; | ||
| 1711 | }; | ||
| 1712 | |||
| 1713 | static inline void | 1692 | static inline void |
| 1714 | tcp_synq_removed(struct sock *sk, struct request_sock *req) | 1693 | tcp_synq_removed(struct sock *sk, struct request_sock *req) |
| 1715 | { | 1694 | { |
| 1716 | struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; | 1695 | if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) |
| 1717 | |||
| 1718 | if (--lopt->qlen == 0) | ||
| 1719 | tcp_delete_keepalive_timer(sk); | 1696 | tcp_delete_keepalive_timer(sk); |
| 1720 | if (req->retrans == 0) | ||
| 1721 | lopt->qlen_young--; | ||
| 1722 | } | 1697 | } |
| 1723 | 1698 | ||
| 1724 | static inline void tcp_synq_added(struct sock *sk) | 1699 | static inline void tcp_synq_added(struct sock *sk) |
| 1725 | { | 1700 | { |
| 1726 | struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; | 1701 | if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) |
| 1727 | |||
| 1728 | if (lopt->qlen++ == 0) | ||
| 1729 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); | 1702 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); |
| 1730 | lopt->qlen_young++; | ||
| 1731 | } | 1703 | } |
| 1732 | 1704 | ||
| 1733 | static inline int tcp_synq_len(struct sock *sk) | 1705 | static inline int tcp_synq_len(struct sock *sk) |
| 1734 | { | 1706 | { |
| 1735 | return tcp_sk(sk)->listen_opt->qlen; | 1707 | return reqsk_queue_len(&tcp_sk(sk)->accept_queue); |
| 1736 | } | 1708 | } |
| 1737 | 1709 | ||
| 1738 | static inline int tcp_synq_young(struct sock *sk) | 1710 | static inline int tcp_synq_young(struct sock *sk) |
| 1739 | { | 1711 | { |
| 1740 | return tcp_sk(sk)->listen_opt->qlen_young; | 1712 | return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); |
| 1741 | } | 1713 | } |
| 1742 | 1714 | ||
| 1743 | static inline int tcp_synq_is_full(struct sock *sk) | 1715 | static inline int tcp_synq_is_full(struct sock *sk) |
| 1744 | { | 1716 | { |
| 1745 | return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; | 1717 | return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); |
| 1746 | } | 1718 | } |
| 1747 | 1719 | ||
| 1748 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, | 1720 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, |
| 1749 | struct request_sock **prev) | 1721 | struct request_sock **prev) |
| 1750 | { | 1722 | { |
| 1751 | write_lock(&tp->syn_wait_lock); | 1723 | reqsk_queue_unlink(&tp->accept_queue, req, prev); |
| 1752 | *prev = req->dl_next; | ||
| 1753 | write_unlock(&tp->syn_wait_lock); | ||
| 1754 | } | 1724 | } |
| 1755 | 1725 | ||
| 1756 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, | 1726 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, |
diff --git a/net/core/Makefile b/net/core/Makefile index 81f03243fe2f..5e0c56b7f607 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
| @@ -2,7 +2,8 @@ | |||
| 2 | # Makefile for the Linux networking core. | 2 | # Makefile for the Linux networking core. |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o | 5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ |
| 6 | gen_stats.o gen_estimator.o | ||
| 6 | 7 | ||
| 7 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
| 8 | 9 | ||
diff --git a/net/core/request_sock.c b/net/core/request_sock.c new file mode 100644 index 000000000000..1258333ca007 --- /dev/null +++ b/net/core/request_sock.c | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | /* | ||
| 2 | * NET Generic infrastructure for Network protocols. | ||
| 3 | * | ||
| 4 | * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 5 | * | ||
| 6 | * From code originally in include/net/tcp.h | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License | ||
| 10 | * as published by the Free Software Foundation; either version | ||
| 11 | * 2 of the License, or (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/random.h> | ||
| 16 | #include <linux/slab.h> | ||
| 17 | #include <linux/string.h> | ||
| 18 | |||
| 19 | #include <net/request_sock.h> | ||
| 20 | |||
| 21 | int reqsk_queue_alloc(struct request_sock_queue *queue, | ||
| 22 | const int nr_table_entries) | ||
| 23 | { | ||
| 24 | const int lopt_size = sizeof(struct tcp_listen_opt) + | ||
| 25 | nr_table_entries * sizeof(struct request_sock *); | ||
| 26 | struct tcp_listen_opt *lopt = kmalloc(lopt_size, GFP_KERNEL); | ||
| 27 | |||
| 28 | if (lopt == NULL) | ||
| 29 | return -ENOMEM; | ||
| 30 | |||
| 31 | memset(lopt, 0, lopt_size); | ||
| 32 | |||
| 33 | for (lopt->max_qlen_log = 6; | ||
| 34 | (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; | ||
| 35 | lopt->max_qlen_log++); | ||
| 36 | |||
| 37 | get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); | ||
| 38 | rwlock_init(&queue->syn_wait_lock); | ||
| 39 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; | ||
| 40 | |||
| 41 | write_lock_bh(&queue->syn_wait_lock); | ||
| 42 | queue->listen_opt = lopt; | ||
| 43 | write_unlock_bh(&queue->syn_wait_lock); | ||
| 44 | |||
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | EXPORT_SYMBOL(reqsk_queue_alloc); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1c29feb6b35f..b85a46dd40a0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); | |||
| 316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, | 316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, |
| 317 | poll_table *wait) | 317 | poll_table *wait) |
| 318 | { | 318 | { |
| 319 | return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; | 319 | return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; |
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | /* | 322 | /* |
| @@ -462,28 +462,15 @@ int tcp_listen_start(struct sock *sk) | |||
| 462 | { | 462 | { |
| 463 | struct inet_sock *inet = inet_sk(sk); | 463 | struct inet_sock *inet = inet_sk(sk); |
| 464 | struct tcp_sock *tp = tcp_sk(sk); | 464 | struct tcp_sock *tp = tcp_sk(sk); |
| 465 | struct tcp_listen_opt *lopt; | 465 | int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); |
| 466 | |||
| 467 | if (rc != 0) | ||
| 468 | return rc; | ||
| 466 | 469 | ||
| 467 | sk->sk_max_ack_backlog = 0; | 470 | sk->sk_max_ack_backlog = 0; |
| 468 | sk->sk_ack_backlog = 0; | 471 | sk->sk_ack_backlog = 0; |
| 469 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
| 470 | rwlock_init(&tp->syn_wait_lock); | ||
| 471 | tcp_delack_init(tp); | 472 | tcp_delack_init(tp); |
| 472 | 473 | ||
| 473 | lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); | ||
| 474 | if (!lopt) | ||
| 475 | return -ENOMEM; | ||
| 476 | |||
| 477 | memset(lopt, 0, sizeof(struct tcp_listen_opt)); | ||
| 478 | for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) | ||
| 479 | if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) | ||
| 480 | break; | ||
| 481 | get_random_bytes(&lopt->hash_rnd, 4); | ||
| 482 | |||
| 483 | write_lock_bh(&tp->syn_wait_lock); | ||
| 484 | tp->listen_opt = lopt; | ||
| 485 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 486 | |||
| 487 | /* There is race window here: we announce ourselves listening, | 474 | /* There is race window here: we announce ourselves listening, |
| 488 | * but this transition is still not validated by get_port(). | 475 | * but this transition is still not validated by get_port(). |
| 489 | * It is OK, because this socket enters to hash table only | 476 | * It is OK, because this socket enters to hash table only |
| @@ -500,10 +487,7 @@ int tcp_listen_start(struct sock *sk) | |||
| 500 | } | 487 | } |
| 501 | 488 | ||
| 502 | sk->sk_state = TCP_CLOSE; | 489 | sk->sk_state = TCP_CLOSE; |
| 503 | write_lock_bh(&tp->syn_wait_lock); | 490 | reqsk_queue_destroy(&tp->accept_queue); |
| 504 | tp->listen_opt = NULL; | ||
| 505 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 506 | kfree(lopt); | ||
| 507 | return -EADDRINUSE; | 491 | return -EADDRINUSE; |
| 508 | } | 492 | } |
| 509 | 493 | ||
| @@ -515,18 +499,16 @@ int tcp_listen_start(struct sock *sk) | |||
| 515 | static void tcp_listen_stop (struct sock *sk) | 499 | static void tcp_listen_stop (struct sock *sk) |
| 516 | { | 500 | { |
| 517 | struct tcp_sock *tp = tcp_sk(sk); | 501 | struct tcp_sock *tp = tcp_sk(sk); |
| 518 | struct tcp_listen_opt *lopt = tp->listen_opt; | 502 | struct tcp_listen_opt *lopt; |
| 519 | struct request_sock *acc_req = tp->accept_queue; | 503 | struct request_sock *acc_req; |
| 520 | struct request_sock *req; | 504 | struct request_sock *req; |
| 521 | int i; | 505 | int i; |
| 522 | 506 | ||
| 523 | tcp_delete_keepalive_timer(sk); | 507 | tcp_delete_keepalive_timer(sk); |
| 524 | 508 | ||
| 525 | /* make all the listen_opt local to us */ | 509 | /* make all the listen_opt local to us */ |
| 526 | write_lock_bh(&tp->syn_wait_lock); | 510 | lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); |
| 527 | tp->listen_opt = NULL; | 511 | acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); |
| 528 | write_unlock_bh(&tp->syn_wait_lock); | ||
| 529 | tp->accept_queue = tp->accept_queue_tail = NULL; | ||
| 530 | 512 | ||
| 531 | if (lopt->qlen) { | 513 | if (lopt->qlen) { |
| 532 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { | 514 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { |
| @@ -1867,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
| 1867 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | 1849 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, |
| 1868 | TASK_INTERRUPTIBLE); | 1850 | TASK_INTERRUPTIBLE); |
| 1869 | release_sock(sk); | 1851 | release_sock(sk); |
| 1870 | if (!tp->accept_queue) | 1852 | if (reqsk_queue_empty(&tp->accept_queue)) |
| 1871 | timeo = schedule_timeout(timeo); | 1853 | timeo = schedule_timeout(timeo); |
| 1872 | lock_sock(sk); | 1854 | lock_sock(sk); |
| 1873 | err = 0; | 1855 | err = 0; |
| 1874 | if (tp->accept_queue) | 1856 | if (!reqsk_queue_empty(&tp->accept_queue)) |
| 1875 | break; | 1857 | break; |
| 1876 | err = -EINVAL; | 1858 | err = -EINVAL; |
| 1877 | if (sk->sk_state != TCP_LISTEN) | 1859 | if (sk->sk_state != TCP_LISTEN) |
| @@ -1894,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo) | |||
| 1894 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) | 1876 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) |
| 1895 | { | 1877 | { |
| 1896 | struct tcp_sock *tp = tcp_sk(sk); | 1878 | struct tcp_sock *tp = tcp_sk(sk); |
| 1897 | struct request_sock *req; | ||
| 1898 | struct sock *newsk; | 1879 | struct sock *newsk; |
| 1899 | int error; | 1880 | int error; |
| 1900 | 1881 | ||
| @@ -1905,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) | |||
| 1905 | */ | 1886 | */ |
| 1906 | error = -EINVAL; | 1887 | error = -EINVAL; |
| 1907 | if (sk->sk_state != TCP_LISTEN) | 1888 | if (sk->sk_state != TCP_LISTEN) |
| 1908 | goto out; | 1889 | goto out_err; |
| 1909 | 1890 | ||
| 1910 | /* Find already established connection */ | 1891 | /* Find already established connection */ |
| 1911 | if (!tp->accept_queue) { | 1892 | if (reqsk_queue_empty(&tp->accept_queue)) { |
| 1912 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 1893 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
| 1913 | 1894 | ||
| 1914 | /* If this is a non blocking socket don't sleep */ | 1895 | /* If this is a non blocking socket don't sleep */ |
| 1915 | error = -EAGAIN; | 1896 | error = -EAGAIN; |
| 1916 | if (!timeo) | 1897 | if (!timeo) |
| 1917 | goto out; | 1898 | goto out_err; |
| 1918 | 1899 | ||
| 1919 | error = wait_for_connect(sk, timeo); | 1900 | error = wait_for_connect(sk, timeo); |
| 1920 | if (error) | 1901 | if (error) |
| 1921 | goto out; | 1902 | goto out_err; |
| 1922 | } | 1903 | } |
| 1923 | 1904 | ||
| 1924 | req = tp->accept_queue; | 1905 | newsk = reqsk_queue_get_child(&tp->accept_queue, sk); |
| 1925 | if ((tp->accept_queue = req->dl_next) == NULL) | ||
| 1926 | tp->accept_queue_tail = NULL; | ||
| 1927 | |||
| 1928 | newsk = req->sk; | ||
| 1929 | sk_acceptq_removed(sk); | ||
| 1930 | __reqsk_free(req); | ||
| 1931 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | 1906 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); |
| 1932 | release_sock(sk); | ||
| 1933 | return newsk; | ||
| 1934 | |||
| 1935 | out: | 1907 | out: |
| 1936 | release_sock(sk); | 1908 | release_sock(sk); |
| 1909 | return newsk; | ||
| 1910 | out_err: | ||
| 1911 | newsk = NULL; | ||
| 1937 | *err = error; | 1912 | *err = error; |
| 1938 | return NULL; | 1913 | goto out; |
| 1939 | } | 1914 | } |
| 1940 | 1915 | ||
| 1941 | /* | 1916 | /* |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 67277800d0c1..c3328fa48837 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
| @@ -529,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
| 529 | 529 | ||
| 530 | entry.family = sk->sk_family; | 530 | entry.family = sk->sk_family; |
| 531 | 531 | ||
| 532 | read_lock_bh(&tp->syn_wait_lock); | 532 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
| 533 | 533 | ||
| 534 | lopt = tp->listen_opt; | 534 | lopt = tp->accept_queue.listen_opt; |
| 535 | if (!lopt || !lopt->qlen) | 535 | if (!lopt || !lopt->qlen) |
| 536 | goto out; | 536 | goto out; |
| 537 | 537 | ||
| @@ -588,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
| 588 | } | 588 | } |
| 589 | 589 | ||
| 590 | out: | 590 | out: |
| 591 | read_unlock_bh(&tp->syn_wait_lock); | 591 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
| 592 | 592 | ||
| 593 | return err; | 593 | return err; |
| 594 | } | 594 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 95528a75a63d..1745dc8d25e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -874,7 +874,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | |||
| 874 | __u16 rport, | 874 | __u16 rport, |
| 875 | __u32 raddr, __u32 laddr) | 875 | __u32 raddr, __u32 laddr) |
| 876 | { | 876 | { |
| 877 | struct tcp_listen_opt *lopt = tp->listen_opt; | 877 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
| 878 | struct request_sock *req, **prev; | 878 | struct request_sock *req, **prev; |
| 879 | 879 | ||
| 880 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; | 880 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; |
| @@ -898,18 +898,10 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | |||
| 898 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) | 898 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) |
| 899 | { | 899 | { |
| 900 | struct tcp_sock *tp = tcp_sk(sk); | 900 | struct tcp_sock *tp = tcp_sk(sk); |
| 901 | struct tcp_listen_opt *lopt = tp->listen_opt; | 901 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
| 902 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | 902 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); |
| 903 | 903 | ||
| 904 | req->expires = jiffies + TCP_TIMEOUT_INIT; | 904 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); |
| 905 | req->retrans = 0; | ||
| 906 | req->sk = NULL; | ||
| 907 | req->dl_next = lopt->syn_table[h]; | ||
| 908 | |||
| 909 | write_lock(&tp->syn_wait_lock); | ||
| 910 | lopt->syn_table[h] = req; | ||
| 911 | write_unlock(&tp->syn_wait_lock); | ||
| 912 | |||
| 913 | tcp_synq_added(sk); | 905 | tcp_synq_added(sk); |
| 914 | } | 906 | } |
| 915 | 907 | ||
| @@ -2167,17 +2159,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
| 2167 | if (++st->sbucket >= TCP_SYNQ_HSIZE) | 2159 | if (++st->sbucket >= TCP_SYNQ_HSIZE) |
| 2168 | break; | 2160 | break; |
| 2169 | get_req: | 2161 | get_req: |
| 2170 | req = tp->listen_opt->syn_table[st->sbucket]; | 2162 | req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; |
| 2171 | } | 2163 | } |
| 2172 | sk = sk_next(st->syn_wait_sk); | 2164 | sk = sk_next(st->syn_wait_sk); |
| 2173 | st->state = TCP_SEQ_STATE_LISTENING; | 2165 | st->state = TCP_SEQ_STATE_LISTENING; |
| 2174 | read_unlock_bh(&tp->syn_wait_lock); | 2166 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
| 2175 | } else { | 2167 | } else { |
| 2176 | tp = tcp_sk(sk); | 2168 | tp = tcp_sk(sk); |
| 2177 | read_lock_bh(&tp->syn_wait_lock); | 2169 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
| 2178 | if (tp->listen_opt && tp->listen_opt->qlen) | 2170 | if (reqsk_queue_len(&tp->accept_queue)) |
| 2179 | goto start_req; | 2171 | goto start_req; |
| 2180 | read_unlock_bh(&tp->syn_wait_lock); | 2172 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
| 2181 | sk = sk_next(sk); | 2173 | sk = sk_next(sk); |
| 2182 | } | 2174 | } |
| 2183 | get_sk: | 2175 | get_sk: |
| @@ -2187,8 +2179,8 @@ get_sk: | |||
| 2187 | goto out; | 2179 | goto out; |
| 2188 | } | 2180 | } |
| 2189 | tp = tcp_sk(sk); | 2181 | tp = tcp_sk(sk); |
| 2190 | read_lock_bh(&tp->syn_wait_lock); | 2182 | read_lock_bh(&tp->accept_queue.syn_wait_lock); |
| 2191 | if (tp->listen_opt && tp->listen_opt->qlen) { | 2183 | if (reqsk_queue_len(&tp->accept_queue)) { |
| 2192 | start_req: | 2184 | start_req: |
| 2193 | st->uid = sock_i_uid(sk); | 2185 | st->uid = sock_i_uid(sk); |
| 2194 | st->syn_wait_sk = sk; | 2186 | st->syn_wait_sk = sk; |
| @@ -2196,7 +2188,7 @@ start_req: | |||
| 2196 | st->sbucket = 0; | 2188 | st->sbucket = 0; |
| 2197 | goto get_req; | 2189 | goto get_req; |
| 2198 | } | 2190 | } |
| 2199 | read_unlock_bh(&tp->syn_wait_lock); | 2191 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
| 2200 | } | 2192 | } |
| 2201 | if (++st->bucket < TCP_LHTABLE_SIZE) { | 2193 | if (++st->bucket < TCP_LHTABLE_SIZE) { |
| 2202 | sk = sk_head(&tcp_listening_hash[st->bucket]); | 2194 | sk = sk_head(&tcp_listening_hash[st->bucket]); |
| @@ -2383,7 +2375,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
| 2383 | case TCP_SEQ_STATE_OPENREQ: | 2375 | case TCP_SEQ_STATE_OPENREQ: |
| 2384 | if (v) { | 2376 | if (v) { |
| 2385 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); | 2377 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); |
| 2386 | read_unlock_bh(&tp->syn_wait_lock); | 2378 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); |
| 2387 | } | 2379 | } |
| 2388 | case TCP_SEQ_STATE_LISTENING: | 2380 | case TCP_SEQ_STATE_LISTENING: |
| 2389 | if (v != SEQ_START_TOKEN) | 2381 | if (v != SEQ_START_TOKEN) |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0e6d525a8341..b3943e7562f3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -790,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
| 790 | newtp->probes_out = 0; | 790 | newtp->probes_out = 0; |
| 791 | newtp->rx_opt.num_sacks = 0; | 791 | newtp->rx_opt.num_sacks = 0; |
| 792 | newtp->urg_data = 0; | 792 | newtp->urg_data = 0; |
| 793 | newtp->listen_opt = NULL; | 793 | /* Deinitialize accept_queue to trap illegal accesses. */ |
| 794 | newtp->accept_queue = newtp->accept_queue_tail = NULL; | 794 | memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); |
| 795 | /* Deinitialize syn_wait_lock to trap illegal accesses. */ | ||
| 796 | memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock)); | ||
| 797 | 795 | ||
| 798 | /* Back to base struct sock members. */ | 796 | /* Back to base struct sock members. */ |
| 799 | newsk->sk_err = 0; | 797 | newsk->sk_err = 0; |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f03efe5fb76a..d97d191149c1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -464,7 +464,7 @@ out_unlock: | |||
| 464 | static void tcp_synack_timer(struct sock *sk) | 464 | static void tcp_synack_timer(struct sock *sk) |
| 465 | { | 465 | { |
| 466 | struct tcp_sock *tp = tcp_sk(sk); | 466 | struct tcp_sock *tp = tcp_sk(sk); |
| 467 | struct tcp_listen_opt *lopt = tp->listen_opt; | 467 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
| 468 | int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; | 468 | int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; |
| 469 | int thresh = max_retries; | 469 | int thresh = max_retries; |
| 470 | unsigned long now = jiffies; | 470 | unsigned long now = jiffies; |
| @@ -527,12 +527,8 @@ static void tcp_synack_timer(struct sock *sk) | |||
| 527 | } | 527 | } |
| 528 | 528 | ||
| 529 | /* Drop this request */ | 529 | /* Drop this request */ |
| 530 | write_lock(&tp->syn_wait_lock); | 530 | tcp_synq_unlink(tp, req, reqp); |
| 531 | *reqp = req->dl_next; | 531 | reqsk_queue_removed(&tp->accept_queue, req); |
| 532 | write_unlock(&tp->syn_wait_lock); | ||
| 533 | lopt->qlen--; | ||
| 534 | if (req->retrans == 0) | ||
| 535 | lopt->qlen_young--; | ||
| 536 | reqsk_free(req); | 532 | reqsk_free(req); |
| 537 | continue; | 533 | continue; |
| 538 | } | 534 | } |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 068cd4a8c292..84091daad6b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
| @@ -401,7 +401,7 @@ static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, | |||
| 401 | struct in6_addr *laddr, | 401 | struct in6_addr *laddr, |
| 402 | int iif) | 402 | int iif) |
| 403 | { | 403 | { |
| 404 | struct tcp_listen_opt *lopt = tp->listen_opt; | 404 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
| 405 | struct request_sock *req, **prev; | 405 | struct request_sock *req, **prev; |
| 406 | 406 | ||
| 407 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; | 407 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; |
| @@ -1267,18 +1267,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
| 1267 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) | 1267 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) |
| 1268 | { | 1268 | { |
| 1269 | struct tcp_sock *tp = tcp_sk(sk); | 1269 | struct tcp_sock *tp = tcp_sk(sk); |
| 1270 | struct tcp_listen_opt *lopt = tp->listen_opt; | 1270 | struct tcp_listen_opt *lopt = tp->accept_queue.listen_opt; |
| 1271 | u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | 1271 | u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); |
| 1272 | 1272 | ||
| 1273 | req->sk = NULL; | 1273 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); |
| 1274 | req->expires = jiffies + TCP_TIMEOUT_INIT; | ||
| 1275 | req->retrans = 0; | ||
| 1276 | req->dl_next = lopt->syn_table[h]; | ||
| 1277 | |||
| 1278 | write_lock(&tp->syn_wait_lock); | ||
| 1279 | lopt->syn_table[h] = req; | ||
| 1280 | write_unlock(&tp->syn_wait_lock); | ||
| 1281 | |||
| 1282 | tcp_synq_added(sk); | 1274 | tcp_synq_added(sk); |
| 1283 | } | 1275 | } |
| 1284 | 1276 | ||
