aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/request_sock.c
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
committerJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
commitf474af7051212b4efc8267583fad9c4ebf33ccff (patch)
tree1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /net/core/request_sock.c
parent0d22f68f02c10d5d10ec5712917e5828b001a822 (diff)
parente3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff)
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which the preparatory patches were pulled recently. After these patches, userspace headers will be segregated into: include/uapi/linux/.../foo.h for the userspace interface stuff, and: include/linux/.../foo.h for the strictly kernel internal stuff. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net/core/request_sock.c')
-rw-r--r--net/core/request_sock.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6a33c..c31d9e8668c 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/tcp.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
19 20
20#include <net/request_sock.h> 21#include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
130 kfree(lopt); 131 kfree(lopt);
131} 132}
132 133
134/*
135 * This function is called to set a Fast Open socket's "fastopen_rsk" field
136 * to NULL when a TFO socket no longer needs to access the request_sock.
137 * This happens only after 3WHS has been either completed or aborted (e.g.,
138 * RST is received).
139 *
140 * Before TFO, a child socket is created only after 3WHS is completed,
141 * hence it never needs to access the request_sock. things get a lot more
142 * complex with TFO. A child socket, accepted or not, has to access its
143 * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
144 * until 3WHS is either completed or aborted. Afterwards the req will stay
145 * until either the child socket is accepted, or in the rare case when the
146 * listener is closed before the child is accepted.
147 *
148 * In short, a request socket is only freed after BOTH 3WHS has completed
149 * (or aborted) and the child socket has been accepted (or listener closed).
150 * When a child socket is accepted, its corresponding req->sk is set to
151 * NULL since it's no longer needed. More importantly, "req->sk == NULL"
152 * will be used by the code below to determine if a child socket has been
153 * accepted or not, and the check is protected by the fastopenq->lock
154 * described below.
155 *
156 * Note that fastopen_rsk is only accessed from the child socket's context
157 * with its socket lock held. But a request_sock (req) can be accessed by
158 * both its child socket through fastopen_rsk, and a listener socket through
159 * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
160 * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
161 * only in the rare case when both the listener and the child locks are held,
162 * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
163 * The lock also protects other fields such as fastopenq->qlen, which is
164 * decremented by this function when fastopen_rsk is no longer needed.
165 *
166 * Note that another solution was to simply use the existing socket lock
167 * from the listener. But first socket lock is difficult to use. It is not
168 * a simple spin lock - one must consider sock_owned_by_user() and arrange
169 * to use sk_add_backlog() stuff. But what really makes it infeasible is the
170 * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
171 * acquire a child's lock while holding listener's socket lock. A corner
172 * case might also exist in tcp_v4_hnd_req() that will trigger this locking
173 * order.
174 *
175 * When a TFO req is created, it needs to sock_hold its listener to prevent
176 * the latter data structure from going away.
177 *
178 * This function also sets "treq->listener" to NULL and unreference listener
179 * socket. treq->listener is used by the listener so it is protected by the
180 * fastopenq->lock in this function.
181 */
182void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
183 bool reset)
184{
185 struct sock *lsk = tcp_rsk(req)->listener;
186 struct fastopen_queue *fastopenq =
187 inet_csk(lsk)->icsk_accept_queue.fastopenq;
188
189 BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
190
191 tcp_sk(sk)->fastopen_rsk = NULL;
192 spin_lock_bh(&fastopenq->lock);
193 fastopenq->qlen--;
194 tcp_rsk(req)->listener = NULL;
195 if (req->sk) /* the child socket hasn't been accepted yet */
196 goto out;
197
198 if (!reset || lsk->sk_state != TCP_LISTEN) {
199 /* If the listener has been closed don't bother with the
200 * special RST handling below.
201 */
202 spin_unlock_bh(&fastopenq->lock);
203 sock_put(lsk);
204 reqsk_free(req);
205 return;
206 }
207 /* Wait for 60secs before removing a req that has triggered RST.
208 * This is a simple defense against TFO spoofing attack - by
209 * counting the req against fastopen.max_qlen, and disabling
210 * TFO when the qlen exceeds max_qlen.
211 *
212 * For more details see CoNext'11 "TCP Fast Open" paper.
213 */
214 req->expires = jiffies + 60*HZ;
215 if (fastopenq->rskq_rst_head == NULL)
216 fastopenq->rskq_rst_head = req;
217 else
218 fastopenq->rskq_rst_tail->dl_next = req;
219
220 req->dl_next = NULL;
221 fastopenq->rskq_rst_tail = req;
222 fastopenq->qlen++;
223out:
224 spin_unlock_bh(&fastopenq->lock);
225 sock_put(lsk);
226 return;
227}