diff options
author | Vladislav Yasevich <vladislav.yasevich@hp.com> | 2006-05-19 14:01:18 -0400 |
---|---|---|
committer | Sridhar Samudrala <sri@us.ibm.com> | 2006-05-19 14:01:18 -0400 |
commit | 61c9fed41638249f8b6ca5345064eb1beb50179f (patch) | |
tree | 8855a0e6cbee58a9d94e30396b7d0a1baa526900 /net/sctp | |
parent | 8de8c8738086501bbe3057ed6f4b70dded657488 (diff) |
[SCTP]: A better solution to fix the race between sctp_peeloff() and
sctp_rcv().
The goal is to hold the ref on the association/endpoint throughout the
state-machine process. We accomplish like this:
/* ref on the assoc/ep is taken during lookup */
if owned_by_user(sk)
sctp_add_backlog(skb, sk);
else
inqueue_push(skb, sk);
/* drop the ref on the assoc/ep */
However, in sctp_add_backlog() we take the ref on assoc/ep and hold it
while the skb is on the backlog queue. This allows us to get rid of the
sock_hold/sock_put in the lookup routines.
Now sctp_backlog_rcv() needs to account for potential association move.
In the unlikely event that association moved, we need to retest if the
new socket is locked by user. If we don't this, we may have two packets
racing up the stack toward the same socket and we can't deal with it.
If the new socket is still locked, we'll just add the skb to its backlog
continuing to hold the ref on the association. This get's rid of the
need to move packets from one backlog to another and it also safe in
case new packets arrive on the same backlog queue.
The last step, is to lock the new socket when we are moving the
association to it. This is needed in case any new packets arrive on
the association when it moved. We want these to go to the backlog since
we would like to avoid the race between this new packet and a packet
that may be sitting on the backlog queue of the old socket toward the
same association.
Signed-off-by: Vladislav Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Diffstat (limited to 'net/sctp')
-rw-r--r-- | net/sctp/input.c | 140 | ||||
-rw-r--r-- | net/sctp/socket.c | 16 |
2 files changed, 89 insertions, 67 deletions
diff --git a/net/sctp/input.c b/net/sctp/input.c index 7523f4df2da6..1662f9cc869e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c | |||
@@ -73,6 +73,8 @@ static struct sctp_association *__sctp_lookup_association( | |||
73 | const union sctp_addr *peer, | 73 | const union sctp_addr *peer, |
74 | struct sctp_transport **pt); | 74 | struct sctp_transport **pt); |
75 | 75 | ||
76 | static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb); | ||
77 | |||
76 | 78 | ||
77 | /* Calculate the SCTP checksum of an SCTP packet. */ | 79 | /* Calculate the SCTP checksum of an SCTP packet. */ |
78 | static inline int sctp_rcv_checksum(struct sk_buff *skb) | 80 | static inline int sctp_rcv_checksum(struct sk_buff *skb) |
@@ -186,7 +188,6 @@ int sctp_rcv(struct sk_buff *skb) | |||
186 | */ | 188 | */ |
187 | if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) | 189 | if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) |
188 | { | 190 | { |
189 | sock_put(sk); | ||
190 | if (asoc) { | 191 | if (asoc) { |
191 | sctp_association_put(asoc); | 192 | sctp_association_put(asoc); |
192 | asoc = NULL; | 193 | asoc = NULL; |
@@ -197,7 +198,6 @@ int sctp_rcv(struct sk_buff *skb) | |||
197 | sk = sctp_get_ctl_sock(); | 198 | sk = sctp_get_ctl_sock(); |
198 | ep = sctp_sk(sk)->ep; | 199 | ep = sctp_sk(sk)->ep; |
199 | sctp_endpoint_hold(ep); | 200 | sctp_endpoint_hold(ep); |
200 | sock_hold(sk); | ||
201 | rcvr = &ep->base; | 201 | rcvr = &ep->base; |
202 | } | 202 | } |
203 | 203 | ||
@@ -253,25 +253,18 @@ int sctp_rcv(struct sk_buff *skb) | |||
253 | */ | 253 | */ |
254 | sctp_bh_lock_sock(sk); | 254 | sctp_bh_lock_sock(sk); |
255 | 255 | ||
256 | /* It is possible that the association could have moved to a different | ||
257 | * socket if it is peeled off. If so, update the sk. | ||
258 | */ | ||
259 | if (sk != rcvr->sk) { | ||
260 | sctp_bh_lock_sock(rcvr->sk); | ||
261 | sctp_bh_unlock_sock(sk); | ||
262 | sk = rcvr->sk; | ||
263 | } | ||
264 | |||
265 | if (sock_owned_by_user(sk)) | 256 | if (sock_owned_by_user(sk)) |
266 | sk_add_backlog(sk, skb); | 257 | sctp_add_backlog(sk, skb); |
267 | else | 258 | else |
268 | sctp_backlog_rcv(sk, skb); | 259 | sctp_inq_push(&chunk->rcvr->inqueue, chunk); |
269 | 260 | ||
270 | /* Release the sock and the sock ref we took in the lookup calls. | ||
271 | * The asoc/ep ref will be released in sctp_backlog_rcv. | ||
272 | */ | ||
273 | sctp_bh_unlock_sock(sk); | 261 | sctp_bh_unlock_sock(sk); |
274 | sock_put(sk); | 262 | |
263 | /* Release the asoc/ep ref we took in the lookup calls. */ | ||
264 | if (asoc) | ||
265 | sctp_association_put(asoc); | ||
266 | else | ||
267 | sctp_endpoint_put(ep); | ||
275 | 268 | ||
276 | return 0; | 269 | return 0; |
277 | 270 | ||
@@ -280,8 +273,7 @@ discard_it: | |||
280 | return 0; | 273 | return 0; |
281 | 274 | ||
282 | discard_release: | 275 | discard_release: |
283 | /* Release any structures we may be holding. */ | 276 | /* Release the asoc/ep ref we took in the lookup calls. */ |
284 | sock_put(sk); | ||
285 | if (asoc) | 277 | if (asoc) |
286 | sctp_association_put(asoc); | 278 | sctp_association_put(asoc); |
287 | else | 279 | else |
@@ -290,56 +282,87 @@ discard_release: | |||
290 | goto discard_it; | 282 | goto discard_it; |
291 | } | 283 | } |
292 | 284 | ||
293 | /* Handle second half of inbound skb processing. If the sock was busy, | 285 | /* Process the backlog queue of the socket. Every skb on |
294 | * we may have need to delay processing until later when the sock is | 286 | * the backlog holds a ref on an association or endpoint. |
295 | * released (on the backlog). If not busy, we call this routine | 287 | * We hold this ref throughout the state machine to make |
296 | * directly from the bottom half. | 288 | * sure that the structure we need is still around. |
297 | */ | 289 | */ |
298 | int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) | 290 | int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) |
299 | { | 291 | { |
300 | struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; | 292 | struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; |
301 | struct sctp_inq *inqueue = NULL; | 293 | struct sctp_inq *inqueue = &chunk->rcvr->inqueue; |
302 | struct sctp_ep_common *rcvr = NULL; | 294 | struct sctp_ep_common *rcvr = NULL; |
295 | int backloged = 0; | ||
303 | 296 | ||
304 | rcvr = chunk->rcvr; | 297 | rcvr = chunk->rcvr; |
305 | 298 | ||
306 | BUG_TRAP(rcvr->sk == sk); | 299 | /* If the rcvr is dead then the association or endpoint |
307 | 300 | * has been deleted and we can safely drop the chunk | |
308 | if (rcvr->dead) { | 301 | * and refs that we are holding. |
309 | sctp_chunk_free(chunk); | 302 | */ |
310 | } else { | 303 | if (rcvr->dead) { |
311 | inqueue = &chunk->rcvr->inqueue; | 304 | sctp_chunk_free(chunk); |
312 | sctp_inq_push(inqueue, chunk); | 305 | goto done; |
313 | } | 306 | } |
314 | 307 | ||
315 | /* Release the asoc/ep ref we took in the lookup calls in sctp_rcv. */ | 308 | if (unlikely(rcvr->sk != sk)) { |
316 | if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) | 309 | /* In this case, the association moved from one socket to |
317 | sctp_association_put(sctp_assoc(rcvr)); | 310 | * another. We are currently sitting on the backlog of the |
318 | else | 311 | * old socket, so we need to move. |
319 | sctp_endpoint_put(sctp_ep(rcvr)); | 312 | * However, since we are here in the process context we |
320 | 313 | * need to take make sure that the user doesn't own | |
314 | * the new socket when we process the packet. | ||
315 | * If the new socket is user-owned, queue the chunk to the | ||
316 | * backlog of the new socket without dropping any refs. | ||
317 | * Otherwise, we can safely push the chunk on the inqueue. | ||
318 | */ | ||
319 | |||
320 | sk = rcvr->sk; | ||
321 | sctp_bh_lock_sock(sk); | ||
322 | |||
323 | if (sock_owned_by_user(sk)) { | ||
324 | sk_add_backlog(sk, skb); | ||
325 | backloged = 1; | ||
326 | } else | ||
327 | sctp_inq_push(inqueue, chunk); | ||
328 | |||
329 | sctp_bh_unlock_sock(sk); | ||
330 | |||
331 | /* If the chunk was backloged again, don't drop refs */ | ||
332 | if (backloged) | ||
333 | return 0; | ||
334 | } else { | ||
335 | sctp_inq_push(inqueue, chunk); | ||
336 | } | ||
337 | |||
338 | done: | ||
339 | /* Release the refs we took in sctp_add_backlog */ | ||
340 | if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) | ||
341 | sctp_association_put(sctp_assoc(rcvr)); | ||
342 | else if (SCTP_EP_TYPE_SOCKET == rcvr->type) | ||
343 | sctp_endpoint_put(sctp_ep(rcvr)); | ||
344 | else | ||
345 | BUG(); | ||
346 | |||
321 | return 0; | 347 | return 0; |
322 | } | 348 | } |
323 | 349 | ||
324 | void sctp_backlog_migrate(struct sctp_association *assoc, | 350 | static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) |
325 | struct sock *oldsk, struct sock *newsk) | ||
326 | { | 351 | { |
327 | struct sk_buff *skb; | 352 | struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; |
328 | struct sctp_chunk *chunk; | 353 | struct sctp_ep_common *rcvr = chunk->rcvr; |
329 | 354 | ||
330 | skb = oldsk->sk_backlog.head; | 355 | /* Hold the assoc/ep while hanging on the backlog queue. |
331 | oldsk->sk_backlog.head = oldsk->sk_backlog.tail = NULL; | 356 | * This way, we know structures we need will not disappear from us |
332 | while (skb != NULL) { | 357 | */ |
333 | struct sk_buff *next = skb->next; | 358 | if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) |
334 | 359 | sctp_association_hold(sctp_assoc(rcvr)); | |
335 | chunk = SCTP_INPUT_CB(skb)->chunk; | 360 | else if (SCTP_EP_TYPE_SOCKET == rcvr->type) |
336 | skb->next = NULL; | 361 | sctp_endpoint_hold(sctp_ep(rcvr)); |
337 | if (&assoc->base == chunk->rcvr) | 362 | else |
338 | sk_add_backlog(newsk, skb); | 363 | BUG(); |
339 | else | 364 | |
340 | sk_add_backlog(oldsk, skb); | 365 | sk_add_backlog(sk, skb); |
341 | skb = next; | ||
342 | } | ||
343 | } | 366 | } |
344 | 367 | ||
345 | /* Handle icmp frag needed error. */ | 368 | /* Handle icmp frag needed error. */ |
@@ -453,7 +476,6 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, | |||
453 | return sk; | 476 | return sk; |
454 | 477 | ||
455 | out: | 478 | out: |
456 | sock_put(sk); | ||
457 | if (asoc) | 479 | if (asoc) |
458 | sctp_association_put(asoc); | 480 | sctp_association_put(asoc); |
459 | return NULL; | 481 | return NULL; |
@@ -463,7 +485,6 @@ out: | |||
463 | void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) | 485 | void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) |
464 | { | 486 | { |
465 | sctp_bh_unlock_sock(sk); | 487 | sctp_bh_unlock_sock(sk); |
466 | sock_put(sk); | ||
467 | if (asoc) | 488 | if (asoc) |
468 | sctp_association_put(asoc); | 489 | sctp_association_put(asoc); |
469 | } | 490 | } |
@@ -716,7 +737,6 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l | |||
716 | 737 | ||
717 | hit: | 738 | hit: |
718 | sctp_endpoint_hold(ep); | 739 | sctp_endpoint_hold(ep); |
719 | sock_hold(epb->sk); | ||
720 | read_unlock(&head->lock); | 740 | read_unlock(&head->lock); |
721 | return ep; | 741 | return ep; |
722 | } | 742 | } |
@@ -818,7 +838,6 @@ static struct sctp_association *__sctp_lookup_association( | |||
818 | hit: | 838 | hit: |
819 | *pt = transport; | 839 | *pt = transport; |
820 | sctp_association_hold(asoc); | 840 | sctp_association_hold(asoc); |
821 | sock_hold(epb->sk); | ||
822 | read_unlock(&head->lock); | 841 | read_unlock(&head->lock); |
823 | return asoc; | 842 | return asoc; |
824 | } | 843 | } |
@@ -846,7 +865,6 @@ int sctp_has_association(const union sctp_addr *laddr, | |||
846 | struct sctp_transport *transport; | 865 | struct sctp_transport *transport; |
847 | 866 | ||
848 | if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { | 867 | if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { |
849 | sock_put(asoc->base.sk); | ||
850 | sctp_association_put(asoc); | 868 | sctp_association_put(asoc); |
851 | return 1; | 869 | return 1; |
852 | } | 870 | } |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 90863307bcd9..b1a17758003a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -1229,7 +1229,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) | |||
1229 | 1229 | ||
1230 | ep = sctp_sk(sk)->ep; | 1230 | ep = sctp_sk(sk)->ep; |
1231 | 1231 | ||
1232 | /* Walk all associations on a socket, not on an endpoint. */ | 1232 | /* Walk all associations on an endpoint. */ |
1233 | list_for_each_safe(pos, temp, &ep->asocs) { | 1233 | list_for_each_safe(pos, temp, &ep->asocs) { |
1234 | asoc = list_entry(pos, struct sctp_association, asocs); | 1234 | asoc = list_entry(pos, struct sctp_association, asocs); |
1235 | 1235 | ||
@@ -5318,6 +5318,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, | |||
5318 | */ | 5318 | */ |
5319 | sctp_release_sock(sk); | 5319 | sctp_release_sock(sk); |
5320 | current_timeo = schedule_timeout(current_timeo); | 5320 | current_timeo = schedule_timeout(current_timeo); |
5321 | BUG_ON(sk != asoc->base.sk); | ||
5321 | sctp_lock_sock(sk); | 5322 | sctp_lock_sock(sk); |
5322 | 5323 | ||
5323 | *timeo_p = current_timeo; | 5324 | *timeo_p = current_timeo; |
@@ -5605,12 +5606,14 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, | |||
5605 | */ | 5606 | */ |
5606 | newsp->type = type; | 5607 | newsp->type = type; |
5607 | 5608 | ||
5608 | spin_lock_bh(&oldsk->sk_lock.slock); | 5609 | /* Mark the new socket "in-use" by the user so that any packets |
5609 | /* Migrate the backlog from oldsk to newsk. */ | 5610 | * that may arrive on the association after we've moved it are |
5610 | sctp_backlog_migrate(assoc, oldsk, newsk); | 5611 | * queued to the backlog. This prevents a potential race between |
5611 | /* Migrate the association to the new socket. */ | 5612 | * backlog processing on the old socket and new-packet processing |
5613 | * on the new socket. | ||
5614 | */ | ||
5615 | sctp_lock_sock(newsk); | ||
5612 | sctp_assoc_migrate(assoc, newsk); | 5616 | sctp_assoc_migrate(assoc, newsk); |
5613 | spin_unlock_bh(&oldsk->sk_lock.slock); | ||
5614 | 5617 | ||
5615 | /* If the association on the newsk is already closed before accept() | 5618 | /* If the association on the newsk is already closed before accept() |
5616 | * is called, set RCV_SHUTDOWN flag. | 5619 | * is called, set RCV_SHUTDOWN flag. |
@@ -5619,6 +5622,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, | |||
5619 | newsk->sk_shutdown |= RCV_SHUTDOWN; | 5622 | newsk->sk_shutdown |= RCV_SHUTDOWN; |
5620 | 5623 | ||
5621 | newsk->sk_state = SCTP_SS_ESTABLISHED; | 5624 | newsk->sk_state = SCTP_SS_ESTABLISHED; |
5625 | sctp_release_sock(newsk); | ||
5622 | } | 5626 | } |
5623 | 5627 | ||
5624 | /* This proto struct describes the ULP interface for SCTP. */ | 5628 | /* This proto struct describes the ULP interface for SCTP. */ |