aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladislav Yasevich <vladislav.yasevich@hp.com>2006-05-19 14:01:18 -0400
committerSridhar Samudrala <sri@us.ibm.com>2006-05-19 14:01:18 -0400
commit61c9fed41638249f8b6ca5345064eb1beb50179f (patch)
tree8855a0e6cbee58a9d94e30396b7d0a1baa526900
parent8de8c8738086501bbe3057ed6f4b70dded657488 (diff)
[SCTP]: A better solution to fix the race between sctp_peeloff() and
sctp_rcv(). The goal is to hold the ref on the association/endpoint throughout the state-machine process. We accomplish like this: /* ref on the assoc/ep is taken during lookup */ if owned_by_user(sk) sctp_add_backlog(skb, sk); else inqueue_push(skb, sk); /* drop the ref on the assoc/ep */ However, in sctp_add_backlog() we take the ref on assoc/ep and hold it while the skb is on the backlog queue. This allows us to get rid of the sock_hold/sock_put in the lookup routines. Now sctp_backlog_rcv() needs to account for potential association move. In the unlikely event that association moved, we need to retest if the new socket is locked by user. If we don't this, we may have two packets racing up the stack toward the same socket and we can't deal with it. If the new socket is still locked, we'll just add the skb to its backlog continuing to hold the ref on the association. This get's rid of the need to move packets from one backlog to another and it also safe in case new packets arrive on the same backlog queue. The last step, is to lock the new socket when we are moving the association to it. This is needed in case any new packets arrive on the association when it moved. We want these to go to the backlog since we would like to avoid the race between this new packet and a packet that may be sitting on the backlog queue of the old socket toward the same association. Signed-off-by: Vladislav Yasevich <vladislav.yasevich@hp.com> Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
-rw-r--r--net/sctp/input.c140
-rw-r--r--net/sctp/socket.c16
2 files changed, 89 insertions, 67 deletions
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 7523f4df2da6..1662f9cc869e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -73,6 +73,8 @@ static struct sctp_association *__sctp_lookup_association(
73 const union sctp_addr *peer, 73 const union sctp_addr *peer,
74 struct sctp_transport **pt); 74 struct sctp_transport **pt);
75 75
76static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
77
76 78
77/* Calculate the SCTP checksum of an SCTP packet. */ 79/* Calculate the SCTP checksum of an SCTP packet. */
78static inline int sctp_rcv_checksum(struct sk_buff *skb) 80static inline int sctp_rcv_checksum(struct sk_buff *skb)
@@ -186,7 +188,6 @@ int sctp_rcv(struct sk_buff *skb)
186 */ 188 */
187 if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) 189 if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb)))
188 { 190 {
189 sock_put(sk);
190 if (asoc) { 191 if (asoc) {
191 sctp_association_put(asoc); 192 sctp_association_put(asoc);
192 asoc = NULL; 193 asoc = NULL;
@@ -197,7 +198,6 @@ int sctp_rcv(struct sk_buff *skb)
197 sk = sctp_get_ctl_sock(); 198 sk = sctp_get_ctl_sock();
198 ep = sctp_sk(sk)->ep; 199 ep = sctp_sk(sk)->ep;
199 sctp_endpoint_hold(ep); 200 sctp_endpoint_hold(ep);
200 sock_hold(sk);
201 rcvr = &ep->base; 201 rcvr = &ep->base;
202 } 202 }
203 203
@@ -253,25 +253,18 @@ int sctp_rcv(struct sk_buff *skb)
253 */ 253 */
254 sctp_bh_lock_sock(sk); 254 sctp_bh_lock_sock(sk);
255 255
256 /* It is possible that the association could have moved to a different
257 * socket if it is peeled off. If so, update the sk.
258 */
259 if (sk != rcvr->sk) {
260 sctp_bh_lock_sock(rcvr->sk);
261 sctp_bh_unlock_sock(sk);
262 sk = rcvr->sk;
263 }
264
265 if (sock_owned_by_user(sk)) 256 if (sock_owned_by_user(sk))
266 sk_add_backlog(sk, skb); 257 sctp_add_backlog(sk, skb);
267 else 258 else
268 sctp_backlog_rcv(sk, skb); 259 sctp_inq_push(&chunk->rcvr->inqueue, chunk);
269 260
270 /* Release the sock and the sock ref we took in the lookup calls.
271 * The asoc/ep ref will be released in sctp_backlog_rcv.
272 */
273 sctp_bh_unlock_sock(sk); 261 sctp_bh_unlock_sock(sk);
274 sock_put(sk); 262
263 /* Release the asoc/ep ref we took in the lookup calls. */
264 if (asoc)
265 sctp_association_put(asoc);
266 else
267 sctp_endpoint_put(ep);
275 268
276 return 0; 269 return 0;
277 270
@@ -280,8 +273,7 @@ discard_it:
280 return 0; 273 return 0;
281 274
282discard_release: 275discard_release:
283 /* Release any structures we may be holding. */ 276 /* Release the asoc/ep ref we took in the lookup calls. */
284 sock_put(sk);
285 if (asoc) 277 if (asoc)
286 sctp_association_put(asoc); 278 sctp_association_put(asoc);
287 else 279 else
@@ -290,56 +282,87 @@ discard_release:
290 goto discard_it; 282 goto discard_it;
291} 283}
292 284
293/* Handle second half of inbound skb processing. If the sock was busy, 285/* Process the backlog queue of the socket. Every skb on
294 * we may have need to delay processing until later when the sock is 286 * the backlog holds a ref on an association or endpoint.
295 * released (on the backlog). If not busy, we call this routine 287 * We hold this ref throughout the state machine to make
296 * directly from the bottom half. 288 * sure that the structure we need is still around.
297 */ 289 */
298int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) 290int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
299{ 291{
300 struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; 292 struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
301 struct sctp_inq *inqueue = NULL; 293 struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
302 struct sctp_ep_common *rcvr = NULL; 294 struct sctp_ep_common *rcvr = NULL;
295 int backloged = 0;
303 296
304 rcvr = chunk->rcvr; 297 rcvr = chunk->rcvr;
305 298
306 BUG_TRAP(rcvr->sk == sk); 299 /* If the rcvr is dead then the association or endpoint
307 300 * has been deleted and we can safely drop the chunk
308 if (rcvr->dead) { 301 * and refs that we are holding.
309 sctp_chunk_free(chunk); 302 */
310 } else { 303 if (rcvr->dead) {
311 inqueue = &chunk->rcvr->inqueue; 304 sctp_chunk_free(chunk);
312 sctp_inq_push(inqueue, chunk); 305 goto done;
313 } 306 }
314 307
315 /* Release the asoc/ep ref we took in the lookup calls in sctp_rcv. */ 308 if (unlikely(rcvr->sk != sk)) {
316 if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) 309 /* In this case, the association moved from one socket to
317 sctp_association_put(sctp_assoc(rcvr)); 310 * another. We are currently sitting on the backlog of the
318 else 311 * old socket, so we need to move.
319 sctp_endpoint_put(sctp_ep(rcvr)); 312 * However, since we are here in the process context we
320 313 * need to take make sure that the user doesn't own
314 * the new socket when we process the packet.
315 * If the new socket is user-owned, queue the chunk to the
316 * backlog of the new socket without dropping any refs.
317 * Otherwise, we can safely push the chunk on the inqueue.
318 */
319
320 sk = rcvr->sk;
321 sctp_bh_lock_sock(sk);
322
323 if (sock_owned_by_user(sk)) {
324 sk_add_backlog(sk, skb);
325 backloged = 1;
326 } else
327 sctp_inq_push(inqueue, chunk);
328
329 sctp_bh_unlock_sock(sk);
330
331 /* If the chunk was backloged again, don't drop refs */
332 if (backloged)
333 return 0;
334 } else {
335 sctp_inq_push(inqueue, chunk);
336 }
337
338done:
339 /* Release the refs we took in sctp_add_backlog */
340 if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
341 sctp_association_put(sctp_assoc(rcvr));
342 else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
343 sctp_endpoint_put(sctp_ep(rcvr));
344 else
345 BUG();
346
321 return 0; 347 return 0;
322} 348}
323 349
324void sctp_backlog_migrate(struct sctp_association *assoc, 350static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
325 struct sock *oldsk, struct sock *newsk)
326{ 351{
327 struct sk_buff *skb; 352 struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
328 struct sctp_chunk *chunk; 353 struct sctp_ep_common *rcvr = chunk->rcvr;
329 354
330 skb = oldsk->sk_backlog.head; 355 /* Hold the assoc/ep while hanging on the backlog queue.
331 oldsk->sk_backlog.head = oldsk->sk_backlog.tail = NULL; 356 * This way, we know structures we need will not disappear from us
332 while (skb != NULL) { 357 */
333 struct sk_buff *next = skb->next; 358 if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
334 359 sctp_association_hold(sctp_assoc(rcvr));
335 chunk = SCTP_INPUT_CB(skb)->chunk; 360 else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
336 skb->next = NULL; 361 sctp_endpoint_hold(sctp_ep(rcvr));
337 if (&assoc->base == chunk->rcvr) 362 else
338 sk_add_backlog(newsk, skb); 363 BUG();
339 else 364
340 sk_add_backlog(oldsk, skb); 365 sk_add_backlog(sk, skb);
341 skb = next;
342 }
343} 366}
344 367
345/* Handle icmp frag needed error. */ 368/* Handle icmp frag needed error. */
@@ -453,7 +476,6 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
453 return sk; 476 return sk;
454 477
455out: 478out:
456 sock_put(sk);
457 if (asoc) 479 if (asoc)
458 sctp_association_put(asoc); 480 sctp_association_put(asoc);
459 return NULL; 481 return NULL;
@@ -463,7 +485,6 @@ out:
463void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) 485void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
464{ 486{
465 sctp_bh_unlock_sock(sk); 487 sctp_bh_unlock_sock(sk);
466 sock_put(sk);
467 if (asoc) 488 if (asoc)
468 sctp_association_put(asoc); 489 sctp_association_put(asoc);
469} 490}
@@ -716,7 +737,6 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
716 737
717hit: 738hit:
718 sctp_endpoint_hold(ep); 739 sctp_endpoint_hold(ep);
719 sock_hold(epb->sk);
720 read_unlock(&head->lock); 740 read_unlock(&head->lock);
721 return ep; 741 return ep;
722} 742}
@@ -818,7 +838,6 @@ static struct sctp_association *__sctp_lookup_association(
818hit: 838hit:
819 *pt = transport; 839 *pt = transport;
820 sctp_association_hold(asoc); 840 sctp_association_hold(asoc);
821 sock_hold(epb->sk);
822 read_unlock(&head->lock); 841 read_unlock(&head->lock);
823 return asoc; 842 return asoc;
824} 843}
@@ -846,7 +865,6 @@ int sctp_has_association(const union sctp_addr *laddr,
846 struct sctp_transport *transport; 865 struct sctp_transport *transport;
847 866
848 if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { 867 if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) {
849 sock_put(asoc->base.sk);
850 sctp_association_put(asoc); 868 sctp_association_put(asoc);
851 return 1; 869 return 1;
852 } 870 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 90863307bcd9..b1a17758003a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1229,7 +1229,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
1229 1229
1230 ep = sctp_sk(sk)->ep; 1230 ep = sctp_sk(sk)->ep;
1231 1231
1232 /* Walk all associations on a socket, not on an endpoint. */ 1232 /* Walk all associations on an endpoint. */
1233 list_for_each_safe(pos, temp, &ep->asocs) { 1233 list_for_each_safe(pos, temp, &ep->asocs) {
1234 asoc = list_entry(pos, struct sctp_association, asocs); 1234 asoc = list_entry(pos, struct sctp_association, asocs);
1235 1235
@@ -5318,6 +5318,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
5318 */ 5318 */
5319 sctp_release_sock(sk); 5319 sctp_release_sock(sk);
5320 current_timeo = schedule_timeout(current_timeo); 5320 current_timeo = schedule_timeout(current_timeo);
5321 BUG_ON(sk != asoc->base.sk);
5321 sctp_lock_sock(sk); 5322 sctp_lock_sock(sk);
5322 5323
5323 *timeo_p = current_timeo; 5324 *timeo_p = current_timeo;
@@ -5605,12 +5606,14 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5605 */ 5606 */
5606 newsp->type = type; 5607 newsp->type = type;
5607 5608
5608 spin_lock_bh(&oldsk->sk_lock.slock); 5609 /* Mark the new socket "in-use" by the user so that any packets
5609 /* Migrate the backlog from oldsk to newsk. */ 5610 * that may arrive on the association after we've moved it are
5610 sctp_backlog_migrate(assoc, oldsk, newsk); 5611 * queued to the backlog. This prevents a potential race between
5611 /* Migrate the association to the new socket. */ 5612 * backlog processing on the old socket and new-packet processing
5613 * on the new socket.
5614 */
5615 sctp_lock_sock(newsk);
5612 sctp_assoc_migrate(assoc, newsk); 5616 sctp_assoc_migrate(assoc, newsk);
5613 spin_unlock_bh(&oldsk->sk_lock.slock);
5614 5617
5615 /* If the association on the newsk is already closed before accept() 5618 /* If the association on the newsk is already closed before accept()
5616 * is called, set RCV_SHUTDOWN flag. 5619 * is called, set RCV_SHUTDOWN flag.
@@ -5619,6 +5622,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5619 newsk->sk_shutdown |= RCV_SHUTDOWN; 5622 newsk->sk_shutdown |= RCV_SHUTDOWN;
5620 5623
5621 newsk->sk_state = SCTP_SS_ESTABLISHED; 5624 newsk->sk_state = SCTP_SS_ESTABLISHED;
5625 sctp_release_sock(newsk);
5622} 5626}
5623 5627
5624/* This proto struct describes the ULP interface for SCTP. */ 5628/* This proto struct describes the ULP interface for SCTP. */