aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Ostrowski <mostrows@gmail.com>2009-10-26 19:23:20 -0400
committerDavid S. Miller <davem@davemloft.net>2009-10-26 19:23:20 -0400
commitfb64bb560e181363c29ed4fe12a0d82765d61733 (patch)
treed3f5f15e9840964657bf40326705d79abe06e9e1
parent5ccdcecb72692d46d7a9264e62751241c7eca559 (diff)
PPPoE: Fix flush/close races.
Be more careful about the state of pointers during tear-down. The "pppoe_dev" field can only be looked at safely while holding socket locks. This subsequently allows for the flush_lock to be killed. We depend on the PPPOX_CONNECTED state to tell us that that those fields are valid, so whoever clears that state (pppox_unbind_sock()) is responsible for the dev_put() call. We also have to ensure that we delete_item() on all sockets before they are cleaned up. The need for these changes has been exposed by scenarios wherein namespace bindings of ethernet devices change while there are ongoing PPPoE sessions, which resulted in oopses due to unusual socket connection termination paths, exposing these issues. Signed-off-by: Michal Ostrowski <mostrows@gmail.com> Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com> Reported-by: Denys Fedoryschenko <denys@visp.net.lb> Tested-by: Denys Fedoryschenko <denys@visp.net.lb>
-rw-r--r--drivers/net/pppoe.c129
1 files changed, 68 insertions, 61 deletions
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 7cbf6f9b51de..2559991eea6a 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -111,9 +111,6 @@ struct pppoe_net {
111 rwlock_t hash_lock; 111 rwlock_t hash_lock;
112}; 112};
113 113
114/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
115static DEFINE_SPINLOCK(flush_lock);
116
117/* 114/*
118 * PPPoE could be in the following stages: 115 * PPPoE could be in the following stages:
119 * 1) Discovery stage (to obtain remote MAC and Session ID) 116 * 1) Discovery stage (to obtain remote MAC and Session ID)
@@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev)
303 write_lock_bh(&pn->hash_lock); 300 write_lock_bh(&pn->hash_lock);
304 for (i = 0; i < PPPOE_HASH_SIZE; i++) { 301 for (i = 0; i < PPPOE_HASH_SIZE; i++) {
305 struct pppox_sock *po = pn->hash_table[i]; 302 struct pppox_sock *po = pn->hash_table[i];
303 struct sock *sk;
306 304
307 while (po != NULL) { 305 while (po) {
308 struct sock *sk; 306 while (po && po->pppoe_dev != dev) {
309 if (po->pppoe_dev != dev) {
310 po = po->next; 307 po = po->next;
311 continue;
312 } 308 }
309
310 if (!po)
311 break;
312
313 sk = sk_pppox(po); 313 sk = sk_pppox(po);
314 spin_lock(&flush_lock);
315 po->pppoe_dev = NULL;
316 spin_unlock(&flush_lock);
317 dev_put(dev);
318 314
319 /* We always grab the socket lock, followed by the 315 /* We always grab the socket lock, followed by the
320 * hash_lock, in that order. Since we should 316 * hash_lock, in that order. Since we should hold the
321 * hold the sock lock while doing any unbinding, 317 * sock lock while doing any unbinding, we need to
322 * we need to release the lock we're holding. 318 * release the lock we're holding. Hold a reference to
323 * Hold a reference to the sock so it doesn't disappear 319 * the sock so it doesn't disappear as we're jumping
324 * as we're jumping between locks. 320 * between locks.
325 */ 321 */
326 322
327 sock_hold(sk); 323 sock_hold(sk);
328
329 write_unlock_bh(&pn->hash_lock); 324 write_unlock_bh(&pn->hash_lock);
330 lock_sock(sk); 325 lock_sock(sk);
331 326
332 if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { 327 if (po->pppoe_dev == dev
328 && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
333 pppox_unbind_sock(sk); 329 pppox_unbind_sock(sk);
334 sk->sk_state = PPPOX_ZOMBIE; 330 sk->sk_state = PPPOX_ZOMBIE;
335 sk->sk_state_change(sk); 331 sk->sk_state_change(sk);
332 po->pppoe_dev = NULL;
333 dev_put(dev);
336 } 334 }
337 335
338 release_sock(sk); 336 release_sock(sk);
339 sock_put(sk); 337 sock_put(sk);
340 338
341 /* Restart scan at the beginning of this hash chain. 339 /* Restart the process from the start of the current
342 * While the lock was dropped the chain contents may 340 * hash chain. We dropped locks so the world may have
343 * have changed. 341 * change from underneath us.
344 */ 342 */
343
344 BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
345 write_lock_bh(&pn->hash_lock); 345 write_lock_bh(&pn->hash_lock);
346 po = pn->hash_table[i]; 346 po = pn->hash_table[i];
347 } 347 }
@@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
388 struct pppox_sock *po = pppox_sk(sk); 388 struct pppox_sock *po = pppox_sk(sk);
389 struct pppox_sock *relay_po; 389 struct pppox_sock *relay_po;
390 390
391 /* Backlog receive. Semantics of backlog rcv preclude any code from
392 * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
393 * can't change.
394 */
395
391 if (sk->sk_state & PPPOX_BOUND) { 396 if (sk->sk_state & PPPOX_BOUND) {
392 ppp_input(&po->chan, skb); 397 ppp_input(&po->chan, skb);
393 } else if (sk->sk_state & PPPOX_RELAY) { 398 } else if (sk->sk_state & PPPOX_RELAY) {
394 relay_po = get_item_by_addr(dev_net(po->pppoe_dev), 399 relay_po = get_item_by_addr(sock_net(sk),
395 &po->pppoe_relay); 400 &po->pppoe_relay);
396 if (relay_po == NULL) 401 if (relay_po == NULL)
397 goto abort_kfree; 402 goto abort_kfree;
398 403
@@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
447 goto drop; 452 goto drop;
448 453
449 pn = pppoe_pernet(dev_net(dev)); 454 pn = pppoe_pernet(dev_net(dev));
455
456 /* Note that get_item does a sock_hold(), so sk_pppox(po)
457 * is known to be safe.
458 */
450 po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); 459 po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
451 if (!po) 460 if (!po)
452 goto drop; 461 goto drop;
@@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock)
561 struct sock *sk = sock->sk; 570 struct sock *sk = sock->sk;
562 struct pppox_sock *po; 571 struct pppox_sock *po;
563 struct pppoe_net *pn; 572 struct pppoe_net *pn;
573 struct net *net = NULL;
564 574
565 if (!sk) 575 if (!sk)
566 return 0; 576 return 0;
@@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock)
571 return -EBADF; 581 return -EBADF;
572 } 582 }
573 583
584 po = pppox_sk(sk);
585
586 if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
587 dev_put(po->pppoe_dev);
588 po->pppoe_dev = NULL;
589 }
590
574 pppox_unbind_sock(sk); 591 pppox_unbind_sock(sk);
575 592
576 /* Signal the death of the socket. */ 593 /* Signal the death of the socket. */
577 sk->sk_state = PPPOX_DEAD; 594 sk->sk_state = PPPOX_DEAD;
578 595
579 /* 596 net = sock_net(sk);
580 * pppoe_flush_dev could lead to a race with 597 pn = pppoe_pernet(net);
581 * this routine so we use flush_lock to eliminate
582 * such a case (we only need per-net specific data)
583 */
584 spin_lock(&flush_lock);
585 po = pppox_sk(sk);
586 if (!po->pppoe_dev) {
587 spin_unlock(&flush_lock);
588 goto out;
589 }
590 pn = pppoe_pernet(dev_net(po->pppoe_dev));
591 spin_unlock(&flush_lock);
592 598
593 /* 599 /*
594 * protect "po" from concurrent updates 600 * protect "po" from concurrent updates
595 * on pppoe_flush_dev 601 * on pppoe_flush_dev
596 */ 602 */
597 write_lock_bh(&pn->hash_lock); 603 delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
604 po->pppoe_ifindex);
598 605
599 po = pppox_sk(sk);
600 if (stage_session(po->pppoe_pa.sid))
601 __delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
602 po->pppoe_ifindex);
603
604 if (po->pppoe_dev) {
605 dev_put(po->pppoe_dev);
606 po->pppoe_dev = NULL;
607 }
608
609 write_unlock_bh(&pn->hash_lock);
610
611out:
612 sock_orphan(sk); 606 sock_orphan(sk);
613 sock->sk = NULL; 607 sock->sk = NULL;
614 608
@@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
625 struct sock *sk = sock->sk; 619 struct sock *sk = sock->sk;
626 struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; 620 struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr;
627 struct pppox_sock *po = pppox_sk(sk); 621 struct pppox_sock *po = pppox_sk(sk);
628 struct net_device *dev; 622 struct net_device *dev = NULL;
629 struct pppoe_net *pn; 623 struct pppoe_net *pn;
624 struct net *net = NULL;
630 int error; 625 int error;
631 626
632 lock_sock(sk); 627 lock_sock(sk);
@@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
652 /* Delete the old binding */ 647 /* Delete the old binding */
653 if (stage_session(po->pppoe_pa.sid)) { 648 if (stage_session(po->pppoe_pa.sid)) {
654 pppox_unbind_sock(sk); 649 pppox_unbind_sock(sk);
650 pn = pppoe_pernet(sock_net(sk));
651 delete_item(pn, po->pppoe_pa.sid,
652 po->pppoe_pa.remote, po->pppoe_ifindex);
655 if (po->pppoe_dev) { 653 if (po->pppoe_dev) {
656 pn = pppoe_pernet(dev_net(po->pppoe_dev));
657 delete_item(pn, po->pppoe_pa.sid,
658 po->pppoe_pa.remote, po->pppoe_ifindex);
659 dev_put(po->pppoe_dev); 654 dev_put(po->pppoe_dev);
655 po->pppoe_dev = NULL;
660 } 656 }
657
661 memset(sk_pppox(po) + 1, 0, 658 memset(sk_pppox(po) + 1, 0,
662 sizeof(struct pppox_sock) - sizeof(struct sock)); 659 sizeof(struct pppox_sock) - sizeof(struct sock));
663 sk->sk_state = PPPOX_NONE; 660 sk->sk_state = PPPOX_NONE;
@@ -666,16 +663,15 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
666 /* Re-bind in session stage only */ 663 /* Re-bind in session stage only */
667 if (stage_session(sp->sa_addr.pppoe.sid)) { 664 if (stage_session(sp->sa_addr.pppoe.sid)) {
668 error = -ENODEV; 665 error = -ENODEV;
669 dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev); 666 net = sock_net(sk);
667 dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
670 if (!dev) 668 if (!dev)
671 goto end; 669 goto err_put;
672 670
673 po->pppoe_dev = dev; 671 po->pppoe_dev = dev;
674 po->pppoe_ifindex = dev->ifindex; 672 po->pppoe_ifindex = dev->ifindex;
675 pn = pppoe_pernet(dev_net(dev)); 673 pn = pppoe_pernet(net);
676 write_lock_bh(&pn->hash_lock);
677 if (!(dev->flags & IFF_UP)) { 674 if (!(dev->flags & IFF_UP)) {
678 write_unlock_bh(&pn->hash_lock);
679 goto err_put; 675 goto err_put;
680 } 676 }
681 677
@@ -683,6 +679,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
683 &sp->sa_addr.pppoe, 679 &sp->sa_addr.pppoe,
684 sizeof(struct pppoe_addr)); 680 sizeof(struct pppoe_addr));
685 681
682 write_lock_bh(&pn->hash_lock);
686 error = __set_item(pn, po); 683 error = __set_item(pn, po);
687 write_unlock_bh(&pn->hash_lock); 684 write_unlock_bh(&pn->hash_lock);
688 if (error < 0) 685 if (error < 0)
@@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
696 po->chan.ops = &pppoe_chan_ops; 693 po->chan.ops = &pppoe_chan_ops;
697 694
698 error = ppp_register_net_channel(dev_net(dev), &po->chan); 695 error = ppp_register_net_channel(dev_net(dev), &po->chan);
699 if (error) 696 if (error) {
697 delete_item(pn, po->pppoe_pa.sid,
698 po->pppoe_pa.remote, po->pppoe_ifindex);
700 goto err_put; 699 goto err_put;
700 }
701 701
702 sk->sk_state = PPPOX_CONNECTED; 702 sk->sk_state = PPPOX_CONNECTED;
703 } 703 }
@@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
915 struct pppoe_hdr *ph; 915 struct pppoe_hdr *ph;
916 int data_len = skb->len; 916 int data_len = skb->len;
917 917
918 /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP
919 * xmit operations conclude prior to an unregistration call. Thus
920 * sk->sk_state cannot change, so we don't need to do lock_sock().
921 * But, we also can't do a lock_sock since that introduces a potential
922 * deadlock as we'd reverse the lock ordering used when calling
923 * ppp_unregister_channel().
924 */
925
918 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) 926 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
919 goto abort; 927 goto abort;
920 928
@@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
944 po->pppoe_pa.remote, NULL, data_len); 952 po->pppoe_pa.remote, NULL, data_len);
945 953
946 dev_queue_xmit(skb); 954 dev_queue_xmit(skb);
947
948 return 1; 955 return 1;
949 956
950abort: 957abort: