aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_generic.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2006-09-27 19:45:45 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-09-28 21:01:50 -0400
commit85670cc1faa2e1472e4a423cbf0b5e3d55c5ba88 (patch)
treea5da67836995f1b04c844071db97608bc2c37b85 /net/sched/sch_generic.c
parent787e0617e5176176c494a787f1b0a5248a3db568 (diff)
[NET_SCHED]: Fix fallout from dev->qdisc RCU change
The move of qdisc destruction to a rcu callback broke locking in the entire qdisc layer by invalidating previously valid assumptions about the context in which changes to the qdisc tree occur. The two assumptions were: - since changes only happen in process context, read_lock doesn't need bottem half protection. Now invalid since destruction of inner qdiscs, classifiers, actions and estimators happens in the RCU callback unless they're manually deleted, resulting in dead-locks when read_lock in process context is interrupted by write_lock_bh in bottem half context. - since changes only happen under the RTNL, no additional locking is necessary for data not used during packet processing (f.e. u32_list). Again, since destruction now happens in the RCU callback, this assumption is not valid anymore, causing races while using this data, which can result in corruption or use-after-free. Instead of "fixing" this by disabling bottem halfs everywhere and adding new locks/refcounting, this patch makes these assumptions valid again by moving destruction back to process context. Since only the dev->qdisc pointer is protected by RCU, but ->enqueue and the qdisc tree are still protected by dev->qdisc_lock, destruction of the tree can be performed immediately and only the final free needs to happen in the rcu callback to make sure dev_queue_xmit doesn't access already freed memory. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r--net/sched/sch_generic.c66
1 files changed, 21 insertions, 45 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6f9151899795..88c6a99ce53c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -45,11 +45,10 @@
45 The idea is the following: 45 The idea is the following:
46 - enqueue, dequeue are serialized via top level device 46 - enqueue, dequeue are serialized via top level device
47 spinlock dev->queue_lock. 47 spinlock dev->queue_lock.
48 - tree walking is protected by read_lock_bh(qdisc_tree_lock) 48 - tree walking is protected by read_lock(qdisc_tree_lock)
49 and this lock is used only in process context. 49 and this lock is used only in process context.
50 - updates to tree are made under rtnl semaphore or 50 - updates to tree are made only under rtnl semaphore,
51 from softirq context (__qdisc_destroy rcu-callback) 51 hence this lock may be made without local bh disabling.
52 hence this lock needs local bh disabling.
53 52
54 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! 53 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
55 */ 54 */
@@ -57,14 +56,14 @@ DEFINE_RWLOCK(qdisc_tree_lock);
57 56
58void qdisc_lock_tree(struct net_device *dev) 57void qdisc_lock_tree(struct net_device *dev)
59{ 58{
60 write_lock_bh(&qdisc_tree_lock); 59 write_lock(&qdisc_tree_lock);
61 spin_lock_bh(&dev->queue_lock); 60 spin_lock_bh(&dev->queue_lock);
62} 61}
63 62
64void qdisc_unlock_tree(struct net_device *dev) 63void qdisc_unlock_tree(struct net_device *dev)
65{ 64{
66 spin_unlock_bh(&dev->queue_lock); 65 spin_unlock_bh(&dev->queue_lock);
67 write_unlock_bh(&qdisc_tree_lock); 66 write_unlock(&qdisc_tree_lock);
68} 67}
69 68
70/* 69/*
@@ -483,20 +482,6 @@ void qdisc_reset(struct Qdisc *qdisc)
483static void __qdisc_destroy(struct rcu_head *head) 482static void __qdisc_destroy(struct rcu_head *head)
484{ 483{
485 struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); 484 struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
486 struct Qdisc_ops *ops = qdisc->ops;
487
488#ifdef CONFIG_NET_ESTIMATOR
489 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
490#endif
491 write_lock(&qdisc_tree_lock);
492 if (ops->reset)
493 ops->reset(qdisc);
494 if (ops->destroy)
495 ops->destroy(qdisc);
496 write_unlock(&qdisc_tree_lock);
497 module_put(ops->owner);
498
499 dev_put(qdisc->dev);
500 kfree((char *) qdisc - qdisc->padded); 485 kfree((char *) qdisc - qdisc->padded);
501} 486}
502 487
@@ -504,32 +489,23 @@ static void __qdisc_destroy(struct rcu_head *head)
504 489
505void qdisc_destroy(struct Qdisc *qdisc) 490void qdisc_destroy(struct Qdisc *qdisc)
506{ 491{
507 struct list_head cql = LIST_HEAD_INIT(cql); 492 struct Qdisc_ops *ops = qdisc->ops;
508 struct Qdisc *cq, *q, *n;
509 493
510 if (qdisc->flags & TCQ_F_BUILTIN || 494 if (qdisc->flags & TCQ_F_BUILTIN ||
511 !atomic_dec_and_test(&qdisc->refcnt)) 495 !atomic_dec_and_test(&qdisc->refcnt))
512 return; 496 return;
513 497
514 if (!list_empty(&qdisc->list)) { 498 list_del(&qdisc->list);
515 if (qdisc->ops->cl_ops == NULL) 499#ifdef CONFIG_NET_ESTIMATOR
516 list_del(&qdisc->list); 500 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
517 else 501#endif
518 list_move(&qdisc->list, &cql); 502 if (ops->reset)
519 } 503 ops->reset(qdisc);
520 504 if (ops->destroy)
521 /* unlink inner qdiscs from dev->qdisc_list immediately */ 505 ops->destroy(qdisc);
522 list_for_each_entry(cq, &cql, list)
523 list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list)
524 if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) {
525 if (q->ops->cl_ops == NULL)
526 list_del_init(&q->list);
527 else
528 list_move_tail(&q->list, &cql);
529 }
530 list_for_each_entry_safe(cq, n, &cql, list)
531 list_del_init(&cq->list);
532 506
507 module_put(ops->owner);
508 dev_put(qdisc->dev);
533 call_rcu(&qdisc->q_rcu, __qdisc_destroy); 509 call_rcu(&qdisc->q_rcu, __qdisc_destroy);
534} 510}
535 511
@@ -549,15 +525,15 @@ void dev_activate(struct net_device *dev)
549 printk(KERN_INFO "%s: activation failed\n", dev->name); 525 printk(KERN_INFO "%s: activation failed\n", dev->name);
550 return; 526 return;
551 } 527 }
552 write_lock_bh(&qdisc_tree_lock); 528 write_lock(&qdisc_tree_lock);
553 list_add_tail(&qdisc->list, &dev->qdisc_list); 529 list_add_tail(&qdisc->list, &dev->qdisc_list);
554 write_unlock_bh(&qdisc_tree_lock); 530 write_unlock(&qdisc_tree_lock);
555 } else { 531 } else {
556 qdisc = &noqueue_qdisc; 532 qdisc = &noqueue_qdisc;
557 } 533 }
558 write_lock_bh(&qdisc_tree_lock); 534 write_lock(&qdisc_tree_lock);
559 dev->qdisc_sleeping = qdisc; 535 dev->qdisc_sleeping = qdisc;
560 write_unlock_bh(&qdisc_tree_lock); 536 write_unlock(&qdisc_tree_lock);
561 } 537 }
562 538
563 if (!netif_carrier_ok(dev)) 539 if (!netif_carrier_ok(dev))