diff options
author | Patrick McHardy <kaber@trash.net> | 2006-09-27 19:45:45 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-09-28 21:01:50 -0400 |
commit | 85670cc1faa2e1472e4a423cbf0b5e3d55c5ba88 (patch) | |
tree | a5da67836995f1b04c844071db97608bc2c37b85 /net/sched/sch_api.c | |
parent | 787e0617e5176176c494a787f1b0a5248a3db568 (diff) |
[NET_SCHED]: Fix fallout from dev->qdisc RCU change
The move of qdisc destruction to a rcu callback broke locking in the
entire qdisc layer by invalidating previously valid assumptions about
the context in which changes to the qdisc tree occur.
The two assumptions were:
- since changes only happen in process context, read_lock doesn't need
bottem half protection. Now invalid since destruction of inner qdiscs,
classifiers, actions and estimators happens in the RCU callback unless
they're manually deleted, resulting in dead-locks when read_lock in
process context is interrupted by write_lock_bh in bottem half context.
- since changes only happen under the RTNL, no additional locking is
necessary for data not used during packet processing (f.e. u32_list).
Again, since destruction now happens in the RCU callback, this assumption
is not valid anymore, causing races while using this data, which can
result in corruption or use-after-free.
Instead of "fixing" this by disabling bottem halfs everywhere and adding
new locks/refcounting, this patch makes these assumptions valid again by
moving destruction back to process context. Since only the dev->qdisc
pointer is protected by RCU, but ->enqueue and the qdisc tree are still
protected by dev->qdisc_lock, destruction of the tree can be performed
immediately and only the final free needs to happen in the rcu callback
to make sure dev_queue_xmit doesn't access already freed memory.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched/sch_api.c')
-rw-r--r-- | net/sched/sch_api.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a19eff12cf78..0b6489291140 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -195,14 +195,14 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) | |||
195 | { | 195 | { |
196 | struct Qdisc *q; | 196 | struct Qdisc *q; |
197 | 197 | ||
198 | read_lock_bh(&qdisc_tree_lock); | 198 | read_lock(&qdisc_tree_lock); |
199 | list_for_each_entry(q, &dev->qdisc_list, list) { | 199 | list_for_each_entry(q, &dev->qdisc_list, list) { |
200 | if (q->handle == handle) { | 200 | if (q->handle == handle) { |
201 | read_unlock_bh(&qdisc_tree_lock); | 201 | read_unlock(&qdisc_tree_lock); |
202 | return q; | 202 | return q; |
203 | } | 203 | } |
204 | } | 204 | } |
205 | read_unlock_bh(&qdisc_tree_lock); | 205 | read_unlock(&qdisc_tree_lock); |
206 | return NULL; | 206 | return NULL; |
207 | } | 207 | } |
208 | 208 | ||
@@ -837,7 +837,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) | |||
837 | continue; | 837 | continue; |
838 | if (idx > s_idx) | 838 | if (idx > s_idx) |
839 | s_q_idx = 0; | 839 | s_q_idx = 0; |
840 | read_lock_bh(&qdisc_tree_lock); | 840 | read_lock(&qdisc_tree_lock); |
841 | q_idx = 0; | 841 | q_idx = 0; |
842 | list_for_each_entry(q, &dev->qdisc_list, list) { | 842 | list_for_each_entry(q, &dev->qdisc_list, list) { |
843 | if (q_idx < s_q_idx) { | 843 | if (q_idx < s_q_idx) { |
@@ -846,12 +846,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) | |||
846 | } | 846 | } |
847 | if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | 847 | if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, |
848 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { | 848 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { |
849 | read_unlock_bh(&qdisc_tree_lock); | 849 | read_unlock(&qdisc_tree_lock); |
850 | goto done; | 850 | goto done; |
851 | } | 851 | } |
852 | q_idx++; | 852 | q_idx++; |
853 | } | 853 | } |
854 | read_unlock_bh(&qdisc_tree_lock); | 854 | read_unlock(&qdisc_tree_lock); |
855 | } | 855 | } |
856 | 856 | ||
857 | done: | 857 | done: |
@@ -1074,7 +1074,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1074 | s_t = cb->args[0]; | 1074 | s_t = cb->args[0]; |
1075 | t = 0; | 1075 | t = 0; |
1076 | 1076 | ||
1077 | read_lock_bh(&qdisc_tree_lock); | 1077 | read_lock(&qdisc_tree_lock); |
1078 | list_for_each_entry(q, &dev->qdisc_list, list) { | 1078 | list_for_each_entry(q, &dev->qdisc_list, list) { |
1079 | if (t < s_t || !q->ops->cl_ops || | 1079 | if (t < s_t || !q->ops->cl_ops || |
1080 | (tcm->tcm_parent && | 1080 | (tcm->tcm_parent && |
@@ -1096,7 +1096,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1096 | break; | 1096 | break; |
1097 | t++; | 1097 | t++; |
1098 | } | 1098 | } |
1099 | read_unlock_bh(&qdisc_tree_lock); | 1099 | read_unlock(&qdisc_tree_lock); |
1100 | 1100 | ||
1101 | cb->args[0] = t; | 1101 | cb->args[0] = t; |
1102 | 1102 | ||