diff options
author | Ranko Zivojnovic <ranko@spidernet.net> | 2007-07-16 21:28:32 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-18 04:46:50 -0400 |
commit | 0929c2dd83317813425b937fbc0041013b8685ff (patch) | |
tree | 00b7dfc35f18dd059b8ea5a6d0a151754c19a974 /net/core | |
parent | dd121c4bbf60336773485e91b5cfc57596b45151 (diff) |
[NET]: gen_estimator deadlock fix
-Fixes ABBA deadlock noted by Patrick McHardy <kaber@trash.net>:
> There is at least one ABBA deadlock, est_timer() does:
> read_lock(&est_lock)
> spin_lock(e->stats_lock) (which is dev->queue_lock)
>
> and qdisc_destroy calls htb_destroy under dev->queue_lock, which
> calls htb_destroy_class, then gen_kill_estimator and this
> write_locks est_lock.
To fix the ABBA deadlock the rate estimators are now kept on an rcu list.
-The est_lock changes the use from protecting the list to protecting
the update to the 'bstat' pointer in order to avoid NULL dereferencing.
-The 'interval' member of the gen_estimator structure removed as it is
not needed.
Signed-off-by: Ranko Zivojnovic <ranko@spidernet.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/gen_estimator.c | 81 |
1 files changed, 49 insertions, 32 deletions
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cc84d8d8a3c7..590a767b029c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c | |||
@@ -79,27 +79,27 @@ | |||
79 | 79 | ||
80 | struct gen_estimator | 80 | struct gen_estimator |
81 | { | 81 | { |
82 | struct gen_estimator *next; | 82 | struct list_head list; |
83 | struct gnet_stats_basic *bstats; | 83 | struct gnet_stats_basic *bstats; |
84 | struct gnet_stats_rate_est *rate_est; | 84 | struct gnet_stats_rate_est *rate_est; |
85 | spinlock_t *stats_lock; | 85 | spinlock_t *stats_lock; |
86 | unsigned interval; | ||
87 | int ewma_log; | 86 | int ewma_log; |
88 | u64 last_bytes; | 87 | u64 last_bytes; |
89 | u32 last_packets; | 88 | u32 last_packets; |
90 | u32 avpps; | 89 | u32 avpps; |
91 | u32 avbps; | 90 | u32 avbps; |
91 | struct rcu_head e_rcu; | ||
92 | }; | 92 | }; |
93 | 93 | ||
94 | struct gen_estimator_head | 94 | struct gen_estimator_head |
95 | { | 95 | { |
96 | struct timer_list timer; | 96 | struct timer_list timer; |
97 | struct gen_estimator *list; | 97 | struct list_head list; |
98 | }; | 98 | }; |
99 | 99 | ||
100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; | 100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; |
101 | 101 | ||
102 | /* Estimator array lock */ | 102 | /* Protects against NULL dereference */ |
103 | static DEFINE_RWLOCK(est_lock); | 103 | static DEFINE_RWLOCK(est_lock); |
104 | 104 | ||
105 | static void est_timer(unsigned long arg) | 105 | static void est_timer(unsigned long arg) |
@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) | |||
107 | int idx = (int)arg; | 107 | int idx = (int)arg; |
108 | struct gen_estimator *e; | 108 | struct gen_estimator *e; |
109 | 109 | ||
110 | read_lock(&est_lock); | 110 | rcu_read_lock(); |
111 | for (e = elist[idx].list; e; e = e->next) { | 111 | list_for_each_entry_rcu(e, &elist[idx].list, list) { |
112 | u64 nbytes; | 112 | u64 nbytes; |
113 | u32 npackets; | 113 | u32 npackets; |
114 | u32 rate; | 114 | u32 rate; |
115 | 115 | ||
116 | spin_lock(e->stats_lock); | 116 | spin_lock(e->stats_lock); |
117 | read_lock(&est_lock); | ||
118 | if (e->bstats == NULL) | ||
119 | goto skip; | ||
120 | |||
117 | nbytes = e->bstats->bytes; | 121 | nbytes = e->bstats->bytes; |
118 | npackets = e->bstats->packets; | 122 | npackets = e->bstats->packets; |
119 | rate = (nbytes - e->last_bytes)<<(7 - idx); | 123 | rate = (nbytes - e->last_bytes)<<(7 - idx); |
@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg) | |||
125 | e->last_packets = npackets; | 129 | e->last_packets = npackets; |
126 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; | 130 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; |
127 | e->rate_est->pps = (e->avpps+0x1FF)>>10; | 131 | e->rate_est->pps = (e->avpps+0x1FF)>>10; |
132 | skip: | ||
133 | read_unlock(&est_lock); | ||
128 | spin_unlock(e->stats_lock); | 134 | spin_unlock(e->stats_lock); |
129 | } | 135 | } |
130 | 136 | ||
131 | if (elist[idx].list != NULL) | 137 | if (!list_empty(&elist[idx].list)) |
132 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); | 138 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); |
133 | read_unlock(&est_lock); | 139 | rcu_read_unlock(); |
134 | } | 140 | } |
135 | 141 | ||
136 | /** | 142 | /** |
@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg) | |||
147 | * &rate_est with the statistics lock grabed during this period. | 153 | * &rate_est with the statistics lock grabed during this period. |
148 | * | 154 | * |
149 | * Returns 0 on success or a negative error code. | 155 | * Returns 0 on success or a negative error code. |
156 | * | ||
157 | * NOTE: Called under rtnl_mutex | ||
150 | */ | 158 | */ |
151 | int gen_new_estimator(struct gnet_stats_basic *bstats, | 159 | int gen_new_estimator(struct gnet_stats_basic *bstats, |
152 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) | 160 | struct gnet_stats_rate_est *rate_est, |
161 | spinlock_t *stats_lock, | ||
162 | struct rtattr *opt) | ||
153 | { | 163 | { |
154 | struct gen_estimator *est; | 164 | struct gen_estimator *est; |
155 | struct gnet_estimator *parm = RTA_DATA(opt); | 165 | struct gnet_estimator *parm = RTA_DATA(opt); |
166 | int idx; | ||
156 | 167 | ||
157 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) | 168 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) |
158 | return -EINVAL; | 169 | return -EINVAL; |
@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
164 | if (est == NULL) | 175 | if (est == NULL) |
165 | return -ENOBUFS; | 176 | return -ENOBUFS; |
166 | 177 | ||
167 | est->interval = parm->interval + 2; | 178 | idx = parm->interval + 2; |
168 | est->bstats = bstats; | 179 | est->bstats = bstats; |
169 | est->rate_est = rate_est; | 180 | est->rate_est = rate_est; |
170 | est->stats_lock = stats_lock; | 181 | est->stats_lock = stats_lock; |
@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
174 | est->last_packets = bstats->packets; | 185 | est->last_packets = bstats->packets; |
175 | est->avpps = rate_est->pps<<10; | 186 | est->avpps = rate_est->pps<<10; |
176 | 187 | ||
177 | est->next = elist[est->interval].list; | 188 | if (!elist[idx].timer.function) { |
178 | if (est->next == NULL) { | 189 | INIT_LIST_HEAD(&elist[idx].list); |
179 | init_timer(&elist[est->interval].timer); | 190 | setup_timer(&elist[idx].timer, est_timer, idx); |
180 | elist[est->interval].timer.data = est->interval; | ||
181 | elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); | ||
182 | elist[est->interval].timer.function = est_timer; | ||
183 | add_timer(&elist[est->interval].timer); | ||
184 | } | 191 | } |
185 | write_lock_bh(&est_lock); | 192 | |
186 | elist[est->interval].list = est; | 193 | if (list_empty(&elist[idx].list)) |
187 | write_unlock_bh(&est_lock); | 194 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); |
195 | |||
196 | list_add_rcu(&est->list, &elist[idx].list); | ||
188 | return 0; | 197 | return 0; |
189 | } | 198 | } |
190 | 199 | ||
200 | static void __gen_kill_estimator(struct rcu_head *head) | ||
201 | { | ||
202 | struct gen_estimator *e = container_of(head, | ||
203 | struct gen_estimator, e_rcu); | ||
204 | kfree(e); | ||
205 | } | ||
206 | |||
191 | /** | 207 | /** |
192 | * gen_kill_estimator - remove a rate estimator | 208 | * gen_kill_estimator - remove a rate estimator |
193 | * @bstats: basic statistics | 209 | * @bstats: basic statistics |
@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
195 | * | 211 | * |
196 | * Removes the rate estimator specified by &bstats and &rate_est | 212 | * Removes the rate estimator specified by &bstats and &rate_est |
197 | * and deletes the timer. | 213 | * and deletes the timer. |
214 | * | ||
215 | * NOTE: Called under rtnl_mutex | ||
198 | */ | 216 | */ |
199 | void gen_kill_estimator(struct gnet_stats_basic *bstats, | 217 | void gen_kill_estimator(struct gnet_stats_basic *bstats, |
200 | struct gnet_stats_rate_est *rate_est) | 218 | struct gnet_stats_rate_est *rate_est) |
201 | { | 219 | { |
202 | int idx; | 220 | int idx; |
203 | struct gen_estimator *est, **pest; | 221 | struct gen_estimator *e, *n; |
204 | 222 | ||
205 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { | 223 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { |
206 | int killed = 0; | 224 | |
207 | pest = &elist[idx].list; | 225 | /* Skip non initialized indexes */ |
208 | while ((est=*pest) != NULL) { | 226 | if (!elist[idx].timer.function) |
209 | if (est->rate_est != rate_est || est->bstats != bstats) { | 227 | continue; |
210 | pest = &est->next; | 228 | |
229 | list_for_each_entry_safe(e, n, &elist[idx].list, list) { | ||
230 | if (e->rate_est != rate_est || e->bstats != bstats) | ||
211 | continue; | 231 | continue; |
212 | } | ||
213 | 232 | ||
214 | write_lock_bh(&est_lock); | 233 | write_lock_bh(&est_lock); |
215 | *pest = est->next; | 234 | e->bstats = NULL; |
216 | write_unlock_bh(&est_lock); | 235 | write_unlock_bh(&est_lock); |
217 | 236 | ||
218 | kfree(est); | 237 | list_del_rcu(&e->list); |
219 | killed++; | 238 | call_rcu(&e->e_rcu, __gen_kill_estimator); |
220 | } | 239 | } |
221 | if (killed && elist[idx].list == NULL) | ||
222 | del_timer(&elist[idx].timer); | ||
223 | } | 240 | } |
224 | } | 241 | } |
225 | 242 | ||