diff options
| author | Ranko Zivojnovic <ranko@spidernet.net> | 2007-07-16 21:28:32 -0400 |
|---|---|---|
| committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-18 04:46:50 -0400 |
| commit | 0929c2dd83317813425b937fbc0041013b8685ff (patch) | |
| tree | 00b7dfc35f18dd059b8ea5a6d0a151754c19a974 /net/core | |
| parent | dd121c4bbf60336773485e91b5cfc57596b45151 (diff) | |
[NET]: gen_estimator deadlock fix
-Fixes ABBA deadlock noted by Patrick McHardy <kaber@trash.net>:
> There is at least one ABBA deadlock, est_timer() does:
> read_lock(&est_lock)
> spin_lock(e->stats_lock) (which is dev->queue_lock)
>
> and qdisc_destroy calls htb_destroy under dev->queue_lock, which
> calls htb_destroy_class, then gen_kill_estimator and this
> write_locks est_lock.
To fix the ABBA deadlock the rate estimators are now kept on an rcu list.
-The est_lock changes the use from protecting the list to protecting
the update to the 'bstat' pointer in order to avoid NULL dereferencing.
-The 'interval' member of the gen_estimator structure removed as it is
not needed.
Signed-off-by: Ranko Zivojnovic <ranko@spidernet.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/gen_estimator.c | 81 |
1 files changed, 49 insertions, 32 deletions
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cc84d8d8a3c7..590a767b029c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c | |||
| @@ -79,27 +79,27 @@ | |||
| 79 | 79 | ||
| 80 | struct gen_estimator | 80 | struct gen_estimator |
| 81 | { | 81 | { |
| 82 | struct gen_estimator *next; | 82 | struct list_head list; |
| 83 | struct gnet_stats_basic *bstats; | 83 | struct gnet_stats_basic *bstats; |
| 84 | struct gnet_stats_rate_est *rate_est; | 84 | struct gnet_stats_rate_est *rate_est; |
| 85 | spinlock_t *stats_lock; | 85 | spinlock_t *stats_lock; |
| 86 | unsigned interval; | ||
| 87 | int ewma_log; | 86 | int ewma_log; |
| 88 | u64 last_bytes; | 87 | u64 last_bytes; |
| 89 | u32 last_packets; | 88 | u32 last_packets; |
| 90 | u32 avpps; | 89 | u32 avpps; |
| 91 | u32 avbps; | 90 | u32 avbps; |
| 91 | struct rcu_head e_rcu; | ||
| 92 | }; | 92 | }; |
| 93 | 93 | ||
| 94 | struct gen_estimator_head | 94 | struct gen_estimator_head |
| 95 | { | 95 | { |
| 96 | struct timer_list timer; | 96 | struct timer_list timer; |
| 97 | struct gen_estimator *list; | 97 | struct list_head list; |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| 100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; | 100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; |
| 101 | 101 | ||
| 102 | /* Estimator array lock */ | 102 | /* Protects against NULL dereference */ |
| 103 | static DEFINE_RWLOCK(est_lock); | 103 | static DEFINE_RWLOCK(est_lock); |
| 104 | 104 | ||
| 105 | static void est_timer(unsigned long arg) | 105 | static void est_timer(unsigned long arg) |
| @@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) | |||
| 107 | int idx = (int)arg; | 107 | int idx = (int)arg; |
| 108 | struct gen_estimator *e; | 108 | struct gen_estimator *e; |
| 109 | 109 | ||
| 110 | read_lock(&est_lock); | 110 | rcu_read_lock(); |
| 111 | for (e = elist[idx].list; e; e = e->next) { | 111 | list_for_each_entry_rcu(e, &elist[idx].list, list) { |
| 112 | u64 nbytes; | 112 | u64 nbytes; |
| 113 | u32 npackets; | 113 | u32 npackets; |
| 114 | u32 rate; | 114 | u32 rate; |
| 115 | 115 | ||
| 116 | spin_lock(e->stats_lock); | 116 | spin_lock(e->stats_lock); |
| 117 | read_lock(&est_lock); | ||
| 118 | if (e->bstats == NULL) | ||
| 119 | goto skip; | ||
| 120 | |||
| 117 | nbytes = e->bstats->bytes; | 121 | nbytes = e->bstats->bytes; |
| 118 | npackets = e->bstats->packets; | 122 | npackets = e->bstats->packets; |
| 119 | rate = (nbytes - e->last_bytes)<<(7 - idx); | 123 | rate = (nbytes - e->last_bytes)<<(7 - idx); |
| @@ -125,12 +129,14 @@ static void est_timer(unsigned long arg) | |||
| 125 | e->last_packets = npackets; | 129 | e->last_packets = npackets; |
| 126 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; | 130 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; |
| 127 | e->rate_est->pps = (e->avpps+0x1FF)>>10; | 131 | e->rate_est->pps = (e->avpps+0x1FF)>>10; |
| 132 | skip: | ||
| 133 | read_unlock(&est_lock); | ||
| 128 | spin_unlock(e->stats_lock); | 134 | spin_unlock(e->stats_lock); |
| 129 | } | 135 | } |
| 130 | 136 | ||
| 131 | if (elist[idx].list != NULL) | 137 | if (!list_empty(&elist[idx].list)) |
| 132 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); | 138 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); |
| 133 | read_unlock(&est_lock); | 139 | rcu_read_unlock(); |
| 134 | } | 140 | } |
| 135 | 141 | ||
| 136 | /** | 142 | /** |
| @@ -147,12 +153,17 @@ static void est_timer(unsigned long arg) | |||
| 147 | * &rate_est with the statistics lock grabed during this period. | 153 | * &rate_est with the statistics lock grabed during this period. |
| 148 | * | 154 | * |
| 149 | * Returns 0 on success or a negative error code. | 155 | * Returns 0 on success or a negative error code. |
| 156 | * | ||
| 157 | * NOTE: Called under rtnl_mutex | ||
| 150 | */ | 158 | */ |
| 151 | int gen_new_estimator(struct gnet_stats_basic *bstats, | 159 | int gen_new_estimator(struct gnet_stats_basic *bstats, |
| 152 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) | 160 | struct gnet_stats_rate_est *rate_est, |
| 161 | spinlock_t *stats_lock, | ||
| 162 | struct rtattr *opt) | ||
| 153 | { | 163 | { |
| 154 | struct gen_estimator *est; | 164 | struct gen_estimator *est; |
| 155 | struct gnet_estimator *parm = RTA_DATA(opt); | 165 | struct gnet_estimator *parm = RTA_DATA(opt); |
| 166 | int idx; | ||
| 156 | 167 | ||
| 157 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) | 168 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) |
| 158 | return -EINVAL; | 169 | return -EINVAL; |
| @@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
| 164 | if (est == NULL) | 175 | if (est == NULL) |
| 165 | return -ENOBUFS; | 176 | return -ENOBUFS; |
| 166 | 177 | ||
| 167 | est->interval = parm->interval + 2; | 178 | idx = parm->interval + 2; |
| 168 | est->bstats = bstats; | 179 | est->bstats = bstats; |
| 169 | est->rate_est = rate_est; | 180 | est->rate_est = rate_est; |
| 170 | est->stats_lock = stats_lock; | 181 | est->stats_lock = stats_lock; |
| @@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
| 174 | est->last_packets = bstats->packets; | 185 | est->last_packets = bstats->packets; |
| 175 | est->avpps = rate_est->pps<<10; | 186 | est->avpps = rate_est->pps<<10; |
| 176 | 187 | ||
| 177 | est->next = elist[est->interval].list; | 188 | if (!elist[idx].timer.function) { |
| 178 | if (est->next == NULL) { | 189 | INIT_LIST_HEAD(&elist[idx].list); |
| 179 | init_timer(&elist[est->interval].timer); | 190 | setup_timer(&elist[idx].timer, est_timer, idx); |
| 180 | elist[est->interval].timer.data = est->interval; | ||
| 181 | elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); | ||
| 182 | elist[est->interval].timer.function = est_timer; | ||
| 183 | add_timer(&elist[est->interval].timer); | ||
| 184 | } | 191 | } |
| 185 | write_lock_bh(&est_lock); | 192 | |
| 186 | elist[est->interval].list = est; | 193 | if (list_empty(&elist[idx].list)) |
| 187 | write_unlock_bh(&est_lock); | 194 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); |
| 195 | |||
| 196 | list_add_rcu(&est->list, &elist[idx].list); | ||
| 188 | return 0; | 197 | return 0; |
| 189 | } | 198 | } |
| 190 | 199 | ||
| 200 | static void __gen_kill_estimator(struct rcu_head *head) | ||
| 201 | { | ||
| 202 | struct gen_estimator *e = container_of(head, | ||
| 203 | struct gen_estimator, e_rcu); | ||
| 204 | kfree(e); | ||
| 205 | } | ||
| 206 | |||
| 191 | /** | 207 | /** |
| 192 | * gen_kill_estimator - remove a rate estimator | 208 | * gen_kill_estimator - remove a rate estimator |
| 193 | * @bstats: basic statistics | 209 | * @bstats: basic statistics |
| @@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, | |||
| 195 | * | 211 | * |
| 196 | * Removes the rate estimator specified by &bstats and &rate_est | 212 | * Removes the rate estimator specified by &bstats and &rate_est |
| 197 | * and deletes the timer. | 213 | * and deletes the timer. |
| 214 | * | ||
| 215 | * NOTE: Called under rtnl_mutex | ||
| 198 | */ | 216 | */ |
| 199 | void gen_kill_estimator(struct gnet_stats_basic *bstats, | 217 | void gen_kill_estimator(struct gnet_stats_basic *bstats, |
| 200 | struct gnet_stats_rate_est *rate_est) | 218 | struct gnet_stats_rate_est *rate_est) |
| 201 | { | 219 | { |
| 202 | int idx; | 220 | int idx; |
| 203 | struct gen_estimator *est, **pest; | 221 | struct gen_estimator *e, *n; |
| 204 | 222 | ||
| 205 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { | 223 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { |
| 206 | int killed = 0; | 224 | |
| 207 | pest = &elist[idx].list; | 225 | /* Skip non initialized indexes */ |
| 208 | while ((est=*pest) != NULL) { | 226 | if (!elist[idx].timer.function) |
| 209 | if (est->rate_est != rate_est || est->bstats != bstats) { | 227 | continue; |
| 210 | pest = &est->next; | 228 | |
| 229 | list_for_each_entry_safe(e, n, &elist[idx].list, list) { | ||
| 230 | if (e->rate_est != rate_est || e->bstats != bstats) | ||
| 211 | continue; | 231 | continue; |
| 212 | } | ||
| 213 | 232 | ||
| 214 | write_lock_bh(&est_lock); | 233 | write_lock_bh(&est_lock); |
| 215 | *pest = est->next; | 234 | e->bstats = NULL; |
| 216 | write_unlock_bh(&est_lock); | 235 | write_unlock_bh(&est_lock); |
| 217 | 236 | ||
| 218 | kfree(est); | 237 | list_del_rcu(&e->list); |
| 219 | killed++; | 238 | call_rcu(&e->e_rcu, __gen_kill_estimator); |
| 220 | } | 239 | } |
| 221 | if (killed && elist[idx].list == NULL) | ||
| 222 | del_timer(&elist[idx].timer); | ||
| 223 | } | 240 | } |
| 224 | } | 241 | } |
| 225 | 242 | ||
