diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-07-08 00:14:35 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 01:44:29 -0400 |
commit | acf92b485cccf028177f46918e045c0c4e80ee10 (patch) | |
tree | 7da97439b5388d143cf414874572f1b2da7f3fe3 /mm/vmscan.c | |
parent | 095760730c1047c69159ce88021a7fa3833502c8 (diff) |
vmscan: shrinker->nr updates race and go wrong
shrink_slab() allows shrinkers to be called in parallel so the
struct shrinker can be updated concurrently. It does not provide any
exclusio for such updates, so we can get the shrinker->nr value
increasing or decreasing incorrectly.
As a result, when a shrinker repeatedly returns a value of -1 (e.g.
a VFS shrinker called w/ GFP_NOFS), the shrinker->nr goes haywire,
sometimes updating with the scan count that wasn't used, sometimes
losing it altogether. Worse is when a shrinker does work and that
update is lost due to racy updates, which means the shrinker will do
the work again!
Fix this by making the total_scan calculations independent of
shrinker->nr, and making the shrinker->nr updates atomic w.r.t. to
other updates via cmpxchg loops.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 45 |
1 files changed, 32 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 255226554a3d..2f7c6ae8fae0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -251,17 +251,29 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
251 | unsigned long total_scan; | 251 | unsigned long total_scan; |
252 | unsigned long max_pass; | 252 | unsigned long max_pass; |
253 | int shrink_ret = 0; | 253 | int shrink_ret = 0; |
254 | long nr; | ||
255 | long new_nr; | ||
254 | 256 | ||
257 | /* | ||
258 | * copy the current shrinker scan count into a local variable | ||
259 | * and zero it so that other concurrent shrinker invocations | ||
260 | * don't also do this scanning work. | ||
261 | */ | ||
262 | do { | ||
263 | nr = shrinker->nr; | ||
264 | } while (cmpxchg(&shrinker->nr, nr, 0) != nr); | ||
265 | |||
266 | total_scan = nr; | ||
255 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | 267 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); |
256 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | 268 | delta = (4 * nr_pages_scanned) / shrinker->seeks; |
257 | delta *= max_pass; | 269 | delta *= max_pass; |
258 | do_div(delta, lru_pages + 1); | 270 | do_div(delta, lru_pages + 1); |
259 | shrinker->nr += delta; | 271 | total_scan += delta; |
260 | if (shrinker->nr < 0) { | 272 | if (total_scan < 0) { |
261 | printk(KERN_ERR "shrink_slab: %pF negative objects to " | 273 | printk(KERN_ERR "shrink_slab: %pF negative objects to " |
262 | "delete nr=%ld\n", | 274 | "delete nr=%ld\n", |
263 | shrinker->shrink, shrinker->nr); | 275 | shrinker->shrink, total_scan); |
264 | shrinker->nr = max_pass; | 276 | total_scan = max_pass; |
265 | } | 277 | } |
266 | 278 | ||
267 | /* | 279 | /* |
@@ -269,13 +281,10 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
269 | * never try to free more than twice the estimate number of | 281 | * never try to free more than twice the estimate number of |
270 | * freeable entries. | 282 | * freeable entries. |
271 | */ | 283 | */ |
272 | if (shrinker->nr > max_pass * 2) | 284 | if (total_scan > max_pass * 2) |
273 | shrinker->nr = max_pass * 2; | 285 | total_scan = max_pass * 2; |
274 | |||
275 | total_scan = shrinker->nr; | ||
276 | shrinker->nr = 0; | ||
277 | 286 | ||
278 | trace_mm_shrink_slab_start(shrinker, shrink, total_scan, | 287 | trace_mm_shrink_slab_start(shrinker, shrink, nr, |
279 | nr_pages_scanned, lru_pages, | 288 | nr_pages_scanned, lru_pages, |
280 | max_pass, delta, total_scan); | 289 | max_pass, delta, total_scan); |
281 | 290 | ||
@@ -296,9 +305,19 @@ unsigned long shrink_slab(struct shrink_control *shrink, | |||
296 | cond_resched(); | 305 | cond_resched(); |
297 | } | 306 | } |
298 | 307 | ||
299 | shrinker->nr += total_scan; | 308 | /* |
300 | trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan, | 309 | * move the unused scan count back into the shrinker in a |
301 | shrinker->nr); | 310 | * manner that handles concurrent updates. If we exhausted the |
311 | * scan, there is no need to do an update. | ||
312 | */ | ||
313 | do { | ||
314 | nr = shrinker->nr; | ||
315 | new_nr = total_scan + nr; | ||
316 | if (total_scan <= 0) | ||
317 | break; | ||
318 | } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); | ||
319 | |||
320 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | ||
302 | } | 321 | } |
303 | up_read(&shrinker_rwsem); | 322 | up_read(&shrinker_rwsem); |
304 | out: | 323 | out: |