aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-07-08 00:14:35 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 01:44:29 -0400
commitacf92b485cccf028177f46918e045c0c4e80ee10 (patch)
tree7da97439b5388d143cf414874572f1b2da7f3fe3 /mm/vmscan.c
parent095760730c1047c69159ce88021a7fa3833502c8 (diff)
vmscan: shrinker->nr updates race and go wrong
shrink_slab() allows shrinkers to be called in parallel so the struct shrinker can be updated concurrently. It does not provide any exclusio for such updates, so we can get the shrinker->nr value increasing or decreasing incorrectly. As a result, when a shrinker repeatedly returns a value of -1 (e.g. a VFS shrinker called w/ GFP_NOFS), the shrinker->nr goes haywire, sometimes updating with the scan count that wasn't used, sometimes losing it altogether. Worse is when a shrinker does work and that update is lost due to racy updates, which means the shrinker will do the work again! Fix this by making the total_scan calculations independent of shrinker->nr, and making the shrinker->nr updates atomic w.r.t. to other updates via cmpxchg loops. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c45
1 files changed, 32 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 255226554a3d..2f7c6ae8fae0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -251,17 +251,29 @@ unsigned long shrink_slab(struct shrink_control *shrink,
251 unsigned long total_scan; 251 unsigned long total_scan;
252 unsigned long max_pass; 252 unsigned long max_pass;
253 int shrink_ret = 0; 253 int shrink_ret = 0;
254 long nr;
255 long new_nr;
254 256
257 /*
258 * copy the current shrinker scan count into a local variable
259 * and zero it so that other concurrent shrinker invocations
260 * don't also do this scanning work.
261 */
262 do {
263 nr = shrinker->nr;
264 } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
265
266 total_scan = nr;
255 max_pass = do_shrinker_shrink(shrinker, shrink, 0); 267 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
256 delta = (4 * nr_pages_scanned) / shrinker->seeks; 268 delta = (4 * nr_pages_scanned) / shrinker->seeks;
257 delta *= max_pass; 269 delta *= max_pass;
258 do_div(delta, lru_pages + 1); 270 do_div(delta, lru_pages + 1);
259 shrinker->nr += delta; 271 total_scan += delta;
260 if (shrinker->nr < 0) { 272 if (total_scan < 0) {
261 printk(KERN_ERR "shrink_slab: %pF negative objects to " 273 printk(KERN_ERR "shrink_slab: %pF negative objects to "
262 "delete nr=%ld\n", 274 "delete nr=%ld\n",
263 shrinker->shrink, shrinker->nr); 275 shrinker->shrink, total_scan);
264 shrinker->nr = max_pass; 276 total_scan = max_pass;
265 } 277 }
266 278
267 /* 279 /*
@@ -269,13 +281,10 @@ unsigned long shrink_slab(struct shrink_control *shrink,
269 * never try to free more than twice the estimate number of 281 * never try to free more than twice the estimate number of
270 * freeable entries. 282 * freeable entries.
271 */ 283 */
272 if (shrinker->nr > max_pass * 2) 284 if (total_scan > max_pass * 2)
273 shrinker->nr = max_pass * 2; 285 total_scan = max_pass * 2;
274
275 total_scan = shrinker->nr;
276 shrinker->nr = 0;
277 286
278 trace_mm_shrink_slab_start(shrinker, shrink, total_scan, 287 trace_mm_shrink_slab_start(shrinker, shrink, nr,
279 nr_pages_scanned, lru_pages, 288 nr_pages_scanned, lru_pages,
280 max_pass, delta, total_scan); 289 max_pass, delta, total_scan);
281 290
@@ -296,9 +305,19 @@ unsigned long shrink_slab(struct shrink_control *shrink,
296 cond_resched(); 305 cond_resched();
297 } 306 }
298 307
299 shrinker->nr += total_scan; 308 /*
300 trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan, 309 * move the unused scan count back into the shrinker in a
301 shrinker->nr); 310 * manner that handles concurrent updates. If we exhausted the
311 * scan, there is no need to do an update.
312 */
313 do {
314 nr = shrinker->nr;
315 new_nr = total_scan + nr;
316 if (total_scan <= 0)
317 break;
318 } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
319
320 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
302 } 321 }
303 up_read(&shrinker_rwsem); 322 up_read(&shrinker_rwsem);
304out: 323out: