aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2018-01-31 19:16:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 20:18:36 -0500
commit9092c71bb724dba2ecba849eae69e5c9d39bd3d2 (patch)
tree7651f4c5ef17afb674b0982272a0eaf513765f1e /mm/vmscan.c
parentfcb2b0c577f145c7616843c9d4dcb4f9e5d88e29 (diff)
mm: use sc->priority for slab shrink targets
Previously we were using the ratio of the number of lru pages scanned to the number of eligible lru pages to determine the number of slab objects to scan. The problem with this is that these two things have nothing to do with each other, so in slab heavy work loads where there is little to no page cache we can end up with the pages scanned being a very low number. This means that we reclaim next to no slab pages and waste a lot of time reclaiming small amounts of space. Consider the following scenario, where we have the following values and the rest of the memory usage is in slab Active: 58840 kB Inactive: 46860 kB Every time we do a get_scan_count() we do this scan = size >> sc->priority where sc->priority starts at DEF_PRIORITY, which is 12. The first loop through reclaim would result in a scan target of 2 pages to 11715 total inactive pages, and 3 pages to 14710 total active pages. This is a really really small target for a system that is entirely slab pages. And this is super optimistic, this assumes we even get to scan these pages. We don't increment sc->nr_scanned unless we 1) isolate the page, which assumes it's not in use, and 2) can lock the page. Under pressure these numbers could probably go down, I'm sure there's some random pages from daemons that aren't actually in use, so the targets get even smaller. Instead use sc->priority in the same way we use it to determine scan amounts for the lru's. This generally equates to pages. Consider the following slab_pages = (nr_objects * object_size) / PAGE_SIZE What we would like to do is scan = slab_pages >> sc->priority but we don't know the number of slab pages each shrinker controls, only the objects. However say that theoretically we knew how many pages a shrinker controlled, we'd still have to convert this to objects, which would look like the following scan = shrinker_pages >> sc->priority scan_objects = (PAGE_SIZE / object_size) * scan or written another way scan_objects = (shrinker_pages >> sc->priority) * (PAGE_SIZE / object_size) which can thus be written scan_objects = ((shrinker_pages * PAGE_SIZE) / object_size) >> sc->priority which is just scan_objects = nr_objects >> sc->priority We don't need to know exactly how many pages each shrinker represents, it's objects are all the information we need. Making this change allows us to place an appropriate amount of pressure on the shrinker pools for their relative size. Link: http://lkml.kernel.org/r/1510780549-6812-1-git-send-email-josef@toxicpanda.com Signed-off-by: Josef Bacik <jbacik@fb.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Dave Chinner <david@fromorbit.com> Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c47
1 files changed, 13 insertions, 34 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 47d5ced51f2d..e73274a60b22 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -310,9 +310,7 @@ EXPORT_SYMBOL(unregister_shrinker);
310#define SHRINK_BATCH 128 310#define SHRINK_BATCH 128
311 311
312static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, 312static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
313 struct shrinker *shrinker, 313 struct shrinker *shrinker, int priority)
314 unsigned long nr_scanned,
315 unsigned long nr_eligible)
316{ 314{
317 unsigned long freed = 0; 315 unsigned long freed = 0;
318 unsigned long long delta; 316 unsigned long long delta;
@@ -337,9 +335,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
337 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); 335 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
338 336
339 total_scan = nr; 337 total_scan = nr;
340 delta = (4 * nr_scanned) / shrinker->seeks; 338 delta = freeable >> priority;
341 delta *= freeable; 339 delta *= 4;
342 do_div(delta, nr_eligible + 1); 340 do_div(delta, shrinker->seeks);
343 total_scan += delta; 341 total_scan += delta;
344 if (total_scan < 0) { 342 if (total_scan < 0) {
345 pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", 343 pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
@@ -373,8 +371,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
373 total_scan = freeable * 2; 371 total_scan = freeable * 2;
374 372
375 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, 373 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
376 nr_scanned, nr_eligible, 374 freeable, delta, total_scan, priority);
377 freeable, delta, total_scan);
378 375
379 /* 376 /*
380 * Normally, we should not scan less than batch_size objects in one 377 * Normally, we should not scan less than batch_size objects in one
@@ -434,8 +431,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
434 * @gfp_mask: allocation context 431 * @gfp_mask: allocation context
435 * @nid: node whose slab caches to target 432 * @nid: node whose slab caches to target
436 * @memcg: memory cgroup whose slab caches to target 433 * @memcg: memory cgroup whose slab caches to target
437 * @nr_scanned: pressure numerator 434 * @priority: the reclaim priority
438 * @nr_eligible: pressure denominator
439 * 435 *
440 * Call the shrink functions to age shrinkable caches. 436 * Call the shrink functions to age shrinkable caches.
441 * 437 *
@@ -447,20 +443,14 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
447 * objects from the memory cgroup specified. Otherwise, only unaware 443 * objects from the memory cgroup specified. Otherwise, only unaware
448 * shrinkers are called. 444 * shrinkers are called.
449 * 445 *
450 * @nr_scanned and @nr_eligible form a ratio that indicate how much of 446 * @priority is sc->priority, we take the number of objects and >> by priority
451 * the available objects should be scanned. Page reclaim for example 447 * in order to get the scan target.
452 * passes the number of pages scanned and the number of pages on the
453 * LRU lists that it considered on @nid, plus a bias in @nr_scanned
454 * when it encountered mapped pages. The ratio is further biased by
455 * the ->seeks setting of the shrink function, which indicates the
456 * cost to recreate an object relative to that of an LRU page.
457 * 448 *
458 * Returns the number of reclaimed slab objects. 449 * Returns the number of reclaimed slab objects.
459 */ 450 */
460static unsigned long shrink_slab(gfp_t gfp_mask, int nid, 451static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
461 struct mem_cgroup *memcg, 452 struct mem_cgroup *memcg,
462 unsigned long nr_scanned, 453 int priority)
463 unsigned long nr_eligible)
464{ 454{
465 struct shrinker *shrinker; 455 struct shrinker *shrinker;
466 unsigned long freed = 0; 456 unsigned long freed = 0;
@@ -468,9 +458,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
468 if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))) 458 if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
469 return 0; 459 return 0;
470 460
471 if (nr_scanned == 0)
472 nr_scanned = SWAP_CLUSTER_MAX;
473
474 if (!down_read_trylock(&shrinker_rwsem)) { 461 if (!down_read_trylock(&shrinker_rwsem)) {
475 /* 462 /*
476 * If we would return 0, our callers would understand that we 463 * If we would return 0, our callers would understand that we
@@ -501,7 +488,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
501 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 488 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
502 sc.nid = 0; 489 sc.nid = 0;
503 490
504 freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible); 491 freed += do_shrink_slab(&sc, shrinker, priority);
505 } 492 }
506 493
507 up_read(&shrinker_rwsem); 494 up_read(&shrinker_rwsem);
@@ -519,8 +506,7 @@ void drop_slab_node(int nid)
519 506
520 freed = 0; 507 freed = 0;
521 do { 508 do {
522 freed += shrink_slab(GFP_KERNEL, nid, memcg, 509 freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
523 1000, 1000);
524 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); 510 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
525 } while (freed > 10); 511 } while (freed > 10);
526} 512}
@@ -2615,14 +2601,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
2615 2601
2616 reclaimed = sc->nr_reclaimed; 2602 reclaimed = sc->nr_reclaimed;
2617 scanned = sc->nr_scanned; 2603 scanned = sc->nr_scanned;
2618
2619 shrink_node_memcg(pgdat, memcg, sc, &lru_pages); 2604 shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
2620 node_lru_pages += lru_pages; 2605 node_lru_pages += lru_pages;
2621 2606
2622 if (memcg) 2607 if (memcg)
2623 shrink_slab(sc->gfp_mask, pgdat->node_id, 2608 shrink_slab(sc->gfp_mask, pgdat->node_id,
2624 memcg, sc->nr_scanned - scanned, 2609 memcg, sc->priority);
2625 lru_pages);
2626 2610
2627 /* Record the group's reclaim efficiency */ 2611 /* Record the group's reclaim efficiency */
2628 vmpressure(sc->gfp_mask, memcg, false, 2612 vmpressure(sc->gfp_mask, memcg, false,
@@ -2646,14 +2630,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
2646 } 2630 }
2647 } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); 2631 } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
2648 2632
2649 /*
2650 * Shrink the slab caches in the same proportion that
2651 * the eligible LRU pages were scanned.
2652 */
2653 if (global_reclaim(sc)) 2633 if (global_reclaim(sc))
2654 shrink_slab(sc->gfp_mask, pgdat->node_id, NULL, 2634 shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
2655 sc->nr_scanned - nr_scanned, 2635 sc->priority);
2656 node_lru_pages);
2657 2636
2658 if (reclaim_state) { 2637 if (reclaim_state) {
2659 sc->nr_reclaimed += reclaim_state->reclaimed_slab; 2638 sc->nr_reclaimed += reclaim_state->reclaimed_slab;