aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-12-12 19:56:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:48 -0500
commit6b4f7799c6a5703ac6b8c0649f4c22f00fa07513 (patch)
tree8d8c5f668a5a86bfff6af18e937bec4ea13027b8 /mm/vmscan.c
parentf5f302e21257ebb0c074bbafc37606c26d28cc3d (diff)
mm: vmscan: invoke slab shrinkers from shrink_zone()
The slab shrinkers are currently invoked from the zonelist walkers in kswapd, direct reclaim, and zone reclaim, all of which roughly gauge the eligible LRU pages and assemble a nodemask to pass to NUMA-aware shrinkers, which then again have to walk over the nodemask. This is redundant code, extra runtime work, and fairly inaccurate when it comes to the estimation of actually scannable LRU pages. The code duplication will only get worse when making the shrinkers cgroup-aware and requiring them to have out-of-band cgroup hierarchy walks as well. Instead, invoke the shrinkers from shrink_zone(), which is where all reclaimers end up, to avoid this duplication. Take the count for eligible LRU pages out of get_scan_count(), which considers many more factors than just the availability of swap space, like zone_reclaimable_pages() currently does. Accumulate the number over all visited lruvecs to get the per-zone value. Some nodes have multiple zones due to memory addressing restrictions. To avoid putting too much pressure on the shrinkers, only invoke them once for each such node, using the class zone of the allocation as the pivot zone. For now, this integrates the slab shrinking better into the reclaim logic and gets rid of duplicative invocations from kswapd, direct reclaim, and zone reclaim. It also prepares for cgroup-awareness, allowing memcg-capable shrinkers to be added at the lruvec level without much duplication of both code and runtime work. This changes kswapd behavior, which used to invoke the shrinkers for each zone, but with scan ratios gathered from the entire node, resulting in meaningless pressure quantities on multi-zone nodes. Zone reclaim behavior also changes. It used to shrink slabs until the same amount of pages were shrunk as were reclaimed from the LRUs. Now it merely invokes the shrinkers once with the zone's scan ratio, which makes the shrinkers go easier on caches that implement aging and would prefer feeding back pressure from recently used slab objects to unused LRU pages. [vdavydov@parallels.com: assure class zone is populated] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Dave Chinner <david@fromorbit.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c216
1 files changed, 90 insertions, 126 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a384339bf718..bd9a72bc4a1b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -229,9 +229,10 @@ EXPORT_SYMBOL(unregister_shrinker);
229 229
230#define SHRINK_BATCH 128 230#define SHRINK_BATCH 128
231 231
232static unsigned long 232static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
233shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, 233 struct shrinker *shrinker,
234 unsigned long nr_pages_scanned, unsigned long lru_pages) 234 unsigned long nr_scanned,
235 unsigned long nr_eligible)
235{ 236{
236 unsigned long freed = 0; 237 unsigned long freed = 0;
237 unsigned long long delta; 238 unsigned long long delta;
@@ -255,9 +256,9 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
255 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); 256 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
256 257
257 total_scan = nr; 258 total_scan = nr;
258 delta = (4 * nr_pages_scanned) / shrinker->seeks; 259 delta = (4 * nr_scanned) / shrinker->seeks;
259 delta *= freeable; 260 delta *= freeable;
260 do_div(delta, lru_pages + 1); 261 do_div(delta, nr_eligible + 1);
261 total_scan += delta; 262 total_scan += delta;
262 if (total_scan < 0) { 263 if (total_scan < 0) {
263 pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", 264 pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
@@ -289,8 +290,8 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
289 total_scan = freeable * 2; 290 total_scan = freeable * 2;
290 291
291 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, 292 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
292 nr_pages_scanned, lru_pages, 293 nr_scanned, nr_eligible,
293 freeable, delta, total_scan); 294 freeable, delta, total_scan);
294 295
295 /* 296 /*
296 * Normally, we should not scan less than batch_size objects in one 297 * Normally, we should not scan less than batch_size objects in one
@@ -339,34 +340,37 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
339 return freed; 340 return freed;
340} 341}
341 342
342/* 343/**
343 * Call the shrink functions to age shrinkable caches 344 * shrink_node_slabs - shrink slab caches of a given node
344 * 345 * @gfp_mask: allocation context
345 * Here we assume it costs one seek to replace a lru page and that it also 346 * @nid: node whose slab caches to target
346 * takes a seek to recreate a cache object. With this in mind we age equal 347 * @nr_scanned: pressure numerator
347 * percentages of the lru and ageable caches. This should balance the seeks 348 * @nr_eligible: pressure denominator
348 * generated by these structures.
349 * 349 *
350 * If the vm encountered mapped pages on the LRU it increase the pressure on 350 * Call the shrink functions to age shrinkable caches.
351 * slab to avoid swapping.
352 * 351 *
353 * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits. 352 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
353 * unaware shrinkers will receive a node id of 0 instead.
354 * 354 *
355 * `lru_pages' represents the number of on-LRU pages in all the zones which 355 * @nr_scanned and @nr_eligible form a ratio that indicate how much of
356 * are eligible for the caller's allocation attempt. It is used for balancing 356 * the available objects should be scanned. Page reclaim for example
357 * slab reclaim versus page reclaim. 357 * passes the number of pages scanned and the number of pages on the
358 * LRU lists that it considered on @nid, plus a bias in @nr_scanned
359 * when it encountered mapped pages. The ratio is further biased by
360 * the ->seeks setting of the shrink function, which indicates the
361 * cost to recreate an object relative to that of an LRU page.
358 * 362 *
359 * Returns the number of slab objects which we shrunk. 363 * Returns the number of reclaimed slab objects.
360 */ 364 */
361unsigned long shrink_slab(struct shrink_control *shrinkctl, 365unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
362 unsigned long nr_pages_scanned, 366 unsigned long nr_scanned,
363 unsigned long lru_pages) 367 unsigned long nr_eligible)
364{ 368{
365 struct shrinker *shrinker; 369 struct shrinker *shrinker;
366 unsigned long freed = 0; 370 unsigned long freed = 0;
367 371
368 if (nr_pages_scanned == 0) 372 if (nr_scanned == 0)
369 nr_pages_scanned = SWAP_CLUSTER_MAX; 373 nr_scanned = SWAP_CLUSTER_MAX;
370 374
371 if (!down_read_trylock(&shrinker_rwsem)) { 375 if (!down_read_trylock(&shrinker_rwsem)) {
372 /* 376 /*
@@ -380,20 +384,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
380 } 384 }
381 385
382 list_for_each_entry(shrinker, &shrinker_list, list) { 386 list_for_each_entry(shrinker, &shrinker_list, list) {
383 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) { 387 struct shrink_control sc = {
384 shrinkctl->nid = 0; 388 .gfp_mask = gfp_mask,
385 freed += shrink_slab_node(shrinkctl, shrinker, 389 .nid = nid,
386 nr_pages_scanned, lru_pages); 390 };
387 continue;
388 }
389 391
390 for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { 392 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
391 if (node_online(shrinkctl->nid)) 393 sc.nid = 0;
392 freed += shrink_slab_node(shrinkctl, shrinker,
393 nr_pages_scanned, lru_pages);
394 394
395 } 395 freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
396 } 396 }
397
397 up_read(&shrinker_rwsem); 398 up_read(&shrinker_rwsem);
398out: 399out:
399 cond_resched(); 400 cond_resched();
@@ -1876,7 +1877,8 @@ enum scan_balance {
1876 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan 1877 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
1877 */ 1878 */
1878static void get_scan_count(struct lruvec *lruvec, int swappiness, 1879static void get_scan_count(struct lruvec *lruvec, int swappiness,
1879 struct scan_control *sc, unsigned long *nr) 1880 struct scan_control *sc, unsigned long *nr,
1881 unsigned long *lru_pages)
1880{ 1882{
1881 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1883 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1882 u64 fraction[2]; 1884 u64 fraction[2];
@@ -2022,6 +2024,7 @@ out:
2022 some_scanned = false; 2024 some_scanned = false;
2023 /* Only use force_scan on second pass. */ 2025 /* Only use force_scan on second pass. */
2024 for (pass = 0; !some_scanned && pass < 2; pass++) { 2026 for (pass = 0; !some_scanned && pass < 2; pass++) {
2027 *lru_pages = 0;
2025 for_each_evictable_lru(lru) { 2028 for_each_evictable_lru(lru) {
2026 int file = is_file_lru(lru); 2029 int file = is_file_lru(lru);
2027 unsigned long size; 2030 unsigned long size;
@@ -2048,14 +2051,19 @@ out:
2048 case SCAN_FILE: 2051 case SCAN_FILE:
2049 case SCAN_ANON: 2052 case SCAN_ANON:
2050 /* Scan one type exclusively */ 2053 /* Scan one type exclusively */
2051 if ((scan_balance == SCAN_FILE) != file) 2054 if ((scan_balance == SCAN_FILE) != file) {
2055 size = 0;
2052 scan = 0; 2056 scan = 0;
2057 }
2053 break; 2058 break;
2054 default: 2059 default:
2055 /* Look ma, no brain */ 2060 /* Look ma, no brain */
2056 BUG(); 2061 BUG();
2057 } 2062 }
2063
2064 *lru_pages += size;
2058 nr[lru] = scan; 2065 nr[lru] = scan;
2066
2059 /* 2067 /*
2060 * Skip the second pass and don't force_scan, 2068 * Skip the second pass and don't force_scan,
2061 * if we found something to scan. 2069 * if we found something to scan.
@@ -2069,7 +2077,7 @@ out:
2069 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 2077 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
2070 */ 2078 */
2071static void shrink_lruvec(struct lruvec *lruvec, int swappiness, 2079static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
2072 struct scan_control *sc) 2080 struct scan_control *sc, unsigned long *lru_pages)
2073{ 2081{
2074 unsigned long nr[NR_LRU_LISTS]; 2082 unsigned long nr[NR_LRU_LISTS];
2075 unsigned long targets[NR_LRU_LISTS]; 2083 unsigned long targets[NR_LRU_LISTS];
@@ -2080,7 +2088,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
2080 struct blk_plug plug; 2088 struct blk_plug plug;
2081 bool scan_adjusted; 2089 bool scan_adjusted;
2082 2090
2083 get_scan_count(lruvec, swappiness, sc, nr); 2091 get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
2084 2092
2085 /* Record the original scan target for proportional adjustments later */ 2093 /* Record the original scan target for proportional adjustments later */
2086 memcpy(targets, nr, sizeof(nr)); 2094 memcpy(targets, nr, sizeof(nr));
@@ -2258,7 +2266,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
2258 } 2266 }
2259} 2267}
2260 2268
2261static bool shrink_zone(struct zone *zone, struct scan_control *sc) 2269static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2270 bool is_classzone)
2262{ 2271{
2263 unsigned long nr_reclaimed, nr_scanned; 2272 unsigned long nr_reclaimed, nr_scanned;
2264 bool reclaimable = false; 2273 bool reclaimable = false;
@@ -2269,6 +2278,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
2269 .zone = zone, 2278 .zone = zone,
2270 .priority = sc->priority, 2279 .priority = sc->priority,
2271 }; 2280 };
2281 unsigned long zone_lru_pages = 0;
2272 struct mem_cgroup *memcg; 2282 struct mem_cgroup *memcg;
2273 2283
2274 nr_reclaimed = sc->nr_reclaimed; 2284 nr_reclaimed = sc->nr_reclaimed;
@@ -2276,13 +2286,15 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
2276 2286
2277 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2287 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2278 do { 2288 do {
2289 unsigned long lru_pages;
2279 struct lruvec *lruvec; 2290 struct lruvec *lruvec;
2280 int swappiness; 2291 int swappiness;
2281 2292
2282 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2293 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2283 swappiness = mem_cgroup_swappiness(memcg); 2294 swappiness = mem_cgroup_swappiness(memcg);
2284 2295
2285 shrink_lruvec(lruvec, swappiness, sc); 2296 shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
2297 zone_lru_pages += lru_pages;
2286 2298
2287 /* 2299 /*
2288 * Direct reclaim and kswapd have to scan all memory 2300 * Direct reclaim and kswapd have to scan all memory
@@ -2302,6 +2314,25 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
2302 memcg = mem_cgroup_iter(root, memcg, &reclaim); 2314 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2303 } while (memcg); 2315 } while (memcg);
2304 2316
2317 /*
2318 * Shrink the slab caches in the same proportion that
2319 * the eligible LRU pages were scanned.
2320 */
2321 if (global_reclaim(sc) && is_classzone) {
2322 struct reclaim_state *reclaim_state;
2323
2324 shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
2325 sc->nr_scanned - nr_scanned,
2326 zone_lru_pages);
2327
2328 reclaim_state = current->reclaim_state;
2329 if (reclaim_state) {
2330 sc->nr_reclaimed +=
2331 reclaim_state->reclaimed_slab;
2332 reclaim_state->reclaimed_slab = 0;
2333 }
2334 }
2335
2305 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, 2336 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
2306 sc->nr_scanned - nr_scanned, 2337 sc->nr_scanned - nr_scanned,
2307 sc->nr_reclaimed - nr_reclaimed); 2338 sc->nr_reclaimed - nr_reclaimed);
@@ -2376,12 +2407,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2376 struct zone *zone; 2407 struct zone *zone;
2377 unsigned long nr_soft_reclaimed; 2408 unsigned long nr_soft_reclaimed;
2378 unsigned long nr_soft_scanned; 2409 unsigned long nr_soft_scanned;
2379 unsigned long lru_pages = 0;
2380 struct reclaim_state *reclaim_state = current->reclaim_state;
2381 gfp_t orig_mask; 2410 gfp_t orig_mask;
2382 struct shrink_control shrink = {
2383 .gfp_mask = sc->gfp_mask,
2384 };
2385 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); 2411 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
2386 bool reclaimable = false; 2412 bool reclaimable = false;
2387 2413
@@ -2394,12 +2420,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2394 if (buffer_heads_over_limit) 2420 if (buffer_heads_over_limit)
2395 sc->gfp_mask |= __GFP_HIGHMEM; 2421 sc->gfp_mask |= __GFP_HIGHMEM;
2396 2422
2397 nodes_clear(shrink.nodes_to_scan);
2398
2399 for_each_zone_zonelist_nodemask(zone, z, zonelist, 2423 for_each_zone_zonelist_nodemask(zone, z, zonelist,
2400 gfp_zone(sc->gfp_mask), sc->nodemask) { 2424 requested_highidx, sc->nodemask) {
2425 enum zone_type classzone_idx;
2426
2401 if (!populated_zone(zone)) 2427 if (!populated_zone(zone))
2402 continue; 2428 continue;
2429
2430 classzone_idx = requested_highidx;
2431 while (!populated_zone(zone->zone_pgdat->node_zones +
2432 classzone_idx))
2433 classzone_idx--;
2434
2403 /* 2435 /*
2404 * Take care memory controller reclaiming has small influence 2436 * Take care memory controller reclaiming has small influence
2405 * to global LRU. 2437 * to global LRU.
@@ -2409,9 +2441,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2409 GFP_KERNEL | __GFP_HARDWALL)) 2441 GFP_KERNEL | __GFP_HARDWALL))
2410 continue; 2442 continue;
2411 2443
2412 lru_pages += zone_reclaimable_pages(zone);
2413 node_set(zone_to_nid(zone), shrink.nodes_to_scan);
2414
2415 if (sc->priority != DEF_PRIORITY && 2444 if (sc->priority != DEF_PRIORITY &&
2416 !zone_reclaimable(zone)) 2445 !zone_reclaimable(zone))
2417 continue; /* Let kswapd poll it */ 2446 continue; /* Let kswapd poll it */
@@ -2450,7 +2479,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2450 /* need some check for avoid more shrink_zone() */ 2479 /* need some check for avoid more shrink_zone() */
2451 } 2480 }
2452 2481
2453 if (shrink_zone(zone, sc)) 2482 if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
2454 reclaimable = true; 2483 reclaimable = true;
2455 2484
2456 if (global_reclaim(sc) && 2485 if (global_reclaim(sc) &&
@@ -2459,20 +2488,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2459 } 2488 }
2460 2489
2461 /* 2490 /*
2462 * Don't shrink slabs when reclaiming memory from over limit cgroups
2463 * but do shrink slab at least once when aborting reclaim for
2464 * compaction to avoid unevenly scanning file/anon LRU pages over slab
2465 * pages.
2466 */
2467 if (global_reclaim(sc)) {
2468 shrink_slab(&shrink, sc->nr_scanned, lru_pages);
2469 if (reclaim_state) {
2470 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
2471 reclaim_state->reclaimed_slab = 0;
2472 }
2473 }
2474
2475 /*
2476 * Restore to original mask to avoid the impact on the caller if we 2491 * Restore to original mask to avoid the impact on the caller if we
2477 * promoted it to __GFP_HIGHMEM. 2492 * promoted it to __GFP_HIGHMEM.
2478 */ 2493 */
@@ -2736,6 +2751,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2736 }; 2751 };
2737 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2752 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2738 int swappiness = mem_cgroup_swappiness(memcg); 2753 int swappiness = mem_cgroup_swappiness(memcg);
2754 unsigned long lru_pages;
2739 2755
2740 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2756 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2741 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2757 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2751,7 +2767,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2751 * will pick up pages from other mem cgroup's as well. We hack 2767 * will pick up pages from other mem cgroup's as well. We hack
2752 * the priority and make it zero. 2768 * the priority and make it zero.
2753 */ 2769 */
2754 shrink_lruvec(lruvec, swappiness, &sc); 2770 shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
2755 2771
2756 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2772 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2757 2773
@@ -2932,15 +2948,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2932static bool kswapd_shrink_zone(struct zone *zone, 2948static bool kswapd_shrink_zone(struct zone *zone,
2933 int classzone_idx, 2949 int classzone_idx,
2934 struct scan_control *sc, 2950 struct scan_control *sc,
2935 unsigned long lru_pages,
2936 unsigned long *nr_attempted) 2951 unsigned long *nr_attempted)
2937{ 2952{
2938 int testorder = sc->order; 2953 int testorder = sc->order;
2939 unsigned long balance_gap; 2954 unsigned long balance_gap;
2940 struct reclaim_state *reclaim_state = current->reclaim_state;
2941 struct shrink_control shrink = {
2942 .gfp_mask = sc->gfp_mask,
2943 };
2944 bool lowmem_pressure; 2955 bool lowmem_pressure;
2945 2956
2946 /* Reclaim above the high watermark. */ 2957 /* Reclaim above the high watermark. */
@@ -2975,13 +2986,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
2975 balance_gap, classzone_idx)) 2986 balance_gap, classzone_idx))
2976 return true; 2987 return true;
2977 2988
2978 shrink_zone(zone, sc); 2989 shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
2979 nodes_clear(shrink.nodes_to_scan);
2980 node_set(zone_to_nid(zone), shrink.nodes_to_scan);
2981
2982 reclaim_state->reclaimed_slab = 0;
2983 shrink_slab(&shrink, sc->nr_scanned, lru_pages);
2984 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
2985 2990
2986 /* Account for the number of pages attempted to reclaim */ 2991 /* Account for the number of pages attempted to reclaim */
2987 *nr_attempted += sc->nr_to_reclaim; 2992 *nr_attempted += sc->nr_to_reclaim;
@@ -3042,7 +3047,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3042 count_vm_event(PAGEOUTRUN); 3047 count_vm_event(PAGEOUTRUN);
3043 3048
3044 do { 3049 do {
3045 unsigned long lru_pages = 0;
3046 unsigned long nr_attempted = 0; 3050 unsigned long nr_attempted = 0;
3047 bool raise_priority = true; 3051 bool raise_priority = true;
3048 bool pgdat_needs_compaction = (order > 0); 3052 bool pgdat_needs_compaction = (order > 0);
@@ -3102,8 +3106,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3102 if (!populated_zone(zone)) 3106 if (!populated_zone(zone))
3103 continue; 3107 continue;
3104 3108
3105 lru_pages += zone_reclaimable_pages(zone);
3106
3107 /* 3109 /*
3108 * If any zone is currently balanced then kswapd will 3110 * If any zone is currently balanced then kswapd will
3109 * not call compaction as it is expected that the 3111 * not call compaction as it is expected that the
@@ -3159,8 +3161,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3159 * that that high watermark would be met at 100% 3161 * that that high watermark would be met at 100%
3160 * efficiency. 3162 * efficiency.
3161 */ 3163 */
3162 if (kswapd_shrink_zone(zone, end_zone, &sc, 3164 if (kswapd_shrink_zone(zone, end_zone,
3163 lru_pages, &nr_attempted)) 3165 &sc, &nr_attempted))
3164 raise_priority = false; 3166 raise_priority = false;
3165 } 3167 }
3166 3168
@@ -3612,10 +3614,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3612 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), 3614 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
3613 .may_swap = 1, 3615 .may_swap = 1,
3614 }; 3616 };
3615 struct shrink_control shrink = {
3616 .gfp_mask = sc.gfp_mask,
3617 };
3618 unsigned long nr_slab_pages0, nr_slab_pages1;
3619 3617
3620 cond_resched(); 3618 cond_resched();
3621 /* 3619 /*
@@ -3634,44 +3632,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3634 * priorities until we have enough memory freed. 3632 * priorities until we have enough memory freed.
3635 */ 3633 */
3636 do { 3634 do {
3637 shrink_zone(zone, &sc); 3635 shrink_zone(zone, &sc, true);
3638 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); 3636 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
3639 } 3637 }
3640 3638
3641 nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
3642 if (nr_slab_pages0 > zone->min_slab_pages) {
3643 /*
3644 * shrink_slab() does not currently allow us to determine how
3645 * many pages were freed in this zone. So we take the current
3646 * number of slab pages and shake the slab until it is reduced
3647 * by the same nr_pages that we used for reclaiming unmapped
3648 * pages.
3649 */
3650 nodes_clear(shrink.nodes_to_scan);
3651 node_set(zone_to_nid(zone), shrink.nodes_to_scan);
3652 for (;;) {
3653 unsigned long lru_pages = zone_reclaimable_pages(zone);
3654
3655 /* No reclaimable slab or very low memory pressure */
3656 if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
3657 break;
3658
3659 /* Freed enough memory */
3660 nr_slab_pages1 = zone_page_state(zone,
3661 NR_SLAB_RECLAIMABLE);
3662 if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
3663 break;
3664 }
3665
3666 /*
3667 * Update nr_reclaimed by the number of slab pages we
3668 * reclaimed from this zone.
3669 */
3670 nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
3671 if (nr_slab_pages1 < nr_slab_pages0)
3672 sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
3673 }
3674
3675 p->reclaim_state = NULL; 3639 p->reclaim_state = NULL;
3676 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); 3640 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
3677 lockdep_clear_current_reclaim_state(); 3641 lockdep_clear_current_reclaim_state();