aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-07-26 19:08:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-26 19:49:42 -0400
commit82f9d486e59f588c7d100865c36510644abda356 (patch)
tree266f3dcf4f57538196bddd77a129adfb2752335b /mm/vmscan.c
parent108b6a78463bb8c7163e4f9779f36ad8bbade334 (diff)
memcg: add memory.vmscan_stat
The commit log of 0ae5e89c60c9 ("memcg: count the soft_limit reclaim in...") says it adds scanning stats to memory.stat file. But it doesn't because we considered we needed to make a concensus for such new APIs. This patch is a trial to add memory.scan_stat. This shows - the number of scanned pages(total, anon, file) - the number of rotated pages(total, anon, file) - the number of freed pages(total, anon, file) - the number of elaplsed time (including sleep/pause time) for both of direct/soft reclaim. The biggest difference with oringinal Ying's one is that this file can be reset by some write, as # echo 0 ...../memory.scan_stat Example of output is here. This is a result after make -j 6 kernel under 300M limit. [kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.scan_stat [kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.vmscan_stat scanned_pages_by_limit 9471864 scanned_anon_pages_by_limit 6640629 scanned_file_pages_by_limit 2831235 rotated_pages_by_limit 4243974 rotated_anon_pages_by_limit 3971968 rotated_file_pages_by_limit 272006 freed_pages_by_limit 2318492 freed_anon_pages_by_limit 962052 freed_file_pages_by_limit 1356440 elapsed_ns_by_limit 351386416101 scanned_pages_by_system 0 scanned_anon_pages_by_system 0 scanned_file_pages_by_system 0 rotated_pages_by_system 0 rotated_anon_pages_by_system 0 rotated_file_pages_by_system 0 freed_pages_by_system 0 freed_anon_pages_by_system 0 freed_file_pages_by_system 0 elapsed_ns_by_system 0 scanned_pages_by_limit_under_hierarchy 9471864 scanned_anon_pages_by_limit_under_hierarchy 6640629 scanned_file_pages_by_limit_under_hierarchy 2831235 rotated_pages_by_limit_under_hierarchy 4243974 rotated_anon_pages_by_limit_under_hierarchy 3971968 rotated_file_pages_by_limit_under_hierarchy 272006 freed_pages_by_limit_under_hierarchy 2318492 freed_anon_pages_by_limit_under_hierarchy 962052 freed_file_pages_by_limit_under_hierarchy 1356440 elapsed_ns_by_limit_under_hierarchy 351386416101 scanned_pages_by_system_under_hierarchy 0 scanned_anon_pages_by_system_under_hierarchy 0 scanned_file_pages_by_system_under_hierarchy 0 rotated_pages_by_system_under_hierarchy 0 rotated_anon_pages_by_system_under_hierarchy 0 rotated_file_pages_by_system_under_hierarchy 0 freed_pages_by_system_under_hierarchy 0 freed_anon_pages_by_system_under_hierarchy 0 freed_file_pages_by_system_under_hierarchy 0 elapsed_ns_by_system_under_hierarchy 0 total_xxxx is for hierarchy management. This will be useful for further memcg developments and need to be developped before we do some complicated rework on LRU/softlimit management. This patch adds a new struct memcg_scanrecord into scan_control struct. sc->nr_scanned at el is not designed for exporting information. For example, nr_scanned is reset frequentrly and incremented +2 at scanning mapped pages. To avoid complexity, I added a new param in scan_control which is for exporting scanning score. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Michal Hocko <mhocko@suse.cz> Cc: Ying Han <yinghan@google.com> Cc: Andrew Bresticker <abrestic@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c39
1 files changed, 34 insertions, 5 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f87702a376d0..7ef69124fa3e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -105,6 +105,7 @@ struct scan_control {
105 105
106 /* Which cgroup do we reclaim from */ 106 /* Which cgroup do we reclaim from */
107 struct mem_cgroup *mem_cgroup; 107 struct mem_cgroup *mem_cgroup;
108 struct memcg_scanrecord *memcg_record;
108 109
109 /* 110 /*
110 * Nodemask of nodes allowed by the caller. If NULL, all nodes 111 * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -1348,6 +1349,8 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
1348 int file = is_file_lru(lru); 1349 int file = is_file_lru(lru);
1349 int numpages = hpage_nr_pages(page); 1350 int numpages = hpage_nr_pages(page);
1350 reclaim_stat->recent_rotated[file] += numpages; 1351 reclaim_stat->recent_rotated[file] += numpages;
1352 if (!scanning_global_lru(sc))
1353 sc->memcg_record->nr_rotated[file] += numpages;
1351 } 1354 }
1352 if (!pagevec_add(&pvec, page)) { 1355 if (!pagevec_add(&pvec, page)) {
1353 spin_unlock_irq(&zone->lru_lock); 1356 spin_unlock_irq(&zone->lru_lock);
@@ -1391,6 +1394,10 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
1391 1394
1392 reclaim_stat->recent_scanned[0] += *nr_anon; 1395 reclaim_stat->recent_scanned[0] += *nr_anon;
1393 reclaim_stat->recent_scanned[1] += *nr_file; 1396 reclaim_stat->recent_scanned[1] += *nr_file;
1397 if (!scanning_global_lru(sc)) {
1398 sc->memcg_record->nr_scanned[0] += *nr_anon;
1399 sc->memcg_record->nr_scanned[1] += *nr_file;
1400 }
1394} 1401}
1395 1402
1396/* 1403/*
@@ -1504,6 +1511,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1504 nr_reclaimed += shrink_page_list(&page_list, zone, sc); 1511 nr_reclaimed += shrink_page_list(&page_list, zone, sc);
1505 } 1512 }
1506 1513
1514 if (!scanning_global_lru(sc))
1515 sc->memcg_record->nr_freed[file] += nr_reclaimed;
1516
1507 local_irq_disable(); 1517 local_irq_disable();
1508 if (current_is_kswapd()) 1518 if (current_is_kswapd())
1509 __count_vm_events(KSWAPD_STEAL, nr_reclaimed); 1519 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
@@ -1603,6 +1613,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1603 } 1613 }
1604 1614
1605 reclaim_stat->recent_scanned[file] += nr_taken; 1615 reclaim_stat->recent_scanned[file] += nr_taken;
1616 if (!scanning_global_lru(sc))
1617 sc->memcg_record->nr_scanned[file] += nr_taken;
1606 1618
1607 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1619 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1608 if (file) 1620 if (file)
@@ -1654,6 +1666,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1654 * get_scan_ratio. 1666 * get_scan_ratio.
1655 */ 1667 */
1656 reclaim_stat->recent_rotated[file] += nr_rotated; 1668 reclaim_stat->recent_rotated[file] += nr_rotated;
1669 if (!scanning_global_lru(sc))
1670 sc->memcg_record->nr_rotated[file] += nr_rotated;
1657 1671
1658 move_active_pages_to_lru(zone, &l_active, 1672 move_active_pages_to_lru(zone, &l_active,
1659 LRU_ACTIVE + file * LRU_FILE); 1673 LRU_ACTIVE + file * LRU_FILE);
@@ -2254,9 +2268,10 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2254#ifdef CONFIG_CGROUP_MEM_RES_CTLR 2268#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2255 2269
2256unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 2270unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2257 gfp_t gfp_mask, bool noswap, 2271 gfp_t gfp_mask, bool noswap,
2258 struct zone *zone, 2272 struct zone *zone,
2259 unsigned long *nr_scanned) 2273 struct memcg_scanrecord *rec,
2274 unsigned long *scanned)
2260{ 2275{
2261 struct scan_control sc = { 2276 struct scan_control sc = {
2262 .nr_scanned = 0, 2277 .nr_scanned = 0,
@@ -2266,7 +2281,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2266 .may_swap = !noswap, 2281 .may_swap = !noswap,
2267 .order = 0, 2282 .order = 0,
2268 .mem_cgroup = mem, 2283 .mem_cgroup = mem,
2284 .memcg_record = rec,
2269 }; 2285 };
2286 unsigned long start, end;
2270 2287
2271 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2288 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2272 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2289 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2275,6 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2275 sc.may_writepage, 2292 sc.may_writepage,
2276 sc.gfp_mask); 2293 sc.gfp_mask);
2277 2294
2295 start = sched_clock();
2278 /* 2296 /*
2279 * NOTE: Although we can get the priority field, using it 2297 * NOTE: Although we can get the priority field, using it
2280 * here is not a good idea, since it limits the pages we can scan. 2298 * here is not a good idea, since it limits the pages we can scan.
@@ -2283,19 +2301,25 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2283 * the priority and make it zero. 2301 * the priority and make it zero.
2284 */ 2302 */
2285 shrink_zone(0, zone, &sc); 2303 shrink_zone(0, zone, &sc);
2304 end = sched_clock();
2305
2306 if (rec)
2307 rec->elapsed += end - start;
2308 *scanned = sc.nr_scanned;
2286 2309
2287 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2310 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2288 2311
2289 *nr_scanned = sc.nr_scanned;
2290 return sc.nr_reclaimed; 2312 return sc.nr_reclaimed;
2291} 2313}
2292 2314
2293unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 2315unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2294 gfp_t gfp_mask, 2316 gfp_t gfp_mask,
2295 bool noswap) 2317 bool noswap,
2318 struct memcg_scanrecord *rec)
2296{ 2319{
2297 struct zonelist *zonelist; 2320 struct zonelist *zonelist;
2298 unsigned long nr_reclaimed; 2321 unsigned long nr_reclaimed;
2322 unsigned long start, end;
2299 int nid; 2323 int nid;
2300 struct scan_control sc = { 2324 struct scan_control sc = {
2301 .may_writepage = !laptop_mode, 2325 .may_writepage = !laptop_mode,
@@ -2304,6 +2328,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2304 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2328 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2305 .order = 0, 2329 .order = 0,
2306 .mem_cgroup = mem_cont, 2330 .mem_cgroup = mem_cont,
2331 .memcg_record = rec,
2307 .nodemask = NULL, /* we don't care the placement */ 2332 .nodemask = NULL, /* we don't care the placement */
2308 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2333 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2309 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2334 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2312,6 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2312 .gfp_mask = sc.gfp_mask, 2337 .gfp_mask = sc.gfp_mask,
2313 }; 2338 };
2314 2339
2340 start = sched_clock();
2315 /* 2341 /*
2316 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't 2342 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
2317 * take care of from where we get pages. So the node where we start the 2343 * take care of from where we get pages. So the node where we start the
@@ -2326,6 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2326 sc.gfp_mask); 2352 sc.gfp_mask);
2327 2353
2328 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); 2354 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
2355 end = sched_clock();
2356 if (rec)
2357 rec->elapsed += end - start;
2329 2358
2330 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); 2359 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
2331 2360