diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2011-07-26 19:08:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 19:49:42 -0400 |
commit | 82f9d486e59f588c7d100865c36510644abda356 (patch) | |
tree | 266f3dcf4f57538196bddd77a129adfb2752335b /mm/vmscan.c | |
parent | 108b6a78463bb8c7163e4f9779f36ad8bbade334 (diff) |
memcg: add memory.vmscan_stat
The commit log of 0ae5e89c60c9 ("memcg: count the soft_limit reclaim
in...") says it adds scanning stats to memory.stat file. But it doesn't
because we considered we needed to make a concensus for such new APIs.
This patch is a trial to add memory.scan_stat. This shows
- the number of scanned pages(total, anon, file)
- the number of rotated pages(total, anon, file)
- the number of freed pages(total, anon, file)
- the number of elaplsed time (including sleep/pause time)
for both of direct/soft reclaim.
The biggest difference with oringinal Ying's one is that this file
can be reset by some write, as
# echo 0 ...../memory.scan_stat
Example of output is here. This is a result after make -j 6 kernel
under 300M limit.
[kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.scan_stat
[kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.vmscan_stat
scanned_pages_by_limit 9471864
scanned_anon_pages_by_limit 6640629
scanned_file_pages_by_limit 2831235
rotated_pages_by_limit 4243974
rotated_anon_pages_by_limit 3971968
rotated_file_pages_by_limit 272006
freed_pages_by_limit 2318492
freed_anon_pages_by_limit 962052
freed_file_pages_by_limit 1356440
elapsed_ns_by_limit 351386416101
scanned_pages_by_system 0
scanned_anon_pages_by_system 0
scanned_file_pages_by_system 0
rotated_pages_by_system 0
rotated_anon_pages_by_system 0
rotated_file_pages_by_system 0
freed_pages_by_system 0
freed_anon_pages_by_system 0
freed_file_pages_by_system 0
elapsed_ns_by_system 0
scanned_pages_by_limit_under_hierarchy 9471864
scanned_anon_pages_by_limit_under_hierarchy 6640629
scanned_file_pages_by_limit_under_hierarchy 2831235
rotated_pages_by_limit_under_hierarchy 4243974
rotated_anon_pages_by_limit_under_hierarchy 3971968
rotated_file_pages_by_limit_under_hierarchy 272006
freed_pages_by_limit_under_hierarchy 2318492
freed_anon_pages_by_limit_under_hierarchy 962052
freed_file_pages_by_limit_under_hierarchy 1356440
elapsed_ns_by_limit_under_hierarchy 351386416101
scanned_pages_by_system_under_hierarchy 0
scanned_anon_pages_by_system_under_hierarchy 0
scanned_file_pages_by_system_under_hierarchy 0
rotated_pages_by_system_under_hierarchy 0
rotated_anon_pages_by_system_under_hierarchy 0
rotated_file_pages_by_system_under_hierarchy 0
freed_pages_by_system_under_hierarchy 0
freed_anon_pages_by_system_under_hierarchy 0
freed_file_pages_by_system_under_hierarchy 0
elapsed_ns_by_system_under_hierarchy 0
total_xxxx is for hierarchy management.
This will be useful for further memcg developments and need to be
developped before we do some complicated rework on LRU/softlimit
management.
This patch adds a new struct memcg_scanrecord into scan_control struct.
sc->nr_scanned at el is not designed for exporting information. For
example, nr_scanned is reset frequentrly and incremented +2 at scanning
mapped pages.
To avoid complexity, I added a new param in scan_control which is for
exporting scanning score.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ying Han <yinghan@google.com>
Cc: Andrew Bresticker <abrestic@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index f87702a376d0..7ef69124fa3e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -105,6 +105,7 @@ struct scan_control { | |||
105 | 105 | ||
106 | /* Which cgroup do we reclaim from */ | 106 | /* Which cgroup do we reclaim from */ |
107 | struct mem_cgroup *mem_cgroup; | 107 | struct mem_cgroup *mem_cgroup; |
108 | struct memcg_scanrecord *memcg_record; | ||
108 | 109 | ||
109 | /* | 110 | /* |
110 | * Nodemask of nodes allowed by the caller. If NULL, all nodes | 111 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
@@ -1348,6 +1349,8 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, | |||
1348 | int file = is_file_lru(lru); | 1349 | int file = is_file_lru(lru); |
1349 | int numpages = hpage_nr_pages(page); | 1350 | int numpages = hpage_nr_pages(page); |
1350 | reclaim_stat->recent_rotated[file] += numpages; | 1351 | reclaim_stat->recent_rotated[file] += numpages; |
1352 | if (!scanning_global_lru(sc)) | ||
1353 | sc->memcg_record->nr_rotated[file] += numpages; | ||
1351 | } | 1354 | } |
1352 | if (!pagevec_add(&pvec, page)) { | 1355 | if (!pagevec_add(&pvec, page)) { |
1353 | spin_unlock_irq(&zone->lru_lock); | 1356 | spin_unlock_irq(&zone->lru_lock); |
@@ -1391,6 +1394,10 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, | |||
1391 | 1394 | ||
1392 | reclaim_stat->recent_scanned[0] += *nr_anon; | 1395 | reclaim_stat->recent_scanned[0] += *nr_anon; |
1393 | reclaim_stat->recent_scanned[1] += *nr_file; | 1396 | reclaim_stat->recent_scanned[1] += *nr_file; |
1397 | if (!scanning_global_lru(sc)) { | ||
1398 | sc->memcg_record->nr_scanned[0] += *nr_anon; | ||
1399 | sc->memcg_record->nr_scanned[1] += *nr_file; | ||
1400 | } | ||
1394 | } | 1401 | } |
1395 | 1402 | ||
1396 | /* | 1403 | /* |
@@ -1504,6 +1511,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1504 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); | 1511 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); |
1505 | } | 1512 | } |
1506 | 1513 | ||
1514 | if (!scanning_global_lru(sc)) | ||
1515 | sc->memcg_record->nr_freed[file] += nr_reclaimed; | ||
1516 | |||
1507 | local_irq_disable(); | 1517 | local_irq_disable(); |
1508 | if (current_is_kswapd()) | 1518 | if (current_is_kswapd()) |
1509 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); | 1519 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
@@ -1603,6 +1613,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1603 | } | 1613 | } |
1604 | 1614 | ||
1605 | reclaim_stat->recent_scanned[file] += nr_taken; | 1615 | reclaim_stat->recent_scanned[file] += nr_taken; |
1616 | if (!scanning_global_lru(sc)) | ||
1617 | sc->memcg_record->nr_scanned[file] += nr_taken; | ||
1606 | 1618 | ||
1607 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1619 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1608 | if (file) | 1620 | if (file) |
@@ -1654,6 +1666,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1654 | * get_scan_ratio. | 1666 | * get_scan_ratio. |
1655 | */ | 1667 | */ |
1656 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1668 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1669 | if (!scanning_global_lru(sc)) | ||
1670 | sc->memcg_record->nr_rotated[file] += nr_rotated; | ||
1657 | 1671 | ||
1658 | move_active_pages_to_lru(zone, &l_active, | 1672 | move_active_pages_to_lru(zone, &l_active, |
1659 | LRU_ACTIVE + file * LRU_FILE); | 1673 | LRU_ACTIVE + file * LRU_FILE); |
@@ -2254,9 +2268,10 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2254 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 2268 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
2255 | 2269 | ||
2256 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2270 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2257 | gfp_t gfp_mask, bool noswap, | 2271 | gfp_t gfp_mask, bool noswap, |
2258 | struct zone *zone, | 2272 | struct zone *zone, |
2259 | unsigned long *nr_scanned) | 2273 | struct memcg_scanrecord *rec, |
2274 | unsigned long *scanned) | ||
2260 | { | 2275 | { |
2261 | struct scan_control sc = { | 2276 | struct scan_control sc = { |
2262 | .nr_scanned = 0, | 2277 | .nr_scanned = 0, |
@@ -2266,7 +2281,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2266 | .may_swap = !noswap, | 2281 | .may_swap = !noswap, |
2267 | .order = 0, | 2282 | .order = 0, |
2268 | .mem_cgroup = mem, | 2283 | .mem_cgroup = mem, |
2284 | .memcg_record = rec, | ||
2269 | }; | 2285 | }; |
2286 | unsigned long start, end; | ||
2270 | 2287 | ||
2271 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2288 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2272 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2289 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
@@ -2275,6 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2275 | sc.may_writepage, | 2292 | sc.may_writepage, |
2276 | sc.gfp_mask); | 2293 | sc.gfp_mask); |
2277 | 2294 | ||
2295 | start = sched_clock(); | ||
2278 | /* | 2296 | /* |
2279 | * NOTE: Although we can get the priority field, using it | 2297 | * NOTE: Although we can get the priority field, using it |
2280 | * here is not a good idea, since it limits the pages we can scan. | 2298 | * here is not a good idea, since it limits the pages we can scan. |
@@ -2283,19 +2301,25 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2283 | * the priority and make it zero. | 2301 | * the priority and make it zero. |
2284 | */ | 2302 | */ |
2285 | shrink_zone(0, zone, &sc); | 2303 | shrink_zone(0, zone, &sc); |
2304 | end = sched_clock(); | ||
2305 | |||
2306 | if (rec) | ||
2307 | rec->elapsed += end - start; | ||
2308 | *scanned = sc.nr_scanned; | ||
2286 | 2309 | ||
2287 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2310 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2288 | 2311 | ||
2289 | *nr_scanned = sc.nr_scanned; | ||
2290 | return sc.nr_reclaimed; | 2312 | return sc.nr_reclaimed; |
2291 | } | 2313 | } |
2292 | 2314 | ||
2293 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 2315 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
2294 | gfp_t gfp_mask, | 2316 | gfp_t gfp_mask, |
2295 | bool noswap) | 2317 | bool noswap, |
2318 | struct memcg_scanrecord *rec) | ||
2296 | { | 2319 | { |
2297 | struct zonelist *zonelist; | 2320 | struct zonelist *zonelist; |
2298 | unsigned long nr_reclaimed; | 2321 | unsigned long nr_reclaimed; |
2322 | unsigned long start, end; | ||
2299 | int nid; | 2323 | int nid; |
2300 | struct scan_control sc = { | 2324 | struct scan_control sc = { |
2301 | .may_writepage = !laptop_mode, | 2325 | .may_writepage = !laptop_mode, |
@@ -2304,6 +2328,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2304 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2328 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2305 | .order = 0, | 2329 | .order = 0, |
2306 | .mem_cgroup = mem_cont, | 2330 | .mem_cgroup = mem_cont, |
2331 | .memcg_record = rec, | ||
2307 | .nodemask = NULL, /* we don't care the placement */ | 2332 | .nodemask = NULL, /* we don't care the placement */ |
2308 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2333 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2309 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), | 2334 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
@@ -2312,6 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2312 | .gfp_mask = sc.gfp_mask, | 2337 | .gfp_mask = sc.gfp_mask, |
2313 | }; | 2338 | }; |
2314 | 2339 | ||
2340 | start = sched_clock(); | ||
2315 | /* | 2341 | /* |
2316 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't | 2342 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't |
2317 | * take care of from where we get pages. So the node where we start the | 2343 | * take care of from where we get pages. So the node where we start the |
@@ -2326,6 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2326 | sc.gfp_mask); | 2352 | sc.gfp_mask); |
2327 | 2353 | ||
2328 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); | 2354 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); |
2355 | end = sched_clock(); | ||
2356 | if (rec) | ||
2357 | rec->elapsed += end - start; | ||
2329 | 2358 | ||
2330 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); | 2359 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |
2331 | 2360 | ||