diff options
| author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2011-07-26 19:08:26 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 19:49:42 -0400 |
| commit | 82f9d486e59f588c7d100865c36510644abda356 (patch) | |
| tree | 266f3dcf4f57538196bddd77a129adfb2752335b /mm | |
| parent | 108b6a78463bb8c7163e4f9779f36ad8bbade334 (diff) | |
memcg: add memory.vmscan_stat
The commit log of 0ae5e89c60c9 ("memcg: count the soft_limit reclaim
in...") says it adds scanning stats to memory.stat file. But it doesn't
because we considered we needed to make a concensus for such new APIs.
This patch is a trial to add memory.scan_stat. This shows
- the number of scanned pages(total, anon, file)
- the number of rotated pages(total, anon, file)
- the number of freed pages(total, anon, file)
- the number of elaplsed time (including sleep/pause time)
for both of direct/soft reclaim.
The biggest difference with oringinal Ying's one is that this file
can be reset by some write, as
# echo 0 ...../memory.scan_stat
Example of output is here. This is a result after make -j 6 kernel
under 300M limit.
[kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.scan_stat
[kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.vmscan_stat
scanned_pages_by_limit 9471864
scanned_anon_pages_by_limit 6640629
scanned_file_pages_by_limit 2831235
rotated_pages_by_limit 4243974
rotated_anon_pages_by_limit 3971968
rotated_file_pages_by_limit 272006
freed_pages_by_limit 2318492
freed_anon_pages_by_limit 962052
freed_file_pages_by_limit 1356440
elapsed_ns_by_limit 351386416101
scanned_pages_by_system 0
scanned_anon_pages_by_system 0
scanned_file_pages_by_system 0
rotated_pages_by_system 0
rotated_anon_pages_by_system 0
rotated_file_pages_by_system 0
freed_pages_by_system 0
freed_anon_pages_by_system 0
freed_file_pages_by_system 0
elapsed_ns_by_system 0
scanned_pages_by_limit_under_hierarchy 9471864
scanned_anon_pages_by_limit_under_hierarchy 6640629
scanned_file_pages_by_limit_under_hierarchy 2831235
rotated_pages_by_limit_under_hierarchy 4243974
rotated_anon_pages_by_limit_under_hierarchy 3971968
rotated_file_pages_by_limit_under_hierarchy 272006
freed_pages_by_limit_under_hierarchy 2318492
freed_anon_pages_by_limit_under_hierarchy 962052
freed_file_pages_by_limit_under_hierarchy 1356440
elapsed_ns_by_limit_under_hierarchy 351386416101
scanned_pages_by_system_under_hierarchy 0
scanned_anon_pages_by_system_under_hierarchy 0
scanned_file_pages_by_system_under_hierarchy 0
rotated_pages_by_system_under_hierarchy 0
rotated_anon_pages_by_system_under_hierarchy 0
rotated_file_pages_by_system_under_hierarchy 0
freed_pages_by_system_under_hierarchy 0
freed_anon_pages_by_system_under_hierarchy 0
freed_file_pages_by_system_under_hierarchy 0
elapsed_ns_by_system_under_hierarchy 0
total_xxxx is for hierarchy management.
This will be useful for further memcg developments and need to be
developped before we do some complicated rework on LRU/softlimit
management.
This patch adds a new struct memcg_scanrecord into scan_control struct.
sc->nr_scanned at el is not designed for exporting information. For
example, nr_scanned is reset frequentrly and incremented +2 at scanning
mapped pages.
To avoid complexity, I added a new param in scan_control which is for
exporting scanning score.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ying Han <yinghan@google.com>
Cc: Andrew Bresticker <abrestic@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/memcontrol.c | 172 | ||||
| -rw-r--r-- | mm/vmscan.c | 39 |
2 files changed, 200 insertions, 11 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dfeca594fd7a..04e505bfd7dd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -205,6 +205,50 @@ struct mem_cgroup_eventfd_list { | |||
| 205 | static void mem_cgroup_threshold(struct mem_cgroup *mem); | 205 | static void mem_cgroup_threshold(struct mem_cgroup *mem); |
| 206 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); | 206 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); |
| 207 | 207 | ||
| 208 | enum { | ||
| 209 | SCAN_BY_LIMIT, | ||
| 210 | SCAN_BY_SYSTEM, | ||
| 211 | NR_SCAN_CONTEXT, | ||
| 212 | SCAN_BY_SHRINK, /* not recorded now */ | ||
| 213 | }; | ||
| 214 | |||
| 215 | enum { | ||
| 216 | SCAN, | ||
| 217 | SCAN_ANON, | ||
| 218 | SCAN_FILE, | ||
| 219 | ROTATE, | ||
| 220 | ROTATE_ANON, | ||
| 221 | ROTATE_FILE, | ||
| 222 | FREED, | ||
| 223 | FREED_ANON, | ||
| 224 | FREED_FILE, | ||
| 225 | ELAPSED, | ||
| 226 | NR_SCANSTATS, | ||
| 227 | }; | ||
| 228 | |||
| 229 | struct scanstat { | ||
| 230 | spinlock_t lock; | ||
| 231 | unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
| 232 | unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
| 233 | }; | ||
| 234 | |||
| 235 | const char *scanstat_string[NR_SCANSTATS] = { | ||
| 236 | "scanned_pages", | ||
| 237 | "scanned_anon_pages", | ||
| 238 | "scanned_file_pages", | ||
| 239 | "rotated_pages", | ||
| 240 | "rotated_anon_pages", | ||
| 241 | "rotated_file_pages", | ||
| 242 | "freed_pages", | ||
| 243 | "freed_anon_pages", | ||
| 244 | "freed_file_pages", | ||
| 245 | "elapsed_ns", | ||
| 246 | }; | ||
| 247 | #define SCANSTAT_WORD_LIMIT "_by_limit" | ||
| 248 | #define SCANSTAT_WORD_SYSTEM "_by_system" | ||
| 249 | #define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" | ||
| 250 | |||
| 251 | |||
| 208 | /* | 252 | /* |
| 209 | * The memory controller data structure. The memory controller controls both | 253 | * The memory controller data structure. The memory controller controls both |
| 210 | * page cache and RSS per cgroup. We would eventually like to provide | 254 | * page cache and RSS per cgroup. We would eventually like to provide |
| @@ -270,7 +314,8 @@ struct mem_cgroup { | |||
| 270 | 314 | ||
| 271 | /* For oom notifier event fd */ | 315 | /* For oom notifier event fd */ |
| 272 | struct list_head oom_notify; | 316 | struct list_head oom_notify; |
| 273 | 317 | /* For recording LRU-scan statistics */ | |
| 318 | struct scanstat scanstat; | ||
| 274 | /* | 319 | /* |
| 275 | * Should we move charges of a task when a task is moved into this | 320 | * Should we move charges of a task when a task is moved into this |
| 276 | * mem_cgroup ? And what type of charges should we move ? | 321 | * mem_cgroup ? And what type of charges should we move ? |
| @@ -1623,6 +1668,44 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
| 1623 | } | 1668 | } |
| 1624 | #endif | 1669 | #endif |
| 1625 | 1670 | ||
| 1671 | static void __mem_cgroup_record_scanstat(unsigned long *stats, | ||
| 1672 | struct memcg_scanrecord *rec) | ||
| 1673 | { | ||
| 1674 | |||
| 1675 | stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; | ||
| 1676 | stats[SCAN_ANON] += rec->nr_scanned[0]; | ||
| 1677 | stats[SCAN_FILE] += rec->nr_scanned[1]; | ||
| 1678 | |||
| 1679 | stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; | ||
| 1680 | stats[ROTATE_ANON] += rec->nr_rotated[0]; | ||
| 1681 | stats[ROTATE_FILE] += rec->nr_rotated[1]; | ||
| 1682 | |||
| 1683 | stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; | ||
| 1684 | stats[FREED_ANON] += rec->nr_freed[0]; | ||
| 1685 | stats[FREED_FILE] += rec->nr_freed[1]; | ||
| 1686 | |||
| 1687 | stats[ELAPSED] += rec->elapsed; | ||
| 1688 | } | ||
| 1689 | |||
| 1690 | static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) | ||
| 1691 | { | ||
| 1692 | struct mem_cgroup *mem; | ||
| 1693 | int context = rec->context; | ||
| 1694 | |||
| 1695 | if (context >= NR_SCAN_CONTEXT) | ||
| 1696 | return; | ||
| 1697 | |||
| 1698 | mem = rec->mem; | ||
| 1699 | spin_lock(&mem->scanstat.lock); | ||
| 1700 | __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); | ||
| 1701 | spin_unlock(&mem->scanstat.lock); | ||
| 1702 | |||
| 1703 | mem = rec->root; | ||
| 1704 | spin_lock(&mem->scanstat.lock); | ||
| 1705 | __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); | ||
| 1706 | spin_unlock(&mem->scanstat.lock); | ||
| 1707 | } | ||
| 1708 | |||
| 1626 | /* | 1709 | /* |
| 1627 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1710 | * Scan the hierarchy if needed to reclaim memory. We remember the last child |
| 1628 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1711 | * we reclaimed from, so that we don't end up penalizing one child extensively |
| @@ -1647,8 +1730,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1647 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | 1730 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
| 1648 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1731 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
| 1649 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1732 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
| 1733 | struct memcg_scanrecord rec; | ||
| 1650 | unsigned long excess; | 1734 | unsigned long excess; |
| 1651 | unsigned long nr_scanned; | 1735 | unsigned long scanned; |
| 1652 | 1736 | ||
| 1653 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1737 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
| 1654 | 1738 | ||
| @@ -1656,6 +1740,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1656 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) | 1740 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) |
| 1657 | noswap = true; | 1741 | noswap = true; |
| 1658 | 1742 | ||
| 1743 | if (shrink) | ||
| 1744 | rec.context = SCAN_BY_SHRINK; | ||
| 1745 | else if (check_soft) | ||
| 1746 | rec.context = SCAN_BY_SYSTEM; | ||
| 1747 | else | ||
| 1748 | rec.context = SCAN_BY_LIMIT; | ||
| 1749 | |||
| 1750 | rec.root = root_mem; | ||
| 1751 | |||
| 1659 | while (1) { | 1752 | while (1) { |
| 1660 | victim = mem_cgroup_select_victim(root_mem); | 1753 | victim = mem_cgroup_select_victim(root_mem); |
| 1661 | if (victim == root_mem) { | 1754 | if (victim == root_mem) { |
| @@ -1696,14 +1789,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1696 | css_put(&victim->css); | 1789 | css_put(&victim->css); |
| 1697 | continue; | 1790 | continue; |
| 1698 | } | 1791 | } |
| 1792 | rec.mem = victim; | ||
| 1793 | rec.nr_scanned[0] = 0; | ||
| 1794 | rec.nr_scanned[1] = 0; | ||
| 1795 | rec.nr_rotated[0] = 0; | ||
| 1796 | rec.nr_rotated[1] = 0; | ||
| 1797 | rec.nr_freed[0] = 0; | ||
| 1798 | rec.nr_freed[1] = 0; | ||
| 1799 | rec.elapsed = 0; | ||
| 1699 | /* we use swappiness of local cgroup */ | 1800 | /* we use swappiness of local cgroup */ |
| 1700 | if (check_soft) { | 1801 | if (check_soft) { |
| 1701 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1802 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
| 1702 | noswap, zone, &nr_scanned); | 1803 | noswap, zone, &rec, &scanned); |
| 1703 | *total_scanned += nr_scanned; | 1804 | *total_scanned += scanned; |
| 1704 | } else | 1805 | } else |
| 1705 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1806 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
| 1706 | noswap); | 1807 | noswap, &rec); |
| 1808 | mem_cgroup_record_scanstat(&rec); | ||
| 1707 | css_put(&victim->css); | 1809 | css_put(&victim->css); |
| 1708 | /* | 1810 | /* |
| 1709 | * At shrinking usage, we can't check we should stop here or | 1811 | * At shrinking usage, we can't check we should stop here or |
| @@ -3792,14 +3894,18 @@ try_to_free: | |||
| 3792 | /* try to free all pages in this cgroup */ | 3894 | /* try to free all pages in this cgroup */ |
| 3793 | shrink = 1; | 3895 | shrink = 1; |
| 3794 | while (nr_retries && mem->res.usage > 0) { | 3896 | while (nr_retries && mem->res.usage > 0) { |
| 3897 | struct memcg_scanrecord rec; | ||
| 3795 | int progress; | 3898 | int progress; |
| 3796 | 3899 | ||
| 3797 | if (signal_pending(current)) { | 3900 | if (signal_pending(current)) { |
| 3798 | ret = -EINTR; | 3901 | ret = -EINTR; |
| 3799 | goto out; | 3902 | goto out; |
| 3800 | } | 3903 | } |
| 3904 | rec.context = SCAN_BY_SHRINK; | ||
| 3905 | rec.mem = mem; | ||
| 3906 | rec.root = mem; | ||
| 3801 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, | 3907 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
| 3802 | false); | 3908 | false, &rec); |
| 3803 | if (!progress) { | 3909 | if (!progress) { |
| 3804 | nr_retries--; | 3910 | nr_retries--; |
| 3805 | /* maybe some writeback is necessary */ | 3911 | /* maybe some writeback is necessary */ |
| @@ -4643,6 +4749,54 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) | |||
| 4643 | } | 4749 | } |
| 4644 | #endif /* CONFIG_NUMA */ | 4750 | #endif /* CONFIG_NUMA */ |
| 4645 | 4751 | ||
| 4752 | static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, | ||
| 4753 | struct cftype *cft, | ||
| 4754 | struct cgroup_map_cb *cb) | ||
| 4755 | { | ||
| 4756 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
| 4757 | char string[64]; | ||
| 4758 | int i; | ||
| 4759 | |||
| 4760 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4761 | strcpy(string, scanstat_string[i]); | ||
| 4762 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
| 4763 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); | ||
| 4764 | } | ||
| 4765 | |||
| 4766 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4767 | strcpy(string, scanstat_string[i]); | ||
| 4768 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
| 4769 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); | ||
| 4770 | } | ||
| 4771 | |||
| 4772 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4773 | strcpy(string, scanstat_string[i]); | ||
| 4774 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
| 4775 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
| 4776 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); | ||
| 4777 | } | ||
| 4778 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4779 | strcpy(string, scanstat_string[i]); | ||
| 4780 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
| 4781 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
| 4782 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); | ||
| 4783 | } | ||
| 4784 | return 0; | ||
| 4785 | } | ||
| 4786 | |||
| 4787 | static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, | ||
| 4788 | unsigned int event) | ||
| 4789 | { | ||
| 4790 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
| 4791 | |||
| 4792 | spin_lock(&mem->scanstat.lock); | ||
| 4793 | memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); | ||
| 4794 | memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); | ||
| 4795 | spin_unlock(&mem->scanstat.lock); | ||
| 4796 | return 0; | ||
| 4797 | } | ||
| 4798 | |||
| 4799 | |||
| 4646 | static struct cftype mem_cgroup_files[] = { | 4800 | static struct cftype mem_cgroup_files[] = { |
| 4647 | { | 4801 | { |
| 4648 | .name = "usage_in_bytes", | 4802 | .name = "usage_in_bytes", |
| @@ -4713,6 +4867,11 @@ static struct cftype mem_cgroup_files[] = { | |||
| 4713 | .mode = S_IRUGO, | 4867 | .mode = S_IRUGO, |
| 4714 | }, | 4868 | }, |
| 4715 | #endif | 4869 | #endif |
| 4870 | { | ||
| 4871 | .name = "vmscan_stat", | ||
| 4872 | .read_map = mem_cgroup_vmscan_stat_read, | ||
| 4873 | .trigger = mem_cgroup_reset_vmscan_stat, | ||
| 4874 | }, | ||
| 4716 | }; | 4875 | }; |
| 4717 | 4876 | ||
| 4718 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4877 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
| @@ -4976,6 +5135,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
| 4976 | atomic_set(&mem->refcnt, 1); | 5135 | atomic_set(&mem->refcnt, 1); |
| 4977 | mem->move_charge_at_immigrate = 0; | 5136 | mem->move_charge_at_immigrate = 0; |
| 4978 | mutex_init(&mem->thresholds_lock); | 5137 | mutex_init(&mem->thresholds_lock); |
| 5138 | spin_lock_init(&mem->scanstat.lock); | ||
| 4979 | return &mem->css; | 5139 | return &mem->css; |
| 4980 | free_out: | 5140 | free_out: |
| 4981 | __mem_cgroup_free(mem); | 5141 | __mem_cgroup_free(mem); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f87702a376d0..7ef69124fa3e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -105,6 +105,7 @@ struct scan_control { | |||
| 105 | 105 | ||
| 106 | /* Which cgroup do we reclaim from */ | 106 | /* Which cgroup do we reclaim from */ |
| 107 | struct mem_cgroup *mem_cgroup; | 107 | struct mem_cgroup *mem_cgroup; |
| 108 | struct memcg_scanrecord *memcg_record; | ||
| 108 | 109 | ||
| 109 | /* | 110 | /* |
| 110 | * Nodemask of nodes allowed by the caller. If NULL, all nodes | 111 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
| @@ -1348,6 +1349,8 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, | |||
| 1348 | int file = is_file_lru(lru); | 1349 | int file = is_file_lru(lru); |
| 1349 | int numpages = hpage_nr_pages(page); | 1350 | int numpages = hpage_nr_pages(page); |
| 1350 | reclaim_stat->recent_rotated[file] += numpages; | 1351 | reclaim_stat->recent_rotated[file] += numpages; |
| 1352 | if (!scanning_global_lru(sc)) | ||
| 1353 | sc->memcg_record->nr_rotated[file] += numpages; | ||
| 1351 | } | 1354 | } |
| 1352 | if (!pagevec_add(&pvec, page)) { | 1355 | if (!pagevec_add(&pvec, page)) { |
| 1353 | spin_unlock_irq(&zone->lru_lock); | 1356 | spin_unlock_irq(&zone->lru_lock); |
| @@ -1391,6 +1394,10 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, | |||
| 1391 | 1394 | ||
| 1392 | reclaim_stat->recent_scanned[0] += *nr_anon; | 1395 | reclaim_stat->recent_scanned[0] += *nr_anon; |
| 1393 | reclaim_stat->recent_scanned[1] += *nr_file; | 1396 | reclaim_stat->recent_scanned[1] += *nr_file; |
| 1397 | if (!scanning_global_lru(sc)) { | ||
| 1398 | sc->memcg_record->nr_scanned[0] += *nr_anon; | ||
| 1399 | sc->memcg_record->nr_scanned[1] += *nr_file; | ||
| 1400 | } | ||
| 1394 | } | 1401 | } |
| 1395 | 1402 | ||
| 1396 | /* | 1403 | /* |
| @@ -1504,6 +1511,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1504 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); | 1511 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); |
| 1505 | } | 1512 | } |
| 1506 | 1513 | ||
| 1514 | if (!scanning_global_lru(sc)) | ||
| 1515 | sc->memcg_record->nr_freed[file] += nr_reclaimed; | ||
| 1516 | |||
| 1507 | local_irq_disable(); | 1517 | local_irq_disable(); |
| 1508 | if (current_is_kswapd()) | 1518 | if (current_is_kswapd()) |
| 1509 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); | 1519 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
| @@ -1603,6 +1613,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1603 | } | 1613 | } |
| 1604 | 1614 | ||
| 1605 | reclaim_stat->recent_scanned[file] += nr_taken; | 1615 | reclaim_stat->recent_scanned[file] += nr_taken; |
| 1616 | if (!scanning_global_lru(sc)) | ||
| 1617 | sc->memcg_record->nr_scanned[file] += nr_taken; | ||
| 1606 | 1618 | ||
| 1607 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1619 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
| 1608 | if (file) | 1620 | if (file) |
| @@ -1654,6 +1666,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1654 | * get_scan_ratio. | 1666 | * get_scan_ratio. |
| 1655 | */ | 1667 | */ |
| 1656 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1668 | reclaim_stat->recent_rotated[file] += nr_rotated; |
| 1669 | if (!scanning_global_lru(sc)) | ||
| 1670 | sc->memcg_record->nr_rotated[file] += nr_rotated; | ||
| 1657 | 1671 | ||
| 1658 | move_active_pages_to_lru(zone, &l_active, | 1672 | move_active_pages_to_lru(zone, &l_active, |
| 1659 | LRU_ACTIVE + file * LRU_FILE); | 1673 | LRU_ACTIVE + file * LRU_FILE); |
| @@ -2254,9 +2268,10 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
| 2254 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 2268 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
| 2255 | 2269 | ||
| 2256 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2270 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
| 2257 | gfp_t gfp_mask, bool noswap, | 2271 | gfp_t gfp_mask, bool noswap, |
| 2258 | struct zone *zone, | 2272 | struct zone *zone, |
| 2259 | unsigned long *nr_scanned) | 2273 | struct memcg_scanrecord *rec, |
| 2274 | unsigned long *scanned) | ||
| 2260 | { | 2275 | { |
| 2261 | struct scan_control sc = { | 2276 | struct scan_control sc = { |
| 2262 | .nr_scanned = 0, | 2277 | .nr_scanned = 0, |
| @@ -2266,7 +2281,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
| 2266 | .may_swap = !noswap, | 2281 | .may_swap = !noswap, |
| 2267 | .order = 0, | 2282 | .order = 0, |
| 2268 | .mem_cgroup = mem, | 2283 | .mem_cgroup = mem, |
| 2284 | .memcg_record = rec, | ||
| 2269 | }; | 2285 | }; |
| 2286 | unsigned long start, end; | ||
| 2270 | 2287 | ||
| 2271 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2288 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
| 2272 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2289 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
| @@ -2275,6 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
| 2275 | sc.may_writepage, | 2292 | sc.may_writepage, |
| 2276 | sc.gfp_mask); | 2293 | sc.gfp_mask); |
| 2277 | 2294 | ||
| 2295 | start = sched_clock(); | ||
| 2278 | /* | 2296 | /* |
| 2279 | * NOTE: Although we can get the priority field, using it | 2297 | * NOTE: Although we can get the priority field, using it |
| 2280 | * here is not a good idea, since it limits the pages we can scan. | 2298 | * here is not a good idea, since it limits the pages we can scan. |
| @@ -2283,19 +2301,25 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
| 2283 | * the priority and make it zero. | 2301 | * the priority and make it zero. |
| 2284 | */ | 2302 | */ |
| 2285 | shrink_zone(0, zone, &sc); | 2303 | shrink_zone(0, zone, &sc); |
| 2304 | end = sched_clock(); | ||
| 2305 | |||
| 2306 | if (rec) | ||
| 2307 | rec->elapsed += end - start; | ||
| 2308 | *scanned = sc.nr_scanned; | ||
| 2286 | 2309 | ||
| 2287 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2310 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
| 2288 | 2311 | ||
| 2289 | *nr_scanned = sc.nr_scanned; | ||
| 2290 | return sc.nr_reclaimed; | 2312 | return sc.nr_reclaimed; |
| 2291 | } | 2313 | } |
| 2292 | 2314 | ||
| 2293 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 2315 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
| 2294 | gfp_t gfp_mask, | 2316 | gfp_t gfp_mask, |
| 2295 | bool noswap) | 2317 | bool noswap, |
| 2318 | struct memcg_scanrecord *rec) | ||
| 2296 | { | 2319 | { |
| 2297 | struct zonelist *zonelist; | 2320 | struct zonelist *zonelist; |
| 2298 | unsigned long nr_reclaimed; | 2321 | unsigned long nr_reclaimed; |
| 2322 | unsigned long start, end; | ||
| 2299 | int nid; | 2323 | int nid; |
| 2300 | struct scan_control sc = { | 2324 | struct scan_control sc = { |
| 2301 | .may_writepage = !laptop_mode, | 2325 | .may_writepage = !laptop_mode, |
| @@ -2304,6 +2328,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 2304 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2328 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
| 2305 | .order = 0, | 2329 | .order = 0, |
| 2306 | .mem_cgroup = mem_cont, | 2330 | .mem_cgroup = mem_cont, |
| 2331 | .memcg_record = rec, | ||
| 2307 | .nodemask = NULL, /* we don't care the placement */ | 2332 | .nodemask = NULL, /* we don't care the placement */ |
| 2308 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2333 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
| 2309 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), | 2334 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
| @@ -2312,6 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 2312 | .gfp_mask = sc.gfp_mask, | 2337 | .gfp_mask = sc.gfp_mask, |
| 2313 | }; | 2338 | }; |
| 2314 | 2339 | ||
| 2340 | start = sched_clock(); | ||
| 2315 | /* | 2341 | /* |
| 2316 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't | 2342 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't |
| 2317 | * take care of from where we get pages. So the node where we start the | 2343 | * take care of from where we get pages. So the node where we start the |
| @@ -2326,6 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 2326 | sc.gfp_mask); | 2352 | sc.gfp_mask); |
| 2327 | 2353 | ||
| 2328 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); | 2354 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); |
| 2355 | end = sched_clock(); | ||
| 2356 | if (rec) | ||
| 2357 | rec->elapsed += end - start; | ||
| 2329 | 2358 | ||
| 2330 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); | 2359 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |
| 2331 | 2360 | ||
