diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2009-06-17 19:26:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-18 16:03:47 -0400 |
commit | d69b042f3d7406ddba560143b1796020df760800 (patch) | |
tree | 6b704864716240aa1282d06bb79c02ef4b77e16d /mm | |
parent | cd5008196f7e583f4c558531a2bca59f6c674c5b (diff) |
memcg: add file-based RSS accounting
Add file RSS tracking per memory cgroup
We currently don't track file RSS, the RSS we report is actually anon RSS.
All the file mapped pages, come in through the page cache and get
accounted there. This patch adds support for accounting file RSS pages.
It should
1. Help improve the metrics reported by the memory resource controller
2. Will form the basis for a future shared memory accounting heuristic
that has been proposed by Kamezawa.
Unfortunately, we cannot rename the existing "rss" keyword used in
memory.stat to "anon_rss". We however, add "mapped_file" data and hope to
educate the end user through documentation.
[hugh.dickins@tiscali.co.uk: fix mem_cgroup_update_mapped_file_stat oops]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.cn>
Cc: Paul Menage <menage@google.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 66 | ||||
-rw-r--r-- | mm/page_cgroup.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 5 |
3 files changed, 71 insertions, 2 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 70db6e0a5eec..6f682901deb5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -62,7 +62,8 @@ enum mem_cgroup_stat_index { | |||
62 | * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. | 62 | * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. |
63 | */ | 63 | */ |
64 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ | 64 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ |
65 | MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ | 65 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ |
66 | MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ | ||
66 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 67 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
67 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 68 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
68 | 69 | ||
@@ -900,6 +901,44 @@ static void record_last_oom(struct mem_cgroup *mem) | |||
900 | mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb); | 901 | mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb); |
901 | } | 902 | } |
902 | 903 | ||
904 | /* | ||
905 | * Currently used to update mapped file statistics, but the routine can be | ||
906 | * generalized to update other statistics as well. | ||
907 | */ | ||
908 | void mem_cgroup_update_mapped_file_stat(struct page *page, int val) | ||
909 | { | ||
910 | struct mem_cgroup *mem; | ||
911 | struct mem_cgroup_stat *stat; | ||
912 | struct mem_cgroup_stat_cpu *cpustat; | ||
913 | int cpu; | ||
914 | struct page_cgroup *pc; | ||
915 | |||
916 | if (!page_is_file_cache(page)) | ||
917 | return; | ||
918 | |||
919 | pc = lookup_page_cgroup(page); | ||
920 | if (unlikely(!pc)) | ||
921 | return; | ||
922 | |||
923 | lock_page_cgroup(pc); | ||
924 | mem = pc->mem_cgroup; | ||
925 | if (!mem) | ||
926 | goto done; | ||
927 | |||
928 | if (!PageCgroupUsed(pc)) | ||
929 | goto done; | ||
930 | |||
931 | /* | ||
932 | * Preemption is already disabled, we don't need get_cpu() | ||
933 | */ | ||
934 | cpu = smp_processor_id(); | ||
935 | stat = &mem->stat; | ||
936 | cpustat = &stat->cpustat[cpu]; | ||
937 | |||
938 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); | ||
939 | done: | ||
940 | unlock_page_cgroup(pc); | ||
941 | } | ||
903 | 942 | ||
904 | /* | 943 | /* |
905 | * Unlike exported interface, "oom" parameter is added. if oom==true, | 944 | * Unlike exported interface, "oom" parameter is added. if oom==true, |
@@ -1098,6 +1137,10 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1098 | struct mem_cgroup_per_zone *from_mz, *to_mz; | 1137 | struct mem_cgroup_per_zone *from_mz, *to_mz; |
1099 | int nid, zid; | 1138 | int nid, zid; |
1100 | int ret = -EBUSY; | 1139 | int ret = -EBUSY; |
1140 | struct page *page; | ||
1141 | int cpu; | ||
1142 | struct mem_cgroup_stat *stat; | ||
1143 | struct mem_cgroup_stat_cpu *cpustat; | ||
1101 | 1144 | ||
1102 | VM_BUG_ON(from == to); | 1145 | VM_BUG_ON(from == to); |
1103 | VM_BUG_ON(PageLRU(pc->page)); | 1146 | VM_BUG_ON(PageLRU(pc->page)); |
@@ -1118,6 +1161,23 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1118 | 1161 | ||
1119 | res_counter_uncharge(&from->res, PAGE_SIZE); | 1162 | res_counter_uncharge(&from->res, PAGE_SIZE); |
1120 | mem_cgroup_charge_statistics(from, pc, false); | 1163 | mem_cgroup_charge_statistics(from, pc, false); |
1164 | |||
1165 | page = pc->page; | ||
1166 | if (page_is_file_cache(page) && page_mapped(page)) { | ||
1167 | cpu = smp_processor_id(); | ||
1168 | /* Update mapped_file data for mem_cgroup "from" */ | ||
1169 | stat = &from->stat; | ||
1170 | cpustat = &stat->cpustat[cpu]; | ||
1171 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, | ||
1172 | -1); | ||
1173 | |||
1174 | /* Update mapped_file data for mem_cgroup "to" */ | ||
1175 | stat = &to->stat; | ||
1176 | cpustat = &stat->cpustat[cpu]; | ||
1177 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, | ||
1178 | 1); | ||
1179 | } | ||
1180 | |||
1121 | if (do_swap_account) | 1181 | if (do_swap_account) |
1122 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | 1182 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1123 | css_put(&from->css); | 1183 | css_put(&from->css); |
@@ -2046,6 +2106,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | |||
2046 | enum { | 2106 | enum { |
2047 | MCS_CACHE, | 2107 | MCS_CACHE, |
2048 | MCS_RSS, | 2108 | MCS_RSS, |
2109 | MCS_MAPPED_FILE, | ||
2049 | MCS_PGPGIN, | 2110 | MCS_PGPGIN, |
2050 | MCS_PGPGOUT, | 2111 | MCS_PGPGOUT, |
2051 | MCS_INACTIVE_ANON, | 2112 | MCS_INACTIVE_ANON, |
@@ -2066,6 +2127,7 @@ struct { | |||
2066 | } memcg_stat_strings[NR_MCS_STAT] = { | 2127 | } memcg_stat_strings[NR_MCS_STAT] = { |
2067 | {"cache", "total_cache"}, | 2128 | {"cache", "total_cache"}, |
2068 | {"rss", "total_rss"}, | 2129 | {"rss", "total_rss"}, |
2130 | {"mapped_file", "total_mapped_file"}, | ||
2069 | {"pgpgin", "total_pgpgin"}, | 2131 | {"pgpgin", "total_pgpgin"}, |
2070 | {"pgpgout", "total_pgpgout"}, | 2132 | {"pgpgout", "total_pgpgout"}, |
2071 | {"inactive_anon", "total_inactive_anon"}, | 2133 | {"inactive_anon", "total_inactive_anon"}, |
@@ -2086,6 +2148,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
2086 | s->stat[MCS_CACHE] += val * PAGE_SIZE; | 2148 | s->stat[MCS_CACHE] += val * PAGE_SIZE; |
2087 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 2149 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); |
2088 | s->stat[MCS_RSS] += val * PAGE_SIZE; | 2150 | s->stat[MCS_RSS] += val * PAGE_SIZE; |
2151 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE); | ||
2152 | s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE; | ||
2089 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); | 2153 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); |
2090 | s->stat[MCS_PGPGIN] += val; | 2154 | s->stat[MCS_PGPGIN] += val; |
2091 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); | 2155 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 4f31c9b3e940..672089d5819f 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -99,6 +99,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
99 | unsigned long pfn = page_to_pfn(page); | 99 | unsigned long pfn = page_to_pfn(page); |
100 | struct mem_section *section = __pfn_to_section(pfn); | 100 | struct mem_section *section = __pfn_to_section(pfn); |
101 | 101 | ||
102 | if (!section->page_cgroup) | ||
103 | return NULL; | ||
102 | return section->page_cgroup + pfn; | 104 | return section->page_cgroup + pfn; |
103 | } | 105 | } |
104 | 106 | ||
@@ -703,8 +703,10 @@ void page_add_new_anon_rmap(struct page *page, | |||
703 | */ | 703 | */ |
704 | void page_add_file_rmap(struct page *page) | 704 | void page_add_file_rmap(struct page *page) |
705 | { | 705 | { |
706 | if (atomic_inc_and_test(&page->_mapcount)) | 706 | if (atomic_inc_and_test(&page->_mapcount)) { |
707 | __inc_zone_page_state(page, NR_FILE_MAPPED); | 707 | __inc_zone_page_state(page, NR_FILE_MAPPED); |
708 | mem_cgroup_update_mapped_file_stat(page, 1); | ||
709 | } | ||
708 | } | 710 | } |
709 | 711 | ||
710 | #ifdef CONFIG_DEBUG_VM | 712 | #ifdef CONFIG_DEBUG_VM |
@@ -753,6 +755,7 @@ void page_remove_rmap(struct page *page) | |||
753 | mem_cgroup_uncharge_page(page); | 755 | mem_cgroup_uncharge_page(page); |
754 | __dec_zone_page_state(page, | 756 | __dec_zone_page_state(page, |
755 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 757 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
758 | mem_cgroup_update_mapped_file_stat(page, -1); | ||
756 | /* | 759 | /* |
757 | * It would be tidy to reset the PageAnon mapping here, | 760 | * It would be tidy to reset the PageAnon mapping here, |
758 | * but that might overwrite a racing page_add_anon_rmap | 761 | * but that might overwrite a racing page_add_anon_rmap |