aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2009-06-17 19:26:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-18 16:03:47 -0400
commitd69b042f3d7406ddba560143b1796020df760800 (patch)
tree6b704864716240aa1282d06bb79c02ef4b77e16d
parentcd5008196f7e583f4c558531a2bca59f6c674c5b (diff)
memcg: add file-based RSS accounting
Add file RSS tracking per memory cgroup We currently don't track file RSS, the RSS we report is actually anon RSS. All the file mapped pages, come in through the page cache and get accounted there. This patch adds support for accounting file RSS pages. It should 1. Help improve the metrics reported by the memory resource controller 2. Will form the basis for a future shared memory accounting heuristic that has been proposed by Kamezawa. Unfortunately, we cannot rename the existing "rss" keyword used in memory.stat to "anon_rss". We however, add "mapped_file" data and hope to educate the end user through documentation. [hugh.dickins@tiscali.co.uk: fix mem_cgroup_update_mapped_file_stat oops] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.cn> Cc: Paul Menage <menage@google.com> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h7
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/page_cgroup.c2
-rw-r--r--mm/rmap.c5
4 files changed, 77 insertions, 3 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45add35dda1b..e46a0734ab6e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -117,7 +117,7 @@ static inline bool mem_cgroup_disabled(void)
117} 117}
118 118
119extern bool mem_cgroup_oom_called(struct task_struct *task); 119extern bool mem_cgroup_oom_called(struct task_struct *task);
120 120void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
121#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 121#else /* CONFIG_CGROUP_MEM_RES_CTLR */
122struct mem_cgroup; 122struct mem_cgroup;
123 123
@@ -271,6 +271,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
271{ 271{
272} 272}
273 273
274static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
275 int val)
276{
277}
278
274#endif /* CONFIG_CGROUP_MEM_CONT */ 279#endif /* CONFIG_CGROUP_MEM_CONT */
275 280
276#endif /* _LINUX_MEMCONTROL_H */ 281#endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 70db6e0a5eec..6f682901deb5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,7 +62,8 @@ enum mem_cgroup_stat_index {
62 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. 62 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
63 */ 63 */
64 MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ 64 MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
65 MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ 65 MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
66 MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */
66 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ 67 MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */
67 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ 68 MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */
68 69
@@ -900,6 +901,44 @@ static void record_last_oom(struct mem_cgroup *mem)
900 mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb); 901 mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
901} 902}
902 903
904/*
905 * Currently used to update mapped file statistics, but the routine can be
906 * generalized to update other statistics as well.
907 */
908void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
909{
910 struct mem_cgroup *mem;
911 struct mem_cgroup_stat *stat;
912 struct mem_cgroup_stat_cpu *cpustat;
913 int cpu;
914 struct page_cgroup *pc;
915
916 if (!page_is_file_cache(page))
917 return;
918
919 pc = lookup_page_cgroup(page);
920 if (unlikely(!pc))
921 return;
922
923 lock_page_cgroup(pc);
924 mem = pc->mem_cgroup;
925 if (!mem)
926 goto done;
927
928 if (!PageCgroupUsed(pc))
929 goto done;
930
931 /*
932 * Preemption is already disabled, we don't need get_cpu()
933 */
934 cpu = smp_processor_id();
935 stat = &mem->stat;
936 cpustat = &stat->cpustat[cpu];
937
938 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val);
939done:
940 unlock_page_cgroup(pc);
941}
903 942
904/* 943/*
905 * Unlike exported interface, "oom" parameter is added. if oom==true, 944 * Unlike exported interface, "oom" parameter is added. if oom==true,
@@ -1098,6 +1137,10 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1098 struct mem_cgroup_per_zone *from_mz, *to_mz; 1137 struct mem_cgroup_per_zone *from_mz, *to_mz;
1099 int nid, zid; 1138 int nid, zid;
1100 int ret = -EBUSY; 1139 int ret = -EBUSY;
1140 struct page *page;
1141 int cpu;
1142 struct mem_cgroup_stat *stat;
1143 struct mem_cgroup_stat_cpu *cpustat;
1101 1144
1102 VM_BUG_ON(from == to); 1145 VM_BUG_ON(from == to);
1103 VM_BUG_ON(PageLRU(pc->page)); 1146 VM_BUG_ON(PageLRU(pc->page));
@@ -1118,6 +1161,23 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1118 1161
1119 res_counter_uncharge(&from->res, PAGE_SIZE); 1162 res_counter_uncharge(&from->res, PAGE_SIZE);
1120 mem_cgroup_charge_statistics(from, pc, false); 1163 mem_cgroup_charge_statistics(from, pc, false);
1164
1165 page = pc->page;
1166 if (page_is_file_cache(page) && page_mapped(page)) {
1167 cpu = smp_processor_id();
1168 /* Update mapped_file data for mem_cgroup "from" */
1169 stat = &from->stat;
1170 cpustat = &stat->cpustat[cpu];
1171 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
1172 -1);
1173
1174 /* Update mapped_file data for mem_cgroup "to" */
1175 stat = &to->stat;
1176 cpustat = &stat->cpustat[cpu];
1177 __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
1178 1);
1179 }
1180
1121 if (do_swap_account) 1181 if (do_swap_account)
1122 res_counter_uncharge(&from->memsw, PAGE_SIZE); 1182 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1123 css_put(&from->css); 1183 css_put(&from->css);
@@ -2046,6 +2106,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
2046enum { 2106enum {
2047 MCS_CACHE, 2107 MCS_CACHE,
2048 MCS_RSS, 2108 MCS_RSS,
2109 MCS_MAPPED_FILE,
2049 MCS_PGPGIN, 2110 MCS_PGPGIN,
2050 MCS_PGPGOUT, 2111 MCS_PGPGOUT,
2051 MCS_INACTIVE_ANON, 2112 MCS_INACTIVE_ANON,
@@ -2066,6 +2127,7 @@ struct {
2066} memcg_stat_strings[NR_MCS_STAT] = { 2127} memcg_stat_strings[NR_MCS_STAT] = {
2067 {"cache", "total_cache"}, 2128 {"cache", "total_cache"},
2068 {"rss", "total_rss"}, 2129 {"rss", "total_rss"},
2130 {"mapped_file", "total_mapped_file"},
2069 {"pgpgin", "total_pgpgin"}, 2131 {"pgpgin", "total_pgpgin"},
2070 {"pgpgout", "total_pgpgout"}, 2132 {"pgpgout", "total_pgpgout"},
2071 {"inactive_anon", "total_inactive_anon"}, 2133 {"inactive_anon", "total_inactive_anon"},
@@ -2086,6 +2148,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
2086 s->stat[MCS_CACHE] += val * PAGE_SIZE; 2148 s->stat[MCS_CACHE] += val * PAGE_SIZE;
2087 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); 2149 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
2088 s->stat[MCS_RSS] += val * PAGE_SIZE; 2150 s->stat[MCS_RSS] += val * PAGE_SIZE;
2151 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE);
2152 s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE;
2089 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); 2153 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
2090 s->stat[MCS_PGPGIN] += val; 2154 s->stat[MCS_PGPGIN] += val;
2091 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); 2155 val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 4f31c9b3e940..672089d5819f 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -99,6 +99,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
99 unsigned long pfn = page_to_pfn(page); 99 unsigned long pfn = page_to_pfn(page);
100 struct mem_section *section = __pfn_to_section(pfn); 100 struct mem_section *section = __pfn_to_section(pfn);
101 101
102 if (!section->page_cgroup)
103 return NULL;
102 return section->page_cgroup + pfn; 104 return section->page_cgroup + pfn;
103} 105}
104 106
diff --git a/mm/rmap.c b/mm/rmap.c
index c9ccc1a72dc3..836c6c63e1f2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -703,8 +703,10 @@ void page_add_new_anon_rmap(struct page *page,
703 */ 703 */
704void page_add_file_rmap(struct page *page) 704void page_add_file_rmap(struct page *page)
705{ 705{
706 if (atomic_inc_and_test(&page->_mapcount)) 706 if (atomic_inc_and_test(&page->_mapcount)) {
707 __inc_zone_page_state(page, NR_FILE_MAPPED); 707 __inc_zone_page_state(page, NR_FILE_MAPPED);
708 mem_cgroup_update_mapped_file_stat(page, 1);
709 }
708} 710}
709 711
710#ifdef CONFIG_DEBUG_VM 712#ifdef CONFIG_DEBUG_VM
@@ -753,6 +755,7 @@ void page_remove_rmap(struct page *page)
753 mem_cgroup_uncharge_page(page); 755 mem_cgroup_uncharge_page(page);
754 __dec_zone_page_state(page, 756 __dec_zone_page_state(page,
755 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 757 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
758 mem_cgroup_update_mapped_file_stat(page, -1);
756 /* 759 /*
757 * It would be tidy to reset the PageAnon mapping here, 760 * It would be tidy to reset the PageAnon mapping here,
758 * but that might overwrite a racing page_add_anon_rmap 761 * but that might overwrite a racing page_add_anon_rmap