aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorYing Han <yinghan@google.com>2011-05-26 19:25:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:36 -0400
commit456f998ec817ebfa254464be4f089542fa390645 (patch)
tree5976aa500638f0bbade1a672233cad71765b89b8 /mm/memcontrol.c
parent406eb0c9ba765eb066406fd5ce9d5e2b169a4d5a (diff)
memcg: add the pagefault count into memcg stats
Two new stats in per-memcg memory.stat which tracks the number of page faults and number of major page faults. "pgfault" "pgmajfault" They are different from "pgpgin"/"pgpgout" stat which count number of pages charged/discharged to the cgroup and have no meaning of reading/ writing page to disk. It is valuable to track the two stats for both measuring application's performance as well as the efficiency of the kernel page reclaim path. Counting pagefaults per process is useful, but we also need the aggregated value since processes are monitored and controlled in cgroup basis in memcg. Functional test: check the total number of pgfault/pgmajfault of all memcgs and compare with global vmstat value: $ cat /proc/vmstat | grep fault pgfault 1070751 pgmajfault 553 $ cat /dev/cgroup/memory.stat | grep fault pgfault 1071138 pgmajfault 553 total_pgfault 1071142 total_pgmajfault 553 $ cat /dev/cgroup/A/memory.stat | grep fault pgfault 199 pgmajfault 0 total_pgfault 199 total_pgmajfault 0 Performance test: run page fault test(pft) wit 16 thread on faulting in 15G anon pages in 16G container. There is no regression noticed on the "flt/cpu/s" Sample output from pft: TAG pft:anon-sys-default: Gb Thr CLine User System Wall flt/cpu/s fault/wsec 15 16 1 0.67s 233.41s 14.76s 16798.546 266356.260 +-------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 10 16682.962 17344.027 16913.524 16928.812 166.5362 + 10 16695.568 16923.896 16820.604 16824.652 84.816568 No difference proven at 95.0% confidence [akpm@linux-foundation.org: fix build] [hughd@google.com: shmem fix] Signed-off-by: Ying Han <yinghan@google.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4021fcd71b60..bd9052a5d3ad 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
94 MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ 94 MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
95 MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ 95 MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
96 MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */ 96 MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */
97 MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
98 MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
97 MEM_CGROUP_EVENTS_NSTATS, 99 MEM_CGROUP_EVENTS_NSTATS,
98}; 100};
99/* 101/*
@@ -590,6 +592,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
590 this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); 592 this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
591} 593}
592 594
595void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
596{
597 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
598}
599
600void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
601{
602 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
603}
604
593static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, 605static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
594 enum mem_cgroup_events_index idx) 606 enum mem_cgroup_events_index idx)
595{ 607{
@@ -827,6 +839,33 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
827 return (mem == root_mem_cgroup); 839 return (mem == root_mem_cgroup);
828} 840}
829 841
842void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
843{
844 struct mem_cgroup *mem;
845
846 if (!mm)
847 return;
848
849 rcu_read_lock();
850 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
851 if (unlikely(!mem))
852 goto out;
853
854 switch (idx) {
855 case PGMAJFAULT:
856 mem_cgroup_pgmajfault(mem, 1);
857 break;
858 case PGFAULT:
859 mem_cgroup_pgfault(mem, 1);
860 break;
861 default:
862 BUG();
863 }
864out:
865 rcu_read_unlock();
866}
867EXPORT_SYMBOL(mem_cgroup_count_vm_event);
868
830/* 869/*
831 * Following LRU functions are allowed to be used without PCG_LOCK. 870 * Following LRU functions are allowed to be used without PCG_LOCK.
832 * Operations are called by routine of global LRU independently from memcg. 871 * Operations are called by routine of global LRU independently from memcg.
@@ -3958,6 +3997,8 @@ enum {
3958 MCS_PGPGIN, 3997 MCS_PGPGIN,
3959 MCS_PGPGOUT, 3998 MCS_PGPGOUT,
3960 MCS_SWAP, 3999 MCS_SWAP,
4000 MCS_PGFAULT,
4001 MCS_PGMAJFAULT,
3961 MCS_INACTIVE_ANON, 4002 MCS_INACTIVE_ANON,
3962 MCS_ACTIVE_ANON, 4003 MCS_ACTIVE_ANON,
3963 MCS_INACTIVE_FILE, 4004 MCS_INACTIVE_FILE,
@@ -3980,6 +4021,8 @@ struct {
3980 {"pgpgin", "total_pgpgin"}, 4021 {"pgpgin", "total_pgpgin"},
3981 {"pgpgout", "total_pgpgout"}, 4022 {"pgpgout", "total_pgpgout"},
3982 {"swap", "total_swap"}, 4023 {"swap", "total_swap"},
4024 {"pgfault", "total_pgfault"},
4025 {"pgmajfault", "total_pgmajfault"},
3983 {"inactive_anon", "total_inactive_anon"}, 4026 {"inactive_anon", "total_inactive_anon"},
3984 {"active_anon", "total_active_anon"}, 4027 {"active_anon", "total_active_anon"},
3985 {"inactive_file", "total_inactive_file"}, 4028 {"inactive_file", "total_inactive_file"},
@@ -4008,6 +4051,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
4008 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); 4051 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
4009 s->stat[MCS_SWAP] += val * PAGE_SIZE; 4052 s->stat[MCS_SWAP] += val * PAGE_SIZE;
4010 } 4053 }
4054 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
4055 s->stat[MCS_PGFAULT] += val;
4056 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
4057 s->stat[MCS_PGMAJFAULT] += val;
4011 4058
4012 /* per zone stat */ 4059 /* per zone stat */
4013 val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); 4060 val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);