aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-02-07 03:14:24 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:20 -0500
commitd52aa412d43827033a8e2ce4415ef6e8f8d53635 (patch)
tree24d446d16a2acf33c612ebf8856529607632bf37
parent3564c7c45156b358efe921ab2e4e516dad92c94c (diff)
memory cgroup enhancements: add status accounting function for memory cgroup
Add statistics account infrastructure for memory controller. All account information is stored per-cpu and caller will not have to take lock or use atomic ops. This will be used by memory.stat file later. CACHE includes swapcache now. I'd like to divide it to PAGECACHE and SWAPCACHE later. This patch adds 3 functions for accounting. * __mem_cgroup_stat_add() ... for usual routine. * __mem_cgroup_stat_add_safe ... for calling under irq_disabled section. * mem_cgroup_read_stat() ... for reading stat value. * renamed PAGECACHE to CACHE (because it may include swapcache *now*) [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix smp_processor_id-in-preemptible] [akpm@linux-foundation.org: uninline things] [akpm@linux-foundation.org: remove dead code] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Cc: Kirill Korotaev <dev@sw.ru> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Menage <menage@google.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c77
1 files changed, 72 insertions, 5 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 31c4f0cefdee..5f3ad9c37bea 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -21,6 +21,7 @@
21#include <linux/memcontrol.h> 21#include <linux/memcontrol.h>
22#include <linux/cgroup.h> 22#include <linux/cgroup.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/smp.h>
24#include <linux/page-flags.h> 25#include <linux/page-flags.h>
25#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
26#include <linux/bit_spinlock.h> 27#include <linux/bit_spinlock.h>
@@ -35,6 +36,47 @@ struct cgroup_subsys mem_cgroup_subsys;
35static const int MEM_CGROUP_RECLAIM_RETRIES = 5; 36static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
36 37
37/* 38/*
39 * Statistics for memory cgroup.
40 */
41enum mem_cgroup_stat_index {
42 /*
43 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
44 */
45 MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
46 MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */
47
48 MEM_CGROUP_STAT_NSTATS,
49};
50
51struct mem_cgroup_stat_cpu {
52 s64 count[MEM_CGROUP_STAT_NSTATS];
53} ____cacheline_aligned_in_smp;
54
55struct mem_cgroup_stat {
56 struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
57};
58
59/*
60 * For accounting under irq disable, no need for increment preempt count.
61 */
62static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
63 enum mem_cgroup_stat_index idx, int val)
64{
65 int cpu = smp_processor_id();
66 stat->cpustat[cpu].count[idx] += val;
67}
68
69static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
70 enum mem_cgroup_stat_index idx)
71{
72 int cpu;
73 s64 ret = 0;
74 for_each_possible_cpu(cpu)
75 ret += stat->cpustat[cpu].count[idx];
76 return ret;
77}
78
79/*
38 * The memory controller data structure. The memory controller controls both 80 * The memory controller data structure. The memory controller controls both
39 * page cache and RSS per cgroup. We would eventually like to provide 81 * page cache and RSS per cgroup. We would eventually like to provide
40 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 82 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -63,6 +105,10 @@ struct mem_cgroup {
63 */ 105 */
64 spinlock_t lru_lock; 106 spinlock_t lru_lock;
65 unsigned long control_type; /* control RSS or RSS+Pagecache */ 107 unsigned long control_type; /* control RSS or RSS+Pagecache */
108 /*
109 * statistics.
110 */
111 struct mem_cgroup_stat stat;
66}; 112};
67 113
68/* 114/*
@@ -101,6 +147,24 @@ enum charge_type {
101 MEM_CGROUP_CHARGE_TYPE_MAPPED, 147 MEM_CGROUP_CHARGE_TYPE_MAPPED,
102}; 148};
103 149
150/*
151 * Always modified under lru lock. Then, not necessary to preempt_disable()
152 */
153static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
154 bool charge)
155{
156 int val = (charge)? 1 : -1;
157 struct mem_cgroup_stat *stat = &mem->stat;
158 VM_BUG_ON(!irqs_disabled());
159
160 if (flags & PAGE_CGROUP_FLAG_CACHE)
161 __mem_cgroup_stat_add_safe(stat,
162 MEM_CGROUP_STAT_CACHE, val);
163 else
164 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
165
166}
167
104static struct mem_cgroup init_mem_cgroup; 168static struct mem_cgroup init_mem_cgroup;
105 169
106static inline 170static inline
@@ -175,8 +239,8 @@ static void __always_inline unlock_page_cgroup(struct page *page)
175 * This can fail if the page has been tied to a page_cgroup. 239 * This can fail if the page has been tied to a page_cgroup.
176 * If success, returns 0. 240 * If success, returns 0.
177 */ 241 */
178static inline int 242static int page_cgroup_assign_new_page_cgroup(struct page *page,
179page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc) 243 struct page_cgroup *pc)
180{ 244{
181 int ret = 0; 245 int ret = 0;
182 246
@@ -198,8 +262,8 @@ page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc)
198 * clear_page_cgroup(page, pc) == pc 262 * clear_page_cgroup(page, pc) == pc
199 */ 263 */
200 264
201static inline struct page_cgroup * 265static struct page_cgroup *clear_page_cgroup(struct page *page,
202clear_page_cgroup(struct page *page, struct page_cgroup *pc) 266 struct page_cgroup *pc)
203{ 267{
204 struct page_cgroup *ret; 268 struct page_cgroup *ret;
205 /* lock and clear */ 269 /* lock and clear */
@@ -211,7 +275,6 @@ clear_page_cgroup(struct page *page, struct page_cgroup *pc)
211 return ret; 275 return ret;
212} 276}
213 277
214
215static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 278static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
216{ 279{
217 if (active) { 280 if (active) {
@@ -426,6 +489,8 @@ retry:
426 } 489 }
427 490
428 spin_lock_irqsave(&mem->lru_lock, flags); 491 spin_lock_irqsave(&mem->lru_lock, flags);
492 /* Update statistics vector */
493 mem_cgroup_charge_statistics(mem, pc->flags, true);
429 list_add(&pc->lru, &mem->active_list); 494 list_add(&pc->lru, &mem->active_list);
430 spin_unlock_irqrestore(&mem->lru_lock, flags); 495 spin_unlock_irqrestore(&mem->lru_lock, flags);
431 496
@@ -496,6 +561,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
496 res_counter_uncharge(&mem->res, PAGE_SIZE); 561 res_counter_uncharge(&mem->res, PAGE_SIZE);
497 spin_lock_irqsave(&mem->lru_lock, flags); 562 spin_lock_irqsave(&mem->lru_lock, flags);
498 list_del_init(&pc->lru); 563 list_del_init(&pc->lru);
564 mem_cgroup_charge_statistics(mem, pc->flags, false);
499 spin_unlock_irqrestore(&mem->lru_lock, flags); 565 spin_unlock_irqrestore(&mem->lru_lock, flags);
500 kfree(pc); 566 kfree(pc);
501 } 567 }
@@ -572,6 +638,7 @@ retry:
572 css_put(&mem->css); 638 css_put(&mem->css);
573 res_counter_uncharge(&mem->res, PAGE_SIZE); 639 res_counter_uncharge(&mem->res, PAGE_SIZE);
574 list_del_init(&pc->lru); 640 list_del_init(&pc->lru);
641 mem_cgroup_charge_statistics(mem, pc->flags, false);
575 kfree(pc); 642 kfree(pc);
576 } else /* being uncharged ? ...do relax */ 643 } else /* being uncharged ? ...do relax */
577 break; 644 break;