aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-02-07 03:14:31 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:21 -0500
commit6d12e2d8ddbe653d80ea4f71578481c1bc933025 (patch)
treef175cdcadd8566b8f8a2e6a155b33e531eba7f11
parentc0149530d0bb356c933a09f3c8103ea02f452d8a (diff)
per-zone and reclaim enhancements for memory controller: per-zone active inactive counter
This patch adds per-zone status in memory cgroup. These values are often read (as per-zone value) by page reclaiming. In current design, per-zone stat is just a unsigned long value and not an atomic value because they are modified only under lru_lock. (So, atomic_ops is not necessary.) This patch adds ACTIVE and INACTIVE per-zone status values. For handling per-zone status, this patch adds struct mem_cgroup_per_zone { ... } and some helper functions. This will be useful to add per-zone objects in mem_cgroup. This patch turns memory controller's early_init to be 0 for calling kmalloc() in initialization. Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: David Rientjes <rientjes@google.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Menage <menage@google.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c161
1 files changed, 154 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 422f779a5b21..1637575d3339 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -78,6 +78,31 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
78} 78}
79 79
80/* 80/*
81 * per-zone information in memory controller.
82 */
83
84enum mem_cgroup_zstat_index {
85 MEM_CGROUP_ZSTAT_ACTIVE,
86 MEM_CGROUP_ZSTAT_INACTIVE,
87
88 NR_MEM_CGROUP_ZSTAT,
89};
90
91struct mem_cgroup_per_zone {
92 unsigned long count[NR_MEM_CGROUP_ZSTAT];
93};
94/* Macro for accessing counter */
95#define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)])
96
97struct mem_cgroup_per_node {
98 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
99};
100
101struct mem_cgroup_lru_info {
102 struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
103};
104
105/*
81 * The memory controller data structure. The memory controller controls both 106 * The memory controller data structure. The memory controller controls both
82 * page cache and RSS per cgroup. We would eventually like to provide 107 * page cache and RSS per cgroup. We would eventually like to provide
83 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 108 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -101,6 +126,7 @@ struct mem_cgroup {
101 */ 126 */
102 struct list_head active_list; 127 struct list_head active_list;
103 struct list_head inactive_list; 128 struct list_head inactive_list;
129 struct mem_cgroup_lru_info info;
104 /* 130 /*
105 * spin_lock to protect the per cgroup LRU 131 * spin_lock to protect the per cgroup LRU
106 */ 132 */
@@ -158,6 +184,7 @@ enum charge_type {
158 MEM_CGROUP_CHARGE_TYPE_MAPPED, 184 MEM_CGROUP_CHARGE_TYPE_MAPPED,
159}; 185};
160 186
187
161/* 188/*
162 * Always modified under lru lock. Then, not necessary to preempt_disable() 189 * Always modified under lru lock. Then, not necessary to preempt_disable()
163 */ 190 */
@@ -173,7 +200,38 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
173 MEM_CGROUP_STAT_CACHE, val); 200 MEM_CGROUP_STAT_CACHE, val);
174 else 201 else
175 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); 202 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
203}
204
205static inline struct mem_cgroup_per_zone *
206mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
207{
208 BUG_ON(!mem->info.nodeinfo[nid]);
209 return &mem->info.nodeinfo[nid]->zoneinfo[zid];
210}
211
212static inline struct mem_cgroup_per_zone *
213page_cgroup_zoneinfo(struct page_cgroup *pc)
214{
215 struct mem_cgroup *mem = pc->mem_cgroup;
216 int nid = page_cgroup_nid(pc);
217 int zid = page_cgroup_zid(pc);
176 218
219 return mem_cgroup_zoneinfo(mem, nid, zid);
220}
221
222static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
223 enum mem_cgroup_zstat_index idx)
224{
225 int nid, zid;
226 struct mem_cgroup_per_zone *mz;
227 u64 total = 0;
228
229 for_each_online_node(nid)
230 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
231 mz = mem_cgroup_zoneinfo(mem, nid, zid);
232 total += MEM_CGROUP_ZSTAT(mz, idx);
233 }
234 return total;
177} 235}
178 236
179static struct mem_cgroup init_mem_cgroup; 237static struct mem_cgroup init_mem_cgroup;
@@ -286,12 +344,51 @@ static struct page_cgroup *clear_page_cgroup(struct page *page,
286 return ret; 344 return ret;
287} 345}
288 346
347static void __mem_cgroup_remove_list(struct page_cgroup *pc)
348{
349 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
350 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
351
352 if (from)
353 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
354 else
355 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
356
357 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
358 list_del_init(&pc->lru);
359}
360
361static void __mem_cgroup_add_list(struct page_cgroup *pc)
362{
363 int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
364 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
365
366 if (!to) {
367 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
368 list_add(&pc->lru, &pc->mem_cgroup->inactive_list);
369 } else {
370 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
371 list_add(&pc->lru, &pc->mem_cgroup->active_list);
372 }
373 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
374}
375
289static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 376static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
290{ 377{
378 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
379 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
380
381 if (from)
382 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
383 else
384 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
385
291 if (active) { 386 if (active) {
387 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
292 pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; 388 pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
293 list_move(&pc->lru, &pc->mem_cgroup->active_list); 389 list_move(&pc->lru, &pc->mem_cgroup->active_list);
294 } else { 390 } else {
391 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
295 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; 392 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
296 list_move(&pc->lru, &pc->mem_cgroup->inactive_list); 393 list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
297 } 394 }
@@ -501,8 +598,7 @@ retry:
501 598
502 spin_lock_irqsave(&mem->lru_lock, flags); 599 spin_lock_irqsave(&mem->lru_lock, flags);
503 /* Update statistics vector */ 600 /* Update statistics vector */
504 mem_cgroup_charge_statistics(mem, pc->flags, true); 601 __mem_cgroup_add_list(pc);
505 list_add(&pc->lru, &mem->active_list);
506 spin_unlock_irqrestore(&mem->lru_lock, flags); 602 spin_unlock_irqrestore(&mem->lru_lock, flags);
507 603
508done: 604done:
@@ -571,13 +667,13 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
571 css_put(&mem->css); 667 css_put(&mem->css);
572 res_counter_uncharge(&mem->res, PAGE_SIZE); 668 res_counter_uncharge(&mem->res, PAGE_SIZE);
573 spin_lock_irqsave(&mem->lru_lock, flags); 669 spin_lock_irqsave(&mem->lru_lock, flags);
574 list_del_init(&pc->lru); 670 __mem_cgroup_remove_list(pc);
575 mem_cgroup_charge_statistics(mem, pc->flags, false);
576 spin_unlock_irqrestore(&mem->lru_lock, flags); 671 spin_unlock_irqrestore(&mem->lru_lock, flags);
577 kfree(pc); 672 kfree(pc);
578 } 673 }
579 } 674 }
580} 675}
676
581/* 677/*
582 * Returns non-zero if a page (under migration) has valid page_cgroup member. 678 * Returns non-zero if a page (under migration) has valid page_cgroup member.
583 * Refcnt of page_cgroup is incremented. 679 * Refcnt of page_cgroup is incremented.
@@ -609,16 +705,26 @@ void mem_cgroup_end_migration(struct page *page)
609void mem_cgroup_page_migration(struct page *page, struct page *newpage) 705void mem_cgroup_page_migration(struct page *page, struct page *newpage)
610{ 706{
611 struct page_cgroup *pc; 707 struct page_cgroup *pc;
708 struct mem_cgroup *mem;
709 unsigned long flags;
612retry: 710retry:
613 pc = page_get_page_cgroup(page); 711 pc = page_get_page_cgroup(page);
614 if (!pc) 712 if (!pc)
615 return; 713 return;
714 mem = pc->mem_cgroup;
616 if (clear_page_cgroup(page, pc) != pc) 715 if (clear_page_cgroup(page, pc) != pc)
617 goto retry; 716 goto retry;
717
718 spin_lock_irqsave(&mem->lru_lock, flags);
719
720 __mem_cgroup_remove_list(pc);
618 pc->page = newpage; 721 pc->page = newpage;
619 lock_page_cgroup(newpage); 722 lock_page_cgroup(newpage);
620 page_assign_page_cgroup(newpage, pc); 723 page_assign_page_cgroup(newpage, pc);
621 unlock_page_cgroup(newpage); 724 unlock_page_cgroup(newpage);
725 __mem_cgroup_add_list(pc);
726
727 spin_unlock_irqrestore(&mem->lru_lock, flags);
622 return; 728 return;
623} 729}
624 730
@@ -648,8 +754,7 @@ retry:
648 if (clear_page_cgroup(page, pc) == pc) { 754 if (clear_page_cgroup(page, pc) == pc) {
649 css_put(&mem->css); 755 css_put(&mem->css);
650 res_counter_uncharge(&mem->res, PAGE_SIZE); 756 res_counter_uncharge(&mem->res, PAGE_SIZE);
651 list_del_init(&pc->lru); 757 __mem_cgroup_remove_list(pc);
652 mem_cgroup_charge_statistics(mem, pc->flags, false);
653 kfree(pc); 758 kfree(pc);
654 } else /* being uncharged ? ...do relax */ 759 } else /* being uncharged ? ...do relax */
655 break; 760 break;
@@ -828,6 +933,17 @@ static int mem_control_stat_show(struct seq_file *m, void *arg)
828 seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, 933 seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg,
829 (long long)val); 934 (long long)val);
830 } 935 }
936 /* showing # of active pages */
937 {
938 unsigned long active, inactive;
939
940 inactive = mem_cgroup_get_all_zonestat(mem_cont,
941 MEM_CGROUP_ZSTAT_INACTIVE);
942 active = mem_cgroup_get_all_zonestat(mem_cont,
943 MEM_CGROUP_ZSTAT_ACTIVE);
944 seq_printf(m, "active %ld\n", (active) * PAGE_SIZE);
945 seq_printf(m, "inactive %ld\n", (inactive) * PAGE_SIZE);
946 }
831 return 0; 947 return 0;
832} 948}
833 949
@@ -881,12 +997,25 @@ static struct cftype mem_cgroup_files[] = {
881 }, 997 },
882}; 998};
883 999
1000static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1001{
1002 struct mem_cgroup_per_node *pn;
1003
1004 pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, node);
1005 if (!pn)
1006 return 1;
1007 mem->info.nodeinfo[node] = pn;
1008 memset(pn, 0, sizeof(*pn));
1009 return 0;
1010}
1011
884static struct mem_cgroup init_mem_cgroup; 1012static struct mem_cgroup init_mem_cgroup;
885 1013
886static struct cgroup_subsys_state * 1014static struct cgroup_subsys_state *
887mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 1015mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
888{ 1016{
889 struct mem_cgroup *mem; 1017 struct mem_cgroup *mem;
1018 int node;
890 1019
891 if (unlikely((cont->parent) == NULL)) { 1020 if (unlikely((cont->parent) == NULL)) {
892 mem = &init_mem_cgroup; 1021 mem = &init_mem_cgroup;
@@ -902,7 +1031,19 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
902 INIT_LIST_HEAD(&mem->inactive_list); 1031 INIT_LIST_HEAD(&mem->inactive_list);
903 spin_lock_init(&mem->lru_lock); 1032 spin_lock_init(&mem->lru_lock);
904 mem->control_type = MEM_CGROUP_TYPE_ALL; 1033 mem->control_type = MEM_CGROUP_TYPE_ALL;
1034 memset(&mem->info, 0, sizeof(mem->info));
1035
1036 for_each_node_state(node, N_POSSIBLE)
1037 if (alloc_mem_cgroup_per_zone_info(mem, node))
1038 goto free_out;
1039
905 return &mem->css; 1040 return &mem->css;
1041free_out:
1042 for_each_node_state(node, N_POSSIBLE)
1043 kfree(mem->info.nodeinfo[node]);
1044 if (cont->parent != NULL)
1045 kfree(mem);
1046 return NULL;
906} 1047}
907 1048
908static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, 1049static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
@@ -915,6 +1056,12 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
915static void mem_cgroup_destroy(struct cgroup_subsys *ss, 1056static void mem_cgroup_destroy(struct cgroup_subsys *ss,
916 struct cgroup *cont) 1057 struct cgroup *cont)
917{ 1058{
1059 int node;
1060 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
1061
1062 for_each_node_state(node, N_POSSIBLE)
1063 kfree(mem->info.nodeinfo[node]);
1064
918 kfree(mem_cgroup_from_cont(cont)); 1065 kfree(mem_cgroup_from_cont(cont));
919} 1066}
920 1067
@@ -967,5 +1114,5 @@ struct cgroup_subsys mem_cgroup_subsys = {
967 .destroy = mem_cgroup_destroy, 1114 .destroy = mem_cgroup_destroy,
968 .populate = mem_cgroup_populate, 1115 .populate = mem_cgroup_populate,
969 .attach = mem_cgroup_move_task, 1116 .attach = mem_cgroup_move_task,
970 .early_init = 1, 1117 .early_init = 0,
971}; 1118};