aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2009-09-23 18:56:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-24 10:20:58 -0400
commit4b3bde4c983de36c59e6c1a24701f6fe816f9f55 (patch)
treee759c5ceccf57331b868b7feac61cae5e932c6d4 /mm
parentbe367d09927023d081f9199665c8500f69f14d22 (diff)
memcg: remove the overhead associated with the root cgroup
Change the memory cgroup to remove the overhead associated with accounting all pages in the root cgroup. As a side-effect, we can no longer set a memory hard limit in the root cgroup. A new flag to track whether the page has been accounted or not has been added as well. Flags are now set atomically for page_cgroup, pcg_default_flags is now obsolete and removed. [akpm@linux-foundation.org: fix a few documentation glitches] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c54
1 files changed, 40 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf2e717f5c12..b0757660663f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -43,6 +43,7 @@
43 43
44struct cgroup_subsys mem_cgroup_subsys __read_mostly; 44struct cgroup_subsys mem_cgroup_subsys __read_mostly;
45#define MEM_CGROUP_RECLAIM_RETRIES 5 45#define MEM_CGROUP_RECLAIM_RETRIES 5
46struct mem_cgroup *root_mem_cgroup __read_mostly;
46 47
47#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 48#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
48/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ 49/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -200,13 +201,8 @@ enum charge_type {
200#define PCGF_CACHE (1UL << PCG_CACHE) 201#define PCGF_CACHE (1UL << PCG_CACHE)
201#define PCGF_USED (1UL << PCG_USED) 202#define PCGF_USED (1UL << PCG_USED)
202#define PCGF_LOCK (1UL << PCG_LOCK) 203#define PCGF_LOCK (1UL << PCG_LOCK)
203static const unsigned long 204/* Not used, but added here for completeness */
204pcg_default_flags[NR_CHARGE_TYPE] = { 205#define PCGF_ACCT (1UL << PCG_ACCT)
205 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
206 PCGF_USED | PCGF_LOCK, /* Anon */
207 PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
208 0, /* FORCE */
209};
210 206
211/* for encoding cft->private value on file */ 207/* for encoding cft->private value on file */
212#define _MEM (0) 208#define _MEM (0)
@@ -354,6 +350,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
354 return ret; 350 return ret;
355} 351}
356 352
353static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
354{
355 return (mem == root_mem_cgroup);
356}
357
357/* 358/*
358 * Following LRU functions are allowed to be used without PCG_LOCK. 359 * Following LRU functions are allowed to be used without PCG_LOCK.
359 * Operations are called by routine of global LRU independently from memcg. 360 * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +372,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
371void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) 372void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
372{ 373{
373 struct page_cgroup *pc; 374 struct page_cgroup *pc;
374 struct mem_cgroup *mem;
375 struct mem_cgroup_per_zone *mz; 375 struct mem_cgroup_per_zone *mz;
376 376
377 if (mem_cgroup_disabled()) 377 if (mem_cgroup_disabled())
378 return; 378 return;
379 pc = lookup_page_cgroup(page); 379 pc = lookup_page_cgroup(page);
380 /* can happen while we handle swapcache. */ 380 /* can happen while we handle swapcache. */
381 if (list_empty(&pc->lru) || !pc->mem_cgroup) 381 if (!TestClearPageCgroupAcctLRU(pc))
382 return; 382 return;
383 VM_BUG_ON(!pc->mem_cgroup);
383 /* 384 /*
384 * We don't check PCG_USED bit. It's cleared when the "page" is finally 385 * We don't check PCG_USED bit. It's cleared when the "page" is finally
385 * removed from global LRU. 386 * removed from global LRU.
386 */ 387 */
387 mz = page_cgroup_zoneinfo(pc); 388 mz = page_cgroup_zoneinfo(pc);
388 mem = pc->mem_cgroup;
389 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 389 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
390 if (mem_cgroup_is_root(pc->mem_cgroup))
391 return;
392 VM_BUG_ON(list_empty(&pc->lru));
390 list_del_init(&pc->lru); 393 list_del_init(&pc->lru);
391 return; 394 return;
392} 395}
@@ -410,8 +413,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
410 * For making pc->mem_cgroup visible, insert smp_rmb() here. 413 * For making pc->mem_cgroup visible, insert smp_rmb() here.
411 */ 414 */
412 smp_rmb(); 415 smp_rmb();
413 /* unused page is not rotated. */ 416 /* unused or root page is not rotated. */
414 if (!PageCgroupUsed(pc)) 417 if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
415 return; 418 return;
416 mz = page_cgroup_zoneinfo(pc); 419 mz = page_cgroup_zoneinfo(pc);
417 list_move(&pc->lru, &mz->lists[lru]); 420 list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +428,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
425 if (mem_cgroup_disabled()) 428 if (mem_cgroup_disabled())
426 return; 429 return;
427 pc = lookup_page_cgroup(page); 430 pc = lookup_page_cgroup(page);
431 VM_BUG_ON(PageCgroupAcctLRU(pc));
428 /* 432 /*
429 * Used bit is set without atomic ops but after smp_wmb(). 433 * Used bit is set without atomic ops but after smp_wmb().
430 * For making pc->mem_cgroup visible, insert smp_rmb() here. 434 * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +439,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
435 439
436 mz = page_cgroup_zoneinfo(pc); 440 mz = page_cgroup_zoneinfo(pc);
437 MEM_CGROUP_ZSTAT(mz, lru) += 1; 441 MEM_CGROUP_ZSTAT(mz, lru) += 1;
442 SetPageCgroupAcctLRU(pc);
443 if (mem_cgroup_is_root(pc->mem_cgroup))
444 return;
438 list_add(&pc->lru, &mz->lists[lru]); 445 list_add(&pc->lru, &mz->lists[lru]);
439} 446}
440 447
@@ -469,7 +476,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
469 476
470 spin_lock_irqsave(&zone->lru_lock, flags); 477 spin_lock_irqsave(&zone->lru_lock, flags);
471 /* link when the page is linked to LRU but page_cgroup isn't */ 478 /* link when the page is linked to LRU but page_cgroup isn't */
472 if (PageLRU(page) && list_empty(&pc->lru)) 479 if (PageLRU(page) && !PageCgroupAcctLRU(pc))
473 mem_cgroup_add_lru_list(page, page_lru(page)); 480 mem_cgroup_add_lru_list(page, page_lru(page));
474 spin_unlock_irqrestore(&zone->lru_lock, flags); 481 spin_unlock_irqrestore(&zone->lru_lock, flags);
475} 482}
@@ -1125,9 +1132,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1125 css_put(&mem->css); 1132 css_put(&mem->css);
1126 return; 1133 return;
1127 } 1134 }
1135
1128 pc->mem_cgroup = mem; 1136 pc->mem_cgroup = mem;
1129 smp_wmb(); 1137 smp_wmb();
1130 pc->flags = pcg_default_flags[ctype]; 1138 switch (ctype) {
1139 case MEM_CGROUP_CHARGE_TYPE_CACHE:
1140 case MEM_CGROUP_CHARGE_TYPE_SHMEM:
1141 SetPageCgroupCache(pc);
1142 SetPageCgroupUsed(pc);
1143 break;
1144 case MEM_CGROUP_CHARGE_TYPE_MAPPED:
1145 ClearPageCgroupCache(pc);
1146 SetPageCgroupUsed(pc);
1147 break;
1148 default:
1149 break;
1150 }
1131 1151
1132 mem_cgroup_charge_statistics(mem, pc, true); 1152 mem_cgroup_charge_statistics(mem, pc, true);
1133 1153
@@ -2083,6 +2103,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
2083 name = MEMFILE_ATTR(cft->private); 2103 name = MEMFILE_ATTR(cft->private);
2084 switch (name) { 2104 switch (name) {
2085 case RES_LIMIT: 2105 case RES_LIMIT:
2106 if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
2107 ret = -EINVAL;
2108 break;
2109 }
2086 /* This function does all necessary parse...reuse it */ 2110 /* This function does all necessary parse...reuse it */
2087 ret = res_counter_memparse_write_strategy(buffer, &val); 2111 ret = res_counter_memparse_write_strategy(buffer, &val);
2088 if (ret) 2112 if (ret)
@@ -2549,6 +2573,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2549 if (cont->parent == NULL) { 2573 if (cont->parent == NULL) {
2550 enable_swap_cgroup(); 2574 enable_swap_cgroup();
2551 parent = NULL; 2575 parent = NULL;
2576 root_mem_cgroup = mem;
2552 } else { 2577 } else {
2553 parent = mem_cgroup_from_cont(cont->parent); 2578 parent = mem_cgroup_from_cont(cont->parent);
2554 mem->use_hierarchy = parent->use_hierarchy; 2579 mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +2602,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2577 return &mem->css; 2602 return &mem->css;
2578free_out: 2603free_out:
2579 __mem_cgroup_free(mem); 2604 __mem_cgroup_free(mem);
2605 root_mem_cgroup = NULL;
2580 return ERR_PTR(error); 2606 return ERR_PTR(error);
2581} 2607}
2582 2608