diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2009-09-23 18:56:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 10:20:58 -0400 |
commit | 4b3bde4c983de36c59e6c1a24701f6fe816f9f55 (patch) | |
tree | e759c5ceccf57331b868b7feac61cae5e932c6d4 /mm | |
parent | be367d09927023d081f9199665c8500f69f14d22 (diff) |
memcg: remove the overhead associated with the root cgroup
Change the memory cgroup to remove the overhead associated with accounting
all pages in the root cgroup. As a side-effect, we can no longer set a
memory hard limit in the root cgroup.
A new flag to track whether the page has been accounted or not has been
added as well. Flags are now set atomically for page_cgroup,
pcg_default_flags is now obsolete and removed.
[akpm@linux-foundation.org: fix a few documentation glitches]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 54 |
1 files changed, 40 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cf2e717f5c12..b0757660663f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -43,6 +43,7 @@ | |||
43 | 43 | ||
44 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 44 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
45 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 45 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
46 | struct mem_cgroup *root_mem_cgroup __read_mostly; | ||
46 | 47 | ||
47 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 48 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
48 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 49 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
@@ -200,13 +201,8 @@ enum charge_type { | |||
200 | #define PCGF_CACHE (1UL << PCG_CACHE) | 201 | #define PCGF_CACHE (1UL << PCG_CACHE) |
201 | #define PCGF_USED (1UL << PCG_USED) | 202 | #define PCGF_USED (1UL << PCG_USED) |
202 | #define PCGF_LOCK (1UL << PCG_LOCK) | 203 | #define PCGF_LOCK (1UL << PCG_LOCK) |
203 | static const unsigned long | 204 | /* Not used, but added here for completeness */ |
204 | pcg_default_flags[NR_CHARGE_TYPE] = { | 205 | #define PCGF_ACCT (1UL << PCG_ACCT) |
205 | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */ | ||
206 | PCGF_USED | PCGF_LOCK, /* Anon */ | ||
207 | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ | ||
208 | 0, /* FORCE */ | ||
209 | }; | ||
210 | 206 | ||
211 | /* for encoding cft->private value on file */ | 207 | /* for encoding cft->private value on file */ |
212 | #define _MEM (0) | 208 | #define _MEM (0) |
@@ -354,6 +350,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | |||
354 | return ret; | 350 | return ret; |
355 | } | 351 | } |
356 | 352 | ||
353 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | ||
354 | { | ||
355 | return (mem == root_mem_cgroup); | ||
356 | } | ||
357 | |||
357 | /* | 358 | /* |
358 | * Following LRU functions are allowed to be used without PCG_LOCK. | 359 | * Following LRU functions are allowed to be used without PCG_LOCK. |
359 | * Operations are called by routine of global LRU independently from memcg. | 360 | * Operations are called by routine of global LRU independently from memcg. |
@@ -371,22 +372,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | |||
371 | void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | 372 | void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) |
372 | { | 373 | { |
373 | struct page_cgroup *pc; | 374 | struct page_cgroup *pc; |
374 | struct mem_cgroup *mem; | ||
375 | struct mem_cgroup_per_zone *mz; | 375 | struct mem_cgroup_per_zone *mz; |
376 | 376 | ||
377 | if (mem_cgroup_disabled()) | 377 | if (mem_cgroup_disabled()) |
378 | return; | 378 | return; |
379 | pc = lookup_page_cgroup(page); | 379 | pc = lookup_page_cgroup(page); |
380 | /* can happen while we handle swapcache. */ | 380 | /* can happen while we handle swapcache. */ |
381 | if (list_empty(&pc->lru) || !pc->mem_cgroup) | 381 | if (!TestClearPageCgroupAcctLRU(pc)) |
382 | return; | 382 | return; |
383 | VM_BUG_ON(!pc->mem_cgroup); | ||
383 | /* | 384 | /* |
384 | * We don't check PCG_USED bit. It's cleared when the "page" is finally | 385 | * We don't check PCG_USED bit. It's cleared when the "page" is finally |
385 | * removed from global LRU. | 386 | * removed from global LRU. |
386 | */ | 387 | */ |
387 | mz = page_cgroup_zoneinfo(pc); | 388 | mz = page_cgroup_zoneinfo(pc); |
388 | mem = pc->mem_cgroup; | ||
389 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 389 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
390 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
391 | return; | ||
392 | VM_BUG_ON(list_empty(&pc->lru)); | ||
390 | list_del_init(&pc->lru); | 393 | list_del_init(&pc->lru); |
391 | return; | 394 | return; |
392 | } | 395 | } |
@@ -410,8 +413,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
410 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | 413 | * For making pc->mem_cgroup visible, insert smp_rmb() here. |
411 | */ | 414 | */ |
412 | smp_rmb(); | 415 | smp_rmb(); |
413 | /* unused page is not rotated. */ | 416 | /* unused or root page is not rotated. */ |
414 | if (!PageCgroupUsed(pc)) | 417 | if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) |
415 | return; | 418 | return; |
416 | mz = page_cgroup_zoneinfo(pc); | 419 | mz = page_cgroup_zoneinfo(pc); |
417 | list_move(&pc->lru, &mz->lists[lru]); | 420 | list_move(&pc->lru, &mz->lists[lru]); |
@@ -425,6 +428,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
425 | if (mem_cgroup_disabled()) | 428 | if (mem_cgroup_disabled()) |
426 | return; | 429 | return; |
427 | pc = lookup_page_cgroup(page); | 430 | pc = lookup_page_cgroup(page); |
431 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | ||
428 | /* | 432 | /* |
429 | * Used bit is set without atomic ops but after smp_wmb(). | 433 | * Used bit is set without atomic ops but after smp_wmb(). |
430 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | 434 | * For making pc->mem_cgroup visible, insert smp_rmb() here. |
@@ -435,6 +439,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
435 | 439 | ||
436 | mz = page_cgroup_zoneinfo(pc); | 440 | mz = page_cgroup_zoneinfo(pc); |
437 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 441 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
442 | SetPageCgroupAcctLRU(pc); | ||
443 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
444 | return; | ||
438 | list_add(&pc->lru, &mz->lists[lru]); | 445 | list_add(&pc->lru, &mz->lists[lru]); |
439 | } | 446 | } |
440 | 447 | ||
@@ -469,7 +476,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) | |||
469 | 476 | ||
470 | spin_lock_irqsave(&zone->lru_lock, flags); | 477 | spin_lock_irqsave(&zone->lru_lock, flags); |
471 | /* link when the page is linked to LRU but page_cgroup isn't */ | 478 | /* link when the page is linked to LRU but page_cgroup isn't */ |
472 | if (PageLRU(page) && list_empty(&pc->lru)) | 479 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) |
473 | mem_cgroup_add_lru_list(page, page_lru(page)); | 480 | mem_cgroup_add_lru_list(page, page_lru(page)); |
474 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 481 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
475 | } | 482 | } |
@@ -1125,9 +1132,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1125 | css_put(&mem->css); | 1132 | css_put(&mem->css); |
1126 | return; | 1133 | return; |
1127 | } | 1134 | } |
1135 | |||
1128 | pc->mem_cgroup = mem; | 1136 | pc->mem_cgroup = mem; |
1129 | smp_wmb(); | 1137 | smp_wmb(); |
1130 | pc->flags = pcg_default_flags[ctype]; | 1138 | switch (ctype) { |
1139 | case MEM_CGROUP_CHARGE_TYPE_CACHE: | ||
1140 | case MEM_CGROUP_CHARGE_TYPE_SHMEM: | ||
1141 | SetPageCgroupCache(pc); | ||
1142 | SetPageCgroupUsed(pc); | ||
1143 | break; | ||
1144 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: | ||
1145 | ClearPageCgroupCache(pc); | ||
1146 | SetPageCgroupUsed(pc); | ||
1147 | break; | ||
1148 | default: | ||
1149 | break; | ||
1150 | } | ||
1131 | 1151 | ||
1132 | mem_cgroup_charge_statistics(mem, pc, true); | 1152 | mem_cgroup_charge_statistics(mem, pc, true); |
1133 | 1153 | ||
@@ -2083,6 +2103,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
2083 | name = MEMFILE_ATTR(cft->private); | 2103 | name = MEMFILE_ATTR(cft->private); |
2084 | switch (name) { | 2104 | switch (name) { |
2085 | case RES_LIMIT: | 2105 | case RES_LIMIT: |
2106 | if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ | ||
2107 | ret = -EINVAL; | ||
2108 | break; | ||
2109 | } | ||
2086 | /* This function does all necessary parse...reuse it */ | 2110 | /* This function does all necessary parse...reuse it */ |
2087 | ret = res_counter_memparse_write_strategy(buffer, &val); | 2111 | ret = res_counter_memparse_write_strategy(buffer, &val); |
2088 | if (ret) | 2112 | if (ret) |
@@ -2549,6 +2573,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2549 | if (cont->parent == NULL) { | 2573 | if (cont->parent == NULL) { |
2550 | enable_swap_cgroup(); | 2574 | enable_swap_cgroup(); |
2551 | parent = NULL; | 2575 | parent = NULL; |
2576 | root_mem_cgroup = mem; | ||
2552 | } else { | 2577 | } else { |
2553 | parent = mem_cgroup_from_cont(cont->parent); | 2578 | parent = mem_cgroup_from_cont(cont->parent); |
2554 | mem->use_hierarchy = parent->use_hierarchy; | 2579 | mem->use_hierarchy = parent->use_hierarchy; |
@@ -2577,6 +2602,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2577 | return &mem->css; | 2602 | return &mem->css; |
2578 | free_out: | 2603 | free_out: |
2579 | __mem_cgroup_free(mem); | 2604 | __mem_cgroup_free(mem); |
2605 | root_mem_cgroup = NULL; | ||
2580 | return ERR_PTR(error); | 2606 | return ERR_PTR(error); |
2581 | } | 2607 | } |
2582 | 2608 | ||