aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:08:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:10 -0500
commit54595fe2652f04dc8f5b985312c7cef5aa7bf722 (patch)
tree4e63df850afb307a170c045217b2097aae271b78
parenta7ba0eef3af51cd1b6fc4028e4705b3ea2ea9469 (diff)
memcg: use css_tryget in memcg
From:KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> css_tryget() newly is added and we can know css is alive or not and get refcnt of css in very safe way. ("alive" here means "rmdir/destroy" is not called.) This patch replaces css_get() to css_tryget(), where I cannot explain why css_get() is safe. And removes memcg->obsolete flag. Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c98
1 files changed, 62 insertions, 36 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4f9a9c5a02e2..b311f19bbe01 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,7 +162,6 @@ struct mem_cgroup {
162 */ 162 */
163 bool use_hierarchy; 163 bool use_hierarchy;
164 unsigned long last_oom_jiffies; 164 unsigned long last_oom_jiffies;
165 int obsolete;
166 atomic_t refcnt; 165 atomic_t refcnt;
167 166
168 unsigned int swappiness; 167 unsigned int swappiness;
@@ -283,6 +282,31 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
283 struct mem_cgroup, css); 282 struct mem_cgroup, css);
284} 283}
285 284
285static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
286{
287 struct mem_cgroup *mem = NULL;
288 /*
289 * Because we have no locks, mm->owner's may be being moved to other
290 * cgroup. We use css_tryget() here even if this looks
291 * pessimistic (rather than adding locks here).
292 */
293 rcu_read_lock();
294 do {
295 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
296 if (unlikely(!mem))
297 break;
298 } while (!css_tryget(&mem->css));
299 rcu_read_unlock();
300 return mem;
301}
302
303static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
304{
305 if (!mem)
306 return true;
307 return css_is_removed(&mem->css);
308}
309
286/* 310/*
287 * Following LRU functions are allowed to be used without PCG_LOCK. 311 * Following LRU functions are allowed to be used without PCG_LOCK.
288 * Operations are called by routine of global LRU independently from memcg. 312 * Operations are called by routine of global LRU independently from memcg.
@@ -622,8 +646,9 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
622{ 646{
623 struct cgroup *cgroup; 647 struct cgroup *cgroup;
624 struct mem_cgroup *ret; 648 struct mem_cgroup *ret;
625 bool obsolete = (root_mem->last_scanned_child && 649 bool obsolete;
626 root_mem->last_scanned_child->obsolete); 650
651 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
627 652
628 /* 653 /*
629 * Scan all children under the mem_cgroup mem 654 * Scan all children under the mem_cgroup mem
@@ -636,7 +661,7 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
636 661
637 if (!root_mem->last_scanned_child || obsolete) { 662 if (!root_mem->last_scanned_child || obsolete) {
638 663
639 if (obsolete) 664 if (obsolete && root_mem->last_scanned_child)
640 mem_cgroup_put(root_mem->last_scanned_child); 665 mem_cgroup_put(root_mem->last_scanned_child);
641 666
642 cgroup = list_first_entry(&root_mem->css.cgroup->children, 667 cgroup = list_first_entry(&root_mem->css.cgroup->children,
@@ -711,7 +736,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
711 next_mem = mem_cgroup_get_first_node(root_mem); 736 next_mem = mem_cgroup_get_first_node(root_mem);
712 737
713 while (next_mem != root_mem) { 738 while (next_mem != root_mem) {
714 if (next_mem->obsolete) { 739 if (mem_cgroup_is_obsolete(next_mem)) {
715 mem_cgroup_put(next_mem); 740 mem_cgroup_put(next_mem);
716 cgroup_lock(); 741 cgroup_lock();
717 next_mem = mem_cgroup_get_first_node(root_mem); 742 next_mem = mem_cgroup_get_first_node(root_mem);
@@ -769,23 +794,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
769 * thread group leader migrates. It's possible that mm is not 794 * thread group leader migrates. It's possible that mm is not
770 * set, if so charge the init_mm (happens for pagecache usage). 795 * set, if so charge the init_mm (happens for pagecache usage).
771 */ 796 */
772 if (likely(!*memcg)) { 797 mem = *memcg;
773 rcu_read_lock(); 798 if (likely(!mem)) {
774 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 799 mem = try_get_mem_cgroup_from_mm(mm);
775 if (unlikely(!mem)) {
776 rcu_read_unlock();
777 return 0;
778 }
779 /*
780 * For every charge from the cgroup, increment reference count
781 */
782 css_get(&mem->css);
783 *memcg = mem; 800 *memcg = mem;
784 rcu_read_unlock();
785 } else { 801 } else {
786 mem = *memcg;
787 css_get(&mem->css); 802 css_get(&mem->css);
788 } 803 }
804 if (unlikely(!mem))
805 return 0;
806
807 VM_BUG_ON(mem_cgroup_is_obsolete(mem));
789 808
790 while (1) { 809 while (1) {
791 int ret; 810 int ret;
@@ -1072,12 +1091,19 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1072 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 1091 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
1073} 1092}
1074 1093
1094/*
1095 * While swap-in, try_charge -> commit or cancel, the page is locked.
1096 * And when try_charge() successfully returns, one refcnt to memcg without
1097 * struct page_cgroup is aquired. This refcnt will be cumsumed by
1098 * "commit()" or removed by "cancel()"
1099 */
1075int mem_cgroup_try_charge_swapin(struct mm_struct *mm, 1100int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1076 struct page *page, 1101 struct page *page,
1077 gfp_t mask, struct mem_cgroup **ptr) 1102 gfp_t mask, struct mem_cgroup **ptr)
1078{ 1103{
1079 struct mem_cgroup *mem; 1104 struct mem_cgroup *mem;
1080 swp_entry_t ent; 1105 swp_entry_t ent;
1106 int ret;
1081 1107
1082 if (mem_cgroup_disabled()) 1108 if (mem_cgroup_disabled())
1083 return 0; 1109 return 0;
@@ -1096,10 +1122,15 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1096 ent.val = page_private(page); 1122 ent.val = page_private(page);
1097 1123
1098 mem = lookup_swap_cgroup(ent); 1124 mem = lookup_swap_cgroup(ent);
1099 if (!mem || mem->obsolete) 1125 if (!mem)
1126 goto charge_cur_mm;
1127 if (!css_tryget(&mem->css))
1100 goto charge_cur_mm; 1128 goto charge_cur_mm;
1101 *ptr = mem; 1129 *ptr = mem;
1102 return __mem_cgroup_try_charge(NULL, mask, ptr, true); 1130 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
1131 /* drop extra refcnt from tryget */
1132 css_put(&mem->css);
1133 return ret;
1103charge_cur_mm: 1134charge_cur_mm:
1104 if (unlikely(!mm)) 1135 if (unlikely(!mm))
1105 mm = &init_mm; 1136 mm = &init_mm;
@@ -1130,13 +1161,18 @@ int mem_cgroup_cache_charge_swapin(struct page *page,
1130 ent.val = page_private(page); 1161 ent.val = page_private(page);
1131 if (do_swap_account) { 1162 if (do_swap_account) {
1132 mem = lookup_swap_cgroup(ent); 1163 mem = lookup_swap_cgroup(ent);
1133 if (mem && mem->obsolete) 1164 if (mem) {
1134 mem = NULL; 1165 if (css_tryget(&mem->css))
1135 if (mem) 1166 mm = NULL; /* charge to recorded */
1136 mm = NULL; 1167 else
1168 mem = NULL; /* charge to current */
1169 }
1137 } 1170 }
1138 ret = mem_cgroup_charge_common(page, mm, mask, 1171 ret = mem_cgroup_charge_common(page, mm, mask,
1139 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); 1172 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
1173 /* drop extra refcnt from tryget */
1174 if (mem)
1175 css_put(&mem->css);
1140 1176
1141 if (!ret && do_swap_account) { 1177 if (!ret && do_swap_account) {
1142 /* avoid double counting */ 1178 /* avoid double counting */
@@ -1178,7 +1214,6 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
1178 struct mem_cgroup *memcg; 1214 struct mem_cgroup *memcg;
1179 memcg = swap_cgroup_record(ent, NULL); 1215 memcg = swap_cgroup_record(ent, NULL);
1180 if (memcg) { 1216 if (memcg) {
1181 /* If memcg is obsolete, memcg can be != ptr */
1182 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 1217 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1183 mem_cgroup_put(memcg); 1218 mem_cgroup_put(memcg);
1184 } 1219 }
@@ -1421,14 +1456,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
1421 if (!mm) 1456 if (!mm)
1422 return 0; 1457 return 0;
1423 1458
1424 rcu_read_lock(); 1459 mem = try_get_mem_cgroup_from_mm(mm);
1425 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1460 if (unlikely(!mem))
1426 if (unlikely(!mem)) {
1427 rcu_read_unlock();
1428 return 0; 1461 return 0;
1429 }
1430 css_get(&mem->css);
1431 rcu_read_unlock();
1432 1462
1433 do { 1463 do {
1434 progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); 1464 progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
@@ -2086,9 +2116,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
2086 * the number of reference from swap_cgroup and free mem_cgroup when 2116 * the number of reference from swap_cgroup and free mem_cgroup when
2087 * it goes down to 0. 2117 * it goes down to 0.
2088 * 2118 *
2089 * When mem_cgroup is destroyed, mem->obsolete will be set to 0 and
2090 * entry which points to this memcg will be ignore at swapin.
2091 *
2092 * Removal of cgroup itself succeeds regardless of refs from swap. 2119 * Removal of cgroup itself succeeds regardless of refs from swap.
2093 */ 2120 */
2094 2121
@@ -2174,7 +2201,6 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2174 struct cgroup *cont) 2201 struct cgroup *cont)
2175{ 2202{
2176 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 2203 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2177 mem->obsolete = 1;
2178 mem_cgroup_force_empty(mem, false); 2204 mem_cgroup_force_empty(mem, false);
2179} 2205}
2180 2206