aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2009-01-15 16:51:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-15 19:39:39 -0500
commitc268e9946d7dc30ac4e55cdc3f43c8af1ae8153c (patch)
tree24de026d333ae2d8137165398d71650307cd26c0 /mm
parent40d58138f832a48208cdce57d6572a033b1f7a23 (diff)
memcg: fix hierarchical reclaim
If root_mem has no children, last_scaned_child is set to root_mem itself. But after some children added to root_mem, mem_cgroup_get_next_node can mem_cgroup_put the root_mem although root_mem has not been mem_cgroup_get. This patch fixes this behavior by: - Set last_scanned_child to NULL if root_mem has no children or DFS search has returned to root_mem itself(root_mem is not a "child" of root_mem). Make mem_cgroup_get_first_node return root_mem in this case. There are no mem_cgroup_get/put for root_mem. - Rename mem_cgroup_get_next_node to __mem_cgroup_get_next_node, and mem_cgroup_get_first_node to mem_cgroup_get_next_node. Make mem_cgroup_hierarchical_reclaim call only new mem_cgroup_get_next_node. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c68
1 files changed, 36 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7be9b35d7ffb..322625f551c2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -633,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
633 * called with hierarchy_mutex held 633 * called with hierarchy_mutex held
634 */ 634 */
635static struct mem_cgroup * 635static struct mem_cgroup *
636mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) 636__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
637{ 637{
638 struct cgroup *cgroup, *curr_cgroup, *root_cgroup; 638 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
639 639
@@ -644,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
644 /* 644 /*
645 * Walk down to children 645 * Walk down to children
646 */ 646 */
647 mem_cgroup_put(curr);
648 cgroup = list_entry(curr_cgroup->children.next, 647 cgroup = list_entry(curr_cgroup->children.next,
649 struct cgroup, sibling); 648 struct cgroup, sibling);
650 curr = mem_cgroup_from_cont(cgroup); 649 curr = mem_cgroup_from_cont(cgroup);
651 mem_cgroup_get(curr);
652 goto done; 650 goto done;
653 } 651 }
654 652
655visit_parent: 653visit_parent:
656 if (curr_cgroup == root_cgroup) { 654 if (curr_cgroup == root_cgroup) {
657 mem_cgroup_put(curr); 655 /* caller handles NULL case */
658 curr = root_mem; 656 curr = NULL;
659 mem_cgroup_get(curr);
660 goto done; 657 goto done;
661 } 658 }
662 659
@@ -664,11 +661,9 @@ visit_parent:
664 * Goto next sibling 661 * Goto next sibling
665 */ 662 */
666 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { 663 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
667 mem_cgroup_put(curr);
668 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, 664 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
669 sibling); 665 sibling);
670 curr = mem_cgroup_from_cont(cgroup); 666 curr = mem_cgroup_from_cont(cgroup);
671 mem_cgroup_get(curr);
672 goto done; 667 goto done;
673 } 668 }
674 669
@@ -679,7 +674,6 @@ visit_parent:
679 goto visit_parent; 674 goto visit_parent;
680 675
681done: 676done:
682 root_mem->last_scanned_child = curr;
683 return curr; 677 return curr;
684} 678}
685 679
@@ -689,40 +683,46 @@ done:
689 * that to reclaim free pages from. 683 * that to reclaim free pages from.
690 */ 684 */
691static struct mem_cgroup * 685static struct mem_cgroup *
692mem_cgroup_get_first_node(struct mem_cgroup *root_mem) 686mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
693{ 687{
694 struct cgroup *cgroup; 688 struct cgroup *cgroup;
695 struct mem_cgroup *ret; 689 struct mem_cgroup *orig, *next;
696 bool obsolete; 690 bool obsolete;
697 691
698 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
699
700 /* 692 /*
701 * Scan all children under the mem_cgroup mem 693 * Scan all children under the mem_cgroup mem
702 */ 694 */
703 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 695 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
696
697 orig = root_mem->last_scanned_child;
698 obsolete = mem_cgroup_is_obsolete(orig);
699
704 if (list_empty(&root_mem->css.cgroup->children)) { 700 if (list_empty(&root_mem->css.cgroup->children)) {
705 ret = root_mem; 701 /*
702 * root_mem might have children before and last_scanned_child
703 * may point to one of them. We put it later.
704 */
705 if (orig)
706 VM_BUG_ON(!obsolete);
707 next = NULL;
706 goto done; 708 goto done;
707 } 709 }
708 710
709 if (!root_mem->last_scanned_child || obsolete) { 711 if (!orig || obsolete) {
710
711 if (obsolete && root_mem->last_scanned_child)
712 mem_cgroup_put(root_mem->last_scanned_child);
713
714 cgroup = list_first_entry(&root_mem->css.cgroup->children, 712 cgroup = list_first_entry(&root_mem->css.cgroup->children,
715 struct cgroup, sibling); 713 struct cgroup, sibling);
716 ret = mem_cgroup_from_cont(cgroup); 714 next = mem_cgroup_from_cont(cgroup);
717 mem_cgroup_get(ret);
718 } else 715 } else
719 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, 716 next = __mem_cgroup_get_next_node(orig, root_mem);
720 root_mem);
721 717
722done: 718done:
723 root_mem->last_scanned_child = ret; 719 if (next)
720 mem_cgroup_get(next);
721 root_mem->last_scanned_child = next;
722 if (orig)
723 mem_cgroup_put(orig);
724 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); 724 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
725 return ret; 725 return (next) ? next : root_mem;
726} 726}
727 727
728static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) 728static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -780,21 +780,18 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
780 if (!root_mem->use_hierarchy) 780 if (!root_mem->use_hierarchy)
781 return ret; 781 return ret;
782 782
783 next_mem = mem_cgroup_get_first_node(root_mem); 783 next_mem = mem_cgroup_get_next_node(root_mem);
784 784
785 while (next_mem != root_mem) { 785 while (next_mem != root_mem) {
786 if (mem_cgroup_is_obsolete(next_mem)) { 786 if (mem_cgroup_is_obsolete(next_mem)) {
787 mem_cgroup_put(next_mem); 787 next_mem = mem_cgroup_get_next_node(root_mem);
788 next_mem = mem_cgroup_get_first_node(root_mem);
789 continue; 788 continue;
790 } 789 }
791 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, 790 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
792 get_swappiness(next_mem)); 791 get_swappiness(next_mem));
793 if (mem_cgroup_check_under_limit(root_mem)) 792 if (mem_cgroup_check_under_limit(root_mem))
794 return 0; 793 return 0;
795 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 794 next_mem = mem_cgroup_get_next_node(root_mem);
796 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
797 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
798 } 795 }
799 return ret; 796 return ret;
800} 797}
@@ -2254,7 +2251,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2254static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2251static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2255 struct cgroup *cont) 2252 struct cgroup *cont)
2256{ 2253{
2257 mem_cgroup_put(mem_cgroup_from_cont(cont)); 2254 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2255 struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2256
2257 if (last_scanned_child) {
2258 VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2259 mem_cgroup_put(last_scanned_child);
2260 }
2261 mem_cgroup_put(mem);
2258} 2262}
2259 2263
2260static int mem_cgroup_populate(struct cgroup_subsys *ss, 2264static int mem_cgroup_populate(struct cgroup_subsys *ss,