diff options
author | Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> | 2009-01-15 16:51:13 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-15 19:39:39 -0500 |
commit | c268e9946d7dc30ac4e55cdc3f43c8af1ae8153c (patch) | |
tree | 24de026d333ae2d8137165398d71650307cd26c0 /mm | |
parent | 40d58138f832a48208cdce57d6572a033b1f7a23 (diff) |
memcg: fix hierarchical reclaim
If root_mem has no children, last_scaned_child is set to root_mem itself.
But after some children added to root_mem, mem_cgroup_get_next_node can
mem_cgroup_put the root_mem although root_mem has not been mem_cgroup_get.
This patch fixes this behavior by:
- Set last_scanned_child to NULL if root_mem has no children or DFS
search has returned to root_mem itself(root_mem is not a "child" of
root_mem). Make mem_cgroup_get_first_node return root_mem in this case.
There are no mem_cgroup_get/put for root_mem.
- Rename mem_cgroup_get_next_node to __mem_cgroup_get_next_node, and
mem_cgroup_get_first_node to mem_cgroup_get_next_node. Make
mem_cgroup_hierarchical_reclaim call only new mem_cgroup_get_next_node.
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 68 |
1 files changed, 36 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7be9b35d7ffb..322625f551c2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -633,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
633 | * called with hierarchy_mutex held | 633 | * called with hierarchy_mutex held |
634 | */ | 634 | */ |
635 | static struct mem_cgroup * | 635 | static struct mem_cgroup * |
636 | mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | 636 | __mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) |
637 | { | 637 | { |
638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; | 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
639 | 639 | ||
@@ -644,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |||
644 | /* | 644 | /* |
645 | * Walk down to children | 645 | * Walk down to children |
646 | */ | 646 | */ |
647 | mem_cgroup_put(curr); | ||
648 | cgroup = list_entry(curr_cgroup->children.next, | 647 | cgroup = list_entry(curr_cgroup->children.next, |
649 | struct cgroup, sibling); | 648 | struct cgroup, sibling); |
650 | curr = mem_cgroup_from_cont(cgroup); | 649 | curr = mem_cgroup_from_cont(cgroup); |
651 | mem_cgroup_get(curr); | ||
652 | goto done; | 650 | goto done; |
653 | } | 651 | } |
654 | 652 | ||
655 | visit_parent: | 653 | visit_parent: |
656 | if (curr_cgroup == root_cgroup) { | 654 | if (curr_cgroup == root_cgroup) { |
657 | mem_cgroup_put(curr); | 655 | /* caller handles NULL case */ |
658 | curr = root_mem; | 656 | curr = NULL; |
659 | mem_cgroup_get(curr); | ||
660 | goto done; | 657 | goto done; |
661 | } | 658 | } |
662 | 659 | ||
@@ -664,11 +661,9 @@ visit_parent: | |||
664 | * Goto next sibling | 661 | * Goto next sibling |
665 | */ | 662 | */ |
666 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { | 663 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
667 | mem_cgroup_put(curr); | ||
668 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, | 664 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
669 | sibling); | 665 | sibling); |
670 | curr = mem_cgroup_from_cont(cgroup); | 666 | curr = mem_cgroup_from_cont(cgroup); |
671 | mem_cgroup_get(curr); | ||
672 | goto done; | 667 | goto done; |
673 | } | 668 | } |
674 | 669 | ||
@@ -679,7 +674,6 @@ visit_parent: | |||
679 | goto visit_parent; | 674 | goto visit_parent; |
680 | 675 | ||
681 | done: | 676 | done: |
682 | root_mem->last_scanned_child = curr; | ||
683 | return curr; | 677 | return curr; |
684 | } | 678 | } |
685 | 679 | ||
@@ -689,40 +683,46 @@ done: | |||
689 | * that to reclaim free pages from. | 683 | * that to reclaim free pages from. |
690 | */ | 684 | */ |
691 | static struct mem_cgroup * | 685 | static struct mem_cgroup * |
692 | mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | 686 | mem_cgroup_get_next_node(struct mem_cgroup *root_mem) |
693 | { | 687 | { |
694 | struct cgroup *cgroup; | 688 | struct cgroup *cgroup; |
695 | struct mem_cgroup *ret; | 689 | struct mem_cgroup *orig, *next; |
696 | bool obsolete; | 690 | bool obsolete; |
697 | 691 | ||
698 | obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||
699 | |||
700 | /* | 692 | /* |
701 | * Scan all children under the mem_cgroup mem | 693 | * Scan all children under the mem_cgroup mem |
702 | */ | 694 | */ |
703 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 695 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
696 | |||
697 | orig = root_mem->last_scanned_child; | ||
698 | obsolete = mem_cgroup_is_obsolete(orig); | ||
699 | |||
704 | if (list_empty(&root_mem->css.cgroup->children)) { | 700 | if (list_empty(&root_mem->css.cgroup->children)) { |
705 | ret = root_mem; | 701 | /* |
702 | * root_mem might have children before and last_scanned_child | ||
703 | * may point to one of them. We put it later. | ||
704 | */ | ||
705 | if (orig) | ||
706 | VM_BUG_ON(!obsolete); | ||
707 | next = NULL; | ||
706 | goto done; | 708 | goto done; |
707 | } | 709 | } |
708 | 710 | ||
709 | if (!root_mem->last_scanned_child || obsolete) { | 711 | if (!orig || obsolete) { |
710 | |||
711 | if (obsolete && root_mem->last_scanned_child) | ||
712 | mem_cgroup_put(root_mem->last_scanned_child); | ||
713 | |||
714 | cgroup = list_first_entry(&root_mem->css.cgroup->children, | 712 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
715 | struct cgroup, sibling); | 713 | struct cgroup, sibling); |
716 | ret = mem_cgroup_from_cont(cgroup); | 714 | next = mem_cgroup_from_cont(cgroup); |
717 | mem_cgroup_get(ret); | ||
718 | } else | 715 | } else |
719 | ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | 716 | next = __mem_cgroup_get_next_node(orig, root_mem); |
720 | root_mem); | ||
721 | 717 | ||
722 | done: | 718 | done: |
723 | root_mem->last_scanned_child = ret; | 719 | if (next) |
720 | mem_cgroup_get(next); | ||
721 | root_mem->last_scanned_child = next; | ||
722 | if (orig) | ||
723 | mem_cgroup_put(orig); | ||
724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
725 | return ret; | 725 | return (next) ? next : root_mem; |
726 | } | 726 | } |
727 | 727 | ||
728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
@@ -780,21 +780,18 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
780 | if (!root_mem->use_hierarchy) | 780 | if (!root_mem->use_hierarchy) |
781 | return ret; | 781 | return ret; |
782 | 782 | ||
783 | next_mem = mem_cgroup_get_first_node(root_mem); | 783 | next_mem = mem_cgroup_get_next_node(root_mem); |
784 | 784 | ||
785 | while (next_mem != root_mem) { | 785 | while (next_mem != root_mem) { |
786 | if (mem_cgroup_is_obsolete(next_mem)) { | 786 | if (mem_cgroup_is_obsolete(next_mem)) { |
787 | mem_cgroup_put(next_mem); | 787 | next_mem = mem_cgroup_get_next_node(root_mem); |
788 | next_mem = mem_cgroup_get_first_node(root_mem); | ||
789 | continue; | 788 | continue; |
790 | } | 789 | } |
791 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, | 790 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
792 | get_swappiness(next_mem)); | 791 | get_swappiness(next_mem)); |
793 | if (mem_cgroup_check_under_limit(root_mem)) | 792 | if (mem_cgroup_check_under_limit(root_mem)) |
794 | return 0; | 793 | return 0; |
795 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 794 | next_mem = mem_cgroup_get_next_node(root_mem); |
796 | next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | ||
797 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | ||
798 | } | 795 | } |
799 | return ret; | 796 | return ret; |
800 | } | 797 | } |
@@ -2254,7 +2251,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
2254 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2251 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
2255 | struct cgroup *cont) | 2252 | struct cgroup *cont) |
2256 | { | 2253 | { |
2257 | mem_cgroup_put(mem_cgroup_from_cont(cont)); | 2254 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2255 | struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | ||
2256 | |||
2257 | if (last_scanned_child) { | ||
2258 | VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | ||
2259 | mem_cgroup_put(last_scanned_child); | ||
2260 | } | ||
2261 | mem_cgroup_put(mem); | ||
2258 | } | 2262 | } |
2259 | 2263 | ||
2260 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 2264 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |