diff options
| author | Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> | 2009-01-15 16:51:13 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-15 19:39:39 -0500 |
| commit | c268e9946d7dc30ac4e55cdc3f43c8af1ae8153c (patch) | |
| tree | 24de026d333ae2d8137165398d71650307cd26c0 | |
| parent | 40d58138f832a48208cdce57d6572a033b1f7a23 (diff) | |
memcg: fix hierarchical reclaim
If root_mem has no children, last_scaned_child is set to root_mem itself.
But after some children added to root_mem, mem_cgroup_get_next_node can
mem_cgroup_put the root_mem although root_mem has not been mem_cgroup_get.
This patch fixes this behavior by:
- Set last_scanned_child to NULL if root_mem has no children or DFS
search has returned to root_mem itself(root_mem is not a "child" of
root_mem). Make mem_cgroup_get_first_node return root_mem in this case.
There are no mem_cgroup_get/put for root_mem.
- Rename mem_cgroup_get_next_node to __mem_cgroup_get_next_node, and
mem_cgroup_get_first_node to mem_cgroup_get_next_node. Make
mem_cgroup_hierarchical_reclaim call only new mem_cgroup_get_next_node.
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/memcontrol.c | 68 |
1 files changed, 36 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7be9b35d7ffb..322625f551c2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -633,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
| 633 | * called with hierarchy_mutex held | 633 | * called with hierarchy_mutex held |
| 634 | */ | 634 | */ |
| 635 | static struct mem_cgroup * | 635 | static struct mem_cgroup * |
| 636 | mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | 636 | __mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) |
| 637 | { | 637 | { |
| 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; | 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
| 639 | 639 | ||
| @@ -644,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |||
| 644 | /* | 644 | /* |
| 645 | * Walk down to children | 645 | * Walk down to children |
| 646 | */ | 646 | */ |
| 647 | mem_cgroup_put(curr); | ||
| 648 | cgroup = list_entry(curr_cgroup->children.next, | 647 | cgroup = list_entry(curr_cgroup->children.next, |
| 649 | struct cgroup, sibling); | 648 | struct cgroup, sibling); |
| 650 | curr = mem_cgroup_from_cont(cgroup); | 649 | curr = mem_cgroup_from_cont(cgroup); |
| 651 | mem_cgroup_get(curr); | ||
| 652 | goto done; | 650 | goto done; |
| 653 | } | 651 | } |
| 654 | 652 | ||
| 655 | visit_parent: | 653 | visit_parent: |
| 656 | if (curr_cgroup == root_cgroup) { | 654 | if (curr_cgroup == root_cgroup) { |
| 657 | mem_cgroup_put(curr); | 655 | /* caller handles NULL case */ |
| 658 | curr = root_mem; | 656 | curr = NULL; |
| 659 | mem_cgroup_get(curr); | ||
| 660 | goto done; | 657 | goto done; |
| 661 | } | 658 | } |
| 662 | 659 | ||
| @@ -664,11 +661,9 @@ visit_parent: | |||
| 664 | * Goto next sibling | 661 | * Goto next sibling |
| 665 | */ | 662 | */ |
| 666 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { | 663 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
| 667 | mem_cgroup_put(curr); | ||
| 668 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, | 664 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
| 669 | sibling); | 665 | sibling); |
| 670 | curr = mem_cgroup_from_cont(cgroup); | 666 | curr = mem_cgroup_from_cont(cgroup); |
| 671 | mem_cgroup_get(curr); | ||
| 672 | goto done; | 667 | goto done; |
| 673 | } | 668 | } |
| 674 | 669 | ||
| @@ -679,7 +674,6 @@ visit_parent: | |||
| 679 | goto visit_parent; | 674 | goto visit_parent; |
| 680 | 675 | ||
| 681 | done: | 676 | done: |
| 682 | root_mem->last_scanned_child = curr; | ||
| 683 | return curr; | 677 | return curr; |
| 684 | } | 678 | } |
| 685 | 679 | ||
| @@ -689,40 +683,46 @@ done: | |||
| 689 | * that to reclaim free pages from. | 683 | * that to reclaim free pages from. |
| 690 | */ | 684 | */ |
| 691 | static struct mem_cgroup * | 685 | static struct mem_cgroup * |
| 692 | mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | 686 | mem_cgroup_get_next_node(struct mem_cgroup *root_mem) |
| 693 | { | 687 | { |
| 694 | struct cgroup *cgroup; | 688 | struct cgroup *cgroup; |
| 695 | struct mem_cgroup *ret; | 689 | struct mem_cgroup *orig, *next; |
| 696 | bool obsolete; | 690 | bool obsolete; |
| 697 | 691 | ||
| 698 | obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||
| 699 | |||
| 700 | /* | 692 | /* |
| 701 | * Scan all children under the mem_cgroup mem | 693 | * Scan all children under the mem_cgroup mem |
| 702 | */ | 694 | */ |
| 703 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 695 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
| 696 | |||
| 697 | orig = root_mem->last_scanned_child; | ||
| 698 | obsolete = mem_cgroup_is_obsolete(orig); | ||
| 699 | |||
| 704 | if (list_empty(&root_mem->css.cgroup->children)) { | 700 | if (list_empty(&root_mem->css.cgroup->children)) { |
| 705 | ret = root_mem; | 701 | /* |
| 702 | * root_mem might have children before and last_scanned_child | ||
| 703 | * may point to one of them. We put it later. | ||
| 704 | */ | ||
| 705 | if (orig) | ||
| 706 | VM_BUG_ON(!obsolete); | ||
| 707 | next = NULL; | ||
| 706 | goto done; | 708 | goto done; |
| 707 | } | 709 | } |
| 708 | 710 | ||
| 709 | if (!root_mem->last_scanned_child || obsolete) { | 711 | if (!orig || obsolete) { |
| 710 | |||
| 711 | if (obsolete && root_mem->last_scanned_child) | ||
| 712 | mem_cgroup_put(root_mem->last_scanned_child); | ||
| 713 | |||
| 714 | cgroup = list_first_entry(&root_mem->css.cgroup->children, | 712 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
| 715 | struct cgroup, sibling); | 713 | struct cgroup, sibling); |
| 716 | ret = mem_cgroup_from_cont(cgroup); | 714 | next = mem_cgroup_from_cont(cgroup); |
| 717 | mem_cgroup_get(ret); | ||
| 718 | } else | 715 | } else |
| 719 | ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | 716 | next = __mem_cgroup_get_next_node(orig, root_mem); |
| 720 | root_mem); | ||
| 721 | 717 | ||
| 722 | done: | 718 | done: |
| 723 | root_mem->last_scanned_child = ret; | 719 | if (next) |
| 720 | mem_cgroup_get(next); | ||
| 721 | root_mem->last_scanned_child = next; | ||
| 722 | if (orig) | ||
| 723 | mem_cgroup_put(orig); | ||
| 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
| 725 | return ret; | 725 | return (next) ? next : root_mem; |
| 726 | } | 726 | } |
| 727 | 727 | ||
| 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
| @@ -780,21 +780,18 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 780 | if (!root_mem->use_hierarchy) | 780 | if (!root_mem->use_hierarchy) |
| 781 | return ret; | 781 | return ret; |
| 782 | 782 | ||
| 783 | next_mem = mem_cgroup_get_first_node(root_mem); | 783 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 784 | 784 | ||
| 785 | while (next_mem != root_mem) { | 785 | while (next_mem != root_mem) { |
| 786 | if (mem_cgroup_is_obsolete(next_mem)) { | 786 | if (mem_cgroup_is_obsolete(next_mem)) { |
| 787 | mem_cgroup_put(next_mem); | 787 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 788 | next_mem = mem_cgroup_get_first_node(root_mem); | ||
| 789 | continue; | 788 | continue; |
| 790 | } | 789 | } |
| 791 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, | 790 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
| 792 | get_swappiness(next_mem)); | 791 | get_swappiness(next_mem)); |
| 793 | if (mem_cgroup_check_under_limit(root_mem)) | 792 | if (mem_cgroup_check_under_limit(root_mem)) |
| 794 | return 0; | 793 | return 0; |
| 795 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 794 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 796 | next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | ||
| 797 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | ||
| 798 | } | 795 | } |
| 799 | return ret; | 796 | return ret; |
| 800 | } | 797 | } |
| @@ -2254,7 +2251,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
| 2254 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2251 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
| 2255 | struct cgroup *cont) | 2252 | struct cgroup *cont) |
| 2256 | { | 2253 | { |
| 2257 | mem_cgroup_put(mem_cgroup_from_cont(cont)); | 2254 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
| 2255 | struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | ||
| 2256 | |||
| 2257 | if (last_scanned_child) { | ||
| 2258 | VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | ||
| 2259 | mem_cgroup_put(last_scanned_child); | ||
| 2260 | } | ||
| 2261 | mem_cgroup_put(mem); | ||
| 2258 | } | 2262 | } |
| 2259 | 2263 | ||
| 2260 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 2264 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
