diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 130 |
1 files changed, 83 insertions, 47 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2996b80601f..4d0ea3ceba6d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -358,6 +358,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
358 | return; | 358 | return; |
359 | 359 | ||
360 | pc = lookup_page_cgroup(page); | 360 | pc = lookup_page_cgroup(page); |
361 | /* | ||
362 | * Used bit is set without atomic ops but after smp_wmb(). | ||
363 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
364 | */ | ||
361 | smp_rmb(); | 365 | smp_rmb(); |
362 | /* unused page is not rotated. */ | 366 | /* unused page is not rotated. */ |
363 | if (!PageCgroupUsed(pc)) | 367 | if (!PageCgroupUsed(pc)) |
@@ -374,7 +378,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
374 | if (mem_cgroup_disabled()) | 378 | if (mem_cgroup_disabled()) |
375 | return; | 379 | return; |
376 | pc = lookup_page_cgroup(page); | 380 | pc = lookup_page_cgroup(page); |
377 | /* barrier to sync with "charge" */ | 381 | /* |
382 | * Used bit is set without atomic ops but after smp_wmb(). | ||
383 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
384 | */ | ||
378 | smp_rmb(); | 385 | smp_rmb(); |
379 | if (!PageCgroupUsed(pc)) | 386 | if (!PageCgroupUsed(pc)) |
380 | return; | 387 | return; |
@@ -559,6 +566,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
559 | return NULL; | 566 | return NULL; |
560 | 567 | ||
561 | pc = lookup_page_cgroup(page); | 568 | pc = lookup_page_cgroup(page); |
569 | /* | ||
570 | * Used bit is set without atomic ops but after smp_wmb(). | ||
571 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
572 | */ | ||
573 | smp_rmb(); | ||
574 | if (!PageCgroupUsed(pc)) | ||
575 | return NULL; | ||
576 | |||
562 | mz = page_cgroup_zoneinfo(pc); | 577 | mz = page_cgroup_zoneinfo(pc); |
563 | if (!mz) | 578 | if (!mz) |
564 | return NULL; | 579 | return NULL; |
@@ -618,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
618 | * called with hierarchy_mutex held | 633 | * called with hierarchy_mutex held |
619 | */ | 634 | */ |
620 | static struct mem_cgroup * | 635 | static struct mem_cgroup * |
621 | mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | 636 | __mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) |
622 | { | 637 | { |
623 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; | 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
624 | 639 | ||
@@ -629,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |||
629 | /* | 644 | /* |
630 | * Walk down to children | 645 | * Walk down to children |
631 | */ | 646 | */ |
632 | mem_cgroup_put(curr); | ||
633 | cgroup = list_entry(curr_cgroup->children.next, | 647 | cgroup = list_entry(curr_cgroup->children.next, |
634 | struct cgroup, sibling); | 648 | struct cgroup, sibling); |
635 | curr = mem_cgroup_from_cont(cgroup); | 649 | curr = mem_cgroup_from_cont(cgroup); |
636 | mem_cgroup_get(curr); | ||
637 | goto done; | 650 | goto done; |
638 | } | 651 | } |
639 | 652 | ||
640 | visit_parent: | 653 | visit_parent: |
641 | if (curr_cgroup == root_cgroup) { | 654 | if (curr_cgroup == root_cgroup) { |
642 | mem_cgroup_put(curr); | 655 | /* caller handles NULL case */ |
643 | curr = root_mem; | 656 | curr = NULL; |
644 | mem_cgroup_get(curr); | ||
645 | goto done; | 657 | goto done; |
646 | } | 658 | } |
647 | 659 | ||
@@ -649,11 +661,9 @@ visit_parent: | |||
649 | * Goto next sibling | 661 | * Goto next sibling |
650 | */ | 662 | */ |
651 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { | 663 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
652 | mem_cgroup_put(curr); | ||
653 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, | 664 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
654 | sibling); | 665 | sibling); |
655 | curr = mem_cgroup_from_cont(cgroup); | 666 | curr = mem_cgroup_from_cont(cgroup); |
656 | mem_cgroup_get(curr); | ||
657 | goto done; | 667 | goto done; |
658 | } | 668 | } |
659 | 669 | ||
@@ -664,7 +674,6 @@ visit_parent: | |||
664 | goto visit_parent; | 674 | goto visit_parent; |
665 | 675 | ||
666 | done: | 676 | done: |
667 | root_mem->last_scanned_child = curr; | ||
668 | return curr; | 677 | return curr; |
669 | } | 678 | } |
670 | 679 | ||
@@ -674,40 +683,46 @@ done: | |||
674 | * that to reclaim free pages from. | 683 | * that to reclaim free pages from. |
675 | */ | 684 | */ |
676 | static struct mem_cgroup * | 685 | static struct mem_cgroup * |
677 | mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | 686 | mem_cgroup_get_next_node(struct mem_cgroup *root_mem) |
678 | { | 687 | { |
679 | struct cgroup *cgroup; | 688 | struct cgroup *cgroup; |
680 | struct mem_cgroup *ret; | 689 | struct mem_cgroup *orig, *next; |
681 | bool obsolete; | 690 | bool obsolete; |
682 | 691 | ||
683 | obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||
684 | |||
685 | /* | 692 | /* |
686 | * Scan all children under the mem_cgroup mem | 693 | * Scan all children under the mem_cgroup mem |
687 | */ | 694 | */ |
688 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 695 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
696 | |||
697 | orig = root_mem->last_scanned_child; | ||
698 | obsolete = mem_cgroup_is_obsolete(orig); | ||
699 | |||
689 | if (list_empty(&root_mem->css.cgroup->children)) { | 700 | if (list_empty(&root_mem->css.cgroup->children)) { |
690 | ret = root_mem; | 701 | /* |
702 | * root_mem might have children before and last_scanned_child | ||
703 | * may point to one of them. We put it later. | ||
704 | */ | ||
705 | if (orig) | ||
706 | VM_BUG_ON(!obsolete); | ||
707 | next = NULL; | ||
691 | goto done; | 708 | goto done; |
692 | } | 709 | } |
693 | 710 | ||
694 | if (!root_mem->last_scanned_child || obsolete) { | 711 | if (!orig || obsolete) { |
695 | |||
696 | if (obsolete && root_mem->last_scanned_child) | ||
697 | mem_cgroup_put(root_mem->last_scanned_child); | ||
698 | |||
699 | cgroup = list_first_entry(&root_mem->css.cgroup->children, | 712 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
700 | struct cgroup, sibling); | 713 | struct cgroup, sibling); |
701 | ret = mem_cgroup_from_cont(cgroup); | 714 | next = mem_cgroup_from_cont(cgroup); |
702 | mem_cgroup_get(ret); | ||
703 | } else | 715 | } else |
704 | ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | 716 | next = __mem_cgroup_get_next_node(orig, root_mem); |
705 | root_mem); | ||
706 | 717 | ||
707 | done: | 718 | done: |
708 | root_mem->last_scanned_child = ret; | 719 | if (next) |
720 | mem_cgroup_get(next); | ||
721 | root_mem->last_scanned_child = next; | ||
722 | if (orig) | ||
723 | mem_cgroup_put(orig); | ||
709 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
710 | return ret; | 725 | return (next) ? next : root_mem; |
711 | } | 726 | } |
712 | 727 | ||
713 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
@@ -758,28 +773,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
758 | * but there might be left over accounting, even after children | 773 | * but there might be left over accounting, even after children |
759 | * have left. | 774 | * have left. |
760 | */ | 775 | */ |
761 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, | 776 | ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, |
762 | get_swappiness(root_mem)); | 777 | get_swappiness(root_mem)); |
763 | if (mem_cgroup_check_under_limit(root_mem)) | 778 | if (mem_cgroup_check_under_limit(root_mem)) |
764 | return 0; | 779 | return 1; /* indicate reclaim has succeeded */ |
765 | if (!root_mem->use_hierarchy) | 780 | if (!root_mem->use_hierarchy) |
766 | return ret; | 781 | return ret; |
767 | 782 | ||
768 | next_mem = mem_cgroup_get_first_node(root_mem); | 783 | next_mem = mem_cgroup_get_next_node(root_mem); |
769 | 784 | ||
770 | while (next_mem != root_mem) { | 785 | while (next_mem != root_mem) { |
771 | if (mem_cgroup_is_obsolete(next_mem)) { | 786 | if (mem_cgroup_is_obsolete(next_mem)) { |
772 | mem_cgroup_put(next_mem); | 787 | next_mem = mem_cgroup_get_next_node(root_mem); |
773 | next_mem = mem_cgroup_get_first_node(root_mem); | ||
774 | continue; | 788 | continue; |
775 | } | 789 | } |
776 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, | 790 | ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
777 | get_swappiness(next_mem)); | 791 | get_swappiness(next_mem)); |
778 | if (mem_cgroup_check_under_limit(root_mem)) | 792 | if (mem_cgroup_check_under_limit(root_mem)) |
779 | return 0; | 793 | return 1; /* indicate reclaim has succeeded */ |
780 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 794 | next_mem = mem_cgroup_get_next_node(root_mem); |
781 | next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | ||
782 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | ||
783 | } | 795 | } |
784 | return ret; | 796 | return ret; |
785 | } | 797 | } |
@@ -863,6 +875,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
863 | 875 | ||
864 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, | 876 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, |
865 | noswap); | 877 | noswap); |
878 | if (ret) | ||
879 | continue; | ||
866 | 880 | ||
867 | /* | 881 | /* |
868 | * try_to_free_mem_cgroup_pages() might not give us a full | 882 | * try_to_free_mem_cgroup_pages() might not give us a full |
@@ -979,14 +993,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
979 | if (pc->mem_cgroup != from) | 993 | if (pc->mem_cgroup != from) |
980 | goto out; | 994 | goto out; |
981 | 995 | ||
982 | css_put(&from->css); | ||
983 | res_counter_uncharge(&from->res, PAGE_SIZE); | 996 | res_counter_uncharge(&from->res, PAGE_SIZE); |
984 | mem_cgroup_charge_statistics(from, pc, false); | 997 | mem_cgroup_charge_statistics(from, pc, false); |
985 | if (do_swap_account) | 998 | if (do_swap_account) |
986 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | 999 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1000 | css_put(&from->css); | ||
1001 | |||
1002 | css_get(&to->css); | ||
987 | pc->mem_cgroup = to; | 1003 | pc->mem_cgroup = to; |
988 | mem_cgroup_charge_statistics(to, pc, true); | 1004 | mem_cgroup_charge_statistics(to, pc, true); |
989 | css_get(&to->css); | ||
990 | ret = 0; | 1005 | ret = 0; |
991 | out: | 1006 | out: |
992 | unlock_page_cgroup(pc); | 1007 | unlock_page_cgroup(pc); |
@@ -1019,8 +1034,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1019 | if (ret || !parent) | 1034 | if (ret || !parent) |
1020 | return ret; | 1035 | return ret; |
1021 | 1036 | ||
1022 | if (!get_page_unless_zero(page)) | 1037 | if (!get_page_unless_zero(page)) { |
1023 | return -EBUSY; | 1038 | ret = -EBUSY; |
1039 | goto uncharge; | ||
1040 | } | ||
1024 | 1041 | ||
1025 | ret = isolate_lru_page(page); | 1042 | ret = isolate_lru_page(page); |
1026 | 1043 | ||
@@ -1029,19 +1046,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1029 | 1046 | ||
1030 | ret = mem_cgroup_move_account(pc, child, parent); | 1047 | ret = mem_cgroup_move_account(pc, child, parent); |
1031 | 1048 | ||
1032 | /* drop extra refcnt by try_charge() (move_account increment one) */ | ||
1033 | css_put(&parent->css); | ||
1034 | putback_lru_page(page); | 1049 | putback_lru_page(page); |
1035 | if (!ret) { | 1050 | if (!ret) { |
1036 | put_page(page); | 1051 | put_page(page); |
1052 | /* drop extra refcnt by try_charge() */ | ||
1053 | css_put(&parent->css); | ||
1037 | return 0; | 1054 | return 0; |
1038 | } | 1055 | } |
1039 | /* uncharge if move fails */ | 1056 | |
1040 | cancel: | 1057 | cancel: |
1058 | put_page(page); | ||
1059 | uncharge: | ||
1060 | /* drop extra refcnt by try_charge() */ | ||
1061 | css_put(&parent->css); | ||
1062 | /* uncharge if move fails */ | ||
1041 | res_counter_uncharge(&parent->res, PAGE_SIZE); | 1063 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
1042 | if (do_swap_account) | 1064 | if (do_swap_account) |
1043 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); | 1065 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
1044 | put_page(page); | ||
1045 | return ret; | 1066 | return ret; |
1046 | } | 1067 | } |
1047 | 1068 | ||
@@ -1971,6 +1992,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
1971 | { | 1992 | { |
1972 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 1993 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
1973 | struct mem_cgroup *parent; | 1994 | struct mem_cgroup *parent; |
1995 | |||
1974 | if (val > 100) | 1996 | if (val > 100) |
1975 | return -EINVAL; | 1997 | return -EINVAL; |
1976 | 1998 | ||
@@ -1978,15 +2000,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
1978 | return -EINVAL; | 2000 | return -EINVAL; |
1979 | 2001 | ||
1980 | parent = mem_cgroup_from_cont(cgrp->parent); | 2002 | parent = mem_cgroup_from_cont(cgrp->parent); |
2003 | |||
2004 | cgroup_lock(); | ||
2005 | |||
1981 | /* If under hierarchy, only empty-root can set this value */ | 2006 | /* If under hierarchy, only empty-root can set this value */ |
1982 | if ((parent->use_hierarchy) || | 2007 | if ((parent->use_hierarchy) || |
1983 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) | 2008 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) { |
2009 | cgroup_unlock(); | ||
1984 | return -EINVAL; | 2010 | return -EINVAL; |
2011 | } | ||
1985 | 2012 | ||
1986 | spin_lock(&memcg->reclaim_param_lock); | 2013 | spin_lock(&memcg->reclaim_param_lock); |
1987 | memcg->swappiness = val; | 2014 | memcg->swappiness = val; |
1988 | spin_unlock(&memcg->reclaim_param_lock); | 2015 | spin_unlock(&memcg->reclaim_param_lock); |
1989 | 2016 | ||
2017 | cgroup_unlock(); | ||
2018 | |||
1990 | return 0; | 2019 | return 0; |
1991 | } | 2020 | } |
1992 | 2021 | ||
@@ -2181,7 +2210,7 @@ static void __init enable_swap_cgroup(void) | |||
2181 | } | 2210 | } |
2182 | #endif | 2211 | #endif |
2183 | 2212 | ||
2184 | static struct cgroup_subsys_state * | 2213 | static struct cgroup_subsys_state * __ref |
2185 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 2214 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
2186 | { | 2215 | { |
2187 | struct mem_cgroup *mem, *parent; | 2216 | struct mem_cgroup *mem, *parent; |
@@ -2232,7 +2261,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
2232 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2261 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
2233 | struct cgroup *cont) | 2262 | struct cgroup *cont) |
2234 | { | 2263 | { |
2235 | mem_cgroup_put(mem_cgroup_from_cont(cont)); | 2264 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2265 | struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | ||
2266 | |||
2267 | if (last_scanned_child) { | ||
2268 | VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | ||
2269 | mem_cgroup_put(last_scanned_child); | ||
2270 | } | ||
2271 | mem_cgroup_put(mem); | ||
2236 | } | 2272 | } |
2237 | 2273 | ||
2238 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 2274 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |