aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c130
1 files changed, 83 insertions, 47 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2996b80601f..4d0ea3ceba6d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -358,6 +358,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
358 return; 358 return;
359 359
360 pc = lookup_page_cgroup(page); 360 pc = lookup_page_cgroup(page);
361 /*
362 * Used bit is set without atomic ops but after smp_wmb().
363 * For making pc->mem_cgroup visible, insert smp_rmb() here.
364 */
361 smp_rmb(); 365 smp_rmb();
362 /* unused page is not rotated. */ 366 /* unused page is not rotated. */
363 if (!PageCgroupUsed(pc)) 367 if (!PageCgroupUsed(pc))
@@ -374,7 +378,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
374 if (mem_cgroup_disabled()) 378 if (mem_cgroup_disabled())
375 return; 379 return;
376 pc = lookup_page_cgroup(page); 380 pc = lookup_page_cgroup(page);
377 /* barrier to sync with "charge" */ 381 /*
382 * Used bit is set without atomic ops but after smp_wmb().
383 * For making pc->mem_cgroup visible, insert smp_rmb() here.
384 */
378 smp_rmb(); 385 smp_rmb();
379 if (!PageCgroupUsed(pc)) 386 if (!PageCgroupUsed(pc))
380 return; 387 return;
@@ -559,6 +566,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
559 return NULL; 566 return NULL;
560 567
561 pc = lookup_page_cgroup(page); 568 pc = lookup_page_cgroup(page);
569 /*
570 * Used bit is set without atomic ops but after smp_wmb().
571 * For making pc->mem_cgroup visible, insert smp_rmb() here.
572 */
573 smp_rmb();
574 if (!PageCgroupUsed(pc))
575 return NULL;
576
562 mz = page_cgroup_zoneinfo(pc); 577 mz = page_cgroup_zoneinfo(pc);
563 if (!mz) 578 if (!mz)
564 return NULL; 579 return NULL;
@@ -618,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
618 * called with hierarchy_mutex held 633 * called with hierarchy_mutex held
619 */ 634 */
620static struct mem_cgroup * 635static struct mem_cgroup *
621mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) 636__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
622{ 637{
623 struct cgroup *cgroup, *curr_cgroup, *root_cgroup; 638 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
624 639
@@ -629,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
629 /* 644 /*
630 * Walk down to children 645 * Walk down to children
631 */ 646 */
632 mem_cgroup_put(curr);
633 cgroup = list_entry(curr_cgroup->children.next, 647 cgroup = list_entry(curr_cgroup->children.next,
634 struct cgroup, sibling); 648 struct cgroup, sibling);
635 curr = mem_cgroup_from_cont(cgroup); 649 curr = mem_cgroup_from_cont(cgroup);
636 mem_cgroup_get(curr);
637 goto done; 650 goto done;
638 } 651 }
639 652
640visit_parent: 653visit_parent:
641 if (curr_cgroup == root_cgroup) { 654 if (curr_cgroup == root_cgroup) {
642 mem_cgroup_put(curr); 655 /* caller handles NULL case */
643 curr = root_mem; 656 curr = NULL;
644 mem_cgroup_get(curr);
645 goto done; 657 goto done;
646 } 658 }
647 659
@@ -649,11 +661,9 @@ visit_parent:
649 * Goto next sibling 661 * Goto next sibling
650 */ 662 */
651 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { 663 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
652 mem_cgroup_put(curr);
653 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, 664 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
654 sibling); 665 sibling);
655 curr = mem_cgroup_from_cont(cgroup); 666 curr = mem_cgroup_from_cont(cgroup);
656 mem_cgroup_get(curr);
657 goto done; 667 goto done;
658 } 668 }
659 669
@@ -664,7 +674,6 @@ visit_parent:
664 goto visit_parent; 674 goto visit_parent;
665 675
666done: 676done:
667 root_mem->last_scanned_child = curr;
668 return curr; 677 return curr;
669} 678}
670 679
@@ -674,40 +683,46 @@ done:
674 * that to reclaim free pages from. 683 * that to reclaim free pages from.
675 */ 684 */
676static struct mem_cgroup * 685static struct mem_cgroup *
677mem_cgroup_get_first_node(struct mem_cgroup *root_mem) 686mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
678{ 687{
679 struct cgroup *cgroup; 688 struct cgroup *cgroup;
680 struct mem_cgroup *ret; 689 struct mem_cgroup *orig, *next;
681 bool obsolete; 690 bool obsolete;
682 691
683 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
684
685 /* 692 /*
686 * Scan all children under the mem_cgroup mem 693 * Scan all children under the mem_cgroup mem
687 */ 694 */
688 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 695 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
696
697 orig = root_mem->last_scanned_child;
698 obsolete = mem_cgroup_is_obsolete(orig);
699
689 if (list_empty(&root_mem->css.cgroup->children)) { 700 if (list_empty(&root_mem->css.cgroup->children)) {
690 ret = root_mem; 701 /*
702 * root_mem might have children before and last_scanned_child
703 * may point to one of them. We put it later.
704 */
705 if (orig)
706 VM_BUG_ON(!obsolete);
707 next = NULL;
691 goto done; 708 goto done;
692 } 709 }
693 710
694 if (!root_mem->last_scanned_child || obsolete) { 711 if (!orig || obsolete) {
695
696 if (obsolete && root_mem->last_scanned_child)
697 mem_cgroup_put(root_mem->last_scanned_child);
698
699 cgroup = list_first_entry(&root_mem->css.cgroup->children, 712 cgroup = list_first_entry(&root_mem->css.cgroup->children,
700 struct cgroup, sibling); 713 struct cgroup, sibling);
701 ret = mem_cgroup_from_cont(cgroup); 714 next = mem_cgroup_from_cont(cgroup);
702 mem_cgroup_get(ret);
703 } else 715 } else
704 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, 716 next = __mem_cgroup_get_next_node(orig, root_mem);
705 root_mem);
706 717
707done: 718done:
708 root_mem->last_scanned_child = ret; 719 if (next)
720 mem_cgroup_get(next);
721 root_mem->last_scanned_child = next;
722 if (orig)
723 mem_cgroup_put(orig);
709 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); 724 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
710 return ret; 725 return (next) ? next : root_mem;
711} 726}
712 727
713static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) 728static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -758,28 +773,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
758 * but there might be left over accounting, even after children 773 * but there might be left over accounting, even after children
759 * have left. 774 * have left.
760 */ 775 */
761 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, 776 ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
762 get_swappiness(root_mem)); 777 get_swappiness(root_mem));
763 if (mem_cgroup_check_under_limit(root_mem)) 778 if (mem_cgroup_check_under_limit(root_mem))
764 return 0; 779 return 1; /* indicate reclaim has succeeded */
765 if (!root_mem->use_hierarchy) 780 if (!root_mem->use_hierarchy)
766 return ret; 781 return ret;
767 782
768 next_mem = mem_cgroup_get_first_node(root_mem); 783 next_mem = mem_cgroup_get_next_node(root_mem);
769 784
770 while (next_mem != root_mem) { 785 while (next_mem != root_mem) {
771 if (mem_cgroup_is_obsolete(next_mem)) { 786 if (mem_cgroup_is_obsolete(next_mem)) {
772 mem_cgroup_put(next_mem); 787 next_mem = mem_cgroup_get_next_node(root_mem);
773 next_mem = mem_cgroup_get_first_node(root_mem);
774 continue; 788 continue;
775 } 789 }
776 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, 790 ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
777 get_swappiness(next_mem)); 791 get_swappiness(next_mem));
778 if (mem_cgroup_check_under_limit(root_mem)) 792 if (mem_cgroup_check_under_limit(root_mem))
779 return 0; 793 return 1; /* indicate reclaim has succeeded */
780 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 794 next_mem = mem_cgroup_get_next_node(root_mem);
781 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
782 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
783 } 795 }
784 return ret; 796 return ret;
785} 797}
@@ -863,6 +875,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
863 875
864 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 876 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
865 noswap); 877 noswap);
878 if (ret)
879 continue;
866 880
867 /* 881 /*
868 * try_to_free_mem_cgroup_pages() might not give us a full 882 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -979,14 +993,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
979 if (pc->mem_cgroup != from) 993 if (pc->mem_cgroup != from)
980 goto out; 994 goto out;
981 995
982 css_put(&from->css);
983 res_counter_uncharge(&from->res, PAGE_SIZE); 996 res_counter_uncharge(&from->res, PAGE_SIZE);
984 mem_cgroup_charge_statistics(from, pc, false); 997 mem_cgroup_charge_statistics(from, pc, false);
985 if (do_swap_account) 998 if (do_swap_account)
986 res_counter_uncharge(&from->memsw, PAGE_SIZE); 999 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1000 css_put(&from->css);
1001
1002 css_get(&to->css);
987 pc->mem_cgroup = to; 1003 pc->mem_cgroup = to;
988 mem_cgroup_charge_statistics(to, pc, true); 1004 mem_cgroup_charge_statistics(to, pc, true);
989 css_get(&to->css);
990 ret = 0; 1005 ret = 0;
991out: 1006out:
992 unlock_page_cgroup(pc); 1007 unlock_page_cgroup(pc);
@@ -1019,8 +1034,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1019 if (ret || !parent) 1034 if (ret || !parent)
1020 return ret; 1035 return ret;
1021 1036
1022 if (!get_page_unless_zero(page)) 1037 if (!get_page_unless_zero(page)) {
1023 return -EBUSY; 1038 ret = -EBUSY;
1039 goto uncharge;
1040 }
1024 1041
1025 ret = isolate_lru_page(page); 1042 ret = isolate_lru_page(page);
1026 1043
@@ -1029,19 +1046,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1029 1046
1030 ret = mem_cgroup_move_account(pc, child, parent); 1047 ret = mem_cgroup_move_account(pc, child, parent);
1031 1048
1032 /* drop extra refcnt by try_charge() (move_account increment one) */
1033 css_put(&parent->css);
1034 putback_lru_page(page); 1049 putback_lru_page(page);
1035 if (!ret) { 1050 if (!ret) {
1036 put_page(page); 1051 put_page(page);
1052 /* drop extra refcnt by try_charge() */
1053 css_put(&parent->css);
1037 return 0; 1054 return 0;
1038 } 1055 }
1039 /* uncharge if move fails */ 1056
1040cancel: 1057cancel:
1058 put_page(page);
1059uncharge:
1060 /* drop extra refcnt by try_charge() */
1061 css_put(&parent->css);
1062 /* uncharge if move fails */
1041 res_counter_uncharge(&parent->res, PAGE_SIZE); 1063 res_counter_uncharge(&parent->res, PAGE_SIZE);
1042 if (do_swap_account) 1064 if (do_swap_account)
1043 res_counter_uncharge(&parent->memsw, PAGE_SIZE); 1065 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1044 put_page(page);
1045 return ret; 1066 return ret;
1046} 1067}
1047 1068
@@ -1971,6 +1992,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1971{ 1992{
1972 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 1993 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1973 struct mem_cgroup *parent; 1994 struct mem_cgroup *parent;
1995
1974 if (val > 100) 1996 if (val > 100)
1975 return -EINVAL; 1997 return -EINVAL;
1976 1998
@@ -1978,15 +2000,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1978 return -EINVAL; 2000 return -EINVAL;
1979 2001
1980 parent = mem_cgroup_from_cont(cgrp->parent); 2002 parent = mem_cgroup_from_cont(cgrp->parent);
2003
2004 cgroup_lock();
2005
1981 /* If under hierarchy, only empty-root can set this value */ 2006 /* If under hierarchy, only empty-root can set this value */
1982 if ((parent->use_hierarchy) || 2007 if ((parent->use_hierarchy) ||
1983 (memcg->use_hierarchy && !list_empty(&cgrp->children))) 2008 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
2009 cgroup_unlock();
1984 return -EINVAL; 2010 return -EINVAL;
2011 }
1985 2012
1986 spin_lock(&memcg->reclaim_param_lock); 2013 spin_lock(&memcg->reclaim_param_lock);
1987 memcg->swappiness = val; 2014 memcg->swappiness = val;
1988 spin_unlock(&memcg->reclaim_param_lock); 2015 spin_unlock(&memcg->reclaim_param_lock);
1989 2016
2017 cgroup_unlock();
2018
1990 return 0; 2019 return 0;
1991} 2020}
1992 2021
@@ -2181,7 +2210,7 @@ static void __init enable_swap_cgroup(void)
2181} 2210}
2182#endif 2211#endif
2183 2212
2184static struct cgroup_subsys_state * 2213static struct cgroup_subsys_state * __ref
2185mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 2214mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2186{ 2215{
2187 struct mem_cgroup *mem, *parent; 2216 struct mem_cgroup *mem, *parent;
@@ -2232,7 +2261,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2232static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2261static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2233 struct cgroup *cont) 2262 struct cgroup *cont)
2234{ 2263{
2235 mem_cgroup_put(mem_cgroup_from_cont(cont)); 2264 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2265 struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2266
2267 if (last_scanned_child) {
2268 VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2269 mem_cgroup_put(last_scanned_child);
2270 }
2271 mem_cgroup_put(mem);
2236} 2272}
2237 2273
2238static int mem_cgroup_populate(struct cgroup_subsys *ss, 2274static int mem_cgroup_populate(struct cgroup_subsys *ss,