aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c155
1 files changed, 106 insertions, 49 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2996b80601f..8e4be9cb2a6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
202 202
203static void mem_cgroup_get(struct mem_cgroup *mem); 203static void mem_cgroup_get(struct mem_cgroup *mem);
204static void mem_cgroup_put(struct mem_cgroup *mem); 204static void mem_cgroup_put(struct mem_cgroup *mem);
205static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
205 206
206static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 207static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
207 struct page_cgroup *pc, 208 struct page_cgroup *pc,
@@ -358,6 +359,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
358 return; 359 return;
359 360
360 pc = lookup_page_cgroup(page); 361 pc = lookup_page_cgroup(page);
362 /*
363 * Used bit is set without atomic ops but after smp_wmb().
364 * For making pc->mem_cgroup visible, insert smp_rmb() here.
365 */
361 smp_rmb(); 366 smp_rmb();
362 /* unused page is not rotated. */ 367 /* unused page is not rotated. */
363 if (!PageCgroupUsed(pc)) 368 if (!PageCgroupUsed(pc))
@@ -374,7 +379,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
374 if (mem_cgroup_disabled()) 379 if (mem_cgroup_disabled())
375 return; 380 return;
376 pc = lookup_page_cgroup(page); 381 pc = lookup_page_cgroup(page);
377 /* barrier to sync with "charge" */ 382 /*
383 * Used bit is set without atomic ops but after smp_wmb().
384 * For making pc->mem_cgroup visible, insert smp_rmb() here.
385 */
378 smp_rmb(); 386 smp_rmb();
379 if (!PageCgroupUsed(pc)) 387 if (!PageCgroupUsed(pc))
380 return; 388 return;
@@ -559,6 +567,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
559 return NULL; 567 return NULL;
560 568
561 pc = lookup_page_cgroup(page); 569 pc = lookup_page_cgroup(page);
570 /*
571 * Used bit is set without atomic ops but after smp_wmb().
572 * For making pc->mem_cgroup visible, insert smp_rmb() here.
573 */
574 smp_rmb();
575 if (!PageCgroupUsed(pc))
576 return NULL;
577
562 mz = page_cgroup_zoneinfo(pc); 578 mz = page_cgroup_zoneinfo(pc);
563 if (!mz) 579 if (!mz)
564 return NULL; 580 return NULL;
@@ -618,7 +634,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
618 * called with hierarchy_mutex held 634 * called with hierarchy_mutex held
619 */ 635 */
620static struct mem_cgroup * 636static struct mem_cgroup *
621mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) 637__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
622{ 638{
623 struct cgroup *cgroup, *curr_cgroup, *root_cgroup; 639 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
624 640
@@ -629,19 +645,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
629 /* 645 /*
630 * Walk down to children 646 * Walk down to children
631 */ 647 */
632 mem_cgroup_put(curr);
633 cgroup = list_entry(curr_cgroup->children.next, 648 cgroup = list_entry(curr_cgroup->children.next,
634 struct cgroup, sibling); 649 struct cgroup, sibling);
635 curr = mem_cgroup_from_cont(cgroup); 650 curr = mem_cgroup_from_cont(cgroup);
636 mem_cgroup_get(curr);
637 goto done; 651 goto done;
638 } 652 }
639 653
640visit_parent: 654visit_parent:
641 if (curr_cgroup == root_cgroup) { 655 if (curr_cgroup == root_cgroup) {
642 mem_cgroup_put(curr); 656 /* caller handles NULL case */
643 curr = root_mem; 657 curr = NULL;
644 mem_cgroup_get(curr);
645 goto done; 658 goto done;
646 } 659 }
647 660
@@ -649,11 +662,9 @@ visit_parent:
649 * Goto next sibling 662 * Goto next sibling
650 */ 663 */
651 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { 664 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
652 mem_cgroup_put(curr);
653 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, 665 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
654 sibling); 666 sibling);
655 curr = mem_cgroup_from_cont(cgroup); 667 curr = mem_cgroup_from_cont(cgroup);
656 mem_cgroup_get(curr);
657 goto done; 668 goto done;
658 } 669 }
659 670
@@ -664,7 +675,6 @@ visit_parent:
664 goto visit_parent; 675 goto visit_parent;
665 676
666done: 677done:
667 root_mem->last_scanned_child = curr;
668 return curr; 678 return curr;
669} 679}
670 680
@@ -674,40 +684,46 @@ done:
674 * that to reclaim free pages from. 684 * that to reclaim free pages from.
675 */ 685 */
676static struct mem_cgroup * 686static struct mem_cgroup *
677mem_cgroup_get_first_node(struct mem_cgroup *root_mem) 687mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
678{ 688{
679 struct cgroup *cgroup; 689 struct cgroup *cgroup;
680 struct mem_cgroup *ret; 690 struct mem_cgroup *orig, *next;
681 bool obsolete; 691 bool obsolete;
682 692
683 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
684
685 /* 693 /*
686 * Scan all children under the mem_cgroup mem 694 * Scan all children under the mem_cgroup mem
687 */ 695 */
688 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 696 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
697
698 orig = root_mem->last_scanned_child;
699 obsolete = mem_cgroup_is_obsolete(orig);
700
689 if (list_empty(&root_mem->css.cgroup->children)) { 701 if (list_empty(&root_mem->css.cgroup->children)) {
690 ret = root_mem; 702 /*
703 * root_mem might have children before and last_scanned_child
704 * may point to one of them. We put it later.
705 */
706 if (orig)
707 VM_BUG_ON(!obsolete);
708 next = NULL;
691 goto done; 709 goto done;
692 } 710 }
693 711
694 if (!root_mem->last_scanned_child || obsolete) { 712 if (!orig || obsolete) {
695
696 if (obsolete && root_mem->last_scanned_child)
697 mem_cgroup_put(root_mem->last_scanned_child);
698
699 cgroup = list_first_entry(&root_mem->css.cgroup->children, 713 cgroup = list_first_entry(&root_mem->css.cgroup->children,
700 struct cgroup, sibling); 714 struct cgroup, sibling);
701 ret = mem_cgroup_from_cont(cgroup); 715 next = mem_cgroup_from_cont(cgroup);
702 mem_cgroup_get(ret);
703 } else 716 } else
704 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, 717 next = __mem_cgroup_get_next_node(orig, root_mem);
705 root_mem);
706 718
707done: 719done:
708 root_mem->last_scanned_child = ret; 720 if (next)
721 mem_cgroup_get(next);
722 root_mem->last_scanned_child = next;
723 if (orig)
724 mem_cgroup_put(orig);
709 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); 725 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
710 return ret; 726 return (next) ? next : root_mem;
711} 727}
712 728
713static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) 729static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -758,28 +774,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
758 * but there might be left over accounting, even after children 774 * but there might be left over accounting, even after children
759 * have left. 775 * have left.
760 */ 776 */
761 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, 777 ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
762 get_swappiness(root_mem)); 778 get_swappiness(root_mem));
763 if (mem_cgroup_check_under_limit(root_mem)) 779 if (mem_cgroup_check_under_limit(root_mem))
764 return 0; 780 return 1; /* indicate reclaim has succeeded */
765 if (!root_mem->use_hierarchy) 781 if (!root_mem->use_hierarchy)
766 return ret; 782 return ret;
767 783
768 next_mem = mem_cgroup_get_first_node(root_mem); 784 next_mem = mem_cgroup_get_next_node(root_mem);
769 785
770 while (next_mem != root_mem) { 786 while (next_mem != root_mem) {
771 if (mem_cgroup_is_obsolete(next_mem)) { 787 if (mem_cgroup_is_obsolete(next_mem)) {
772 mem_cgroup_put(next_mem); 788 next_mem = mem_cgroup_get_next_node(root_mem);
773 next_mem = mem_cgroup_get_first_node(root_mem);
774 continue; 789 continue;
775 } 790 }
776 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, 791 ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
777 get_swappiness(next_mem)); 792 get_swappiness(next_mem));
778 if (mem_cgroup_check_under_limit(root_mem)) 793 if (mem_cgroup_check_under_limit(root_mem))
779 return 0; 794 return 1; /* indicate reclaim has succeeded */
780 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 795 next_mem = mem_cgroup_get_next_node(root_mem);
781 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
782 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
783 } 796 }
784 return ret; 797 return ret;
785} 798}
@@ -863,6 +876,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
863 876
864 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 877 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
865 noswap); 878 noswap);
879 if (ret)
880 continue;
866 881
867 /* 882 /*
868 * try_to_free_mem_cgroup_pages() might not give us a full 883 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -979,14 +994,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
979 if (pc->mem_cgroup != from) 994 if (pc->mem_cgroup != from)
980 goto out; 995 goto out;
981 996
982 css_put(&from->css);
983 res_counter_uncharge(&from->res, PAGE_SIZE); 997 res_counter_uncharge(&from->res, PAGE_SIZE);
984 mem_cgroup_charge_statistics(from, pc, false); 998 mem_cgroup_charge_statistics(from, pc, false);
985 if (do_swap_account) 999 if (do_swap_account)
986 res_counter_uncharge(&from->memsw, PAGE_SIZE); 1000 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1001 css_put(&from->css);
1002
1003 css_get(&to->css);
987 pc->mem_cgroup = to; 1004 pc->mem_cgroup = to;
988 mem_cgroup_charge_statistics(to, pc, true); 1005 mem_cgroup_charge_statistics(to, pc, true);
989 css_get(&to->css);
990 ret = 0; 1006 ret = 0;
991out: 1007out:
992 unlock_page_cgroup(pc); 1008 unlock_page_cgroup(pc);
@@ -1019,8 +1035,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1019 if (ret || !parent) 1035 if (ret || !parent)
1020 return ret; 1036 return ret;
1021 1037
1022 if (!get_page_unless_zero(page)) 1038 if (!get_page_unless_zero(page)) {
1023 return -EBUSY; 1039 ret = -EBUSY;
1040 goto uncharge;
1041 }
1024 1042
1025 ret = isolate_lru_page(page); 1043 ret = isolate_lru_page(page);
1026 1044
@@ -1029,19 +1047,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1029 1047
1030 ret = mem_cgroup_move_account(pc, child, parent); 1048 ret = mem_cgroup_move_account(pc, child, parent);
1031 1049
1032 /* drop extra refcnt by try_charge() (move_account increment one) */
1033 css_put(&parent->css);
1034 putback_lru_page(page); 1050 putback_lru_page(page);
1035 if (!ret) { 1051 if (!ret) {
1036 put_page(page); 1052 put_page(page);
1053 /* drop extra refcnt by try_charge() */
1054 css_put(&parent->css);
1037 return 0; 1055 return 0;
1038 } 1056 }
1039 /* uncharge if move fails */ 1057
1040cancel: 1058cancel:
1059 put_page(page);
1060uncharge:
1061 /* drop extra refcnt by try_charge() */
1062 css_put(&parent->css);
1063 /* uncharge if move fails */
1041 res_counter_uncharge(&parent->res, PAGE_SIZE); 1064 res_counter_uncharge(&parent->res, PAGE_SIZE);
1042 if (do_swap_account) 1065 if (do_swap_account)
1043 res_counter_uncharge(&parent->memsw, PAGE_SIZE); 1066 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1044 put_page(page);
1045 return ret; 1067 return ret;
1046} 1068}
1047 1069
@@ -1663,7 +1685,7 @@ move_account:
1663 /* This is for making all *used* pages to be on LRU. */ 1685 /* This is for making all *used* pages to be on LRU. */
1664 lru_add_drain_all(); 1686 lru_add_drain_all();
1665 ret = 0; 1687 ret = 0;
1666 for_each_node_state(node, N_POSSIBLE) { 1688 for_each_node_state(node, N_HIGH_MEMORY) {
1667 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 1689 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
1668 enum lru_list l; 1690 enum lru_list l;
1669 for_each_lru(l) { 1691 for_each_lru(l) {
@@ -1971,6 +1993,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1971{ 1993{
1972 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 1994 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1973 struct mem_cgroup *parent; 1995 struct mem_cgroup *parent;
1996
1974 if (val > 100) 1997 if (val > 100)
1975 return -EINVAL; 1998 return -EINVAL;
1976 1999
@@ -1978,15 +2001,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1978 return -EINVAL; 2001 return -EINVAL;
1979 2002
1980 parent = mem_cgroup_from_cont(cgrp->parent); 2003 parent = mem_cgroup_from_cont(cgrp->parent);
2004
2005 cgroup_lock();
2006
1981 /* If under hierarchy, only empty-root can set this value */ 2007 /* If under hierarchy, only empty-root can set this value */
1982 if ((parent->use_hierarchy) || 2008 if ((parent->use_hierarchy) ||
1983 (memcg->use_hierarchy && !list_empty(&cgrp->children))) 2009 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
2010 cgroup_unlock();
1984 return -EINVAL; 2011 return -EINVAL;
2012 }
1985 2013
1986 spin_lock(&memcg->reclaim_param_lock); 2014 spin_lock(&memcg->reclaim_param_lock);
1987 memcg->swappiness = val; 2015 memcg->swappiness = val;
1988 spin_unlock(&memcg->reclaim_param_lock); 2016 spin_unlock(&memcg->reclaim_param_lock);
1989 2017
2018 cgroup_unlock();
2019
1990 return 0; 2020 return 0;
1991} 2021}
1992 2022
@@ -2164,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem)
2164 2194
2165static void mem_cgroup_put(struct mem_cgroup *mem) 2195static void mem_cgroup_put(struct mem_cgroup *mem)
2166{ 2196{
2167 if (atomic_dec_and_test(&mem->refcnt)) 2197 if (atomic_dec_and_test(&mem->refcnt)) {
2198 struct mem_cgroup *parent = parent_mem_cgroup(mem);
2168 __mem_cgroup_free(mem); 2199 __mem_cgroup_free(mem);
2200 if (parent)
2201 mem_cgroup_put(parent);
2202 }
2169} 2203}
2170 2204
2205/*
2206 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
2207 */
2208static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
2209{
2210 if (!mem->res.parent)
2211 return NULL;
2212 return mem_cgroup_from_res_counter(mem->res.parent, res);
2213}
2171 2214
2172#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 2215#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2173static void __init enable_swap_cgroup(void) 2216static void __init enable_swap_cgroup(void)
@@ -2181,7 +2224,7 @@ static void __init enable_swap_cgroup(void)
2181} 2224}
2182#endif 2225#endif
2183 2226
2184static struct cgroup_subsys_state * 2227static struct cgroup_subsys_state * __ref
2185mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 2228mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2186{ 2229{
2187 struct mem_cgroup *mem, *parent; 2230 struct mem_cgroup *mem, *parent;
@@ -2206,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2206 if (parent && parent->use_hierarchy) { 2249 if (parent && parent->use_hierarchy) {
2207 res_counter_init(&mem->res, &parent->res); 2250 res_counter_init(&mem->res, &parent->res);
2208 res_counter_init(&mem->memsw, &parent->memsw); 2251 res_counter_init(&mem->memsw, &parent->memsw);
2252 /*
2253 * We increment refcnt of the parent to ensure that we can
2254 * safely access it on res_counter_charge/uncharge.
2255 * This refcnt will be decremented when freeing this
2256 * mem_cgroup(see mem_cgroup_put).
2257 */
2258 mem_cgroup_get(parent);
2209 } else { 2259 } else {
2210 res_counter_init(&mem->res, NULL); 2260 res_counter_init(&mem->res, NULL);
2211 res_counter_init(&mem->memsw, NULL); 2261 res_counter_init(&mem->memsw, NULL);
@@ -2232,7 +2282,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2232static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2282static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2233 struct cgroup *cont) 2283 struct cgroup *cont)
2234{ 2284{
2235 mem_cgroup_put(mem_cgroup_from_cont(cont)); 2285 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2286 struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2287
2288 if (last_scanned_child) {
2289 VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2290 mem_cgroup_put(last_scanned_child);
2291 }
2292 mem_cgroup_put(mem);
2236} 2293}
2237 2294
2238static int mem_cgroup_populate(struct cgroup_subsys *ss, 2295static int mem_cgroup_populate(struct cgroup_subsys *ss,