diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 130 |
1 files changed, 83 insertions, 47 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2996b80601f..4d0ea3ceba6d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -358,6 +358,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
| 358 | return; | 358 | return; |
| 359 | 359 | ||
| 360 | pc = lookup_page_cgroup(page); | 360 | pc = lookup_page_cgroup(page); |
| 361 | /* | ||
| 362 | * Used bit is set without atomic ops but after smp_wmb(). | ||
| 363 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
| 364 | */ | ||
| 361 | smp_rmb(); | 365 | smp_rmb(); |
| 362 | /* unused page is not rotated. */ | 366 | /* unused page is not rotated. */ |
| 363 | if (!PageCgroupUsed(pc)) | 367 | if (!PageCgroupUsed(pc)) |
| @@ -374,7 +378,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
| 374 | if (mem_cgroup_disabled()) | 378 | if (mem_cgroup_disabled()) |
| 375 | return; | 379 | return; |
| 376 | pc = lookup_page_cgroup(page); | 380 | pc = lookup_page_cgroup(page); |
| 377 | /* barrier to sync with "charge" */ | 381 | /* |
| 382 | * Used bit is set without atomic ops but after smp_wmb(). | ||
| 383 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
| 384 | */ | ||
| 378 | smp_rmb(); | 385 | smp_rmb(); |
| 379 | if (!PageCgroupUsed(pc)) | 386 | if (!PageCgroupUsed(pc)) |
| 380 | return; | 387 | return; |
| @@ -559,6 +566,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
| 559 | return NULL; | 566 | return NULL; |
| 560 | 567 | ||
| 561 | pc = lookup_page_cgroup(page); | 568 | pc = lookup_page_cgroup(page); |
| 569 | /* | ||
| 570 | * Used bit is set without atomic ops but after smp_wmb(). | ||
| 571 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
| 572 | */ | ||
| 573 | smp_rmb(); | ||
| 574 | if (!PageCgroupUsed(pc)) | ||
| 575 | return NULL; | ||
| 576 | |||
| 562 | mz = page_cgroup_zoneinfo(pc); | 577 | mz = page_cgroup_zoneinfo(pc); |
| 563 | if (!mz) | 578 | if (!mz) |
| 564 | return NULL; | 579 | return NULL; |
| @@ -618,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
| 618 | * called with hierarchy_mutex held | 633 | * called with hierarchy_mutex held |
| 619 | */ | 634 | */ |
| 620 | static struct mem_cgroup * | 635 | static struct mem_cgroup * |
| 621 | mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | 636 | __mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) |
| 622 | { | 637 | { |
| 623 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; | 638 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
| 624 | 639 | ||
| @@ -629,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |||
| 629 | /* | 644 | /* |
| 630 | * Walk down to children | 645 | * Walk down to children |
| 631 | */ | 646 | */ |
| 632 | mem_cgroup_put(curr); | ||
| 633 | cgroup = list_entry(curr_cgroup->children.next, | 647 | cgroup = list_entry(curr_cgroup->children.next, |
| 634 | struct cgroup, sibling); | 648 | struct cgroup, sibling); |
| 635 | curr = mem_cgroup_from_cont(cgroup); | 649 | curr = mem_cgroup_from_cont(cgroup); |
| 636 | mem_cgroup_get(curr); | ||
| 637 | goto done; | 650 | goto done; |
| 638 | } | 651 | } |
| 639 | 652 | ||
| 640 | visit_parent: | 653 | visit_parent: |
| 641 | if (curr_cgroup == root_cgroup) { | 654 | if (curr_cgroup == root_cgroup) { |
| 642 | mem_cgroup_put(curr); | 655 | /* caller handles NULL case */ |
| 643 | curr = root_mem; | 656 | curr = NULL; |
| 644 | mem_cgroup_get(curr); | ||
| 645 | goto done; | 657 | goto done; |
| 646 | } | 658 | } |
| 647 | 659 | ||
| @@ -649,11 +661,9 @@ visit_parent: | |||
| 649 | * Goto next sibling | 661 | * Goto next sibling |
| 650 | */ | 662 | */ |
| 651 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { | 663 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
| 652 | mem_cgroup_put(curr); | ||
| 653 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, | 664 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
| 654 | sibling); | 665 | sibling); |
| 655 | curr = mem_cgroup_from_cont(cgroup); | 666 | curr = mem_cgroup_from_cont(cgroup); |
| 656 | mem_cgroup_get(curr); | ||
| 657 | goto done; | 667 | goto done; |
| 658 | } | 668 | } |
| 659 | 669 | ||
| @@ -664,7 +674,6 @@ visit_parent: | |||
| 664 | goto visit_parent; | 674 | goto visit_parent; |
| 665 | 675 | ||
| 666 | done: | 676 | done: |
| 667 | root_mem->last_scanned_child = curr; | ||
| 668 | return curr; | 677 | return curr; |
| 669 | } | 678 | } |
| 670 | 679 | ||
| @@ -674,40 +683,46 @@ done: | |||
| 674 | * that to reclaim free pages from. | 683 | * that to reclaim free pages from. |
| 675 | */ | 684 | */ |
| 676 | static struct mem_cgroup * | 685 | static struct mem_cgroup * |
| 677 | mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | 686 | mem_cgroup_get_next_node(struct mem_cgroup *root_mem) |
| 678 | { | 687 | { |
| 679 | struct cgroup *cgroup; | 688 | struct cgroup *cgroup; |
| 680 | struct mem_cgroup *ret; | 689 | struct mem_cgroup *orig, *next; |
| 681 | bool obsolete; | 690 | bool obsolete; |
| 682 | 691 | ||
| 683 | obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||
| 684 | |||
| 685 | /* | 692 | /* |
| 686 | * Scan all children under the mem_cgroup mem | 693 | * Scan all children under the mem_cgroup mem |
| 687 | */ | 694 | */ |
| 688 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 695 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
| 696 | |||
| 697 | orig = root_mem->last_scanned_child; | ||
| 698 | obsolete = mem_cgroup_is_obsolete(orig); | ||
| 699 | |||
| 689 | if (list_empty(&root_mem->css.cgroup->children)) { | 700 | if (list_empty(&root_mem->css.cgroup->children)) { |
| 690 | ret = root_mem; | 701 | /* |
| 702 | * root_mem might have children before and last_scanned_child | ||
| 703 | * may point to one of them. We put it later. | ||
| 704 | */ | ||
| 705 | if (orig) | ||
| 706 | VM_BUG_ON(!obsolete); | ||
| 707 | next = NULL; | ||
| 691 | goto done; | 708 | goto done; |
| 692 | } | 709 | } |
| 693 | 710 | ||
| 694 | if (!root_mem->last_scanned_child || obsolete) { | 711 | if (!orig || obsolete) { |
| 695 | |||
| 696 | if (obsolete && root_mem->last_scanned_child) | ||
| 697 | mem_cgroup_put(root_mem->last_scanned_child); | ||
| 698 | |||
| 699 | cgroup = list_first_entry(&root_mem->css.cgroup->children, | 712 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
| 700 | struct cgroup, sibling); | 713 | struct cgroup, sibling); |
| 701 | ret = mem_cgroup_from_cont(cgroup); | 714 | next = mem_cgroup_from_cont(cgroup); |
| 702 | mem_cgroup_get(ret); | ||
| 703 | } else | 715 | } else |
| 704 | ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | 716 | next = __mem_cgroup_get_next_node(orig, root_mem); |
| 705 | root_mem); | ||
| 706 | 717 | ||
| 707 | done: | 718 | done: |
| 708 | root_mem->last_scanned_child = ret; | 719 | if (next) |
| 720 | mem_cgroup_get(next); | ||
| 721 | root_mem->last_scanned_child = next; | ||
| 722 | if (orig) | ||
| 723 | mem_cgroup_put(orig); | ||
| 709 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | 724 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
| 710 | return ret; | 725 | return (next) ? next : root_mem; |
| 711 | } | 726 | } |
| 712 | 727 | ||
| 713 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | 728 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
| @@ -758,28 +773,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 758 | * but there might be left over accounting, even after children | 773 | * but there might be left over accounting, even after children |
| 759 | * have left. | 774 | * have left. |
| 760 | */ | 775 | */ |
| 761 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, | 776 | ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, |
| 762 | get_swappiness(root_mem)); | 777 | get_swappiness(root_mem)); |
| 763 | if (mem_cgroup_check_under_limit(root_mem)) | 778 | if (mem_cgroup_check_under_limit(root_mem)) |
| 764 | return 0; | 779 | return 1; /* indicate reclaim has succeeded */ |
| 765 | if (!root_mem->use_hierarchy) | 780 | if (!root_mem->use_hierarchy) |
| 766 | return ret; | 781 | return ret; |
| 767 | 782 | ||
| 768 | next_mem = mem_cgroup_get_first_node(root_mem); | 783 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 769 | 784 | ||
| 770 | while (next_mem != root_mem) { | 785 | while (next_mem != root_mem) { |
| 771 | if (mem_cgroup_is_obsolete(next_mem)) { | 786 | if (mem_cgroup_is_obsolete(next_mem)) { |
| 772 | mem_cgroup_put(next_mem); | 787 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 773 | next_mem = mem_cgroup_get_first_node(root_mem); | ||
| 774 | continue; | 788 | continue; |
| 775 | } | 789 | } |
| 776 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, | 790 | ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
| 777 | get_swappiness(next_mem)); | 791 | get_swappiness(next_mem)); |
| 778 | if (mem_cgroup_check_under_limit(root_mem)) | 792 | if (mem_cgroup_check_under_limit(root_mem)) |
| 779 | return 0; | 793 | return 1; /* indicate reclaim has succeeded */ |
| 780 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 794 | next_mem = mem_cgroup_get_next_node(root_mem); |
| 781 | next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | ||
| 782 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | ||
| 783 | } | 795 | } |
| 784 | return ret; | 796 | return ret; |
| 785 | } | 797 | } |
| @@ -863,6 +875,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 863 | 875 | ||
| 864 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, | 876 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, |
| 865 | noswap); | 877 | noswap); |
| 878 | if (ret) | ||
| 879 | continue; | ||
| 866 | 880 | ||
| 867 | /* | 881 | /* |
| 868 | * try_to_free_mem_cgroup_pages() might not give us a full | 882 | * try_to_free_mem_cgroup_pages() might not give us a full |
| @@ -979,14 +993,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 979 | if (pc->mem_cgroup != from) | 993 | if (pc->mem_cgroup != from) |
| 980 | goto out; | 994 | goto out; |
| 981 | 995 | ||
| 982 | css_put(&from->css); | ||
| 983 | res_counter_uncharge(&from->res, PAGE_SIZE); | 996 | res_counter_uncharge(&from->res, PAGE_SIZE); |
| 984 | mem_cgroup_charge_statistics(from, pc, false); | 997 | mem_cgroup_charge_statistics(from, pc, false); |
| 985 | if (do_swap_account) | 998 | if (do_swap_account) |
| 986 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | 999 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
| 1000 | css_put(&from->css); | ||
| 1001 | |||
| 1002 | css_get(&to->css); | ||
| 987 | pc->mem_cgroup = to; | 1003 | pc->mem_cgroup = to; |
| 988 | mem_cgroup_charge_statistics(to, pc, true); | 1004 | mem_cgroup_charge_statistics(to, pc, true); |
| 989 | css_get(&to->css); | ||
| 990 | ret = 0; | 1005 | ret = 0; |
| 991 | out: | 1006 | out: |
| 992 | unlock_page_cgroup(pc); | 1007 | unlock_page_cgroup(pc); |
| @@ -1019,8 +1034,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
| 1019 | if (ret || !parent) | 1034 | if (ret || !parent) |
| 1020 | return ret; | 1035 | return ret; |
| 1021 | 1036 | ||
| 1022 | if (!get_page_unless_zero(page)) | 1037 | if (!get_page_unless_zero(page)) { |
| 1023 | return -EBUSY; | 1038 | ret = -EBUSY; |
| 1039 | goto uncharge; | ||
| 1040 | } | ||
| 1024 | 1041 | ||
| 1025 | ret = isolate_lru_page(page); | 1042 | ret = isolate_lru_page(page); |
| 1026 | 1043 | ||
| @@ -1029,19 +1046,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
| 1029 | 1046 | ||
| 1030 | ret = mem_cgroup_move_account(pc, child, parent); | 1047 | ret = mem_cgroup_move_account(pc, child, parent); |
| 1031 | 1048 | ||
| 1032 | /* drop extra refcnt by try_charge() (move_account increment one) */ | ||
| 1033 | css_put(&parent->css); | ||
| 1034 | putback_lru_page(page); | 1049 | putback_lru_page(page); |
| 1035 | if (!ret) { | 1050 | if (!ret) { |
| 1036 | put_page(page); | 1051 | put_page(page); |
| 1052 | /* drop extra refcnt by try_charge() */ | ||
| 1053 | css_put(&parent->css); | ||
| 1037 | return 0; | 1054 | return 0; |
| 1038 | } | 1055 | } |
| 1039 | /* uncharge if move fails */ | 1056 | |
| 1040 | cancel: | 1057 | cancel: |
| 1058 | put_page(page); | ||
| 1059 | uncharge: | ||
| 1060 | /* drop extra refcnt by try_charge() */ | ||
| 1061 | css_put(&parent->css); | ||
| 1062 | /* uncharge if move fails */ | ||
| 1041 | res_counter_uncharge(&parent->res, PAGE_SIZE); | 1063 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
| 1042 | if (do_swap_account) | 1064 | if (do_swap_account) |
| 1043 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); | 1065 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
| 1044 | put_page(page); | ||
| 1045 | return ret; | 1066 | return ret; |
| 1046 | } | 1067 | } |
| 1047 | 1068 | ||
| @@ -1971,6 +1992,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
| 1971 | { | 1992 | { |
| 1972 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 1993 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
| 1973 | struct mem_cgroup *parent; | 1994 | struct mem_cgroup *parent; |
| 1995 | |||
| 1974 | if (val > 100) | 1996 | if (val > 100) |
| 1975 | return -EINVAL; | 1997 | return -EINVAL; |
| 1976 | 1998 | ||
| @@ -1978,15 +2000,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
| 1978 | return -EINVAL; | 2000 | return -EINVAL; |
| 1979 | 2001 | ||
| 1980 | parent = mem_cgroup_from_cont(cgrp->parent); | 2002 | parent = mem_cgroup_from_cont(cgrp->parent); |
| 2003 | |||
| 2004 | cgroup_lock(); | ||
| 2005 | |||
| 1981 | /* If under hierarchy, only empty-root can set this value */ | 2006 | /* If under hierarchy, only empty-root can set this value */ |
| 1982 | if ((parent->use_hierarchy) || | 2007 | if ((parent->use_hierarchy) || |
| 1983 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) | 2008 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) { |
| 2009 | cgroup_unlock(); | ||
| 1984 | return -EINVAL; | 2010 | return -EINVAL; |
| 2011 | } | ||
| 1985 | 2012 | ||
| 1986 | spin_lock(&memcg->reclaim_param_lock); | 2013 | spin_lock(&memcg->reclaim_param_lock); |
| 1987 | memcg->swappiness = val; | 2014 | memcg->swappiness = val; |
| 1988 | spin_unlock(&memcg->reclaim_param_lock); | 2015 | spin_unlock(&memcg->reclaim_param_lock); |
| 1989 | 2016 | ||
| 2017 | cgroup_unlock(); | ||
| 2018 | |||
| 1990 | return 0; | 2019 | return 0; |
| 1991 | } | 2020 | } |
| 1992 | 2021 | ||
| @@ -2181,7 +2210,7 @@ static void __init enable_swap_cgroup(void) | |||
| 2181 | } | 2210 | } |
| 2182 | #endif | 2211 | #endif |
| 2183 | 2212 | ||
| 2184 | static struct cgroup_subsys_state * | 2213 | static struct cgroup_subsys_state * __ref |
| 2185 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 2214 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
| 2186 | { | 2215 | { |
| 2187 | struct mem_cgroup *mem, *parent; | 2216 | struct mem_cgroup *mem, *parent; |
| @@ -2232,7 +2261,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
| 2232 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2261 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
| 2233 | struct cgroup *cont) | 2262 | struct cgroup *cont) |
| 2234 | { | 2263 | { |
| 2235 | mem_cgroup_put(mem_cgroup_from_cont(cont)); | 2264 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
| 2265 | struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | ||
| 2266 | |||
| 2267 | if (last_scanned_child) { | ||
| 2268 | VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | ||
| 2269 | mem_cgroup_put(last_scanned_child); | ||
| 2270 | } | ||
| 2271 | mem_cgroup_put(mem); | ||
| 2236 | } | 2272 | } |
| 2237 | 2273 | ||
| 2238 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 2274 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
