diff options
author | Johannes Weiner <jweiner@redhat.com> | 2012-01-12 20:17:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:05 -0500 |
commit | 5660048ccac8735d9bc0a46325a02e6a6518b5b2 (patch) | |
tree | 4b2269ddd6ca001843187a89fb91278192028d87 /mm/memcontrol.c | |
parent | 527a5ec9a53471d855291ba9f1fdf1dd4e12a184 (diff) |
mm: move memcg hierarchy reclaim to generic reclaim code
Memory cgroup limit reclaim and traditional global pressure reclaim will
soon share the same code to reclaim from a hierarchical tree of memory
cgroups.
In preparation of this, move the two right next to each other in
shrink_zone().
The mem_cgroup_hierarchical_reclaim() polymath is split into a soft
limit reclaim function, which still does hierarchy walking on its own,
and a limit (shrinking) reclaim function, which relies on generic
reclaim code to walk the hierarchy.
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 169 |
1 files changed, 85 insertions, 84 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bec451da7def..750ed1449955 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -370,8 +370,6 @@ enum charge_type { | |||
370 | #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) | 370 | #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) |
371 | #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 | 371 | #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 |
372 | #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) | 372 | #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) |
373 | #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 | ||
374 | #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) | ||
375 | 373 | ||
376 | static void mem_cgroup_get(struct mem_cgroup *memcg); | 374 | static void mem_cgroup_get(struct mem_cgroup *memcg); |
377 | static void mem_cgroup_put(struct mem_cgroup *memcg); | 375 | static void mem_cgroup_put(struct mem_cgroup *memcg); |
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
857 | return memcg; | 855 | return memcg; |
858 | } | 856 | } |
859 | 857 | ||
860 | struct mem_cgroup_reclaim_cookie { | 858 | /** |
861 | struct zone *zone; | 859 | * mem_cgroup_iter - iterate over memory cgroup hierarchy |
862 | int priority; | 860 | * @root: hierarchy root |
863 | unsigned int generation; | 861 | * @prev: previously returned memcg, NULL on first invocation |
864 | }; | 862 | * @reclaim: cookie for shared reclaim walks, NULL for full walks |
865 | 863 | * | |
866 | static struct mem_cgroup * | 864 | * Returns references to children of the hierarchy below @root, or |
867 | mem_cgroup_iter(struct mem_cgroup *root, | 865 | * @root itself, or %NULL after a full round-trip. |
868 | struct mem_cgroup *prev, | 866 | * |
869 | struct mem_cgroup_reclaim_cookie *reclaim) | 867 | * Caller must pass the return value in @prev on subsequent |
868 | * invocations for reference counting, or use mem_cgroup_iter_break() | ||
869 | * to cancel a hierarchy walk before the round-trip is complete. | ||
870 | * | ||
871 | * Reclaimers can specify a zone and a priority level in @reclaim to | ||
872 | * divide up the memcgs in the hierarchy among all concurrent | ||
873 | * reclaimers operating on the same zone and priority. | ||
874 | */ | ||
875 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | ||
876 | struct mem_cgroup *prev, | ||
877 | struct mem_cgroup_reclaim_cookie *reclaim) | ||
870 | { | 878 | { |
871 | struct mem_cgroup *memcg = NULL; | 879 | struct mem_cgroup *memcg = NULL; |
872 | int id = 0; | 880 | int id = 0; |
873 | 881 | ||
882 | if (mem_cgroup_disabled()) | ||
883 | return NULL; | ||
884 | |||
874 | if (!root) | 885 | if (!root) |
875 | root = root_mem_cgroup; | 886 | root = root_mem_cgroup; |
876 | 887 | ||
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root, | |||
926 | return memcg; | 937 | return memcg; |
927 | } | 938 | } |
928 | 939 | ||
929 | static void mem_cgroup_iter_break(struct mem_cgroup *root, | 940 | /** |
930 | struct mem_cgroup *prev) | 941 | * mem_cgroup_iter_break - abort a hierarchy walk prematurely |
942 | * @root: hierarchy root | ||
943 | * @prev: last visited hierarchy member as returned by mem_cgroup_iter() | ||
944 | */ | ||
945 | void mem_cgroup_iter_break(struct mem_cgroup *root, | ||
946 | struct mem_cgroup *prev) | ||
931 | { | 947 | { |
932 | if (!root) | 948 | if (!root) |
933 | root = root_mem_cgroup; | 949 | root = root_mem_cgroup; |
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | |||
1555 | return min(limit, memsw); | 1571 | return min(limit, memsw); |
1556 | } | 1572 | } |
1557 | 1573 | ||
1574 | static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | ||
1575 | gfp_t gfp_mask, | ||
1576 | unsigned long flags) | ||
1577 | { | ||
1578 | unsigned long total = 0; | ||
1579 | bool noswap = false; | ||
1580 | int loop; | ||
1581 | |||
1582 | if (flags & MEM_CGROUP_RECLAIM_NOSWAP) | ||
1583 | noswap = true; | ||
1584 | if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum) | ||
1585 | noswap = true; | ||
1586 | |||
1587 | for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) { | ||
1588 | if (loop) | ||
1589 | drain_all_stock_async(memcg); | ||
1590 | total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap); | ||
1591 | /* | ||
1592 | * Allow limit shrinkers, which are triggered directly | ||
1593 | * by userspace, to catch signals and stop reclaim | ||
1594 | * after minimal progress, regardless of the margin. | ||
1595 | */ | ||
1596 | if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK)) | ||
1597 | break; | ||
1598 | if (mem_cgroup_margin(memcg)) | ||
1599 | break; | ||
1600 | /* | ||
1601 | * If nothing was reclaimed after two attempts, there | ||
1602 | * may be no reclaimable pages in this hierarchy. | ||
1603 | */ | ||
1604 | if (loop && !total) | ||
1605 | break; | ||
1606 | } | ||
1607 | return total; | ||
1608 | } | ||
1609 | |||
1558 | /** | 1610 | /** |
1559 | * test_mem_cgroup_node_reclaimable | 1611 | * test_mem_cgroup_node_reclaimable |
1560 | * @mem: the target memcg | 1612 | * @mem: the target memcg |
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | |||
1692 | } | 1744 | } |
1693 | #endif | 1745 | #endif |
1694 | 1746 | ||
1695 | /* | 1747 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
1696 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1748 | struct zone *zone, |
1697 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1749 | gfp_t gfp_mask, |
1698 | * based on its position in the children list. | 1750 | unsigned long *total_scanned) |
1699 | * | ||
1700 | * root_memcg is the original ancestor that we've been reclaim from. | ||
1701 | * | ||
1702 | * We give up and return to the caller when we visit root_memcg twice. | ||
1703 | * (other groups can be removed while we're walking....) | ||
1704 | * | ||
1705 | * If shrink==true, for avoiding to free too much, this returns immedieately. | ||
1706 | */ | ||
1707 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | ||
1708 | struct zone *zone, | ||
1709 | gfp_t gfp_mask, | ||
1710 | unsigned long reclaim_options, | ||
1711 | unsigned long *total_scanned) | ||
1712 | { | 1751 | { |
1713 | struct mem_cgroup *victim = NULL; | 1752 | struct mem_cgroup *victim = NULL; |
1714 | int ret, total = 0; | 1753 | int total = 0; |
1715 | int loop = 0; | 1754 | int loop = 0; |
1716 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | ||
1717 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | ||
1718 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | ||
1719 | unsigned long excess; | 1755 | unsigned long excess; |
1720 | unsigned long nr_scanned; | 1756 | unsigned long nr_scanned; |
1721 | struct mem_cgroup_reclaim_cookie reclaim = { | 1757 | struct mem_cgroup_reclaim_cookie reclaim = { |
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | |||
1725 | 1761 | ||
1726 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; | 1762 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; |
1727 | 1763 | ||
1728 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ | ||
1729 | if (!check_soft && !shrink && root_memcg->memsw_is_minimum) | ||
1730 | noswap = true; | ||
1731 | |||
1732 | while (1) { | 1764 | while (1) { |
1733 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); | 1765 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); |
1734 | if (!victim) { | 1766 | if (!victim) { |
1735 | loop++; | 1767 | loop++; |
1736 | /* | ||
1737 | * We are not draining per cpu cached charges during | ||
1738 | * soft limit reclaim because global reclaim doesn't | ||
1739 | * care about charges. It tries to free some memory and | ||
1740 | * charges will not give any. | ||
1741 | */ | ||
1742 | if (!check_soft && loop >= 1) | ||
1743 | drain_all_stock_async(root_memcg); | ||
1744 | if (loop >= 2) { | 1768 | if (loop >= 2) { |
1745 | /* | 1769 | /* |
1746 | * If we have not been able to reclaim | 1770 | * If we have not been able to reclaim |
1747 | * anything, it might because there are | 1771 | * anything, it might because there are |
1748 | * no reclaimable pages under this hierarchy | 1772 | * no reclaimable pages under this hierarchy |
1749 | */ | 1773 | */ |
1750 | if (!check_soft || !total) | 1774 | if (!total) |
1751 | break; | 1775 | break; |
1752 | /* | 1776 | /* |
1753 | * We want to do more targeted reclaim. | 1777 | * We want to do more targeted reclaim. |
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | |||
1761 | } | 1785 | } |
1762 | continue; | 1786 | continue; |
1763 | } | 1787 | } |
1764 | if (!mem_cgroup_reclaimable(victim, noswap)) { | 1788 | if (!mem_cgroup_reclaimable(victim, false)) |
1765 | /* this cgroup's local usage == 0 */ | ||
1766 | continue; | 1789 | continue; |
1767 | } | 1790 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, |
1768 | /* we use swappiness of local cgroup */ | 1791 | zone, &nr_scanned); |
1769 | if (check_soft) { | 1792 | *total_scanned += nr_scanned; |
1770 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1793 | if (!res_counter_soft_limit_excess(&root_memcg->res)) |
1771 | noswap, zone, &nr_scanned); | ||
1772 | *total_scanned += nr_scanned; | ||
1773 | } else | ||
1774 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | ||
1775 | noswap); | ||
1776 | total += ret; | ||
1777 | /* | ||
1778 | * At shrinking usage, we can't check we should stop here or | ||
1779 | * reclaim more. It's depends on callers. last_scanned_child | ||
1780 | * will work enough for keeping fairness under tree. | ||
1781 | */ | ||
1782 | if (shrink) | ||
1783 | break; | ||
1784 | if (check_soft) { | ||
1785 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
1786 | break; | ||
1787 | } else if (mem_cgroup_margin(root_memcg)) | ||
1788 | break; | 1794 | break; |
1789 | } | 1795 | } |
1790 | mem_cgroup_iter_break(root_memcg, victim); | 1796 | mem_cgroup_iter_break(root_memcg, victim); |
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
2281 | if (!(gfp_mask & __GFP_WAIT)) | 2287 | if (!(gfp_mask & __GFP_WAIT)) |
2282 | return CHARGE_WOULDBLOCK; | 2288 | return CHARGE_WOULDBLOCK; |
2283 | 2289 | ||
2284 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 2290 | ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); |
2285 | gfp_mask, flags, NULL); | ||
2286 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) | 2291 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
2287 | return CHARGE_RETRY; | 2292 | return CHARGE_RETRY; |
2288 | /* | 2293 | /* |
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
3559 | if (!ret) | 3564 | if (!ret) |
3560 | break; | 3565 | break; |
3561 | 3566 | ||
3562 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3567 | mem_cgroup_reclaim(memcg, GFP_KERNEL, |
3563 | MEM_CGROUP_RECLAIM_SHRINK, | 3568 | MEM_CGROUP_RECLAIM_SHRINK); |
3564 | NULL); | ||
3565 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 3569 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
3566 | /* Usage is reduced ? */ | 3570 | /* Usage is reduced ? */ |
3567 | if (curusage >= oldusage) | 3571 | if (curusage >= oldusage) |
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3619 | if (!ret) | 3623 | if (!ret) |
3620 | break; | 3624 | break; |
3621 | 3625 | ||
3622 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3626 | mem_cgroup_reclaim(memcg, GFP_KERNEL, |
3623 | MEM_CGROUP_RECLAIM_NOSWAP | | 3627 | MEM_CGROUP_RECLAIM_NOSWAP | |
3624 | MEM_CGROUP_RECLAIM_SHRINK, | 3628 | MEM_CGROUP_RECLAIM_SHRINK); |
3625 | NULL); | ||
3626 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 3629 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3627 | /* Usage is reduced ? */ | 3630 | /* Usage is reduced ? */ |
3628 | if (curusage >= oldusage) | 3631 | if (curusage >= oldusage) |
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3665 | break; | 3668 | break; |
3666 | 3669 | ||
3667 | nr_scanned = 0; | 3670 | nr_scanned = 0; |
3668 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, | 3671 | reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone, |
3669 | gfp_mask, | 3672 | gfp_mask, &nr_scanned); |
3670 | MEM_CGROUP_RECLAIM_SOFT, | ||
3671 | &nr_scanned); | ||
3672 | nr_reclaimed += reclaimed; | 3673 | nr_reclaimed += reclaimed; |
3673 | *total_scanned += nr_scanned; | 3674 | *total_scanned += nr_scanned; |
3674 | spin_lock(&mctz->lock); | 3675 | spin_lock(&mctz->lock); |