aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h24
-rw-r--r--mm/memcontrol.c169
-rw-r--r--mm/vmscan.c43
3 files changed, 148 insertions, 88 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3558a5e268cf..3b99dce85293 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
40 struct mem_cgroup *mem_cont, 40 struct mem_cgroup *mem_cont,
41 int active, int file); 41 int active, int file);
42 42
43struct mem_cgroup_reclaim_cookie {
44 struct zone *zone;
45 int priority;
46 unsigned int generation;
47};
48
43#ifdef CONFIG_CGROUP_MEM_RES_CTLR 49#ifdef CONFIG_CGROUP_MEM_RES_CTLR
44/* 50/*
45 * All "charge" functions with gfp_mask should use GFP_KERNEL or 51 * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
106extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, 112extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
107 struct page *oldpage, struct page *newpage, bool migration_ok); 113 struct page *oldpage, struct page *newpage, bool migration_ok);
108 114
115struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
116 struct mem_cgroup *,
117 struct mem_cgroup_reclaim_cookie *);
118void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
119
109/* 120/*
110 * For memory reclaim. 121 * For memory reclaim.
111 */ 122 */
@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
281{ 292{
282} 293}
283 294
295static inline struct mem_cgroup *
296mem_cgroup_iter(struct mem_cgroup *root,
297 struct mem_cgroup *prev,
298 struct mem_cgroup_reclaim_cookie *reclaim)
299{
300 return NULL;
301}
302
303static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
304 struct mem_cgroup *prev)
305{
306}
307
284static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) 308static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
285{ 309{
286 return 0; 310 return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bec451da7def..750ed1449955 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -370,8 +370,6 @@ enum charge_type {
370#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) 370#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
371#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 371#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
372#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) 372#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
373#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
374#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
375 373
376static void mem_cgroup_get(struct mem_cgroup *memcg); 374static void mem_cgroup_get(struct mem_cgroup *memcg);
377static void mem_cgroup_put(struct mem_cgroup *memcg); 375static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
857 return memcg; 855 return memcg;
858} 856}
859 857
860struct mem_cgroup_reclaim_cookie { 858/**
861 struct zone *zone; 859 * mem_cgroup_iter - iterate over memory cgroup hierarchy
862 int priority; 860 * @root: hierarchy root
863 unsigned int generation; 861 * @prev: previously returned memcg, NULL on first invocation
864}; 862 * @reclaim: cookie for shared reclaim walks, NULL for full walks
865 863 *
866static struct mem_cgroup * 864 * Returns references to children of the hierarchy below @root, or
867mem_cgroup_iter(struct mem_cgroup *root, 865 * @root itself, or %NULL after a full round-trip.
868 struct mem_cgroup *prev, 866 *
869 struct mem_cgroup_reclaim_cookie *reclaim) 867 * Caller must pass the return value in @prev on subsequent
868 * invocations for reference counting, or use mem_cgroup_iter_break()
869 * to cancel a hierarchy walk before the round-trip is complete.
870 *
871 * Reclaimers can specify a zone and a priority level in @reclaim to
872 * divide up the memcgs in the hierarchy among all concurrent
873 * reclaimers operating on the same zone and priority.
874 */
875struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
876 struct mem_cgroup *prev,
877 struct mem_cgroup_reclaim_cookie *reclaim)
870{ 878{
871 struct mem_cgroup *memcg = NULL; 879 struct mem_cgroup *memcg = NULL;
872 int id = 0; 880 int id = 0;
873 881
882 if (mem_cgroup_disabled())
883 return NULL;
884
874 if (!root) 885 if (!root)
875 root = root_mem_cgroup; 886 root = root_mem_cgroup;
876 887
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
926 return memcg; 937 return memcg;
927} 938}
928 939
929static void mem_cgroup_iter_break(struct mem_cgroup *root, 940/**
930 struct mem_cgroup *prev) 941 * mem_cgroup_iter_break - abort a hierarchy walk prematurely
942 * @root: hierarchy root
943 * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
944 */
945void mem_cgroup_iter_break(struct mem_cgroup *root,
946 struct mem_cgroup *prev)
931{ 947{
932 if (!root) 948 if (!root)
933 root = root_mem_cgroup; 949 root = root_mem_cgroup;
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1555 return min(limit, memsw); 1571 return min(limit, memsw);
1556} 1572}
1557 1573
1574static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1575 gfp_t gfp_mask,
1576 unsigned long flags)
1577{
1578 unsigned long total = 0;
1579 bool noswap = false;
1580 int loop;
1581
1582 if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
1583 noswap = true;
1584 if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
1585 noswap = true;
1586
1587 for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
1588 if (loop)
1589 drain_all_stock_async(memcg);
1590 total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
1591 /*
1592 * Allow limit shrinkers, which are triggered directly
1593 * by userspace, to catch signals and stop reclaim
1594 * after minimal progress, regardless of the margin.
1595 */
1596 if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
1597 break;
1598 if (mem_cgroup_margin(memcg))
1599 break;
1600 /*
1601 * If nothing was reclaimed after two attempts, there
1602 * may be no reclaimable pages in this hierarchy.
1603 */
1604 if (loop && !total)
1605 break;
1606 }
1607 return total;
1608}
1609
1558/** 1610/**
1559 * test_mem_cgroup_node_reclaimable 1611 * test_mem_cgroup_node_reclaimable
1560 * @mem: the target memcg 1612 * @mem: the target memcg
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1692} 1744}
1693#endif 1745#endif
1694 1746
1695/* 1747static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1696 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1748 struct zone *zone,
1697 * we reclaimed from, so that we don't end up penalizing one child extensively 1749 gfp_t gfp_mask,
1698 * based on its position in the children list. 1750 unsigned long *total_scanned)
1699 *
1700 * root_memcg is the original ancestor that we've been reclaim from.
1701 *
1702 * We give up and return to the caller when we visit root_memcg twice.
1703 * (other groups can be removed while we're walking....)
1704 *
1705 * If shrink==true, for avoiding to free too much, this returns immedieately.
1706 */
1707static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1708 struct zone *zone,
1709 gfp_t gfp_mask,
1710 unsigned long reclaim_options,
1711 unsigned long *total_scanned)
1712{ 1751{
1713 struct mem_cgroup *victim = NULL; 1752 struct mem_cgroup *victim = NULL;
1714 int ret, total = 0; 1753 int total = 0;
1715 int loop = 0; 1754 int loop = 0;
1716 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1717 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1718 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1719 unsigned long excess; 1755 unsigned long excess;
1720 unsigned long nr_scanned; 1756 unsigned long nr_scanned;
1721 struct mem_cgroup_reclaim_cookie reclaim = { 1757 struct mem_cgroup_reclaim_cookie reclaim = {
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1725 1761
1726 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; 1762 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
1727 1763
1728 /* If memsw_is_minimum==1, swap-out is of-no-use. */
1729 if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
1730 noswap = true;
1731
1732 while (1) { 1764 while (1) {
1733 victim = mem_cgroup_iter(root_memcg, victim, &reclaim); 1765 victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
1734 if (!victim) { 1766 if (!victim) {
1735 loop++; 1767 loop++;
1736 /*
1737 * We are not draining per cpu cached charges during
1738 * soft limit reclaim because global reclaim doesn't
1739 * care about charges. It tries to free some memory and
1740 * charges will not give any.
1741 */
1742 if (!check_soft && loop >= 1)
1743 drain_all_stock_async(root_memcg);
1744 if (loop >= 2) { 1768 if (loop >= 2) {
1745 /* 1769 /*
1746 * If we have not been able to reclaim 1770 * If we have not been able to reclaim
1747 * anything, it might because there are 1771 * anything, it might because there are
1748 * no reclaimable pages under this hierarchy 1772 * no reclaimable pages under this hierarchy
1749 */ 1773 */
1750 if (!check_soft || !total) 1774 if (!total)
1751 break; 1775 break;
1752 /* 1776 /*
1753 * We want to do more targeted reclaim. 1777 * We want to do more targeted reclaim.
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1761 } 1785 }
1762 continue; 1786 continue;
1763 } 1787 }
1764 if (!mem_cgroup_reclaimable(victim, noswap)) { 1788 if (!mem_cgroup_reclaimable(victim, false))
1765 /* this cgroup's local usage == 0 */
1766 continue; 1789 continue;
1767 } 1790 total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
1768 /* we use swappiness of local cgroup */ 1791 zone, &nr_scanned);
1769 if (check_soft) { 1792 *total_scanned += nr_scanned;
1770 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1793 if (!res_counter_soft_limit_excess(&root_memcg->res))
1771 noswap, zone, &nr_scanned);
1772 *total_scanned += nr_scanned;
1773 } else
1774 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1775 noswap);
1776 total += ret;
1777 /*
1778 * At shrinking usage, we can't check we should stop here or
1779 * reclaim more. It's depends on callers. last_scanned_child
1780 * will work enough for keeping fairness under tree.
1781 */
1782 if (shrink)
1783 break;
1784 if (check_soft) {
1785 if (!res_counter_soft_limit_excess(&root_memcg->res))
1786 break;
1787 } else if (mem_cgroup_margin(root_memcg))
1788 break; 1794 break;
1789 } 1795 }
1790 mem_cgroup_iter_break(root_memcg, victim); 1796 mem_cgroup_iter_break(root_memcg, victim);
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2281 if (!(gfp_mask & __GFP_WAIT)) 2287 if (!(gfp_mask & __GFP_WAIT))
2282 return CHARGE_WOULDBLOCK; 2288 return CHARGE_WOULDBLOCK;
2283 2289
2284 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, 2290 ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
2285 gfp_mask, flags, NULL);
2286 if (mem_cgroup_margin(mem_over_limit) >= nr_pages) 2291 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
2287 return CHARGE_RETRY; 2292 return CHARGE_RETRY;
2288 /* 2293 /*
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
3559 if (!ret) 3564 if (!ret)
3560 break; 3565 break;
3561 3566
3562 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, 3567 mem_cgroup_reclaim(memcg, GFP_KERNEL,
3563 MEM_CGROUP_RECLAIM_SHRINK, 3568 MEM_CGROUP_RECLAIM_SHRINK);
3564 NULL);
3565 curusage = res_counter_read_u64(&memcg->res, RES_USAGE); 3569 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
3566 /* Usage is reduced ? */ 3570 /* Usage is reduced ? */
3567 if (curusage >= oldusage) 3571 if (curusage >= oldusage)
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
3619 if (!ret) 3623 if (!ret)
3620 break; 3624 break;
3621 3625
3622 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, 3626 mem_cgroup_reclaim(memcg, GFP_KERNEL,
3623 MEM_CGROUP_RECLAIM_NOSWAP | 3627 MEM_CGROUP_RECLAIM_NOSWAP |
3624 MEM_CGROUP_RECLAIM_SHRINK, 3628 MEM_CGROUP_RECLAIM_SHRINK);
3625 NULL);
3626 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 3629 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
3627 /* Usage is reduced ? */ 3630 /* Usage is reduced ? */
3628 if (curusage >= oldusage) 3631 if (curusage >= oldusage)
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3665 break; 3668 break;
3666 3669
3667 nr_scanned = 0; 3670 nr_scanned = 0;
3668 reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, 3671 reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
3669 gfp_mask, 3672 gfp_mask, &nr_scanned);
3670 MEM_CGROUP_RECLAIM_SOFT,
3671 &nr_scanned);
3672 nr_reclaimed += reclaimed; 3673 nr_reclaimed += reclaimed;
3673 *total_scanned += nr_scanned; 3674 *total_scanned += nr_scanned;
3674 spin_lock(&mctz->lock); 3675 spin_lock(&mctz->lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0627d07c3ac..136c7eb0ad88 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2104,12 +2104,43 @@ restart:
2104static void shrink_zone(int priority, struct zone *zone, 2104static void shrink_zone(int priority, struct zone *zone,
2105 struct scan_control *sc) 2105 struct scan_control *sc)
2106{ 2106{
2107 struct mem_cgroup_zone mz = { 2107 struct mem_cgroup *root = sc->target_mem_cgroup;
2108 .mem_cgroup = sc->target_mem_cgroup, 2108 struct mem_cgroup_reclaim_cookie reclaim = {
2109 .zone = zone, 2109 .zone = zone,
2110 .priority = priority,
2110 }; 2111 };
2112 struct mem_cgroup *memcg;
2113
2114 if (global_reclaim(sc)) {
2115 struct mem_cgroup_zone mz = {
2116 .mem_cgroup = NULL,
2117 .zone = zone,
2118 };
2119
2120 shrink_mem_cgroup_zone(priority, &mz, sc);
2121 return;
2122 }
2123
2124 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2125 do {
2126 struct mem_cgroup_zone mz = {
2127 .mem_cgroup = memcg,
2128 .zone = zone,
2129 };
2111 2130
2112 shrink_mem_cgroup_zone(priority, &mz, sc); 2131 shrink_mem_cgroup_zone(priority, &mz, sc);
2132 /*
2133 * Limit reclaim has historically picked one memcg and
2134 * scanned it with decreasing priority levels until
2135 * nr_to_reclaim had been reclaimed. This priority
2136 * cycle is thus over after a single memcg.
2137 */
2138 if (!global_reclaim(sc)) {
2139 mem_cgroup_iter_break(root, memcg);
2140 break;
2141 }
2142 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2143 } while (memcg);
2113} 2144}
2114 2145
2115/* 2146/*
@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2374 .order = 0, 2405 .order = 0,
2375 .target_mem_cgroup = mem, 2406 .target_mem_cgroup = mem,
2376 }; 2407 };
2408 struct mem_cgroup_zone mz = {
2409 .mem_cgroup = mem,
2410 .zone = zone,
2411 };
2377 2412
2378 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2413 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2379 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2414 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2389 * will pick up pages from other mem cgroup's as well. We hack 2424 * will pick up pages from other mem cgroup's as well. We hack
2390 * the priority and make it zero. 2425 * the priority and make it zero.
2391 */ 2426 */
2392 shrink_zone(0, zone, &sc); 2427 shrink_mem_cgroup_zone(0, &mz, &sc);
2393 2428
2394 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2429 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2395 2430