diff options
-rw-r--r-- | include/linux/memcontrol.h | 24 | ||||
-rw-r--r-- | mm/memcontrol.c | 169 | ||||
-rw-r--r-- | mm/vmscan.c | 43 |
3 files changed, 148 insertions, 88 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3558a5e268cf..3b99dce85293 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
40 | struct mem_cgroup *mem_cont, | 40 | struct mem_cgroup *mem_cont, |
41 | int active, int file); | 41 | int active, int file); |
42 | 42 | ||
43 | struct mem_cgroup_reclaim_cookie { | ||
44 | struct zone *zone; | ||
45 | int priority; | ||
46 | unsigned int generation; | ||
47 | }; | ||
48 | |||
43 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 49 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
44 | /* | 50 | /* |
45 | * All "charge" functions with gfp_mask should use GFP_KERNEL or | 51 | * All "charge" functions with gfp_mask should use GFP_KERNEL or |
@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page, | |||
106 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, | 112 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, |
107 | struct page *oldpage, struct page *newpage, bool migration_ok); | 113 | struct page *oldpage, struct page *newpage, bool migration_ok); |
108 | 114 | ||
115 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | ||
116 | struct mem_cgroup *, | ||
117 | struct mem_cgroup_reclaim_cookie *); | ||
118 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | ||
119 | |||
109 | /* | 120 | /* |
110 | * For memory reclaim. | 121 | * For memory reclaim. |
111 | */ | 122 | */ |
@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
281 | { | 292 | { |
282 | } | 293 | } |
283 | 294 | ||
295 | static inline struct mem_cgroup * | ||
296 | mem_cgroup_iter(struct mem_cgroup *root, | ||
297 | struct mem_cgroup *prev, | ||
298 | struct mem_cgroup_reclaim_cookie *reclaim) | ||
299 | { | ||
300 | return NULL; | ||
301 | } | ||
302 | |||
303 | static inline void mem_cgroup_iter_break(struct mem_cgroup *root, | ||
304 | struct mem_cgroup *prev) | ||
305 | { | ||
306 | } | ||
307 | |||
284 | static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) | 308 | static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) |
285 | { | 309 | { |
286 | return 0; | 310 | return 0; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bec451da7def..750ed1449955 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -370,8 +370,6 @@ enum charge_type { | |||
370 | #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) | 370 | #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) |
371 | #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 | 371 | #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 |
372 | #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) | 372 | #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) |
373 | #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 | ||
374 | #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) | ||
375 | 373 | ||
376 | static void mem_cgroup_get(struct mem_cgroup *memcg); | 374 | static void mem_cgroup_get(struct mem_cgroup *memcg); |
377 | static void mem_cgroup_put(struct mem_cgroup *memcg); | 375 | static void mem_cgroup_put(struct mem_cgroup *memcg); |
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
857 | return memcg; | 855 | return memcg; |
858 | } | 856 | } |
859 | 857 | ||
860 | struct mem_cgroup_reclaim_cookie { | 858 | /** |
861 | struct zone *zone; | 859 | * mem_cgroup_iter - iterate over memory cgroup hierarchy |
862 | int priority; | 860 | * @root: hierarchy root |
863 | unsigned int generation; | 861 | * @prev: previously returned memcg, NULL on first invocation |
864 | }; | 862 | * @reclaim: cookie for shared reclaim walks, NULL for full walks |
865 | 863 | * | |
866 | static struct mem_cgroup * | 864 | * Returns references to children of the hierarchy below @root, or |
867 | mem_cgroup_iter(struct mem_cgroup *root, | 865 | * @root itself, or %NULL after a full round-trip. |
868 | struct mem_cgroup *prev, | 866 | * |
869 | struct mem_cgroup_reclaim_cookie *reclaim) | 867 | * Caller must pass the return value in @prev on subsequent |
868 | * invocations for reference counting, or use mem_cgroup_iter_break() | ||
869 | * to cancel a hierarchy walk before the round-trip is complete. | ||
870 | * | ||
871 | * Reclaimers can specify a zone and a priority level in @reclaim to | ||
872 | * divide up the memcgs in the hierarchy among all concurrent | ||
873 | * reclaimers operating on the same zone and priority. | ||
874 | */ | ||
875 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | ||
876 | struct mem_cgroup *prev, | ||
877 | struct mem_cgroup_reclaim_cookie *reclaim) | ||
870 | { | 878 | { |
871 | struct mem_cgroup *memcg = NULL; | 879 | struct mem_cgroup *memcg = NULL; |
872 | int id = 0; | 880 | int id = 0; |
873 | 881 | ||
882 | if (mem_cgroup_disabled()) | ||
883 | return NULL; | ||
884 | |||
874 | if (!root) | 885 | if (!root) |
875 | root = root_mem_cgroup; | 886 | root = root_mem_cgroup; |
876 | 887 | ||
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root, | |||
926 | return memcg; | 937 | return memcg; |
927 | } | 938 | } |
928 | 939 | ||
929 | static void mem_cgroup_iter_break(struct mem_cgroup *root, | 940 | /** |
930 | struct mem_cgroup *prev) | 941 | * mem_cgroup_iter_break - abort a hierarchy walk prematurely |
942 | * @root: hierarchy root | ||
943 | * @prev: last visited hierarchy member as returned by mem_cgroup_iter() | ||
944 | */ | ||
945 | void mem_cgroup_iter_break(struct mem_cgroup *root, | ||
946 | struct mem_cgroup *prev) | ||
931 | { | 947 | { |
932 | if (!root) | 948 | if (!root) |
933 | root = root_mem_cgroup; | 949 | root = root_mem_cgroup; |
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | |||
1555 | return min(limit, memsw); | 1571 | return min(limit, memsw); |
1556 | } | 1572 | } |
1557 | 1573 | ||
1574 | static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | ||
1575 | gfp_t gfp_mask, | ||
1576 | unsigned long flags) | ||
1577 | { | ||
1578 | unsigned long total = 0; | ||
1579 | bool noswap = false; | ||
1580 | int loop; | ||
1581 | |||
1582 | if (flags & MEM_CGROUP_RECLAIM_NOSWAP) | ||
1583 | noswap = true; | ||
1584 | if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum) | ||
1585 | noswap = true; | ||
1586 | |||
1587 | for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) { | ||
1588 | if (loop) | ||
1589 | drain_all_stock_async(memcg); | ||
1590 | total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap); | ||
1591 | /* | ||
1592 | * Allow limit shrinkers, which are triggered directly | ||
1593 | * by userspace, to catch signals and stop reclaim | ||
1594 | * after minimal progress, regardless of the margin. | ||
1595 | */ | ||
1596 | if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK)) | ||
1597 | break; | ||
1598 | if (mem_cgroup_margin(memcg)) | ||
1599 | break; | ||
1600 | /* | ||
1601 | * If nothing was reclaimed after two attempts, there | ||
1602 | * may be no reclaimable pages in this hierarchy. | ||
1603 | */ | ||
1604 | if (loop && !total) | ||
1605 | break; | ||
1606 | } | ||
1607 | return total; | ||
1608 | } | ||
1609 | |||
1558 | /** | 1610 | /** |
1559 | * test_mem_cgroup_node_reclaimable | 1611 | * test_mem_cgroup_node_reclaimable |
1560 | * @mem: the target memcg | 1612 | * @mem: the target memcg |
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | |||
1692 | } | 1744 | } |
1693 | #endif | 1745 | #endif |
1694 | 1746 | ||
1695 | /* | 1747 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
1696 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1748 | struct zone *zone, |
1697 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1749 | gfp_t gfp_mask, |
1698 | * based on its position in the children list. | 1750 | unsigned long *total_scanned) |
1699 | * | ||
1700 | * root_memcg is the original ancestor that we've been reclaim from. | ||
1701 | * | ||
1702 | * We give up and return to the caller when we visit root_memcg twice. | ||
1703 | * (other groups can be removed while we're walking....) | ||
1704 | * | ||
1705 | * If shrink==true, for avoiding to free too much, this returns immedieately. | ||
1706 | */ | ||
1707 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | ||
1708 | struct zone *zone, | ||
1709 | gfp_t gfp_mask, | ||
1710 | unsigned long reclaim_options, | ||
1711 | unsigned long *total_scanned) | ||
1712 | { | 1751 | { |
1713 | struct mem_cgroup *victim = NULL; | 1752 | struct mem_cgroup *victim = NULL; |
1714 | int ret, total = 0; | 1753 | int total = 0; |
1715 | int loop = 0; | 1754 | int loop = 0; |
1716 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | ||
1717 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | ||
1718 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | ||
1719 | unsigned long excess; | 1755 | unsigned long excess; |
1720 | unsigned long nr_scanned; | 1756 | unsigned long nr_scanned; |
1721 | struct mem_cgroup_reclaim_cookie reclaim = { | 1757 | struct mem_cgroup_reclaim_cookie reclaim = { |
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | |||
1725 | 1761 | ||
1726 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; | 1762 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; |
1727 | 1763 | ||
1728 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ | ||
1729 | if (!check_soft && !shrink && root_memcg->memsw_is_minimum) | ||
1730 | noswap = true; | ||
1731 | |||
1732 | while (1) { | 1764 | while (1) { |
1733 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); | 1765 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); |
1734 | if (!victim) { | 1766 | if (!victim) { |
1735 | loop++; | 1767 | loop++; |
1736 | /* | ||
1737 | * We are not draining per cpu cached charges during | ||
1738 | * soft limit reclaim because global reclaim doesn't | ||
1739 | * care about charges. It tries to free some memory and | ||
1740 | * charges will not give any. | ||
1741 | */ | ||
1742 | if (!check_soft && loop >= 1) | ||
1743 | drain_all_stock_async(root_memcg); | ||
1744 | if (loop >= 2) { | 1768 | if (loop >= 2) { |
1745 | /* | 1769 | /* |
1746 | * If we have not been able to reclaim | 1770 | * If we have not been able to reclaim |
1747 | * anything, it might because there are | 1771 | * anything, it might because there are |
1748 | * no reclaimable pages under this hierarchy | 1772 | * no reclaimable pages under this hierarchy |
1749 | */ | 1773 | */ |
1750 | if (!check_soft || !total) | 1774 | if (!total) |
1751 | break; | 1775 | break; |
1752 | /* | 1776 | /* |
1753 | * We want to do more targeted reclaim. | 1777 | * We want to do more targeted reclaim. |
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, | |||
1761 | } | 1785 | } |
1762 | continue; | 1786 | continue; |
1763 | } | 1787 | } |
1764 | if (!mem_cgroup_reclaimable(victim, noswap)) { | 1788 | if (!mem_cgroup_reclaimable(victim, false)) |
1765 | /* this cgroup's local usage == 0 */ | ||
1766 | continue; | 1789 | continue; |
1767 | } | 1790 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, |
1768 | /* we use swappiness of local cgroup */ | 1791 | zone, &nr_scanned); |
1769 | if (check_soft) { | 1792 | *total_scanned += nr_scanned; |
1770 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1793 | if (!res_counter_soft_limit_excess(&root_memcg->res)) |
1771 | noswap, zone, &nr_scanned); | ||
1772 | *total_scanned += nr_scanned; | ||
1773 | } else | ||
1774 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | ||
1775 | noswap); | ||
1776 | total += ret; | ||
1777 | /* | ||
1778 | * At shrinking usage, we can't check we should stop here or | ||
1779 | * reclaim more. It's depends on callers. last_scanned_child | ||
1780 | * will work enough for keeping fairness under tree. | ||
1781 | */ | ||
1782 | if (shrink) | ||
1783 | break; | ||
1784 | if (check_soft) { | ||
1785 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
1786 | break; | ||
1787 | } else if (mem_cgroup_margin(root_memcg)) | ||
1788 | break; | 1794 | break; |
1789 | } | 1795 | } |
1790 | mem_cgroup_iter_break(root_memcg, victim); | 1796 | mem_cgroup_iter_break(root_memcg, victim); |
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
2281 | if (!(gfp_mask & __GFP_WAIT)) | 2287 | if (!(gfp_mask & __GFP_WAIT)) |
2282 | return CHARGE_WOULDBLOCK; | 2288 | return CHARGE_WOULDBLOCK; |
2283 | 2289 | ||
2284 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 2290 | ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); |
2285 | gfp_mask, flags, NULL); | ||
2286 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) | 2291 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
2287 | return CHARGE_RETRY; | 2292 | return CHARGE_RETRY; |
2288 | /* | 2293 | /* |
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
3559 | if (!ret) | 3564 | if (!ret) |
3560 | break; | 3565 | break; |
3561 | 3566 | ||
3562 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3567 | mem_cgroup_reclaim(memcg, GFP_KERNEL, |
3563 | MEM_CGROUP_RECLAIM_SHRINK, | 3568 | MEM_CGROUP_RECLAIM_SHRINK); |
3564 | NULL); | ||
3565 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 3569 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
3566 | /* Usage is reduced ? */ | 3570 | /* Usage is reduced ? */ |
3567 | if (curusage >= oldusage) | 3571 | if (curusage >= oldusage) |
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3619 | if (!ret) | 3623 | if (!ret) |
3620 | break; | 3624 | break; |
3621 | 3625 | ||
3622 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3626 | mem_cgroup_reclaim(memcg, GFP_KERNEL, |
3623 | MEM_CGROUP_RECLAIM_NOSWAP | | 3627 | MEM_CGROUP_RECLAIM_NOSWAP | |
3624 | MEM_CGROUP_RECLAIM_SHRINK, | 3628 | MEM_CGROUP_RECLAIM_SHRINK); |
3625 | NULL); | ||
3626 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 3629 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3627 | /* Usage is reduced ? */ | 3630 | /* Usage is reduced ? */ |
3628 | if (curusage >= oldusage) | 3631 | if (curusage >= oldusage) |
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3665 | break; | 3668 | break; |
3666 | 3669 | ||
3667 | nr_scanned = 0; | 3670 | nr_scanned = 0; |
3668 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, | 3671 | reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone, |
3669 | gfp_mask, | 3672 | gfp_mask, &nr_scanned); |
3670 | MEM_CGROUP_RECLAIM_SOFT, | ||
3671 | &nr_scanned); | ||
3672 | nr_reclaimed += reclaimed; | 3673 | nr_reclaimed += reclaimed; |
3673 | *total_scanned += nr_scanned; | 3674 | *total_scanned += nr_scanned; |
3674 | spin_lock(&mctz->lock); | 3675 | spin_lock(&mctz->lock); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e0627d07c3ac..136c7eb0ad88 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2104,12 +2104,43 @@ restart: | |||
2104 | static void shrink_zone(int priority, struct zone *zone, | 2104 | static void shrink_zone(int priority, struct zone *zone, |
2105 | struct scan_control *sc) | 2105 | struct scan_control *sc) |
2106 | { | 2106 | { |
2107 | struct mem_cgroup_zone mz = { | 2107 | struct mem_cgroup *root = sc->target_mem_cgroup; |
2108 | .mem_cgroup = sc->target_mem_cgroup, | 2108 | struct mem_cgroup_reclaim_cookie reclaim = { |
2109 | .zone = zone, | 2109 | .zone = zone, |
2110 | .priority = priority, | ||
2110 | }; | 2111 | }; |
2112 | struct mem_cgroup *memcg; | ||
2113 | |||
2114 | if (global_reclaim(sc)) { | ||
2115 | struct mem_cgroup_zone mz = { | ||
2116 | .mem_cgroup = NULL, | ||
2117 | .zone = zone, | ||
2118 | }; | ||
2119 | |||
2120 | shrink_mem_cgroup_zone(priority, &mz, sc); | ||
2121 | return; | ||
2122 | } | ||
2123 | |||
2124 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | ||
2125 | do { | ||
2126 | struct mem_cgroup_zone mz = { | ||
2127 | .mem_cgroup = memcg, | ||
2128 | .zone = zone, | ||
2129 | }; | ||
2111 | 2130 | ||
2112 | shrink_mem_cgroup_zone(priority, &mz, sc); | 2131 | shrink_mem_cgroup_zone(priority, &mz, sc); |
2132 | /* | ||
2133 | * Limit reclaim has historically picked one memcg and | ||
2134 | * scanned it with decreasing priority levels until | ||
2135 | * nr_to_reclaim had been reclaimed. This priority | ||
2136 | * cycle is thus over after a single memcg. | ||
2137 | */ | ||
2138 | if (!global_reclaim(sc)) { | ||
2139 | mem_cgroup_iter_break(root, memcg); | ||
2140 | break; | ||
2141 | } | ||
2142 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
2143 | } while (memcg); | ||
2113 | } | 2144 | } |
2114 | 2145 | ||
2115 | /* | 2146 | /* |
@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2374 | .order = 0, | 2405 | .order = 0, |
2375 | .target_mem_cgroup = mem, | 2406 | .target_mem_cgroup = mem, |
2376 | }; | 2407 | }; |
2408 | struct mem_cgroup_zone mz = { | ||
2409 | .mem_cgroup = mem, | ||
2410 | .zone = zone, | ||
2411 | }; | ||
2377 | 2412 | ||
2378 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2413 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2379 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2414 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2389 | * will pick up pages from other mem cgroup's as well. We hack | 2424 | * will pick up pages from other mem cgroup's as well. We hack |
2390 | * the priority and make it zero. | 2425 | * the priority and make it zero. |
2391 | */ | 2426 | */ |
2392 | shrink_zone(0, zone, &sc); | 2427 | shrink_mem_cgroup_zone(0, &mz, &sc); |
2393 | 2428 | ||
2394 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2429 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2395 | 2430 | ||