diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 266 |
1 files changed, 172 insertions, 94 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ab841031436..da53a252b259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -600,23 +600,24 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | |||
600 | } | 600 | } |
601 | 601 | ||
602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
603 | struct page_cgroup *pc, | 603 | bool file, int nr_pages) |
604 | bool charge) | ||
605 | { | 604 | { |
606 | int val = (charge) ? 1 : -1; | ||
607 | |||
608 | preempt_disable(); | 605 | preempt_disable(); |
609 | 606 | ||
610 | if (PageCgroupCache(pc)) | 607 | if (file) |
611 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); | 608 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); |
612 | else | 609 | else |
613 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); | 610 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); |
614 | 611 | ||
615 | if (charge) | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | ||
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
617 | else | 615 | else { |
618 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
619 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); | 617 | nr_pages = -nr_pages; /* for event */ |
618 | } | ||
619 | |||
620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | ||
620 | 621 | ||
621 | preempt_enable(); | 622 | preempt_enable(); |
622 | } | 623 | } |
@@ -815,7 +816,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | |||
815 | * removed from global LRU. | 816 | * removed from global LRU. |
816 | */ | 817 | */ |
817 | mz = page_cgroup_zoneinfo(pc); | 818 | mz = page_cgroup_zoneinfo(pc); |
818 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 819 | /* huge page split is done under lru_lock. so, we have no races. */ |
820 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); | ||
819 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 821 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
820 | return; | 822 | return; |
821 | VM_BUG_ON(list_empty(&pc->lru)); | 823 | VM_BUG_ON(list_empty(&pc->lru)); |
@@ -836,13 +838,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
836 | return; | 838 | return; |
837 | 839 | ||
838 | pc = lookup_page_cgroup(page); | 840 | pc = lookup_page_cgroup(page); |
839 | /* | ||
840 | * Used bit is set without atomic ops but after smp_wmb(). | ||
841 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
842 | */ | ||
843 | smp_rmb(); | ||
844 | /* unused or root page is not rotated. */ | 841 | /* unused or root page is not rotated. */ |
845 | if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) | 842 | if (!PageCgroupUsed(pc)) |
843 | return; | ||
844 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | ||
845 | smp_rmb(); | ||
846 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
846 | return; | 847 | return; |
847 | mz = page_cgroup_zoneinfo(pc); | 848 | mz = page_cgroup_zoneinfo(pc); |
848 | list_move(&pc->lru, &mz->lists[lru]); | 849 | list_move(&pc->lru, &mz->lists[lru]); |
@@ -857,16 +858,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
857 | return; | 858 | return; |
858 | pc = lookup_page_cgroup(page); | 859 | pc = lookup_page_cgroup(page); |
859 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | 860 | VM_BUG_ON(PageCgroupAcctLRU(pc)); |
860 | /* | ||
861 | * Used bit is set without atomic ops but after smp_wmb(). | ||
862 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
863 | */ | ||
864 | smp_rmb(); | ||
865 | if (!PageCgroupUsed(pc)) | 861 | if (!PageCgroupUsed(pc)) |
866 | return; | 862 | return; |
867 | 863 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
864 | smp_rmb(); | ||
868 | mz = page_cgroup_zoneinfo(pc); | 865 | mz = page_cgroup_zoneinfo(pc); |
869 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 866 | /* huge page split is done under lru_lock. so, we have no races. */ |
867 | MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); | ||
870 | SetPageCgroupAcctLRU(pc); | 868 | SetPageCgroupAcctLRU(pc); |
871 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 869 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
872 | return; | 870 | return; |
@@ -1030,14 +1028,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
1030 | return NULL; | 1028 | return NULL; |
1031 | 1029 | ||
1032 | pc = lookup_page_cgroup(page); | 1030 | pc = lookup_page_cgroup(page); |
1033 | /* | ||
1034 | * Used bit is set without atomic ops but after smp_wmb(). | ||
1035 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
1036 | */ | ||
1037 | smp_rmb(); | ||
1038 | if (!PageCgroupUsed(pc)) | 1031 | if (!PageCgroupUsed(pc)) |
1039 | return NULL; | 1032 | return NULL; |
1040 | 1033 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | |
1034 | smp_rmb(); | ||
1041 | mz = page_cgroup_zoneinfo(pc); | 1035 | mz = page_cgroup_zoneinfo(pc); |
1042 | if (!mz) | 1036 | if (!mz) |
1043 | return NULL; | 1037 | return NULL; |
@@ -1119,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
1119 | return false; | 1113 | return false; |
1120 | } | 1114 | } |
1121 | 1115 | ||
1116 | /** | ||
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | ||
1118 | * @mem: memory cgroup to check | ||
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | ||
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | ||
1122 | * whether this would exceed the limit. | ||
1123 | */ | ||
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | ||
1125 | { | ||
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | ||
1127 | return false; | ||
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | ||
1129 | return false; | ||
1130 | return true; | ||
1131 | } | ||
1132 | |||
1122 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1123 | { | 1134 | { |
1124 | struct cgroup *cgrp = memcg->css.cgroup; | 1135 | struct cgroup *cgrp = memcg->css.cgroup; |
@@ -1615,7 +1626,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1615 | if (unlikely(!mem || !PageCgroupUsed(pc))) | 1626 | if (unlikely(!mem || !PageCgroupUsed(pc))) |
1616 | goto out; | 1627 | goto out; |
1617 | /* pc->mem_cgroup is unstable ? */ | 1628 | /* pc->mem_cgroup is unstable ? */ |
1618 | if (unlikely(mem_cgroup_stealed(mem))) { | 1629 | if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { |
1619 | /* take a lock against to access pc->mem_cgroup */ | 1630 | /* take a lock against to access pc->mem_cgroup */ |
1620 | move_lock_page_cgroup(pc, &flags); | 1631 | move_lock_page_cgroup(pc, &flags); |
1621 | need_unlock = true; | 1632 | need_unlock = true; |
@@ -1840,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1840 | if (likely(!ret)) | 1851 | if (likely(!ret)) |
1841 | return CHARGE_OK; | 1852 | return CHARGE_OK; |
1842 | 1853 | ||
1854 | res_counter_uncharge(&mem->res, csize); | ||
1843 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 1855 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
1844 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1856 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1845 | } else | 1857 | } else |
1846 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1847 | 1859 | /* | |
1848 | if (csize > PAGE_SIZE) /* change csize and retry */ | 1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | ||
1862 | * (PAGE_SIZE). | ||
1863 | * | ||
1864 | * Never reclaim on behalf of optional batching, retry with a | ||
1865 | * single page instead. | ||
1866 | */ | ||
1867 | if (csize == CHARGE_SIZE) | ||
1849 | return CHARGE_RETRY; | 1868 | return CHARGE_RETRY; |
1850 | 1869 | ||
1851 | if (!(gfp_mask & __GFP_WAIT)) | 1870 | if (!(gfp_mask & __GFP_WAIT)) |
1852 | return CHARGE_WOULDBLOCK; | 1871 | return CHARGE_WOULDBLOCK; |
1853 | 1872 | ||
1854 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1855 | gfp_mask, flags); | 1874 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | ||
1876 | return CHARGE_RETRY; | ||
1856 | /* | 1877 | /* |
1857 | * try_to_free_mem_cgroup_pages() might not give us a full | 1878 | * Even though the limit is exceeded at this point, reclaim |
1858 | * picture of reclaim. Some pages are reclaimed and might be | 1879 | * may have been able to free some pages. Retry the charge |
1859 | * moved to swap cache or just unmapped from the cgroup. | 1880 | * before killing the task. |
1860 | * Check the limit again to see if the reclaim reduced the | 1881 | * |
1861 | * current usage of the cgroup before giving up | 1882 | * Only for regular pages, though: huge pages are rather |
1883 | * unlikely to succeed so close to the limit, and we fall back | ||
1884 | * to regular pages anyway in case of failure. | ||
1862 | */ | 1885 | */ |
1863 | if (ret || mem_cgroup_check_under_limit(mem_over_limit)) | 1886 | if (csize == PAGE_SIZE && ret) |
1864 | return CHARGE_RETRY; | 1887 | return CHARGE_RETRY; |
1865 | 1888 | ||
1866 | /* | 1889 | /* |
@@ -2084,14 +2107,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2084 | return mem; | 2107 | return mem; |
2085 | } | 2108 | } |
2086 | 2109 | ||
2087 | /* | 2110 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2088 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be | 2111 | struct page_cgroup *pc, |
2089 | * USED state. If already USED, uncharge and return. | 2112 | enum charge_type ctype, |
2090 | */ | 2113 | int page_size) |
2091 | static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
2092 | struct page_cgroup *pc, | ||
2093 | enum charge_type ctype) | ||
2094 | { | 2114 | { |
2115 | int nr_pages = page_size >> PAGE_SHIFT; | ||
2116 | |||
2117 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2118 | if (!mem) | ||
2119 | return; | ||
2120 | |||
2121 | lock_page_cgroup(pc); | ||
2122 | if (unlikely(PageCgroupUsed(pc))) { | ||
2123 | unlock_page_cgroup(pc); | ||
2124 | mem_cgroup_cancel_charge(mem, page_size); | ||
2125 | return; | ||
2126 | } | ||
2127 | /* | ||
2128 | * we don't need page_cgroup_lock about tail pages, becase they are not | ||
2129 | * accessed by any other context at this point. | ||
2130 | */ | ||
2095 | pc->mem_cgroup = mem; | 2131 | pc->mem_cgroup = mem; |
2096 | /* | 2132 | /* |
2097 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | 2133 | * We access a page_cgroup asynchronously without lock_page_cgroup(). |
@@ -2115,43 +2151,57 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2115 | break; | 2151 | break; |
2116 | } | 2152 | } |
2117 | 2153 | ||
2118 | mem_cgroup_charge_statistics(mem, pc, true); | 2154 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); |
2155 | unlock_page_cgroup(pc); | ||
2156 | /* | ||
2157 | * "charge_statistics" updated event counter. Then, check it. | ||
2158 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | ||
2159 | * if they exceeds softlimit. | ||
2160 | */ | ||
2161 | memcg_check_events(mem, pc->page); | ||
2119 | } | 2162 | } |
2120 | 2163 | ||
2121 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2164 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2122 | struct page_cgroup *pc, | ||
2123 | enum charge_type ctype, | ||
2124 | int page_size) | ||
2125 | { | ||
2126 | int i; | ||
2127 | int count = page_size >> PAGE_SHIFT; | ||
2128 | 2165 | ||
2129 | /* try_charge() can return NULL to *memcg, taking care of it. */ | 2166 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ |
2130 | if (!mem) | 2167 | (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) |
2131 | return; | 2168 | /* |
2169 | * Because tail pages are not marked as "used", set it. We're under | ||
2170 | * zone->lru_lock, 'splitting on pmd' and compund_lock. | ||
2171 | */ | ||
2172 | void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | ||
2173 | { | ||
2174 | struct page_cgroup *head_pc = lookup_page_cgroup(head); | ||
2175 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | ||
2176 | unsigned long flags; | ||
2132 | 2177 | ||
2133 | lock_page_cgroup(pc); | 2178 | if (mem_cgroup_disabled()) |
2134 | if (unlikely(PageCgroupUsed(pc))) { | ||
2135 | unlock_page_cgroup(pc); | ||
2136 | mem_cgroup_cancel_charge(mem, page_size); | ||
2137 | return; | 2179 | return; |
2138 | } | ||
2139 | |||
2140 | /* | 2180 | /* |
2141 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2181 | * We have no races with charge/uncharge but will have races with |
2142 | * accessed by any other context at this point. | 2182 | * page state accounting. |
2143 | */ | 2183 | */ |
2144 | for (i = 0; i < count; i++) | 2184 | move_lock_page_cgroup(head_pc, &flags); |
2145 | ____mem_cgroup_commit_charge(mem, pc + i, ctype); | ||
2146 | 2185 | ||
2147 | unlock_page_cgroup(pc); | 2186 | tail_pc->mem_cgroup = head_pc->mem_cgroup; |
2148 | /* | 2187 | smp_wmb(); /* see __commit_charge() */ |
2149 | * "charge_statistics" updated event counter. Then, check it. | 2188 | if (PageCgroupAcctLRU(head_pc)) { |
2150 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2189 | enum lru_list lru; |
2151 | * if they exceeds softlimit. | 2190 | struct mem_cgroup_per_zone *mz; |
2152 | */ | 2191 | |
2153 | memcg_check_events(mem, pc->page); | 2192 | /* |
2193 | * LRU flags cannot be copied because we need to add tail | ||
2194 | *.page to LRU by generic call and our hook will be called. | ||
2195 | * We hold lru_lock, then, reduce counter directly. | ||
2196 | */ | ||
2197 | lru = page_lru(head); | ||
2198 | mz = page_cgroup_zoneinfo(head_pc); | ||
2199 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | ||
2200 | } | ||
2201 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | ||
2202 | move_unlock_page_cgroup(head_pc, &flags); | ||
2154 | } | 2203 | } |
2204 | #endif | ||
2155 | 2205 | ||
2156 | /** | 2206 | /** |
2157 | * __mem_cgroup_move_account - move account of the page | 2207 | * __mem_cgroup_move_account - move account of the page |
@@ -2171,8 +2221,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2171 | */ | 2221 | */ |
2172 | 2222 | ||
2173 | static void __mem_cgroup_move_account(struct page_cgroup *pc, | 2223 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
2174 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2224 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge, |
2225 | int charge_size) | ||
2175 | { | 2226 | { |
2227 | int nr_pages = charge_size >> PAGE_SHIFT; | ||
2228 | |||
2176 | VM_BUG_ON(from == to); | 2229 | VM_BUG_ON(from == to); |
2177 | VM_BUG_ON(PageLRU(pc->page)); | 2230 | VM_BUG_ON(PageLRU(pc->page)); |
2178 | VM_BUG_ON(!page_is_cgroup_locked(pc)); | 2231 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
@@ -2186,14 +2239,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2186 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 2239 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); |
2187 | preempt_enable(); | 2240 | preempt_enable(); |
2188 | } | 2241 | } |
2189 | mem_cgroup_charge_statistics(from, pc, false); | 2242 | mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); |
2190 | if (uncharge) | 2243 | if (uncharge) |
2191 | /* This is not "cancel", but cancel_charge does all we need. */ | 2244 | /* This is not "cancel", but cancel_charge does all we need. */ |
2192 | mem_cgroup_cancel_charge(from, PAGE_SIZE); | 2245 | mem_cgroup_cancel_charge(from, charge_size); |
2193 | 2246 | ||
2194 | /* caller should have done css_get */ | 2247 | /* caller should have done css_get */ |
2195 | pc->mem_cgroup = to; | 2248 | pc->mem_cgroup = to; |
2196 | mem_cgroup_charge_statistics(to, pc, true); | 2249 | mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages); |
2197 | /* | 2250 | /* |
2198 | * We charges against "to" which may not have any tasks. Then, "to" | 2251 | * We charges against "to" which may not have any tasks. Then, "to" |
2199 | * can be under rmdir(). But in current implementation, caller of | 2252 | * can be under rmdir(). But in current implementation, caller of |
@@ -2208,15 +2261,24 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2208 | * __mem_cgroup_move_account() | 2261 | * __mem_cgroup_move_account() |
2209 | */ | 2262 | */ |
2210 | static int mem_cgroup_move_account(struct page_cgroup *pc, | 2263 | static int mem_cgroup_move_account(struct page_cgroup *pc, |
2211 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 2264 | struct mem_cgroup *from, struct mem_cgroup *to, |
2265 | bool uncharge, int charge_size) | ||
2212 | { | 2266 | { |
2213 | int ret = -EINVAL; | 2267 | int ret = -EINVAL; |
2214 | unsigned long flags; | 2268 | unsigned long flags; |
2269 | /* | ||
2270 | * The page is isolated from LRU. So, collapse function | ||
2271 | * will not handle this page. But page splitting can happen. | ||
2272 | * Do this check under compound_page_lock(). The caller should | ||
2273 | * hold it. | ||
2274 | */ | ||
2275 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | ||
2276 | return -EBUSY; | ||
2215 | 2277 | ||
2216 | lock_page_cgroup(pc); | 2278 | lock_page_cgroup(pc); |
2217 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | 2279 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { |
2218 | move_lock_page_cgroup(pc, &flags); | 2280 | move_lock_page_cgroup(pc, &flags); |
2219 | __mem_cgroup_move_account(pc, from, to, uncharge); | 2281 | __mem_cgroup_move_account(pc, from, to, uncharge, charge_size); |
2220 | move_unlock_page_cgroup(pc, &flags); | 2282 | move_unlock_page_cgroup(pc, &flags); |
2221 | ret = 0; | 2283 | ret = 0; |
2222 | } | 2284 | } |
@@ -2241,6 +2303,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2241 | struct cgroup *cg = child->css.cgroup; | 2303 | struct cgroup *cg = child->css.cgroup; |
2242 | struct cgroup *pcg = cg->parent; | 2304 | struct cgroup *pcg = cg->parent; |
2243 | struct mem_cgroup *parent; | 2305 | struct mem_cgroup *parent; |
2306 | int page_size = PAGE_SIZE; | ||
2307 | unsigned long flags; | ||
2244 | int ret; | 2308 | int ret; |
2245 | 2309 | ||
2246 | /* Is ROOT ? */ | 2310 | /* Is ROOT ? */ |
@@ -2253,15 +2317,24 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2253 | if (isolate_lru_page(page)) | 2317 | if (isolate_lru_page(page)) |
2254 | goto put; | 2318 | goto put; |
2255 | 2319 | ||
2320 | if (PageTransHuge(page)) | ||
2321 | page_size = HPAGE_SIZE; | ||
2322 | |||
2256 | parent = mem_cgroup_from_cont(pcg); | 2323 | parent = mem_cgroup_from_cont(pcg); |
2257 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, | 2324 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, |
2258 | PAGE_SIZE); | 2325 | &parent, false, page_size); |
2259 | if (ret || !parent) | 2326 | if (ret || !parent) |
2260 | goto put_back; | 2327 | goto put_back; |
2261 | 2328 | ||
2262 | ret = mem_cgroup_move_account(pc, child, parent, true); | 2329 | if (page_size > PAGE_SIZE) |
2330 | flags = compound_lock_irqsave(page); | ||
2331 | |||
2332 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); | ||
2263 | if (ret) | 2333 | if (ret) |
2264 | mem_cgroup_cancel_charge(parent, PAGE_SIZE); | 2334 | mem_cgroup_cancel_charge(parent, page_size); |
2335 | |||
2336 | if (page_size > PAGE_SIZE) | ||
2337 | compound_unlock_irqrestore(page, flags); | ||
2265 | put_back: | 2338 | put_back: |
2266 | putback_lru_page(page); | 2339 | putback_lru_page(page); |
2267 | put: | 2340 | put: |
@@ -2280,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2280 | gfp_t gfp_mask, enum charge_type ctype) | 2353 | gfp_t gfp_mask, enum charge_type ctype) |
2281 | { | 2354 | { |
2282 | struct mem_cgroup *mem = NULL; | 2355 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | ||
2283 | struct page_cgroup *pc; | 2357 | struct page_cgroup *pc; |
2358 | bool oom = true; | ||
2284 | int ret; | 2359 | int ret; |
2285 | int page_size = PAGE_SIZE; | ||
2286 | 2360 | ||
2287 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
2288 | page_size <<= compound_order(page); | 2362 | page_size <<= compound_order(page); |
2289 | VM_BUG_ON(!PageTransHuge(page)); | 2363 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | ||
2365 | * Never OOM-kill a process for a huge page. The | ||
2366 | * fault handler will fall back to regular pages. | ||
2367 | */ | ||
2368 | oom = false; | ||
2290 | } | 2369 | } |
2291 | 2370 | ||
2292 | pc = lookup_page_cgroup(page); | 2371 | pc = lookup_page_cgroup(page); |
@@ -2295,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2295 | return 0; | 2374 | return 0; |
2296 | prefetchw(pc); | 2375 | prefetchw(pc); |
2297 | 2376 | ||
2298 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | 2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); |
2299 | if (ret || !mem) | 2378 | if (ret || !mem) |
2300 | return ret; | 2379 | return ret; |
2301 | 2380 | ||
@@ -2546,7 +2625,6 @@ direct_uncharge: | |||
2546 | static struct mem_cgroup * | 2625 | static struct mem_cgroup * |
2547 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2626 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2548 | { | 2627 | { |
2549 | int i; | ||
2550 | int count; | 2628 | int count; |
2551 | struct page_cgroup *pc; | 2629 | struct page_cgroup *pc; |
2552 | struct mem_cgroup *mem = NULL; | 2630 | struct mem_cgroup *mem = NULL; |
@@ -2596,8 +2674,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2596 | break; | 2674 | break; |
2597 | } | 2675 | } |
2598 | 2676 | ||
2599 | for (i = 0; i < count; i++) | 2677 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); |
2600 | mem_cgroup_charge_statistics(mem, pc + i, false); | ||
2601 | 2678 | ||
2602 | ClearPageCgroupUsed(pc); | 2679 | ClearPageCgroupUsed(pc); |
2603 | /* | 2680 | /* |
@@ -4844,7 +4921,7 @@ retry: | |||
4844 | goto put; | 4921 | goto put; |
4845 | pc = lookup_page_cgroup(page); | 4922 | pc = lookup_page_cgroup(page); |
4846 | if (!mem_cgroup_move_account(pc, | 4923 | if (!mem_cgroup_move_account(pc, |
4847 | mc.from, mc.to, false)) { | 4924 | mc.from, mc.to, false, PAGE_SIZE)) { |
4848 | mc.precharge--; | 4925 | mc.precharge--; |
4849 | /* we uncharge from mc.from later. */ | 4926 | /* we uncharge from mc.from later. */ |
4850 | mc.moved_charge++; | 4927 | mc.moved_charge++; |
@@ -4983,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4983 | static int __init enable_swap_account(char *s) | 5060 | static int __init enable_swap_account(char *s) |
4984 | { | 5061 | { |
4985 | /* consider enabled if no parameter or 1 is given */ | 5062 | /* consider enabled if no parameter or 1 is given */ |
4986 | if (!s || !strcmp(s, "1")) | 5063 | if (!(*s) || !strcmp(s, "=1")) |
4987 | really_do_swap_account = 1; | 5064 | really_do_swap_account = 1; |
4988 | else if (!strcmp(s, "0")) | 5065 | else if (!strcmp(s, "=0")) |
4989 | really_do_swap_account = 0; | 5066 | really_do_swap_account = 0; |
4990 | return 1; | 5067 | return 1; |
4991 | } | 5068 | } |
@@ -4993,7 +5070,8 @@ __setup("swapaccount", enable_swap_account); | |||
4993 | 5070 | ||
4994 | static int __init disable_swap_account(char *s) | 5071 | static int __init disable_swap_account(char *s) |
4995 | { | 5072 | { |
4996 | enable_swap_account("0"); | 5073 | printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n"); |
5074 | enable_swap_account("=0"); | ||
4997 | return 1; | 5075 | return 1; |
4998 | } | 5076 | } |
4999 | __setup("noswapaccount", disable_swap_account); | 5077 | __setup("noswapaccount", disable_swap_account); |