diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 105 |
1 files changed, 57 insertions, 48 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23976fd885fd..d6ac0e33e150 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) | |||
1536 | * start move here. | 1536 | * start move here. |
1537 | */ | 1537 | */ |
1538 | 1538 | ||
1539 | /* for quick checking without looking up memcg */ | ||
1540 | atomic_t memcg_moving __read_mostly; | ||
1541 | |||
1542 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) | 1539 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) |
1543 | { | 1540 | { |
1544 | atomic_inc(&memcg_moving); | ||
1545 | atomic_inc(&memcg->moving_account); | 1541 | atomic_inc(&memcg->moving_account); |
1546 | synchronize_rcu(); | 1542 | synchronize_rcu(); |
1547 | } | 1543 | } |
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg) | |||
1552 | * Now, mem_cgroup_clear_mc() may call this function with NULL. | 1548 | * Now, mem_cgroup_clear_mc() may call this function with NULL. |
1553 | * We check NULL in callee rather than caller. | 1549 | * We check NULL in callee rather than caller. |
1554 | */ | 1550 | */ |
1555 | if (memcg) { | 1551 | if (memcg) |
1556 | atomic_dec(&memcg_moving); | ||
1557 | atomic_dec(&memcg->moving_account); | 1552 | atomic_dec(&memcg->moving_account); |
1558 | } | ||
1559 | } | 1553 | } |
1560 | 1554 | ||
1561 | /* | 1555 | /* |
@@ -2204,41 +2198,52 @@ cleanup: | |||
2204 | return true; | 2198 | return true; |
2205 | } | 2199 | } |
2206 | 2200 | ||
2207 | /* | 2201 | /** |
2208 | * Used to update mapped file or writeback or other statistics. | 2202 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction |
2203 | * @page: page that is going to change accounted state | ||
2204 | * @locked: &memcg->move_lock slowpath was taken | ||
2205 | * @flags: IRQ-state flags for &memcg->move_lock | ||
2209 | * | 2206 | * |
2210 | * Notes: Race condition | 2207 | * This function must mark the beginning of an accounted page state |
2208 | * change to prevent double accounting when the page is concurrently | ||
2209 | * being moved to another memcg: | ||
2211 | * | 2210 | * |
2212 | * Charging occurs during page instantiation, while the page is | 2211 | * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); |
2213 | * unmapped and locked in page migration, or while the page table is | 2212 | * if (TestClearPageState(page)) |
2214 | * locked in THP migration. No race is possible. | 2213 | * mem_cgroup_update_page_stat(memcg, state, -1); |
2214 | * mem_cgroup_end_page_stat(memcg, locked, flags); | ||
2215 | * | 2215 | * |
2216 | * Uncharge happens to pages with zero references, no race possible. | 2216 | * The RCU lock is held throughout the transaction. The fast path can |
2217 | * get away without acquiring the memcg->move_lock (@locked is false) | ||
2218 | * because page moving starts with an RCU grace period. | ||
2217 | * | 2219 | * |
2218 | * Charge moving between groups is protected by checking mm->moving | 2220 | * The RCU lock also protects the memcg from being freed when the page |
2219 | * account and taking the move_lock in the slowpath. | 2221 | * state that is going to change is the only thing preventing the page |
2222 | * from being uncharged. E.g. end-writeback clearing PageWriteback(), | ||
2223 | * which allows migration to go ahead and uncharge the page before the | ||
2224 | * account transaction might be complete. | ||
2220 | */ | 2225 | */ |
2221 | 2226 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, | |
2222 | void __mem_cgroup_begin_update_page_stat(struct page *page, | 2227 | bool *locked, |
2223 | bool *locked, unsigned long *flags) | 2228 | unsigned long *flags) |
2224 | { | 2229 | { |
2225 | struct mem_cgroup *memcg; | 2230 | struct mem_cgroup *memcg; |
2226 | struct page_cgroup *pc; | 2231 | struct page_cgroup *pc; |
2227 | 2232 | ||
2233 | rcu_read_lock(); | ||
2234 | |||
2235 | if (mem_cgroup_disabled()) | ||
2236 | return NULL; | ||
2237 | |||
2228 | pc = lookup_page_cgroup(page); | 2238 | pc = lookup_page_cgroup(page); |
2229 | again: | 2239 | again: |
2230 | memcg = pc->mem_cgroup; | 2240 | memcg = pc->mem_cgroup; |
2231 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | 2241 | if (unlikely(!memcg || !PageCgroupUsed(pc))) |
2232 | return; | 2242 | return NULL; |
2233 | /* | 2243 | |
2234 | * If this memory cgroup is not under account moving, we don't | 2244 | *locked = false; |
2235 | * need to take move_lock_mem_cgroup(). Because we already hold | ||
2236 | * rcu_read_lock(), any calls to move_account will be delayed until | ||
2237 | * rcu_read_unlock(). | ||
2238 | */ | ||
2239 | VM_BUG_ON(!rcu_read_lock_held()); | ||
2240 | if (atomic_read(&memcg->moving_account) <= 0) | 2245 | if (atomic_read(&memcg->moving_account) <= 0) |
2241 | return; | 2246 | return memcg; |
2242 | 2247 | ||
2243 | move_lock_mem_cgroup(memcg, flags); | 2248 | move_lock_mem_cgroup(memcg, flags); |
2244 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { | 2249 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { |
@@ -2246,36 +2251,40 @@ again: | |||
2246 | goto again; | 2251 | goto again; |
2247 | } | 2252 | } |
2248 | *locked = true; | 2253 | *locked = true; |
2254 | |||
2255 | return memcg; | ||
2249 | } | 2256 | } |
2250 | 2257 | ||
2251 | void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) | 2258 | /** |
2259 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | ||
2260 | * @memcg: the memcg that was accounted against | ||
2261 | * @locked: value received from mem_cgroup_begin_page_stat() | ||
2262 | * @flags: value received from mem_cgroup_begin_page_stat() | ||
2263 | */ | ||
2264 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, | ||
2265 | unsigned long flags) | ||
2252 | { | 2266 | { |
2253 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2267 | if (memcg && locked) |
2268 | move_unlock_mem_cgroup(memcg, &flags); | ||
2254 | 2269 | ||
2255 | /* | 2270 | rcu_read_unlock(); |
2256 | * It's guaranteed that pc->mem_cgroup never changes while | ||
2257 | * lock is held because a routine modifies pc->mem_cgroup | ||
2258 | * should take move_lock_mem_cgroup(). | ||
2259 | */ | ||
2260 | move_unlock_mem_cgroup(pc->mem_cgroup, flags); | ||
2261 | } | 2271 | } |
2262 | 2272 | ||
2263 | void mem_cgroup_update_page_stat(struct page *page, | 2273 | /** |
2274 | * mem_cgroup_update_page_stat - update page state statistics | ||
2275 | * @memcg: memcg to account against | ||
2276 | * @idx: page state item to account | ||
2277 | * @val: number of pages (positive or negative) | ||
2278 | * | ||
2279 | * See mem_cgroup_begin_page_stat() for locking requirements. | ||
2280 | */ | ||
2281 | void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | ||
2264 | enum mem_cgroup_stat_index idx, int val) | 2282 | enum mem_cgroup_stat_index idx, int val) |
2265 | { | 2283 | { |
2266 | struct mem_cgroup *memcg; | ||
2267 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
2268 | unsigned long uninitialized_var(flags); | ||
2269 | |||
2270 | if (mem_cgroup_disabled()) | ||
2271 | return; | ||
2272 | |||
2273 | VM_BUG_ON(!rcu_read_lock_held()); | 2284 | VM_BUG_ON(!rcu_read_lock_held()); |
2274 | memcg = pc->mem_cgroup; | ||
2275 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | ||
2276 | return; | ||
2277 | 2285 | ||
2278 | this_cpu_add(memcg->stat->count[idx], val); | 2286 | if (memcg) |
2287 | this_cpu_add(memcg->stat->count[idx], val); | ||
2279 | } | 2288 | } |
2280 | 2289 | ||
2281 | /* | 2290 | /* |