diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 127 |
1 files changed, 57 insertions, 70 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2b98a6875c0..f99f5991d6bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page) | |||
313 | static void | 313 | static void |
314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, |
315 | struct mem_cgroup_per_zone *mz, | 315 | struct mem_cgroup_per_zone *mz, |
316 | struct mem_cgroup_tree_per_zone *mctz) | 316 | struct mem_cgroup_tree_per_zone *mctz, |
317 | unsigned long long new_usage_in_excess) | ||
317 | { | 318 | { |
318 | struct rb_node **p = &mctz->rb_root.rb_node; | 319 | struct rb_node **p = &mctz->rb_root.rb_node; |
319 | struct rb_node *parent = NULL; | 320 | struct rb_node *parent = NULL; |
@@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | |||
322 | if (mz->on_tree) | 323 | if (mz->on_tree) |
323 | return; | 324 | return; |
324 | 325 | ||
325 | mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 326 | mz->usage_in_excess = new_usage_in_excess; |
327 | if (!mz->usage_in_excess) | ||
328 | return; | ||
326 | while (*p) { | 329 | while (*p) { |
327 | parent = *p; | 330 | parent = *p; |
328 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | 331 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, |
@@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
353 | } | 356 | } |
354 | 357 | ||
355 | static void | 358 | static void |
356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
357 | struct mem_cgroup_per_zone *mz, | ||
358 | struct mem_cgroup_tree_per_zone *mctz) | ||
359 | { | ||
360 | spin_lock(&mctz->lock); | ||
361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
362 | spin_unlock(&mctz->lock); | ||
363 | } | ||
364 | |||
365 | static void | ||
366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 359 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
367 | struct mem_cgroup_per_zone *mz, | 360 | struct mem_cgroup_per_zone *mz, |
368 | struct mem_cgroup_tree_per_zone *mctz) | 361 | struct mem_cgroup_tree_per_zone *mctz) |
@@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
392 | 385 | ||
393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 386 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
394 | { | 387 | { |
395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 388 | unsigned long long excess; |
396 | bool updated_tree = false; | ||
397 | struct mem_cgroup_per_zone *mz; | 389 | struct mem_cgroup_per_zone *mz; |
398 | struct mem_cgroup_tree_per_zone *mctz; | 390 | struct mem_cgroup_tree_per_zone *mctz; |
399 | 391 | int nid = page_to_nid(page); | |
400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 392 | int zid = page_zonenum(page); |
401 | mctz = soft_limit_tree_from_page(page); | 393 | mctz = soft_limit_tree_from_page(page); |
402 | 394 | ||
403 | /* | 395 | /* |
404 | * We do updates in lazy mode, mem's are removed | 396 | * Necessary to update all ancestors when hierarchy is used. |
405 | * lazily from the per-zone, per-node rb tree | 397 | * because their event counter is not touched. |
406 | */ | 398 | */ |
407 | prev_usage_in_excess = mz->usage_in_excess; | 399 | for (; mem; mem = parent_mem_cgroup(mem)) { |
408 | 400 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 401 | excess = res_counter_soft_limit_excess(&mem->res); |
410 | if (prev_usage_in_excess) { | 402 | /* |
411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 403 | * We have to update the tree if mz is on RB-tree or |
412 | updated_tree = true; | 404 | * mem is over its softlimit. |
413 | } | 405 | */ |
414 | if (!new_usage_in_excess) | 406 | if (excess || mz->on_tree) { |
415 | goto done; | 407 | spin_lock(&mctz->lock); |
416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 408 | /* if on-tree, remove it */ |
417 | 409 | if (mz->on_tree) | |
418 | done: | 410 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
419 | if (updated_tree) { | 411 | /* |
420 | spin_lock(&mctz->lock); | 412 | * Insert again. mz->usage_in_excess will be updated. |
421 | mz->usage_in_excess = new_usage_in_excess; | 413 | * If excess is 0, no tree ops. |
422 | spin_unlock(&mctz->lock); | 414 | */ |
415 | __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); | ||
416 | spin_unlock(&mctz->lock); | ||
417 | } | ||
423 | } | 418 | } |
424 | } | 419 | } |
425 | 420 | ||
@@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone * | |||
447 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 442 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) |
448 | { | 443 | { |
449 | struct rb_node *rightmost = NULL; | 444 | struct rb_node *rightmost = NULL; |
450 | struct mem_cgroup_per_zone *mz = NULL; | 445 | struct mem_cgroup_per_zone *mz; |
451 | 446 | ||
452 | retry: | 447 | retry: |
448 | mz = NULL; | ||
453 | rightmost = rb_last(&mctz->rb_root); | 449 | rightmost = rb_last(&mctz->rb_root); |
454 | if (!rightmost) | 450 | if (!rightmost) |
455 | goto done; /* Nothing to reclaim from */ | 451 | goto done; /* Nothing to reclaim from */ |
@@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1270 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1266 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
1271 | bool oom, struct page *page) | 1267 | bool oom, struct page *page) |
1272 | { | 1268 | { |
1273 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1269 | struct mem_cgroup *mem, *mem_over_limit; |
1274 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1270 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1275 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1271 | struct res_counter *fail_res; |
1276 | 1272 | ||
1277 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1273 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
1278 | /* Don't account this! */ | 1274 | /* Don't account this! */ |
@@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1304 | 1300 | ||
1305 | if (mem_cgroup_is_root(mem)) | 1301 | if (mem_cgroup_is_root(mem)) |
1306 | goto done; | 1302 | goto done; |
1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1303 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
1308 | &soft_fail_res); | ||
1309 | if (likely(!ret)) { | 1304 | if (likely(!ret)) { |
1310 | if (!do_swap_account) | 1305 | if (!do_swap_account) |
1311 | break; | 1306 | break; |
1312 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1307 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
1313 | &fail_res, NULL); | 1308 | &fail_res); |
1314 | if (likely(!ret)) | 1309 | if (likely(!ret)) |
1315 | break; | 1310 | break; |
1316 | /* mem+swap counter fails */ | 1311 | /* mem+swap counter fails */ |
1317 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1312 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1318 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1313 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1319 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1314 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1320 | memsw); | 1315 | memsw); |
@@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1353 | } | 1348 | } |
1354 | } | 1349 | } |
1355 | /* | 1350 | /* |
1356 | * Insert just the ancestor, we should trickle down to the correct | 1351 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
1357 | * cgroup for reclaim, since the other nodes will be below their | 1352 | * if they exceeds softlimit. |
1358 | * soft limit | ||
1359 | */ | 1353 | */ |
1360 | if (soft_fail_res) { | 1354 | if (mem_cgroup_soft_limit_check(mem)) |
1361 | mem_over_soft_limit = | 1355 | mem_cgroup_update_tree(mem, page); |
1362 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
1365 | } | ||
1366 | done: | 1356 | done: |
1367 | return 0; | 1357 | return 0; |
1368 | nomem: | 1358 | nomem: |
@@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1437 | if (unlikely(PageCgroupUsed(pc))) { | 1427 | if (unlikely(PageCgroupUsed(pc))) { |
1438 | unlock_page_cgroup(pc); | 1428 | unlock_page_cgroup(pc); |
1439 | if (!mem_cgroup_is_root(mem)) { | 1429 | if (!mem_cgroup_is_root(mem)) { |
1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1430 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1441 | if (do_swap_account) | 1431 | if (do_swap_account) |
1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1432 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1443 | NULL); | ||
1444 | } | 1433 | } |
1445 | css_put(&mem->css); | 1434 | css_put(&mem->css); |
1446 | return; | 1435 | return; |
@@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1519 | goto out; | 1508 | goto out; |
1520 | 1509 | ||
1521 | if (!mem_cgroup_is_root(from)) | 1510 | if (!mem_cgroup_is_root(from)) |
1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1511 | res_counter_uncharge(&from->res, PAGE_SIZE); |
1523 | mem_cgroup_charge_statistics(from, pc, false); | 1512 | mem_cgroup_charge_statistics(from, pc, false); |
1524 | 1513 | ||
1525 | page = pc->page; | 1514 | page = pc->page; |
@@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1539 | } | 1528 | } |
1540 | 1529 | ||
1541 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1530 | if (do_swap_account && !mem_cgroup_is_root(from)) |
1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1531 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1543 | css_put(&from->css); | 1532 | css_put(&from->css); |
1544 | 1533 | ||
1545 | css_get(&to->css); | 1534 | css_get(&to->css); |
@@ -1610,9 +1599,9 @@ uncharge: | |||
1610 | css_put(&parent->css); | 1599 | css_put(&parent->css); |
1611 | /* uncharge if move fails */ | 1600 | /* uncharge if move fails */ |
1612 | if (!mem_cgroup_is_root(parent)) { | 1601 | if (!mem_cgroup_is_root(parent)) { |
1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1602 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
1614 | if (do_swap_account) | 1603 | if (do_swap_account) |
1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1604 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
1616 | } | 1605 | } |
1617 | return ret; | 1606 | return ret; |
1618 | } | 1607 | } |
@@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1803 | * calling css_tryget | 1792 | * calling css_tryget |
1804 | */ | 1793 | */ |
1805 | if (!mem_cgroup_is_root(memcg)) | 1794 | if (!mem_cgroup_is_root(memcg)) |
1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1795 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1807 | NULL); | ||
1808 | mem_cgroup_swap_statistics(memcg, false); | 1796 | mem_cgroup_swap_statistics(memcg, false); |
1809 | mem_cgroup_put(memcg); | 1797 | mem_cgroup_put(memcg); |
1810 | } | 1798 | } |
@@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1831 | if (!mem) | 1819 | if (!mem) |
1832 | return; | 1820 | return; |
1833 | if (!mem_cgroup_is_root(mem)) { | 1821 | if (!mem_cgroup_is_root(mem)) { |
1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1822 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1835 | if (do_swap_account) | 1823 | if (do_swap_account) |
1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1824 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1837 | } | 1825 | } |
1838 | css_put(&mem->css); | 1826 | css_put(&mem->css); |
1839 | } | 1827 | } |
@@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1848 | struct page_cgroup *pc; | 1836 | struct page_cgroup *pc; |
1849 | struct mem_cgroup *mem = NULL; | 1837 | struct mem_cgroup *mem = NULL; |
1850 | struct mem_cgroup_per_zone *mz; | 1838 | struct mem_cgroup_per_zone *mz; |
1851 | bool soft_limit_excess = false; | ||
1852 | 1839 | ||
1853 | if (mem_cgroup_disabled()) | 1840 | if (mem_cgroup_disabled()) |
1854 | return NULL; | 1841 | return NULL; |
@@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1888 | } | 1875 | } |
1889 | 1876 | ||
1890 | if (!mem_cgroup_is_root(mem)) { | 1877 | if (!mem_cgroup_is_root(mem)) { |
1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1878 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1892 | if (do_swap_account && | 1879 | if (do_swap_account && |
1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1880 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1881 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1895 | } | 1882 | } |
1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1883 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
1897 | mem_cgroup_swap_statistics(mem, true); | 1884 | mem_cgroup_swap_statistics(mem, true); |
@@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1908 | mz = page_cgroup_zoneinfo(pc); | 1895 | mz = page_cgroup_zoneinfo(pc); |
1909 | unlock_page_cgroup(pc); | 1896 | unlock_page_cgroup(pc); |
1910 | 1897 | ||
1911 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1898 | if (mem_cgroup_soft_limit_check(mem)) |
1912 | mem_cgroup_update_tree(mem, page); | 1899 | mem_cgroup_update_tree(mem, page); |
1913 | /* at swapout, this memcg will be accessed to record to swap */ | 1900 | /* at swapout, this memcg will be accessed to record to swap */ |
1914 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1901 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
@@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
1986 | * This memcg can be obsolete one. We avoid calling css_tryget | 1973 | * This memcg can be obsolete one. We avoid calling css_tryget |
1987 | */ | 1974 | */ |
1988 | if (!mem_cgroup_is_root(memcg)) | 1975 | if (!mem_cgroup_is_root(memcg)) |
1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1976 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1990 | mem_cgroup_swap_statistics(memcg, false); | 1977 | mem_cgroup_swap_statistics(memcg, false); |
1991 | mem_cgroup_put(memcg); | 1978 | mem_cgroup_put(memcg); |
1992 | } | 1979 | } |
@@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2233 | unsigned long reclaimed; | 2220 | unsigned long reclaimed; |
2234 | int loop = 0; | 2221 | int loop = 0; |
2235 | struct mem_cgroup_tree_per_zone *mctz; | 2222 | struct mem_cgroup_tree_per_zone *mctz; |
2223 | unsigned long long excess; | ||
2236 | 2224 | ||
2237 | if (order > 0) | 2225 | if (order > 0) |
2238 | return 0; | 2226 | return 0; |
@@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2284 | break; | 2272 | break; |
2285 | } while (1); | 2273 | } while (1); |
2286 | } | 2274 | } |
2287 | mz->usage_in_excess = | ||
2288 | res_counter_soft_limit_excess(&mz->mem->res); | ||
2289 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | 2275 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); |
2276 | excess = res_counter_soft_limit_excess(&mz->mem->res); | ||
2290 | /* | 2277 | /* |
2291 | * One school of thought says that we should not add | 2278 | * One school of thought says that we should not add |
2292 | * back the node to the tree if reclaim returns 0. | 2279 | * back the node to the tree if reclaim returns 0. |
@@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2295 | * memory to reclaim from. Consider this as a longer | 2282 | * memory to reclaim from. Consider this as a longer |
2296 | * term TODO. | 2283 | * term TODO. |
2297 | */ | 2284 | */ |
2298 | if (mz->usage_in_excess) | 2285 | /* If excess == 0, no tree ops */ |
2299 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); | 2286 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess); |
2300 | spin_unlock(&mctz->lock); | 2287 | spin_unlock(&mctz->lock); |
2301 | css_put(&mz->mem->css); | 2288 | css_put(&mz->mem->css); |
2302 | loop++; | 2289 | loop++; |