diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 127 |
1 files changed, 57 insertions, 70 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2b98a6875c0..f99f5991d6bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page) | |||
| 313 | static void | 313 | static void |
| 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, |
| 315 | struct mem_cgroup_per_zone *mz, | 315 | struct mem_cgroup_per_zone *mz, |
| 316 | struct mem_cgroup_tree_per_zone *mctz) | 316 | struct mem_cgroup_tree_per_zone *mctz, |
| 317 | unsigned long long new_usage_in_excess) | ||
| 317 | { | 318 | { |
| 318 | struct rb_node **p = &mctz->rb_root.rb_node; | 319 | struct rb_node **p = &mctz->rb_root.rb_node; |
| 319 | struct rb_node *parent = NULL; | 320 | struct rb_node *parent = NULL; |
| @@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | |||
| 322 | if (mz->on_tree) | 323 | if (mz->on_tree) |
| 323 | return; | 324 | return; |
| 324 | 325 | ||
| 325 | mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 326 | mz->usage_in_excess = new_usage_in_excess; |
| 327 | if (!mz->usage_in_excess) | ||
| 328 | return; | ||
| 326 | while (*p) { | 329 | while (*p) { |
| 327 | parent = *p; | 330 | parent = *p; |
| 328 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | 331 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, |
| @@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
| 353 | } | 356 | } |
| 354 | 357 | ||
| 355 | static void | 358 | static void |
| 356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
| 357 | struct mem_cgroup_per_zone *mz, | ||
| 358 | struct mem_cgroup_tree_per_zone *mctz) | ||
| 359 | { | ||
| 360 | spin_lock(&mctz->lock); | ||
| 361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
| 362 | spin_unlock(&mctz->lock); | ||
| 363 | } | ||
| 364 | |||
| 365 | static void | ||
| 366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 359 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
| 367 | struct mem_cgroup_per_zone *mz, | 360 | struct mem_cgroup_per_zone *mz, |
| 368 | struct mem_cgroup_tree_per_zone *mctz) | 361 | struct mem_cgroup_tree_per_zone *mctz) |
| @@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
| 392 | 385 | ||
| 393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 386 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
| 394 | { | 387 | { |
| 395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 388 | unsigned long long excess; |
| 396 | bool updated_tree = false; | ||
| 397 | struct mem_cgroup_per_zone *mz; | 389 | struct mem_cgroup_per_zone *mz; |
| 398 | struct mem_cgroup_tree_per_zone *mctz; | 390 | struct mem_cgroup_tree_per_zone *mctz; |
| 399 | 391 | int nid = page_to_nid(page); | |
| 400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 392 | int zid = page_zonenum(page); |
| 401 | mctz = soft_limit_tree_from_page(page); | 393 | mctz = soft_limit_tree_from_page(page); |
| 402 | 394 | ||
| 403 | /* | 395 | /* |
| 404 | * We do updates in lazy mode, mem's are removed | 396 | * Necessary to update all ancestors when hierarchy is used. |
| 405 | * lazily from the per-zone, per-node rb tree | 397 | * because their event counter is not touched. |
| 406 | */ | 398 | */ |
| 407 | prev_usage_in_excess = mz->usage_in_excess; | 399 | for (; mem; mem = parent_mem_cgroup(mem)) { |
| 408 | 400 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
| 409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 401 | excess = res_counter_soft_limit_excess(&mem->res); |
| 410 | if (prev_usage_in_excess) { | 402 | /* |
| 411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 403 | * We have to update the tree if mz is on RB-tree or |
| 412 | updated_tree = true; | 404 | * mem is over its softlimit. |
| 413 | } | 405 | */ |
| 414 | if (!new_usage_in_excess) | 406 | if (excess || mz->on_tree) { |
| 415 | goto done; | 407 | spin_lock(&mctz->lock); |
| 416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 408 | /* if on-tree, remove it */ |
| 417 | 409 | if (mz->on_tree) | |
| 418 | done: | 410 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
| 419 | if (updated_tree) { | 411 | /* |
| 420 | spin_lock(&mctz->lock); | 412 | * Insert again. mz->usage_in_excess will be updated. |
| 421 | mz->usage_in_excess = new_usage_in_excess; | 413 | * If excess is 0, no tree ops. |
| 422 | spin_unlock(&mctz->lock); | 414 | */ |
| 415 | __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); | ||
| 416 | spin_unlock(&mctz->lock); | ||
| 417 | } | ||
| 423 | } | 418 | } |
| 424 | } | 419 | } |
| 425 | 420 | ||
| @@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone * | |||
| 447 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 442 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) |
| 448 | { | 443 | { |
| 449 | struct rb_node *rightmost = NULL; | 444 | struct rb_node *rightmost = NULL; |
| 450 | struct mem_cgroup_per_zone *mz = NULL; | 445 | struct mem_cgroup_per_zone *mz; |
| 451 | 446 | ||
| 452 | retry: | 447 | retry: |
| 448 | mz = NULL; | ||
| 453 | rightmost = rb_last(&mctz->rb_root); | 449 | rightmost = rb_last(&mctz->rb_root); |
| 454 | if (!rightmost) | 450 | if (!rightmost) |
| 455 | goto done; /* Nothing to reclaim from */ | 451 | goto done; /* Nothing to reclaim from */ |
| @@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1270 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1266 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
| 1271 | bool oom, struct page *page) | 1267 | bool oom, struct page *page) |
| 1272 | { | 1268 | { |
| 1273 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1269 | struct mem_cgroup *mem, *mem_over_limit; |
| 1274 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1270 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
| 1275 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1271 | struct res_counter *fail_res; |
| 1276 | 1272 | ||
| 1277 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1273 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
| 1278 | /* Don't account this! */ | 1274 | /* Don't account this! */ |
| @@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1304 | 1300 | ||
| 1305 | if (mem_cgroup_is_root(mem)) | 1301 | if (mem_cgroup_is_root(mem)) |
| 1306 | goto done; | 1302 | goto done; |
| 1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1303 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
| 1308 | &soft_fail_res); | ||
| 1309 | if (likely(!ret)) { | 1304 | if (likely(!ret)) { |
| 1310 | if (!do_swap_account) | 1305 | if (!do_swap_account) |
| 1311 | break; | 1306 | break; |
| 1312 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1307 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
| 1313 | &fail_res, NULL); | 1308 | &fail_res); |
| 1314 | if (likely(!ret)) | 1309 | if (likely(!ret)) |
| 1315 | break; | 1310 | break; |
| 1316 | /* mem+swap counter fails */ | 1311 | /* mem+swap counter fails */ |
| 1317 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1312 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1318 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1313 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
| 1319 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1314 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
| 1320 | memsw); | 1315 | memsw); |
| @@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1353 | } | 1348 | } |
| 1354 | } | 1349 | } |
| 1355 | /* | 1350 | /* |
| 1356 | * Insert just the ancestor, we should trickle down to the correct | 1351 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
| 1357 | * cgroup for reclaim, since the other nodes will be below their | 1352 | * if they exceeds softlimit. |
| 1358 | * soft limit | ||
| 1359 | */ | 1353 | */ |
| 1360 | if (soft_fail_res) { | 1354 | if (mem_cgroup_soft_limit_check(mem)) |
| 1361 | mem_over_soft_limit = | 1355 | mem_cgroup_update_tree(mem, page); |
| 1362 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
| 1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
| 1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
| 1365 | } | ||
| 1366 | done: | 1356 | done: |
| 1367 | return 0; | 1357 | return 0; |
| 1368 | nomem: | 1358 | nomem: |
| @@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
| 1437 | if (unlikely(PageCgroupUsed(pc))) { | 1427 | if (unlikely(PageCgroupUsed(pc))) { |
| 1438 | unlock_page_cgroup(pc); | 1428 | unlock_page_cgroup(pc); |
| 1439 | if (!mem_cgroup_is_root(mem)) { | 1429 | if (!mem_cgroup_is_root(mem)) { |
| 1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1430 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1441 | if (do_swap_account) | 1431 | if (do_swap_account) |
| 1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1432 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1443 | NULL); | ||
| 1444 | } | 1433 | } |
| 1445 | css_put(&mem->css); | 1434 | css_put(&mem->css); |
| 1446 | return; | 1435 | return; |
| @@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1519 | goto out; | 1508 | goto out; |
| 1520 | 1509 | ||
| 1521 | if (!mem_cgroup_is_root(from)) | 1510 | if (!mem_cgroup_is_root(from)) |
| 1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1511 | res_counter_uncharge(&from->res, PAGE_SIZE); |
| 1523 | mem_cgroup_charge_statistics(from, pc, false); | 1512 | mem_cgroup_charge_statistics(from, pc, false); |
| 1524 | 1513 | ||
| 1525 | page = pc->page; | 1514 | page = pc->page; |
| @@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1539 | } | 1528 | } |
| 1540 | 1529 | ||
| 1541 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1530 | if (do_swap_account && !mem_cgroup_is_root(from)) |
| 1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1531 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
| 1543 | css_put(&from->css); | 1532 | css_put(&from->css); |
| 1544 | 1533 | ||
| 1545 | css_get(&to->css); | 1534 | css_get(&to->css); |
| @@ -1610,9 +1599,9 @@ uncharge: | |||
| 1610 | css_put(&parent->css); | 1599 | css_put(&parent->css); |
| 1611 | /* uncharge if move fails */ | 1600 | /* uncharge if move fails */ |
| 1612 | if (!mem_cgroup_is_root(parent)) { | 1601 | if (!mem_cgroup_is_root(parent)) { |
| 1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1602 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
| 1614 | if (do_swap_account) | 1603 | if (do_swap_account) |
| 1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1604 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
| 1616 | } | 1605 | } |
| 1617 | return ret; | 1606 | return ret; |
| 1618 | } | 1607 | } |
| @@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
| 1803 | * calling css_tryget | 1792 | * calling css_tryget |
| 1804 | */ | 1793 | */ |
| 1805 | if (!mem_cgroup_is_root(memcg)) | 1794 | if (!mem_cgroup_is_root(memcg)) |
| 1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1795 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1807 | NULL); | ||
| 1808 | mem_cgroup_swap_statistics(memcg, false); | 1796 | mem_cgroup_swap_statistics(memcg, false); |
| 1809 | mem_cgroup_put(memcg); | 1797 | mem_cgroup_put(memcg); |
| 1810 | } | 1798 | } |
| @@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
| 1831 | if (!mem) | 1819 | if (!mem) |
| 1832 | return; | 1820 | return; |
| 1833 | if (!mem_cgroup_is_root(mem)) { | 1821 | if (!mem_cgroup_is_root(mem)) { |
| 1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1822 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1835 | if (do_swap_account) | 1823 | if (do_swap_account) |
| 1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1824 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1837 | } | 1825 | } |
| 1838 | css_put(&mem->css); | 1826 | css_put(&mem->css); |
| 1839 | } | 1827 | } |
| @@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1848 | struct page_cgroup *pc; | 1836 | struct page_cgroup *pc; |
| 1849 | struct mem_cgroup *mem = NULL; | 1837 | struct mem_cgroup *mem = NULL; |
| 1850 | struct mem_cgroup_per_zone *mz; | 1838 | struct mem_cgroup_per_zone *mz; |
| 1851 | bool soft_limit_excess = false; | ||
| 1852 | 1839 | ||
| 1853 | if (mem_cgroup_disabled()) | 1840 | if (mem_cgroup_disabled()) |
| 1854 | return NULL; | 1841 | return NULL; |
| @@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1888 | } | 1875 | } |
| 1889 | 1876 | ||
| 1890 | if (!mem_cgroup_is_root(mem)) { | 1877 | if (!mem_cgroup_is_root(mem)) { |
| 1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1878 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1892 | if (do_swap_account && | 1879 | if (do_swap_account && |
| 1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1880 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
| 1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1881 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1895 | } | 1882 | } |
| 1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1883 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| 1897 | mem_cgroup_swap_statistics(mem, true); | 1884 | mem_cgroup_swap_statistics(mem, true); |
| @@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1908 | mz = page_cgroup_zoneinfo(pc); | 1895 | mz = page_cgroup_zoneinfo(pc); |
| 1909 | unlock_page_cgroup(pc); | 1896 | unlock_page_cgroup(pc); |
| 1910 | 1897 | ||
| 1911 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1898 | if (mem_cgroup_soft_limit_check(mem)) |
| 1912 | mem_cgroup_update_tree(mem, page); | 1899 | mem_cgroup_update_tree(mem, page); |
| 1913 | /* at swapout, this memcg will be accessed to record to swap */ | 1900 | /* at swapout, this memcg will be accessed to record to swap */ |
| 1914 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1901 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| @@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
| 1986 | * This memcg can be obsolete one. We avoid calling css_tryget | 1973 | * This memcg can be obsolete one. We avoid calling css_tryget |
| 1987 | */ | 1974 | */ |
| 1988 | if (!mem_cgroup_is_root(memcg)) | 1975 | if (!mem_cgroup_is_root(memcg)) |
| 1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1976 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1990 | mem_cgroup_swap_statistics(memcg, false); | 1977 | mem_cgroup_swap_statistics(memcg, false); |
| 1991 | mem_cgroup_put(memcg); | 1978 | mem_cgroup_put(memcg); |
| 1992 | } | 1979 | } |
| @@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2233 | unsigned long reclaimed; | 2220 | unsigned long reclaimed; |
| 2234 | int loop = 0; | 2221 | int loop = 0; |
| 2235 | struct mem_cgroup_tree_per_zone *mctz; | 2222 | struct mem_cgroup_tree_per_zone *mctz; |
| 2223 | unsigned long long excess; | ||
| 2236 | 2224 | ||
| 2237 | if (order > 0) | 2225 | if (order > 0) |
| 2238 | return 0; | 2226 | return 0; |
| @@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2284 | break; | 2272 | break; |
| 2285 | } while (1); | 2273 | } while (1); |
| 2286 | } | 2274 | } |
| 2287 | mz->usage_in_excess = | ||
| 2288 | res_counter_soft_limit_excess(&mz->mem->res); | ||
| 2289 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | 2275 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); |
| 2276 | excess = res_counter_soft_limit_excess(&mz->mem->res); | ||
| 2290 | /* | 2277 | /* |
| 2291 | * One school of thought says that we should not add | 2278 | * One school of thought says that we should not add |
| 2292 | * back the node to the tree if reclaim returns 0. | 2279 | * back the node to the tree if reclaim returns 0. |
| @@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
| 2295 | * memory to reclaim from. Consider this as a longer | 2282 | * memory to reclaim from. Consider this as a longer |
| 2296 | * term TODO. | 2283 | * term TODO. |
| 2297 | */ | 2284 | */ |
| 2298 | if (mz->usage_in_excess) | 2285 | /* If excess == 0, no tree ops */ |
| 2299 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); | 2286 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess); |
| 2300 | spin_unlock(&mctz->lock); | 2287 | spin_unlock(&mctz->lock); |
| 2301 | css_put(&mz->mem->css); | 2288 | css_put(&mz->mem->css); |
| 2302 | loop++; | 2289 | loop++; |
