diff options
| -rw-r--r-- | include/linux/res_counter.h | 6 | ||||
| -rw-r--r-- | kernel/res_counter.c | 18 | ||||
| -rw-r--r-- | mm/memcontrol.c | 113 |
3 files changed, 54 insertions, 83 deletions
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 731af71cddc9..fcb9884df618 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h | |||
| @@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); | |||
| 114 | int __must_check res_counter_charge_locked(struct res_counter *counter, | 114 | int __must_check res_counter_charge_locked(struct res_counter *counter, |
| 115 | unsigned long val); | 115 | unsigned long val); |
| 116 | int __must_check res_counter_charge(struct res_counter *counter, | 116 | int __must_check res_counter_charge(struct res_counter *counter, |
| 117 | unsigned long val, struct res_counter **limit_fail_at, | 117 | unsigned long val, struct res_counter **limit_fail_at); |
| 118 | struct res_counter **soft_limit_at); | ||
| 119 | 118 | ||
| 120 | /* | 119 | /* |
| 121 | * uncharge - tell that some portion of the resource is released | 120 | * uncharge - tell that some portion of the resource is released |
| @@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter, | |||
| 128 | */ | 127 | */ |
| 129 | 128 | ||
| 130 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); | 129 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); |
| 131 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, | 130 | void res_counter_uncharge(struct res_counter *counter, unsigned long val); |
| 132 | bool *was_soft_limit_excess); | ||
| 133 | 131 | ||
| 134 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) | 132 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) |
| 135 | { | 133 | { |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 88faec23e833..bcdabf37c40b 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
| @@ -37,27 +37,17 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | int res_counter_charge(struct res_counter *counter, unsigned long val, | 39 | int res_counter_charge(struct res_counter *counter, unsigned long val, |
| 40 | struct res_counter **limit_fail_at, | 40 | struct res_counter **limit_fail_at) |
| 41 | struct res_counter **soft_limit_fail_at) | ||
| 42 | { | 41 | { |
| 43 | int ret; | 42 | int ret; |
| 44 | unsigned long flags; | 43 | unsigned long flags; |
| 45 | struct res_counter *c, *u; | 44 | struct res_counter *c, *u; |
| 46 | 45 | ||
| 47 | *limit_fail_at = NULL; | 46 | *limit_fail_at = NULL; |
| 48 | if (soft_limit_fail_at) | ||
| 49 | *soft_limit_fail_at = NULL; | ||
| 50 | local_irq_save(flags); | 47 | local_irq_save(flags); |
| 51 | for (c = counter; c != NULL; c = c->parent) { | 48 | for (c = counter; c != NULL; c = c->parent) { |
| 52 | spin_lock(&c->lock); | 49 | spin_lock(&c->lock); |
| 53 | ret = res_counter_charge_locked(c, val); | 50 | ret = res_counter_charge_locked(c, val); |
| 54 | /* | ||
| 55 | * With soft limits, we return the highest ancestor | ||
| 56 | * that exceeds its soft limit | ||
| 57 | */ | ||
| 58 | if (soft_limit_fail_at && | ||
| 59 | !res_counter_soft_limit_check_locked(c)) | ||
| 60 | *soft_limit_fail_at = c; | ||
| 61 | spin_unlock(&c->lock); | 51 | spin_unlock(&c->lock); |
| 62 | if (ret < 0) { | 52 | if (ret < 0) { |
| 63 | *limit_fail_at = c; | 53 | *limit_fail_at = c; |
| @@ -85,8 +75,7 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
| 85 | counter->usage -= val; | 75 | counter->usage -= val; |
| 86 | } | 76 | } |
| 87 | 77 | ||
| 88 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, | 78 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) |
| 89 | bool *was_soft_limit_excess) | ||
| 90 | { | 79 | { |
| 91 | unsigned long flags; | 80 | unsigned long flags; |
| 92 | struct res_counter *c; | 81 | struct res_counter *c; |
| @@ -94,9 +83,6 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val, | |||
| 94 | local_irq_save(flags); | 83 | local_irq_save(flags); |
| 95 | for (c = counter; c != NULL; c = c->parent) { | 84 | for (c = counter; c != NULL; c = c->parent) { |
| 96 | spin_lock(&c->lock); | 85 | spin_lock(&c->lock); |
| 97 | if (was_soft_limit_excess) | ||
| 98 | *was_soft_limit_excess = | ||
| 99 | !res_counter_soft_limit_check_locked(c); | ||
| 100 | res_counter_uncharge_locked(c, val); | 86 | res_counter_uncharge_locked(c, val); |
| 101 | spin_unlock(&c->lock); | 87 | spin_unlock(&c->lock); |
| 102 | } | 88 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 21a30629ca80..1ae8c439584a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -353,16 +353,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
| 353 | } | 353 | } |
| 354 | 354 | ||
| 355 | static void | 355 | static void |
| 356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
| 357 | struct mem_cgroup_per_zone *mz, | ||
| 358 | struct mem_cgroup_tree_per_zone *mctz) | ||
| 359 | { | ||
| 360 | spin_lock(&mctz->lock); | ||
| 361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
| 362 | spin_unlock(&mctz->lock); | ||
| 363 | } | ||
| 364 | |||
| 365 | static void | ||
| 366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 356 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
| 367 | struct mem_cgroup_per_zone *mz, | 357 | struct mem_cgroup_per_zone *mz, |
| 368 | struct mem_cgroup_tree_per_zone *mctz) | 358 | struct mem_cgroup_tree_per_zone *mctz) |
| @@ -392,34 +382,40 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
| 392 | 382 | ||
| 393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 383 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
| 394 | { | 384 | { |
| 395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 385 | unsigned long long new_usage_in_excess; |
| 396 | bool updated_tree = false; | ||
| 397 | struct mem_cgroup_per_zone *mz; | 386 | struct mem_cgroup_per_zone *mz; |
| 398 | struct mem_cgroup_tree_per_zone *mctz; | 387 | struct mem_cgroup_tree_per_zone *mctz; |
| 399 | 388 | int nid = page_to_nid(page); | |
| 400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 389 | int zid = page_zonenum(page); |
| 401 | mctz = soft_limit_tree_from_page(page); | 390 | mctz = soft_limit_tree_from_page(page); |
| 402 | 391 | ||
| 403 | /* | 392 | /* |
| 404 | * We do updates in lazy mode, mem's are removed | 393 | * Necessary to update all ancestors when hierarchy is used. |
| 405 | * lazily from the per-zone, per-node rb tree | 394 | * because their event counter is not touched. |
| 406 | */ | 395 | */ |
| 407 | prev_usage_in_excess = mz->usage_in_excess; | 396 | for (; mem; mem = parent_mem_cgroup(mem)) { |
| 408 | 397 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
| 409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 398 | new_usage_in_excess = |
| 410 | if (prev_usage_in_excess) { | 399 | res_counter_soft_limit_excess(&mem->res); |
| 411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 400 | /* |
| 412 | updated_tree = true; | 401 | * We have to update the tree if mz is on RB-tree or |
| 413 | } | 402 | * mem is over its softlimit. |
| 414 | if (!new_usage_in_excess) | 403 | */ |
| 415 | goto done; | 404 | if (new_usage_in_excess || mz->on_tree) { |
| 416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 405 | spin_lock(&mctz->lock); |
| 417 | 406 | /* if on-tree, remove it */ | |
| 418 | done: | 407 | if (mz->on_tree) |
| 419 | if (updated_tree) { | 408 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
| 420 | spin_lock(&mctz->lock); | 409 | /* |
| 421 | mz->usage_in_excess = new_usage_in_excess; | 410 | * if over soft limit, insert again. mz->usage_in_excess |
| 422 | spin_unlock(&mctz->lock); | 411 | * will be updated properly. |
| 412 | */ | ||
| 413 | if (new_usage_in_excess) | ||
| 414 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
| 415 | else | ||
| 416 | mz->usage_in_excess = 0; | ||
| 417 | spin_unlock(&mctz->lock); | ||
| 418 | } | ||
| 423 | } | 419 | } |
| 424 | } | 420 | } |
| 425 | 421 | ||
| @@ -1271,9 +1267,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1271 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1267 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
| 1272 | bool oom, struct page *page) | 1268 | bool oom, struct page *page) |
| 1273 | { | 1269 | { |
| 1274 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1270 | struct mem_cgroup *mem, *mem_over_limit; |
| 1275 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1271 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
| 1276 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1272 | struct res_counter *fail_res; |
| 1277 | 1273 | ||
| 1278 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1274 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
| 1279 | /* Don't account this! */ | 1275 | /* Don't account this! */ |
| @@ -1305,17 +1301,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1305 | 1301 | ||
| 1306 | if (mem_cgroup_is_root(mem)) | 1302 | if (mem_cgroup_is_root(mem)) |
| 1307 | goto done; | 1303 | goto done; |
| 1308 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1304 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
| 1309 | &soft_fail_res); | ||
| 1310 | if (likely(!ret)) { | 1305 | if (likely(!ret)) { |
| 1311 | if (!do_swap_account) | 1306 | if (!do_swap_account) |
| 1312 | break; | 1307 | break; |
| 1313 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1308 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
| 1314 | &fail_res, NULL); | 1309 | &fail_res); |
| 1315 | if (likely(!ret)) | 1310 | if (likely(!ret)) |
| 1316 | break; | 1311 | break; |
| 1317 | /* mem+swap counter fails */ | 1312 | /* mem+swap counter fails */ |
| 1318 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1313 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1319 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1314 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
| 1320 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1315 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
| 1321 | memsw); | 1316 | memsw); |
| @@ -1354,16 +1349,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
| 1354 | } | 1349 | } |
| 1355 | } | 1350 | } |
| 1356 | /* | 1351 | /* |
| 1357 | * Insert just the ancestor, we should trickle down to the correct | 1352 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
| 1358 | * cgroup for reclaim, since the other nodes will be below their | 1353 | * if they exceeds softlimit. |
| 1359 | * soft limit | ||
| 1360 | */ | 1354 | */ |
| 1361 | if (soft_fail_res) { | 1355 | if (mem_cgroup_soft_limit_check(mem)) |
| 1362 | mem_over_soft_limit = | 1356 | mem_cgroup_update_tree(mem, page); |
| 1363 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
| 1364 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
| 1365 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
| 1366 | } | ||
| 1367 | done: | 1357 | done: |
| 1368 | return 0; | 1358 | return 0; |
| 1369 | nomem: | 1359 | nomem: |
| @@ -1438,10 +1428,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
| 1438 | if (unlikely(PageCgroupUsed(pc))) { | 1428 | if (unlikely(PageCgroupUsed(pc))) { |
| 1439 | unlock_page_cgroup(pc); | 1429 | unlock_page_cgroup(pc); |
| 1440 | if (!mem_cgroup_is_root(mem)) { | 1430 | if (!mem_cgroup_is_root(mem)) { |
| 1441 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1431 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1442 | if (do_swap_account) | 1432 | if (do_swap_account) |
| 1443 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1433 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1444 | NULL); | ||
| 1445 | } | 1434 | } |
| 1446 | css_put(&mem->css); | 1435 | css_put(&mem->css); |
| 1447 | return; | 1436 | return; |
| @@ -1520,7 +1509,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1520 | goto out; | 1509 | goto out; |
| 1521 | 1510 | ||
| 1522 | if (!mem_cgroup_is_root(from)) | 1511 | if (!mem_cgroup_is_root(from)) |
| 1523 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1512 | res_counter_uncharge(&from->res, PAGE_SIZE); |
| 1524 | mem_cgroup_charge_statistics(from, pc, false); | 1513 | mem_cgroup_charge_statistics(from, pc, false); |
| 1525 | 1514 | ||
| 1526 | page = pc->page; | 1515 | page = pc->page; |
| @@ -1540,7 +1529,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
| 1540 | } | 1529 | } |
| 1541 | 1530 | ||
| 1542 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1531 | if (do_swap_account && !mem_cgroup_is_root(from)) |
| 1543 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1532 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
| 1544 | css_put(&from->css); | 1533 | css_put(&from->css); |
| 1545 | 1534 | ||
| 1546 | css_get(&to->css); | 1535 | css_get(&to->css); |
| @@ -1611,9 +1600,9 @@ uncharge: | |||
| 1611 | css_put(&parent->css); | 1600 | css_put(&parent->css); |
| 1612 | /* uncharge if move fails */ | 1601 | /* uncharge if move fails */ |
| 1613 | if (!mem_cgroup_is_root(parent)) { | 1602 | if (!mem_cgroup_is_root(parent)) { |
| 1614 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1603 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
| 1615 | if (do_swap_account) | 1604 | if (do_swap_account) |
| 1616 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1605 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
| 1617 | } | 1606 | } |
| 1618 | return ret; | 1607 | return ret; |
| 1619 | } | 1608 | } |
| @@ -1804,8 +1793,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
| 1804 | * calling css_tryget | 1793 | * calling css_tryget |
| 1805 | */ | 1794 | */ |
| 1806 | if (!mem_cgroup_is_root(memcg)) | 1795 | if (!mem_cgroup_is_root(memcg)) |
| 1807 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1796 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1808 | NULL); | ||
| 1809 | mem_cgroup_swap_statistics(memcg, false); | 1797 | mem_cgroup_swap_statistics(memcg, false); |
| 1810 | mem_cgroup_put(memcg); | 1798 | mem_cgroup_put(memcg); |
| 1811 | } | 1799 | } |
| @@ -1832,9 +1820,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
| 1832 | if (!mem) | 1820 | if (!mem) |
| 1833 | return; | 1821 | return; |
| 1834 | if (!mem_cgroup_is_root(mem)) { | 1822 | if (!mem_cgroup_is_root(mem)) { |
| 1835 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1823 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1836 | if (do_swap_account) | 1824 | if (do_swap_account) |
| 1837 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1825 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1838 | } | 1826 | } |
| 1839 | css_put(&mem->css); | 1827 | css_put(&mem->css); |
| 1840 | } | 1828 | } |
| @@ -1849,7 +1837,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1849 | struct page_cgroup *pc; | 1837 | struct page_cgroup *pc; |
| 1850 | struct mem_cgroup *mem = NULL; | 1838 | struct mem_cgroup *mem = NULL; |
| 1851 | struct mem_cgroup_per_zone *mz; | 1839 | struct mem_cgroup_per_zone *mz; |
| 1852 | bool soft_limit_excess = false; | ||
| 1853 | 1840 | ||
| 1854 | if (mem_cgroup_disabled()) | 1841 | if (mem_cgroup_disabled()) |
| 1855 | return NULL; | 1842 | return NULL; |
| @@ -1889,10 +1876,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1889 | } | 1876 | } |
| 1890 | 1877 | ||
| 1891 | if (!mem_cgroup_is_root(mem)) { | 1878 | if (!mem_cgroup_is_root(mem)) { |
| 1892 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1879 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 1893 | if (do_swap_account && | 1880 | if (do_swap_account && |
| 1894 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1881 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
| 1895 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1882 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
| 1896 | } | 1883 | } |
| 1897 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1884 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| 1898 | mem_cgroup_swap_statistics(mem, true); | 1885 | mem_cgroup_swap_statistics(mem, true); |
| @@ -1909,7 +1896,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
| 1909 | mz = page_cgroup_zoneinfo(pc); | 1896 | mz = page_cgroup_zoneinfo(pc); |
| 1910 | unlock_page_cgroup(pc); | 1897 | unlock_page_cgroup(pc); |
| 1911 | 1898 | ||
| 1912 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1899 | if (mem_cgroup_soft_limit_check(mem)) |
| 1913 | mem_cgroup_update_tree(mem, page); | 1900 | mem_cgroup_update_tree(mem, page); |
| 1914 | /* at swapout, this memcg will be accessed to record to swap */ | 1901 | /* at swapout, this memcg will be accessed to record to swap */ |
| 1915 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1902 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
| @@ -1987,7 +1974,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
| 1987 | * This memcg can be obsolete one. We avoid calling css_tryget | 1974 | * This memcg can be obsolete one. We avoid calling css_tryget |
| 1988 | */ | 1975 | */ |
| 1989 | if (!mem_cgroup_is_root(memcg)) | 1976 | if (!mem_cgroup_is_root(memcg)) |
| 1990 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1977 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
| 1991 | mem_cgroup_swap_statistics(memcg, false); | 1978 | mem_cgroup_swap_statistics(memcg, false); |
| 1992 | mem_cgroup_put(memcg); | 1979 | mem_cgroup_put(memcg); |
| 1993 | } | 1980 | } |
