diff options
-rw-r--r-- | include/linux/res_counter.h | 6 | ||||
-rw-r--r-- | kernel/res_counter.c | 18 | ||||
-rw-r--r-- | mm/memcontrol.c | 113 |
3 files changed, 54 insertions, 83 deletions
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 731af71cddc9..fcb9884df618 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h | |||
@@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); | |||
114 | int __must_check res_counter_charge_locked(struct res_counter *counter, | 114 | int __must_check res_counter_charge_locked(struct res_counter *counter, |
115 | unsigned long val); | 115 | unsigned long val); |
116 | int __must_check res_counter_charge(struct res_counter *counter, | 116 | int __must_check res_counter_charge(struct res_counter *counter, |
117 | unsigned long val, struct res_counter **limit_fail_at, | 117 | unsigned long val, struct res_counter **limit_fail_at); |
118 | struct res_counter **soft_limit_at); | ||
119 | 118 | ||
120 | /* | 119 | /* |
121 | * uncharge - tell that some portion of the resource is released | 120 | * uncharge - tell that some portion of the resource is released |
@@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter, | |||
128 | */ | 127 | */ |
129 | 128 | ||
130 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); | 129 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); |
131 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, | 130 | void res_counter_uncharge(struct res_counter *counter, unsigned long val); |
132 | bool *was_soft_limit_excess); | ||
133 | 131 | ||
134 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) | 132 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) |
135 | { | 133 | { |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 88faec23e833..bcdabf37c40b 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -37,27 +37,17 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
37 | } | 37 | } |
38 | 38 | ||
39 | int res_counter_charge(struct res_counter *counter, unsigned long val, | 39 | int res_counter_charge(struct res_counter *counter, unsigned long val, |
40 | struct res_counter **limit_fail_at, | 40 | struct res_counter **limit_fail_at) |
41 | struct res_counter **soft_limit_fail_at) | ||
42 | { | 41 | { |
43 | int ret; | 42 | int ret; |
44 | unsigned long flags; | 43 | unsigned long flags; |
45 | struct res_counter *c, *u; | 44 | struct res_counter *c, *u; |
46 | 45 | ||
47 | *limit_fail_at = NULL; | 46 | *limit_fail_at = NULL; |
48 | if (soft_limit_fail_at) | ||
49 | *soft_limit_fail_at = NULL; | ||
50 | local_irq_save(flags); | 47 | local_irq_save(flags); |
51 | for (c = counter; c != NULL; c = c->parent) { | 48 | for (c = counter; c != NULL; c = c->parent) { |
52 | spin_lock(&c->lock); | 49 | spin_lock(&c->lock); |
53 | ret = res_counter_charge_locked(c, val); | 50 | ret = res_counter_charge_locked(c, val); |
54 | /* | ||
55 | * With soft limits, we return the highest ancestor | ||
56 | * that exceeds its soft limit | ||
57 | */ | ||
58 | if (soft_limit_fail_at && | ||
59 | !res_counter_soft_limit_check_locked(c)) | ||
60 | *soft_limit_fail_at = c; | ||
61 | spin_unlock(&c->lock); | 51 | spin_unlock(&c->lock); |
62 | if (ret < 0) { | 52 | if (ret < 0) { |
63 | *limit_fail_at = c; | 53 | *limit_fail_at = c; |
@@ -85,8 +75,7 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
85 | counter->usage -= val; | 75 | counter->usage -= val; |
86 | } | 76 | } |
87 | 77 | ||
88 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, | 78 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) |
89 | bool *was_soft_limit_excess) | ||
90 | { | 79 | { |
91 | unsigned long flags; | 80 | unsigned long flags; |
92 | struct res_counter *c; | 81 | struct res_counter *c; |
@@ -94,9 +83,6 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val, | |||
94 | local_irq_save(flags); | 83 | local_irq_save(flags); |
95 | for (c = counter; c != NULL; c = c->parent) { | 84 | for (c = counter; c != NULL; c = c->parent) { |
96 | spin_lock(&c->lock); | 85 | spin_lock(&c->lock); |
97 | if (was_soft_limit_excess) | ||
98 | *was_soft_limit_excess = | ||
99 | !res_counter_soft_limit_check_locked(c); | ||
100 | res_counter_uncharge_locked(c, val); | 86 | res_counter_uncharge_locked(c, val); |
101 | spin_unlock(&c->lock); | 87 | spin_unlock(&c->lock); |
102 | } | 88 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 21a30629ca80..1ae8c439584a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -353,16 +353,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
353 | } | 353 | } |
354 | 354 | ||
355 | static void | 355 | static void |
356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
357 | struct mem_cgroup_per_zone *mz, | ||
358 | struct mem_cgroup_tree_per_zone *mctz) | ||
359 | { | ||
360 | spin_lock(&mctz->lock); | ||
361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
362 | spin_unlock(&mctz->lock); | ||
363 | } | ||
364 | |||
365 | static void | ||
366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 356 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
367 | struct mem_cgroup_per_zone *mz, | 357 | struct mem_cgroup_per_zone *mz, |
368 | struct mem_cgroup_tree_per_zone *mctz) | 358 | struct mem_cgroup_tree_per_zone *mctz) |
@@ -392,34 +382,40 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
392 | 382 | ||
393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 383 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
394 | { | 384 | { |
395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 385 | unsigned long long new_usage_in_excess; |
396 | bool updated_tree = false; | ||
397 | struct mem_cgroup_per_zone *mz; | 386 | struct mem_cgroup_per_zone *mz; |
398 | struct mem_cgroup_tree_per_zone *mctz; | 387 | struct mem_cgroup_tree_per_zone *mctz; |
399 | 388 | int nid = page_to_nid(page); | |
400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 389 | int zid = page_zonenum(page); |
401 | mctz = soft_limit_tree_from_page(page); | 390 | mctz = soft_limit_tree_from_page(page); |
402 | 391 | ||
403 | /* | 392 | /* |
404 | * We do updates in lazy mode, mem's are removed | 393 | * Necessary to update all ancestors when hierarchy is used. |
405 | * lazily from the per-zone, per-node rb tree | 394 | * because their event counter is not touched. |
406 | */ | 395 | */ |
407 | prev_usage_in_excess = mz->usage_in_excess; | 396 | for (; mem; mem = parent_mem_cgroup(mem)) { |
408 | 397 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 398 | new_usage_in_excess = |
410 | if (prev_usage_in_excess) { | 399 | res_counter_soft_limit_excess(&mem->res); |
411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 400 | /* |
412 | updated_tree = true; | 401 | * We have to update the tree if mz is on RB-tree or |
413 | } | 402 | * mem is over its softlimit. |
414 | if (!new_usage_in_excess) | 403 | */ |
415 | goto done; | 404 | if (new_usage_in_excess || mz->on_tree) { |
416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 405 | spin_lock(&mctz->lock); |
417 | 406 | /* if on-tree, remove it */ | |
418 | done: | 407 | if (mz->on_tree) |
419 | if (updated_tree) { | 408 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
420 | spin_lock(&mctz->lock); | 409 | /* |
421 | mz->usage_in_excess = new_usage_in_excess; | 410 | * if over soft limit, insert again. mz->usage_in_excess |
422 | spin_unlock(&mctz->lock); | 411 | * will be updated properly. |
412 | */ | ||
413 | if (new_usage_in_excess) | ||
414 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
415 | else | ||
416 | mz->usage_in_excess = 0; | ||
417 | spin_unlock(&mctz->lock); | ||
418 | } | ||
423 | } | 419 | } |
424 | } | 420 | } |
425 | 421 | ||
@@ -1271,9 +1267,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1271 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1267 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
1272 | bool oom, struct page *page) | 1268 | bool oom, struct page *page) |
1273 | { | 1269 | { |
1274 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1270 | struct mem_cgroup *mem, *mem_over_limit; |
1275 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1271 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1276 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1272 | struct res_counter *fail_res; |
1277 | 1273 | ||
1278 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1274 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
1279 | /* Don't account this! */ | 1275 | /* Don't account this! */ |
@@ -1305,17 +1301,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1305 | 1301 | ||
1306 | if (mem_cgroup_is_root(mem)) | 1302 | if (mem_cgroup_is_root(mem)) |
1307 | goto done; | 1303 | goto done; |
1308 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1304 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
1309 | &soft_fail_res); | ||
1310 | if (likely(!ret)) { | 1305 | if (likely(!ret)) { |
1311 | if (!do_swap_account) | 1306 | if (!do_swap_account) |
1312 | break; | 1307 | break; |
1313 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1308 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
1314 | &fail_res, NULL); | 1309 | &fail_res); |
1315 | if (likely(!ret)) | 1310 | if (likely(!ret)) |
1316 | break; | 1311 | break; |
1317 | /* mem+swap counter fails */ | 1312 | /* mem+swap counter fails */ |
1318 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1313 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1319 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1314 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1320 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1315 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1321 | memsw); | 1316 | memsw); |
@@ -1354,16 +1349,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1354 | } | 1349 | } |
1355 | } | 1350 | } |
1356 | /* | 1351 | /* |
1357 | * Insert just the ancestor, we should trickle down to the correct | 1352 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
1358 | * cgroup for reclaim, since the other nodes will be below their | 1353 | * if they exceeds softlimit. |
1359 | * soft limit | ||
1360 | */ | 1354 | */ |
1361 | if (soft_fail_res) { | 1355 | if (mem_cgroup_soft_limit_check(mem)) |
1362 | mem_over_soft_limit = | 1356 | mem_cgroup_update_tree(mem, page); |
1363 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
1364 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
1365 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
1366 | } | ||
1367 | done: | 1357 | done: |
1368 | return 0; | 1358 | return 0; |
1369 | nomem: | 1359 | nomem: |
@@ -1438,10 +1428,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1438 | if (unlikely(PageCgroupUsed(pc))) { | 1428 | if (unlikely(PageCgroupUsed(pc))) { |
1439 | unlock_page_cgroup(pc); | 1429 | unlock_page_cgroup(pc); |
1440 | if (!mem_cgroup_is_root(mem)) { | 1430 | if (!mem_cgroup_is_root(mem)) { |
1441 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1431 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1442 | if (do_swap_account) | 1432 | if (do_swap_account) |
1443 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1433 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1444 | NULL); | ||
1445 | } | 1434 | } |
1446 | css_put(&mem->css); | 1435 | css_put(&mem->css); |
1447 | return; | 1436 | return; |
@@ -1520,7 +1509,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1520 | goto out; | 1509 | goto out; |
1521 | 1510 | ||
1522 | if (!mem_cgroup_is_root(from)) | 1511 | if (!mem_cgroup_is_root(from)) |
1523 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1512 | res_counter_uncharge(&from->res, PAGE_SIZE); |
1524 | mem_cgroup_charge_statistics(from, pc, false); | 1513 | mem_cgroup_charge_statistics(from, pc, false); |
1525 | 1514 | ||
1526 | page = pc->page; | 1515 | page = pc->page; |
@@ -1540,7 +1529,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1540 | } | 1529 | } |
1541 | 1530 | ||
1542 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1531 | if (do_swap_account && !mem_cgroup_is_root(from)) |
1543 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1532 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1544 | css_put(&from->css); | 1533 | css_put(&from->css); |
1545 | 1534 | ||
1546 | css_get(&to->css); | 1535 | css_get(&to->css); |
@@ -1611,9 +1600,9 @@ uncharge: | |||
1611 | css_put(&parent->css); | 1600 | css_put(&parent->css); |
1612 | /* uncharge if move fails */ | 1601 | /* uncharge if move fails */ |
1613 | if (!mem_cgroup_is_root(parent)) { | 1602 | if (!mem_cgroup_is_root(parent)) { |
1614 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1603 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
1615 | if (do_swap_account) | 1604 | if (do_swap_account) |
1616 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1605 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
1617 | } | 1606 | } |
1618 | return ret; | 1607 | return ret; |
1619 | } | 1608 | } |
@@ -1804,8 +1793,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1804 | * calling css_tryget | 1793 | * calling css_tryget |
1805 | */ | 1794 | */ |
1806 | if (!mem_cgroup_is_root(memcg)) | 1795 | if (!mem_cgroup_is_root(memcg)) |
1807 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1796 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1808 | NULL); | ||
1809 | mem_cgroup_swap_statistics(memcg, false); | 1797 | mem_cgroup_swap_statistics(memcg, false); |
1810 | mem_cgroup_put(memcg); | 1798 | mem_cgroup_put(memcg); |
1811 | } | 1799 | } |
@@ -1832,9 +1820,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1832 | if (!mem) | 1820 | if (!mem) |
1833 | return; | 1821 | return; |
1834 | if (!mem_cgroup_is_root(mem)) { | 1822 | if (!mem_cgroup_is_root(mem)) { |
1835 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1823 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1836 | if (do_swap_account) | 1824 | if (do_swap_account) |
1837 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1825 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1838 | } | 1826 | } |
1839 | css_put(&mem->css); | 1827 | css_put(&mem->css); |
1840 | } | 1828 | } |
@@ -1849,7 +1837,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1849 | struct page_cgroup *pc; | 1837 | struct page_cgroup *pc; |
1850 | struct mem_cgroup *mem = NULL; | 1838 | struct mem_cgroup *mem = NULL; |
1851 | struct mem_cgroup_per_zone *mz; | 1839 | struct mem_cgroup_per_zone *mz; |
1852 | bool soft_limit_excess = false; | ||
1853 | 1840 | ||
1854 | if (mem_cgroup_disabled()) | 1841 | if (mem_cgroup_disabled()) |
1855 | return NULL; | 1842 | return NULL; |
@@ -1889,10 +1876,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1889 | } | 1876 | } |
1890 | 1877 | ||
1891 | if (!mem_cgroup_is_root(mem)) { | 1878 | if (!mem_cgroup_is_root(mem)) { |
1892 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1879 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1893 | if (do_swap_account && | 1880 | if (do_swap_account && |
1894 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1881 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
1895 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1882 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1896 | } | 1883 | } |
1897 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1884 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
1898 | mem_cgroup_swap_statistics(mem, true); | 1885 | mem_cgroup_swap_statistics(mem, true); |
@@ -1909,7 +1896,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1909 | mz = page_cgroup_zoneinfo(pc); | 1896 | mz = page_cgroup_zoneinfo(pc); |
1910 | unlock_page_cgroup(pc); | 1897 | unlock_page_cgroup(pc); |
1911 | 1898 | ||
1912 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1899 | if (mem_cgroup_soft_limit_check(mem)) |
1913 | mem_cgroup_update_tree(mem, page); | 1900 | mem_cgroup_update_tree(mem, page); |
1914 | /* at swapout, this memcg will be accessed to record to swap */ | 1901 | /* at swapout, this memcg will be accessed to record to swap */ |
1915 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1902 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
@@ -1987,7 +1974,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
1987 | * This memcg can be obsolete one. We avoid calling css_tryget | 1974 | * This memcg can be obsolete one. We avoid calling css_tryget |
1988 | */ | 1975 | */ |
1989 | if (!mem_cgroup_is_root(memcg)) | 1976 | if (!mem_cgroup_is_root(memcg)) |
1990 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1977 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1991 | mem_cgroup_swap_statistics(memcg, false); | 1978 | mem_cgroup_swap_statistics(memcg, false); |
1992 | mem_cgroup_put(memcg); | 1979 | mem_cgroup_put(memcg); |
1993 | } | 1980 | } |