aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/res_counter.h6
-rw-r--r--kernel/res_counter.c18
-rw-r--r--mm/memcontrol.c113
3 files changed, 54 insertions, 83 deletions
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 731af71cddc9..fcb9884df618 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
114int __must_check res_counter_charge_locked(struct res_counter *counter, 114int __must_check res_counter_charge_locked(struct res_counter *counter,
115 unsigned long val); 115 unsigned long val);
116int __must_check res_counter_charge(struct res_counter *counter, 116int __must_check res_counter_charge(struct res_counter *counter,
117 unsigned long val, struct res_counter **limit_fail_at, 117 unsigned long val, struct res_counter **limit_fail_at);
118 struct res_counter **soft_limit_at);
119 118
120/* 119/*
121 * uncharge - tell that some portion of the resource is released 120 * uncharge - tell that some portion of the resource is released
@@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter,
128 */ 127 */
129 128
130void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); 129void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
131void res_counter_uncharge(struct res_counter *counter, unsigned long val, 130void res_counter_uncharge(struct res_counter *counter, unsigned long val);
132 bool *was_soft_limit_excess);
133 131
134static inline bool res_counter_limit_check_locked(struct res_counter *cnt) 132static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
135{ 133{
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 88faec23e833..bcdabf37c40b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -37,27 +37,17 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
37} 37}
38 38
39int res_counter_charge(struct res_counter *counter, unsigned long val, 39int res_counter_charge(struct res_counter *counter, unsigned long val,
40 struct res_counter **limit_fail_at, 40 struct res_counter **limit_fail_at)
41 struct res_counter **soft_limit_fail_at)
42{ 41{
43 int ret; 42 int ret;
44 unsigned long flags; 43 unsigned long flags;
45 struct res_counter *c, *u; 44 struct res_counter *c, *u;
46 45
47 *limit_fail_at = NULL; 46 *limit_fail_at = NULL;
48 if (soft_limit_fail_at)
49 *soft_limit_fail_at = NULL;
50 local_irq_save(flags); 47 local_irq_save(flags);
51 for (c = counter; c != NULL; c = c->parent) { 48 for (c = counter; c != NULL; c = c->parent) {
52 spin_lock(&c->lock); 49 spin_lock(&c->lock);
53 ret = res_counter_charge_locked(c, val); 50 ret = res_counter_charge_locked(c, val);
54 /*
55 * With soft limits, we return the highest ancestor
56 * that exceeds its soft limit
57 */
58 if (soft_limit_fail_at &&
59 !res_counter_soft_limit_check_locked(c))
60 *soft_limit_fail_at = c;
61 spin_unlock(&c->lock); 51 spin_unlock(&c->lock);
62 if (ret < 0) { 52 if (ret < 0) {
63 *limit_fail_at = c; 53 *limit_fail_at = c;
@@ -85,8 +75,7 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
85 counter->usage -= val; 75 counter->usage -= val;
86} 76}
87 77
88void res_counter_uncharge(struct res_counter *counter, unsigned long val, 78void res_counter_uncharge(struct res_counter *counter, unsigned long val)
89 bool *was_soft_limit_excess)
90{ 79{
91 unsigned long flags; 80 unsigned long flags;
92 struct res_counter *c; 81 struct res_counter *c;
@@ -94,9 +83,6 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val,
94 local_irq_save(flags); 83 local_irq_save(flags);
95 for (c = counter; c != NULL; c = c->parent) { 84 for (c = counter; c != NULL; c = c->parent) {
96 spin_lock(&c->lock); 85 spin_lock(&c->lock);
97 if (was_soft_limit_excess)
98 *was_soft_limit_excess =
99 !res_counter_soft_limit_check_locked(c);
100 res_counter_uncharge_locked(c, val); 86 res_counter_uncharge_locked(c, val);
101 spin_unlock(&c->lock); 87 spin_unlock(&c->lock);
102 } 88 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 21a30629ca80..1ae8c439584a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -353,16 +353,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
353} 353}
354 354
355static void 355static void
356mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
357 struct mem_cgroup_per_zone *mz,
358 struct mem_cgroup_tree_per_zone *mctz)
359{
360 spin_lock(&mctz->lock);
361 __mem_cgroup_insert_exceeded(mem, mz, mctz);
362 spin_unlock(&mctz->lock);
363}
364
365static void
366mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 356mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
367 struct mem_cgroup_per_zone *mz, 357 struct mem_cgroup_per_zone *mz,
368 struct mem_cgroup_tree_per_zone *mctz) 358 struct mem_cgroup_tree_per_zone *mctz)
@@ -392,34 +382,40 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
392 382
393static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) 383static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
394{ 384{
395 unsigned long long prev_usage_in_excess, new_usage_in_excess; 385 unsigned long long new_usage_in_excess;
396 bool updated_tree = false;
397 struct mem_cgroup_per_zone *mz; 386 struct mem_cgroup_per_zone *mz;
398 struct mem_cgroup_tree_per_zone *mctz; 387 struct mem_cgroup_tree_per_zone *mctz;
399 388 int nid = page_to_nid(page);
400 mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); 389 int zid = page_zonenum(page);
401 mctz = soft_limit_tree_from_page(page); 390 mctz = soft_limit_tree_from_page(page);
402 391
403 /* 392 /*
404 * We do updates in lazy mode, mem's are removed 393 * Necessary to update all ancestors when hierarchy is used.
405 * lazily from the per-zone, per-node rb tree 394 * because their event counter is not touched.
406 */ 395 */
407 prev_usage_in_excess = mz->usage_in_excess; 396 for (; mem; mem = parent_mem_cgroup(mem)) {
408 397 mz = mem_cgroup_zoneinfo(mem, nid, zid);
409 new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); 398 new_usage_in_excess =
410 if (prev_usage_in_excess) { 399 res_counter_soft_limit_excess(&mem->res);
411 mem_cgroup_remove_exceeded(mem, mz, mctz); 400 /*
412 updated_tree = true; 401 * We have to update the tree if mz is on RB-tree or
413 } 402 * mem is over its softlimit.
414 if (!new_usage_in_excess) 403 */
415 goto done; 404 if (new_usage_in_excess || mz->on_tree) {
416 mem_cgroup_insert_exceeded(mem, mz, mctz); 405 spin_lock(&mctz->lock);
417 406 /* if on-tree, remove it */
418done: 407 if (mz->on_tree)
419 if (updated_tree) { 408 __mem_cgroup_remove_exceeded(mem, mz, mctz);
420 spin_lock(&mctz->lock); 409 /*
421 mz->usage_in_excess = new_usage_in_excess; 410 * if over soft limit, insert again. mz->usage_in_excess
422 spin_unlock(&mctz->lock); 411 * will be updated properly.
412 */
413 if (new_usage_in_excess)
414 __mem_cgroup_insert_exceeded(mem, mz, mctz);
415 else
416 mz->usage_in_excess = 0;
417 spin_unlock(&mctz->lock);
418 }
423 } 419 }
424} 420}
425 421
@@ -1271,9 +1267,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1271 gfp_t gfp_mask, struct mem_cgroup **memcg, 1267 gfp_t gfp_mask, struct mem_cgroup **memcg,
1272 bool oom, struct page *page) 1268 bool oom, struct page *page)
1273{ 1269{
1274 struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; 1270 struct mem_cgroup *mem, *mem_over_limit;
1275 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1271 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1276 struct res_counter *fail_res, *soft_fail_res = NULL; 1272 struct res_counter *fail_res;
1277 1273
1278 if (unlikely(test_thread_flag(TIF_MEMDIE))) { 1274 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
1279 /* Don't account this! */ 1275 /* Don't account this! */
@@ -1305,17 +1301,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1305 1301
1306 if (mem_cgroup_is_root(mem)) 1302 if (mem_cgroup_is_root(mem))
1307 goto done; 1303 goto done;
1308 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, 1304 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
1309 &soft_fail_res);
1310 if (likely(!ret)) { 1305 if (likely(!ret)) {
1311 if (!do_swap_account) 1306 if (!do_swap_account)
1312 break; 1307 break;
1313 ret = res_counter_charge(&mem->memsw, PAGE_SIZE, 1308 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
1314 &fail_res, NULL); 1309 &fail_res);
1315 if (likely(!ret)) 1310 if (likely(!ret))
1316 break; 1311 break;
1317 /* mem+swap counter fails */ 1312 /* mem+swap counter fails */
1318 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1313 res_counter_uncharge(&mem->res, PAGE_SIZE);
1319 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1314 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1320 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1315 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1321 memsw); 1316 memsw);
@@ -1354,16 +1349,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1354 } 1349 }
1355 } 1350 }
1356 /* 1351 /*
1357 * Insert just the ancestor, we should trickle down to the correct 1352 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
1358 * cgroup for reclaim, since the other nodes will be below their 1353 * if they exceeds softlimit.
1359 * soft limit
1360 */ 1354 */
1361 if (soft_fail_res) { 1355 if (mem_cgroup_soft_limit_check(mem))
1362 mem_over_soft_limit = 1356 mem_cgroup_update_tree(mem, page);
1363 mem_cgroup_from_res_counter(soft_fail_res, res);
1364 if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
1365 mem_cgroup_update_tree(mem_over_soft_limit, page);
1366 }
1367done: 1357done:
1368 return 0; 1358 return 0;
1369nomem: 1359nomem:
@@ -1438,10 +1428,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1438 if (unlikely(PageCgroupUsed(pc))) { 1428 if (unlikely(PageCgroupUsed(pc))) {
1439 unlock_page_cgroup(pc); 1429 unlock_page_cgroup(pc);
1440 if (!mem_cgroup_is_root(mem)) { 1430 if (!mem_cgroup_is_root(mem)) {
1441 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1431 res_counter_uncharge(&mem->res, PAGE_SIZE);
1442 if (do_swap_account) 1432 if (do_swap_account)
1443 res_counter_uncharge(&mem->memsw, PAGE_SIZE, 1433 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1444 NULL);
1445 } 1434 }
1446 css_put(&mem->css); 1435 css_put(&mem->css);
1447 return; 1436 return;
@@ -1520,7 +1509,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1520 goto out; 1509 goto out;
1521 1510
1522 if (!mem_cgroup_is_root(from)) 1511 if (!mem_cgroup_is_root(from))
1523 res_counter_uncharge(&from->res, PAGE_SIZE, NULL); 1512 res_counter_uncharge(&from->res, PAGE_SIZE);
1524 mem_cgroup_charge_statistics(from, pc, false); 1513 mem_cgroup_charge_statistics(from, pc, false);
1525 1514
1526 page = pc->page; 1515 page = pc->page;
@@ -1540,7 +1529,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1540 } 1529 }
1541 1530
1542 if (do_swap_account && !mem_cgroup_is_root(from)) 1531 if (do_swap_account && !mem_cgroup_is_root(from))
1543 res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); 1532 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1544 css_put(&from->css); 1533 css_put(&from->css);
1545 1534
1546 css_get(&to->css); 1535 css_get(&to->css);
@@ -1611,9 +1600,9 @@ uncharge:
1611 css_put(&parent->css); 1600 css_put(&parent->css);
1612 /* uncharge if move fails */ 1601 /* uncharge if move fails */
1613 if (!mem_cgroup_is_root(parent)) { 1602 if (!mem_cgroup_is_root(parent)) {
1614 res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); 1603 res_counter_uncharge(&parent->res, PAGE_SIZE);
1615 if (do_swap_account) 1604 if (do_swap_account)
1616 res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); 1605 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1617 } 1606 }
1618 return ret; 1607 return ret;
1619} 1608}
@@ -1804,8 +1793,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1804 * calling css_tryget 1793 * calling css_tryget
1805 */ 1794 */
1806 if (!mem_cgroup_is_root(memcg)) 1795 if (!mem_cgroup_is_root(memcg))
1807 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, 1796 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1808 NULL);
1809 mem_cgroup_swap_statistics(memcg, false); 1797 mem_cgroup_swap_statistics(memcg, false);
1810 mem_cgroup_put(memcg); 1798 mem_cgroup_put(memcg);
1811 } 1799 }
@@ -1832,9 +1820,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1832 if (!mem) 1820 if (!mem)
1833 return; 1821 return;
1834 if (!mem_cgroup_is_root(mem)) { 1822 if (!mem_cgroup_is_root(mem)) {
1835 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1823 res_counter_uncharge(&mem->res, PAGE_SIZE);
1836 if (do_swap_account) 1824 if (do_swap_account)
1837 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1825 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1838 } 1826 }
1839 css_put(&mem->css); 1827 css_put(&mem->css);
1840} 1828}
@@ -1849,7 +1837,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1849 struct page_cgroup *pc; 1837 struct page_cgroup *pc;
1850 struct mem_cgroup *mem = NULL; 1838 struct mem_cgroup *mem = NULL;
1851 struct mem_cgroup_per_zone *mz; 1839 struct mem_cgroup_per_zone *mz;
1852 bool soft_limit_excess = false;
1853 1840
1854 if (mem_cgroup_disabled()) 1841 if (mem_cgroup_disabled())
1855 return NULL; 1842 return NULL;
@@ -1889,10 +1876,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1889 } 1876 }
1890 1877
1891 if (!mem_cgroup_is_root(mem)) { 1878 if (!mem_cgroup_is_root(mem)) {
1892 res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); 1879 res_counter_uncharge(&mem->res, PAGE_SIZE);
1893 if (do_swap_account && 1880 if (do_swap_account &&
1894 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) 1881 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1895 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1882 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1896 } 1883 }
1897 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1884 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1898 mem_cgroup_swap_statistics(mem, true); 1885 mem_cgroup_swap_statistics(mem, true);
@@ -1909,7 +1896,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1909 mz = page_cgroup_zoneinfo(pc); 1896 mz = page_cgroup_zoneinfo(pc);
1910 unlock_page_cgroup(pc); 1897 unlock_page_cgroup(pc);
1911 1898
1912 if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) 1899 if (mem_cgroup_soft_limit_check(mem))
1913 mem_cgroup_update_tree(mem, page); 1900 mem_cgroup_update_tree(mem, page);
1914 /* at swapout, this memcg will be accessed to record to swap */ 1901 /* at swapout, this memcg will be accessed to record to swap */
1915 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1902 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1987,7 +1974,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
1987 * This memcg can be obsolete one. We avoid calling css_tryget 1974 * This memcg can be obsolete one. We avoid calling css_tryget
1988 */ 1975 */
1989 if (!mem_cgroup_is_root(memcg)) 1976 if (!mem_cgroup_is_root(memcg))
1990 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); 1977 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1991 mem_cgroup_swap_statistics(memcg, false); 1978 mem_cgroup_swap_statistics(memcg, false);
1992 mem_cgroup_put(memcg); 1979 mem_cgroup_put(memcg);
1993 } 1980 }