aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c127
1 files changed, 57 insertions, 70 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2b98a6875c0..f99f5991d6bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page)
313static void 313static void
314__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, 314__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
315 struct mem_cgroup_per_zone *mz, 315 struct mem_cgroup_per_zone *mz,
316 struct mem_cgroup_tree_per_zone *mctz) 316 struct mem_cgroup_tree_per_zone *mctz,
317 unsigned long long new_usage_in_excess)
317{ 318{
318 struct rb_node **p = &mctz->rb_root.rb_node; 319 struct rb_node **p = &mctz->rb_root.rb_node;
319 struct rb_node *parent = NULL; 320 struct rb_node *parent = NULL;
@@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
322 if (mz->on_tree) 323 if (mz->on_tree)
323 return; 324 return;
324 325
325 mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); 326 mz->usage_in_excess = new_usage_in_excess;
327 if (!mz->usage_in_excess)
328 return;
326 while (*p) { 329 while (*p) {
327 parent = *p; 330 parent = *p;
328 mz_node = rb_entry(parent, struct mem_cgroup_per_zone, 331 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
@@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
353} 356}
354 357
355static void 358static void
356mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
357 struct mem_cgroup_per_zone *mz,
358 struct mem_cgroup_tree_per_zone *mctz)
359{
360 spin_lock(&mctz->lock);
361 __mem_cgroup_insert_exceeded(mem, mz, mctz);
362 spin_unlock(&mctz->lock);
363}
364
365static void
366mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 359mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
367 struct mem_cgroup_per_zone *mz, 360 struct mem_cgroup_per_zone *mz,
368 struct mem_cgroup_tree_per_zone *mctz) 361 struct mem_cgroup_tree_per_zone *mctz)
@@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
392 385
393static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) 386static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
394{ 387{
395 unsigned long long prev_usage_in_excess, new_usage_in_excess; 388 unsigned long long excess;
396 bool updated_tree = false;
397 struct mem_cgroup_per_zone *mz; 389 struct mem_cgroup_per_zone *mz;
398 struct mem_cgroup_tree_per_zone *mctz; 390 struct mem_cgroup_tree_per_zone *mctz;
399 391 int nid = page_to_nid(page);
400 mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); 392 int zid = page_zonenum(page);
401 mctz = soft_limit_tree_from_page(page); 393 mctz = soft_limit_tree_from_page(page);
402 394
403 /* 395 /*
404 * We do updates in lazy mode, mem's are removed 396 * Necessary to update all ancestors when hierarchy is used.
405 * lazily from the per-zone, per-node rb tree 397 * because their event counter is not touched.
406 */ 398 */
407 prev_usage_in_excess = mz->usage_in_excess; 399 for (; mem; mem = parent_mem_cgroup(mem)) {
408 400 mz = mem_cgroup_zoneinfo(mem, nid, zid);
409 new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); 401 excess = res_counter_soft_limit_excess(&mem->res);
410 if (prev_usage_in_excess) { 402 /*
411 mem_cgroup_remove_exceeded(mem, mz, mctz); 403 * We have to update the tree if mz is on RB-tree or
412 updated_tree = true; 404 * mem is over its softlimit.
413 } 405 */
414 if (!new_usage_in_excess) 406 if (excess || mz->on_tree) {
415 goto done; 407 spin_lock(&mctz->lock);
416 mem_cgroup_insert_exceeded(mem, mz, mctz); 408 /* if on-tree, remove it */
417 409 if (mz->on_tree)
418done: 410 __mem_cgroup_remove_exceeded(mem, mz, mctz);
419 if (updated_tree) { 411 /*
420 spin_lock(&mctz->lock); 412 * Insert again. mz->usage_in_excess will be updated.
421 mz->usage_in_excess = new_usage_in_excess; 413 * If excess is 0, no tree ops.
422 spin_unlock(&mctz->lock); 414 */
415 __mem_cgroup_insert_exceeded(mem, mz, mctz, excess);
416 spin_unlock(&mctz->lock);
417 }
423 } 418 }
424} 419}
425 420
@@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone *
447__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 442__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
448{ 443{
449 struct rb_node *rightmost = NULL; 444 struct rb_node *rightmost = NULL;
450 struct mem_cgroup_per_zone *mz = NULL; 445 struct mem_cgroup_per_zone *mz;
451 446
452retry: 447retry:
448 mz = NULL;
453 rightmost = rb_last(&mctz->rb_root); 449 rightmost = rb_last(&mctz->rb_root);
454 if (!rightmost) 450 if (!rightmost)
455 goto done; /* Nothing to reclaim from */ 451 goto done; /* Nothing to reclaim from */
@@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1270 gfp_t gfp_mask, struct mem_cgroup **memcg, 1266 gfp_t gfp_mask, struct mem_cgroup **memcg,
1271 bool oom, struct page *page) 1267 bool oom, struct page *page)
1272{ 1268{
1273 struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; 1269 struct mem_cgroup *mem, *mem_over_limit;
1274 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1270 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1275 struct res_counter *fail_res, *soft_fail_res = NULL; 1271 struct res_counter *fail_res;
1276 1272
1277 if (unlikely(test_thread_flag(TIF_MEMDIE))) { 1273 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
1278 /* Don't account this! */ 1274 /* Don't account this! */
@@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1304 1300
1305 if (mem_cgroup_is_root(mem)) 1301 if (mem_cgroup_is_root(mem))
1306 goto done; 1302 goto done;
1307 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, 1303 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
1308 &soft_fail_res);
1309 if (likely(!ret)) { 1304 if (likely(!ret)) {
1310 if (!do_swap_account) 1305 if (!do_swap_account)
1311 break; 1306 break;
1312 ret = res_counter_charge(&mem->memsw, PAGE_SIZE, 1307 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
1313 &fail_res, NULL); 1308 &fail_res);
1314 if (likely(!ret)) 1309 if (likely(!ret))
1315 break; 1310 break;
1316 /* mem+swap counter fails */ 1311 /* mem+swap counter fails */
1317 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1312 res_counter_uncharge(&mem->res, PAGE_SIZE);
1318 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1313 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1319 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1314 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1320 memsw); 1315 memsw);
@@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1353 } 1348 }
1354 } 1349 }
1355 /* 1350 /*
1356 * Insert just the ancestor, we should trickle down to the correct 1351 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
1357 * cgroup for reclaim, since the other nodes will be below their 1352 * if they exceeds softlimit.
1358 * soft limit
1359 */ 1353 */
1360 if (soft_fail_res) { 1354 if (mem_cgroup_soft_limit_check(mem))
1361 mem_over_soft_limit = 1355 mem_cgroup_update_tree(mem, page);
1362 mem_cgroup_from_res_counter(soft_fail_res, res);
1363 if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
1364 mem_cgroup_update_tree(mem_over_soft_limit, page);
1365 }
1366done: 1356done:
1367 return 0; 1357 return 0;
1368nomem: 1358nomem:
@@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1437 if (unlikely(PageCgroupUsed(pc))) { 1427 if (unlikely(PageCgroupUsed(pc))) {
1438 unlock_page_cgroup(pc); 1428 unlock_page_cgroup(pc);
1439 if (!mem_cgroup_is_root(mem)) { 1429 if (!mem_cgroup_is_root(mem)) {
1440 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1430 res_counter_uncharge(&mem->res, PAGE_SIZE);
1441 if (do_swap_account) 1431 if (do_swap_account)
1442 res_counter_uncharge(&mem->memsw, PAGE_SIZE, 1432 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1443 NULL);
1444 } 1433 }
1445 css_put(&mem->css); 1434 css_put(&mem->css);
1446 return; 1435 return;
@@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1519 goto out; 1508 goto out;
1520 1509
1521 if (!mem_cgroup_is_root(from)) 1510 if (!mem_cgroup_is_root(from))
1522 res_counter_uncharge(&from->res, PAGE_SIZE, NULL); 1511 res_counter_uncharge(&from->res, PAGE_SIZE);
1523 mem_cgroup_charge_statistics(from, pc, false); 1512 mem_cgroup_charge_statistics(from, pc, false);
1524 1513
1525 page = pc->page; 1514 page = pc->page;
@@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1539 } 1528 }
1540 1529
1541 if (do_swap_account && !mem_cgroup_is_root(from)) 1530 if (do_swap_account && !mem_cgroup_is_root(from))
1542 res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); 1531 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1543 css_put(&from->css); 1532 css_put(&from->css);
1544 1533
1545 css_get(&to->css); 1534 css_get(&to->css);
@@ -1610,9 +1599,9 @@ uncharge:
1610 css_put(&parent->css); 1599 css_put(&parent->css);
1611 /* uncharge if move fails */ 1600 /* uncharge if move fails */
1612 if (!mem_cgroup_is_root(parent)) { 1601 if (!mem_cgroup_is_root(parent)) {
1613 res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); 1602 res_counter_uncharge(&parent->res, PAGE_SIZE);
1614 if (do_swap_account) 1603 if (do_swap_account)
1615 res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); 1604 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1616 } 1605 }
1617 return ret; 1606 return ret;
1618} 1607}
@@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1803 * calling css_tryget 1792 * calling css_tryget
1804 */ 1793 */
1805 if (!mem_cgroup_is_root(memcg)) 1794 if (!mem_cgroup_is_root(memcg))
1806 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, 1795 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1807 NULL);
1808 mem_cgroup_swap_statistics(memcg, false); 1796 mem_cgroup_swap_statistics(memcg, false);
1809 mem_cgroup_put(memcg); 1797 mem_cgroup_put(memcg);
1810 } 1798 }
@@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1831 if (!mem) 1819 if (!mem)
1832 return; 1820 return;
1833 if (!mem_cgroup_is_root(mem)) { 1821 if (!mem_cgroup_is_root(mem)) {
1834 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1822 res_counter_uncharge(&mem->res, PAGE_SIZE);
1835 if (do_swap_account) 1823 if (do_swap_account)
1836 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1824 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1837 } 1825 }
1838 css_put(&mem->css); 1826 css_put(&mem->css);
1839} 1827}
@@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1848 struct page_cgroup *pc; 1836 struct page_cgroup *pc;
1849 struct mem_cgroup *mem = NULL; 1837 struct mem_cgroup *mem = NULL;
1850 struct mem_cgroup_per_zone *mz; 1838 struct mem_cgroup_per_zone *mz;
1851 bool soft_limit_excess = false;
1852 1839
1853 if (mem_cgroup_disabled()) 1840 if (mem_cgroup_disabled())
1854 return NULL; 1841 return NULL;
@@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1888 } 1875 }
1889 1876
1890 if (!mem_cgroup_is_root(mem)) { 1877 if (!mem_cgroup_is_root(mem)) {
1891 res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); 1878 res_counter_uncharge(&mem->res, PAGE_SIZE);
1892 if (do_swap_account && 1879 if (do_swap_account &&
1893 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) 1880 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1894 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1881 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1895 } 1882 }
1896 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1883 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1897 mem_cgroup_swap_statistics(mem, true); 1884 mem_cgroup_swap_statistics(mem, true);
@@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1908 mz = page_cgroup_zoneinfo(pc); 1895 mz = page_cgroup_zoneinfo(pc);
1909 unlock_page_cgroup(pc); 1896 unlock_page_cgroup(pc);
1910 1897
1911 if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) 1898 if (mem_cgroup_soft_limit_check(mem))
1912 mem_cgroup_update_tree(mem, page); 1899 mem_cgroup_update_tree(mem, page);
1913 /* at swapout, this memcg will be accessed to record to swap */ 1900 /* at swapout, this memcg will be accessed to record to swap */
1914 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1901 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
1986 * This memcg can be obsolete one. We avoid calling css_tryget 1973 * This memcg can be obsolete one. We avoid calling css_tryget
1987 */ 1974 */
1988 if (!mem_cgroup_is_root(memcg)) 1975 if (!mem_cgroup_is_root(memcg))
1989 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); 1976 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1990 mem_cgroup_swap_statistics(memcg, false); 1977 mem_cgroup_swap_statistics(memcg, false);
1991 mem_cgroup_put(memcg); 1978 mem_cgroup_put(memcg);
1992 } 1979 }
@@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2233 unsigned long reclaimed; 2220 unsigned long reclaimed;
2234 int loop = 0; 2221 int loop = 0;
2235 struct mem_cgroup_tree_per_zone *mctz; 2222 struct mem_cgroup_tree_per_zone *mctz;
2223 unsigned long long excess;
2236 2224
2237 if (order > 0) 2225 if (order > 0)
2238 return 0; 2226 return 0;
@@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2284 break; 2272 break;
2285 } while (1); 2273 } while (1);
2286 } 2274 }
2287 mz->usage_in_excess =
2288 res_counter_soft_limit_excess(&mz->mem->res);
2289 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); 2275 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
2276 excess = res_counter_soft_limit_excess(&mz->mem->res);
2290 /* 2277 /*
2291 * One school of thought says that we should not add 2278 * One school of thought says that we should not add
2292 * back the node to the tree if reclaim returns 0. 2279 * back the node to the tree if reclaim returns 0.
@@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2295 * memory to reclaim from. Consider this as a longer 2282 * memory to reclaim from. Consider this as a longer
2296 * term TODO. 2283 * term TODO.
2297 */ 2284 */
2298 if (mz->usage_in_excess) 2285 /* If excess == 0, no tree ops */
2299 __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); 2286 __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess);
2300 spin_unlock(&mctz->lock); 2287 spin_unlock(&mctz->lock);
2301 css_put(&mz->mem->css); 2288 css_put(&mz->mem->css);
2302 loop++; 2289 loop++;