diff options
-rw-r--r-- | mm/memcontrol.c | 121 |
1 files changed, 100 insertions, 21 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 011aba6cad70..2011f15278fd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -70,6 +70,7 @@ enum mem_cgroup_stat_index { | |||
70 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 70 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
71 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 71 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
72 | MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ | 72 | MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ |
73 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ | ||
73 | 74 | ||
74 | MEM_CGROUP_STAT_NSTATS, | 75 | MEM_CGROUP_STAT_NSTATS, |
75 | }; | 76 | }; |
@@ -478,11 +479,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
478 | return mz; | 479 | return mz; |
479 | } | 480 | } |
480 | 481 | ||
482 | static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | ||
483 | bool charge) | ||
484 | { | ||
485 | int val = (charge) ? 1 : -1; | ||
486 | struct mem_cgroup_stat *stat = &mem->stat; | ||
487 | struct mem_cgroup_stat_cpu *cpustat; | ||
488 | int cpu = get_cpu(); | ||
489 | |||
490 | cpustat = &stat->cpustat[cpu]; | ||
491 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val); | ||
492 | put_cpu(); | ||
493 | } | ||
494 | |||
481 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 495 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
482 | struct page_cgroup *pc, | 496 | struct page_cgroup *pc, |
483 | bool charge) | 497 | bool charge) |
484 | { | 498 | { |
485 | int val = (charge)? 1 : -1; | 499 | int val = (charge) ? 1 : -1; |
486 | struct mem_cgroup_stat *stat = &mem->stat; | 500 | struct mem_cgroup_stat *stat = &mem->stat; |
487 | struct mem_cgroup_stat_cpu *cpustat; | 501 | struct mem_cgroup_stat_cpu *cpustat; |
488 | int cpu = get_cpu(); | 502 | int cpu = get_cpu(); |
@@ -1285,9 +1299,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1285 | VM_BUG_ON(css_is_removed(&mem->css)); | 1299 | VM_BUG_ON(css_is_removed(&mem->css)); |
1286 | 1300 | ||
1287 | while (1) { | 1301 | while (1) { |
1288 | int ret; | 1302 | int ret = 0; |
1289 | unsigned long flags = 0; | 1303 | unsigned long flags = 0; |
1290 | 1304 | ||
1305 | if (mem_cgroup_is_root(mem)) | ||
1306 | goto done; | ||
1291 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, |
1292 | &soft_fail_res); | 1308 | &soft_fail_res); |
1293 | if (likely(!ret)) { | 1309 | if (likely(!ret)) { |
@@ -1347,6 +1363,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1347 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | 1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) |
1348 | mem_cgroup_update_tree(mem_over_soft_limit, page); | 1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); |
1349 | } | 1365 | } |
1366 | done: | ||
1350 | return 0; | 1367 | return 0; |
1351 | nomem: | 1368 | nomem: |
1352 | css_put(&mem->css); | 1369 | css_put(&mem->css); |
@@ -1419,9 +1436,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1419 | lock_page_cgroup(pc); | 1436 | lock_page_cgroup(pc); |
1420 | if (unlikely(PageCgroupUsed(pc))) { | 1437 | if (unlikely(PageCgroupUsed(pc))) { |
1421 | unlock_page_cgroup(pc); | 1438 | unlock_page_cgroup(pc); |
1422 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1439 | if (!mem_cgroup_is_root(mem)) { |
1423 | if (do_swap_account) | 1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); |
1424 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1441 | if (do_swap_account) |
1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | ||
1443 | NULL); | ||
1444 | } | ||
1425 | css_put(&mem->css); | 1445 | css_put(&mem->css); |
1426 | return; | 1446 | return; |
1427 | } | 1447 | } |
@@ -1498,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1498 | if (pc->mem_cgroup != from) | 1518 | if (pc->mem_cgroup != from) |
1499 | goto out; | 1519 | goto out; |
1500 | 1520 | ||
1501 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1521 | if (!mem_cgroup_is_root(from)) |
1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | ||
1502 | mem_cgroup_charge_statistics(from, pc, false); | 1523 | mem_cgroup_charge_statistics(from, pc, false); |
1503 | 1524 | ||
1504 | page = pc->page; | 1525 | page = pc->page; |
@@ -1517,7 +1538,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1517 | 1); | 1538 | 1); |
1518 | } | 1539 | } |
1519 | 1540 | ||
1520 | if (do_swap_account) | 1541 | if (do_swap_account && !mem_cgroup_is_root(from)) |
1521 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); |
1522 | css_put(&from->css); | 1543 | css_put(&from->css); |
1523 | 1544 | ||
@@ -1588,9 +1609,11 @@ uncharge: | |||
1588 | /* drop extra refcnt by try_charge() */ | 1609 | /* drop extra refcnt by try_charge() */ |
1589 | css_put(&parent->css); | 1610 | css_put(&parent->css); |
1590 | /* uncharge if move fails */ | 1611 | /* uncharge if move fails */ |
1591 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1612 | if (!mem_cgroup_is_root(parent)) { |
1592 | if (do_swap_account) | 1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); |
1593 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1614 | if (do_swap_account) |
1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | ||
1616 | } | ||
1594 | return ret; | 1617 | return ret; |
1595 | } | 1618 | } |
1596 | 1619 | ||
@@ -1779,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1779 | * This recorded memcg can be obsolete one. So, avoid | 1802 | * This recorded memcg can be obsolete one. So, avoid |
1780 | * calling css_tryget | 1803 | * calling css_tryget |
1781 | */ | 1804 | */ |
1782 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1805 | if (!mem_cgroup_is_root(memcg)) |
1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | ||
1807 | NULL); | ||
1808 | mem_cgroup_swap_statistics(memcg, false); | ||
1783 | mem_cgroup_put(memcg); | 1809 | mem_cgroup_put(memcg); |
1784 | } | 1810 | } |
1785 | rcu_read_unlock(); | 1811 | rcu_read_unlock(); |
@@ -1804,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1804 | return; | 1830 | return; |
1805 | if (!mem) | 1831 | if (!mem) |
1806 | return; | 1832 | return; |
1807 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1833 | if (!mem_cgroup_is_root(mem)) { |
1808 | if (do_swap_account) | 1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); |
1809 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1835 | if (do_swap_account) |
1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | ||
1837 | } | ||
1810 | css_put(&mem->css); | 1838 | css_put(&mem->css); |
1811 | } | 1839 | } |
1812 | 1840 | ||
@@ -1859,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1859 | break; | 1887 | break; |
1860 | } | 1888 | } |
1861 | 1889 | ||
1862 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1890 | if (!mem_cgroup_is_root(mem)) { |
1863 | if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); |
1864 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1892 | if (do_swap_account && |
1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | ||
1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | ||
1895 | } | ||
1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | ||
1897 | mem_cgroup_swap_statistics(mem, true); | ||
1865 | mem_cgroup_charge_statistics(mem, pc, false); | 1898 | mem_cgroup_charge_statistics(mem, pc, false); |
1866 | 1899 | ||
1867 | ClearPageCgroupUsed(pc); | 1900 | ClearPageCgroupUsed(pc); |
@@ -1952,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
1952 | * We uncharge this because swap is freed. | 1985 | * We uncharge this because swap is freed. |
1953 | * This memcg can be obsolete one. We avoid calling css_tryget | 1986 | * This memcg can be obsolete one. We avoid calling css_tryget |
1954 | */ | 1987 | */ |
1955 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1988 | if (!mem_cgroup_is_root(memcg)) |
1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | ||
1990 | mem_cgroup_swap_statistics(memcg, false); | ||
1956 | mem_cgroup_put(memcg); | 1991 | mem_cgroup_put(memcg); |
1957 | } | 1992 | } |
1958 | rcu_read_unlock(); | 1993 | rcu_read_unlock(); |
@@ -2464,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
2464 | return retval; | 2499 | return retval; |
2465 | } | 2500 | } |
2466 | 2501 | ||
2502 | struct mem_cgroup_idx_data { | ||
2503 | s64 val; | ||
2504 | enum mem_cgroup_stat_index idx; | ||
2505 | }; | ||
2506 | |||
2507 | static int | ||
2508 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | ||
2509 | { | ||
2510 | struct mem_cgroup_idx_data *d = data; | ||
2511 | d->val += mem_cgroup_read_stat(&mem->stat, d->idx); | ||
2512 | return 0; | ||
2513 | } | ||
2514 | |||
2515 | static void | ||
2516 | mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | ||
2517 | enum mem_cgroup_stat_index idx, s64 *val) | ||
2518 | { | ||
2519 | struct mem_cgroup_idx_data d; | ||
2520 | d.idx = idx; | ||
2521 | d.val = 0; | ||
2522 | mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | ||
2523 | *val = d.val; | ||
2524 | } | ||
2525 | |||
2467 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 2526 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
2468 | { | 2527 | { |
2469 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 2528 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2470 | u64 val = 0; | 2529 | u64 idx_val, val; |
2471 | int type, name; | 2530 | int type, name; |
2472 | 2531 | ||
2473 | type = MEMFILE_TYPE(cft->private); | 2532 | type = MEMFILE_TYPE(cft->private); |
2474 | name = MEMFILE_ATTR(cft->private); | 2533 | name = MEMFILE_ATTR(cft->private); |
2475 | switch (type) { | 2534 | switch (type) { |
2476 | case _MEM: | 2535 | case _MEM: |
2477 | val = res_counter_read_u64(&mem->res, name); | 2536 | if (name == RES_USAGE && mem_cgroup_is_root(mem)) { |
2537 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2538 | MEM_CGROUP_STAT_CACHE, &idx_val); | ||
2539 | val = idx_val; | ||
2540 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2541 | MEM_CGROUP_STAT_RSS, &idx_val); | ||
2542 | val += idx_val; | ||
2543 | val <<= PAGE_SHIFT; | ||
2544 | } else | ||
2545 | val = res_counter_read_u64(&mem->res, name); | ||
2478 | break; | 2546 | break; |
2479 | case _MEMSWAP: | 2547 | case _MEMSWAP: |
2480 | val = res_counter_read_u64(&mem->memsw, name); | 2548 | if (name == RES_USAGE && mem_cgroup_is_root(mem)) { |
2549 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2550 | MEM_CGROUP_STAT_CACHE, &idx_val); | ||
2551 | val = idx_val; | ||
2552 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2553 | MEM_CGROUP_STAT_RSS, &idx_val); | ||
2554 | val += idx_val; | ||
2555 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2556 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | ||
2557 | val <<= PAGE_SHIFT; | ||
2558 | } else | ||
2559 | val = res_counter_read_u64(&mem->memsw, name); | ||
2481 | break; | 2560 | break; |
2482 | default: | 2561 | default: |
2483 | BUG(); | 2562 | BUG(); |