diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-04-02 19:57:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-02 22:04:55 -0400 |
commit | 81d39c20f5ee2437d71709beb82597e2a38efbbc (patch) | |
tree | 56297eb00cfa2bc251b2c5e0e0330becafd39e33 /mm/memcontrol.c | |
parent | 14067bb3e24b96d92e22d19c18c0119edf5575e5 (diff) |
memcg: fix shrinking memory to return -EBUSY by fixing retry algorithm
As pointed out, shrinking memcg's limit should return -EBUSY after
reasonable retries. This patch tries to fix the current behavior of
shrink_usage.
Before looking into "shrink should return -EBUSY" problem, we should fix
hierarchical reclaim code. It compares current usage and current limit,
but it only makes sense when the kernel reclaims memory because hit
limits. This is also a problem.
What this patch does are.
1. add new argument "shrink" to hierarchical reclaim. If "shrink==true",
hierarchical reclaim returns immediately and the caller checks the kernel
should shrink more or not.
(At shrinking memory, usage is always smaller than limit. So check for
usage < limit is useless.)
2. For adjusting to above change, 2 changes in "shrink"'s retry path.
2-a. retry_count depends on # of children because the kernel visits
the children under hierarchy one by one.
2-b. rather than checking return value of hierarchical_reclaim's progress,
compares usage-before-shrink and usage-after-shrink.
If usage-before-shrink <= usage-after-shrink, retry_count is
decremented.
Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 71 |
1 files changed, 59 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 33fc0302e29e..6f6a575e77ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -702,6 +702,23 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg) | |||
702 | return swappiness; | 702 | return swappiness; |
703 | } | 703 | } |
704 | 704 | ||
705 | static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) | ||
706 | { | ||
707 | int *val = data; | ||
708 | (*val)++; | ||
709 | return 0; | ||
710 | } | ||
711 | /* | ||
712 | * This function returns the number of memcg under hierarchy tree. Returns | ||
713 | * 1(self count) if no children. | ||
714 | */ | ||
715 | static int mem_cgroup_count_children(struct mem_cgroup *mem) | ||
716 | { | ||
717 | int num = 0; | ||
718 | mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); | ||
719 | return num; | ||
720 | } | ||
721 | |||
705 | /* | 722 | /* |
706 | * Visit the first child (need not be the first child as per the ordering | 723 | * Visit the first child (need not be the first child as per the ordering |
707 | * of the cgroup list, since we track last_scanned_child) of @mem and use | 724 | * of the cgroup list, since we track last_scanned_child) of @mem and use |
@@ -750,9 +767,11 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
750 | * | 767 | * |
751 | * We give up and return to the caller when we visit root_mem twice. | 768 | * We give up and return to the caller when we visit root_mem twice. |
752 | * (other groups can be removed while we're walking....) | 769 | * (other groups can be removed while we're walking....) |
770 | * | ||
771 | * If shrink==true, for avoiding to free too much, this returns immedieately. | ||
753 | */ | 772 | */ |
754 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | 773 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
755 | gfp_t gfp_mask, bool noswap) | 774 | gfp_t gfp_mask, bool noswap, bool shrink) |
756 | { | 775 | { |
757 | struct mem_cgroup *victim; | 776 | struct mem_cgroup *victim; |
758 | int ret, total = 0; | 777 | int ret, total = 0; |
@@ -771,6 +790,13 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
771 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, | 790 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, |
772 | get_swappiness(victim)); | 791 | get_swappiness(victim)); |
773 | css_put(&victim->css); | 792 | css_put(&victim->css); |
793 | /* | ||
794 | * At shrinking usage, we can't check we should stop here or | ||
795 | * reclaim more. It's depends on callers. last_scanned_child | ||
796 | * will work enough for keeping fairness under tree. | ||
797 | */ | ||
798 | if (shrink) | ||
799 | return ret; | ||
774 | total += ret; | 800 | total += ret; |
775 | if (mem_cgroup_check_under_limit(root_mem)) | 801 | if (mem_cgroup_check_under_limit(root_mem)) |
776 | return 1 + total; | 802 | return 1 + total; |
@@ -856,7 +882,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
856 | goto nomem; | 882 | goto nomem; |
857 | 883 | ||
858 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, | 884 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, |
859 | noswap); | 885 | noswap, false); |
860 | if (ret) | 886 | if (ret) |
861 | continue; | 887 | continue; |
862 | 888 | ||
@@ -1489,7 +1515,8 @@ int mem_cgroup_shrink_usage(struct page *page, | |||
1489 | return 0; | 1515 | return 0; |
1490 | 1516 | ||
1491 | do { | 1517 | do { |
1492 | progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); | 1518 | progress = mem_cgroup_hierarchical_reclaim(mem, |
1519 | gfp_mask, true, false); | ||
1493 | progress += mem_cgroup_check_under_limit(mem); | 1520 | progress += mem_cgroup_check_under_limit(mem); |
1494 | } while (!progress && --retry); | 1521 | } while (!progress && --retry); |
1495 | 1522 | ||
@@ -1504,11 +1531,21 @@ static DEFINE_MUTEX(set_limit_mutex); | |||
1504 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | 1531 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
1505 | unsigned long long val) | 1532 | unsigned long long val) |
1506 | { | 1533 | { |
1507 | 1534 | int retry_count; | |
1508 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; | ||
1509 | int progress; | 1535 | int progress; |
1510 | u64 memswlimit; | 1536 | u64 memswlimit; |
1511 | int ret = 0; | 1537 | int ret = 0; |
1538 | int children = mem_cgroup_count_children(memcg); | ||
1539 | u64 curusage, oldusage; | ||
1540 | |||
1541 | /* | ||
1542 | * For keeping hierarchical_reclaim simple, how long we should retry | ||
1543 | * is depends on callers. We set our retry-count to be function | ||
1544 | * of # of children which we should visit in this loop. | ||
1545 | */ | ||
1546 | retry_count = MEM_CGROUP_RECLAIM_RETRIES * children; | ||
1547 | |||
1548 | oldusage = res_counter_read_u64(&memcg->res, RES_USAGE); | ||
1512 | 1549 | ||
1513 | while (retry_count) { | 1550 | while (retry_count) { |
1514 | if (signal_pending(current)) { | 1551 | if (signal_pending(current)) { |
@@ -1534,8 +1571,13 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1534 | break; | 1571 | break; |
1535 | 1572 | ||
1536 | progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, | 1573 | progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, |
1537 | false); | 1574 | false, true); |
1538 | if (!progress) retry_count--; | 1575 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
1576 | /* Usage is reduced ? */ | ||
1577 | if (curusage >= oldusage) | ||
1578 | retry_count--; | ||
1579 | else | ||
1580 | oldusage = curusage; | ||
1539 | } | 1581 | } |
1540 | 1582 | ||
1541 | return ret; | 1583 | return ret; |
@@ -1544,13 +1586,16 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1544 | int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | 1586 | int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, |
1545 | unsigned long long val) | 1587 | unsigned long long val) |
1546 | { | 1588 | { |
1547 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; | 1589 | int retry_count; |
1548 | u64 memlimit, oldusage, curusage; | 1590 | u64 memlimit, oldusage, curusage; |
1549 | int ret; | 1591 | int children = mem_cgroup_count_children(memcg); |
1592 | int ret = -EBUSY; | ||
1550 | 1593 | ||
1551 | if (!do_swap_account) | 1594 | if (!do_swap_account) |
1552 | return -EINVAL; | 1595 | return -EINVAL; |
1553 | 1596 | /* see mem_cgroup_resize_res_limit */ | |
1597 | retry_count = children * MEM_CGROUP_RECLAIM_RETRIES; | ||
1598 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
1554 | while (retry_count) { | 1599 | while (retry_count) { |
1555 | if (signal_pending(current)) { | 1600 | if (signal_pending(current)) { |
1556 | ret = -EINTR; | 1601 | ret = -EINTR; |
@@ -1574,11 +1619,13 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
1574 | if (!ret) | 1619 | if (!ret) |
1575 | break; | 1620 | break; |
1576 | 1621 | ||
1577 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1622 | mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); |
1578 | mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true); | ||
1579 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1623 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1624 | /* Usage is reduced ? */ | ||
1580 | if (curusage >= oldusage) | 1625 | if (curusage >= oldusage) |
1581 | retry_count--; | 1626 | retry_count--; |
1627 | else | ||
1628 | oldusage = curusage; | ||
1582 | } | 1629 | } |
1583 | return ret; | 1630 | return ret; |
1584 | } | 1631 | } |