aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-04-02 19:57:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-02 22:04:55 -0400
commit81d39c20f5ee2437d71709beb82597e2a38efbbc (patch)
tree56297eb00cfa2bc251b2c5e0e0330becafd39e33 /mm/memcontrol.c
parent14067bb3e24b96d92e22d19c18c0119edf5575e5 (diff)
memcg: fix shrinking memory to return -EBUSY by fixing retry algorithm
As pointed out, shrinking memcg's limit should return -EBUSY after reasonable retries. This patch tries to fix the current behavior of shrink_usage. Before looking into "shrink should return -EBUSY" problem, we should fix hierarchical reclaim code. It compares current usage and current limit, but it only makes sense when the kernel reclaims memory because hit limits. This is also a problem. What this patch does are. 1. add new argument "shrink" to hierarchical reclaim. If "shrink==true", hierarchical reclaim returns immediately and the caller checks the kernel should shrink more or not. (At shrinking memory, usage is always smaller than limit. So check for usage < limit is useless.) 2. For adjusting to above change, 2 changes in "shrink"'s retry path. 2-a. retry_count depends on # of children because the kernel visits the children under hierarchy one by one. 2-b. rather than checking return value of hierarchical_reclaim's progress, compares usage-before-shrink and usage-after-shrink. If usage-before-shrink <= usage-after-shrink, retry_count is decremented. Reported-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c71
1 files changed, 59 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 33fc0302e29e..6f6a575e77ad 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -702,6 +702,23 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
702 return swappiness; 702 return swappiness;
703} 703}
704 704
705static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
706{
707 int *val = data;
708 (*val)++;
709 return 0;
710}
711/*
712 * This function returns the number of memcg under hierarchy tree. Returns
713 * 1(self count) if no children.
714 */
715static int mem_cgroup_count_children(struct mem_cgroup *mem)
716{
717 int num = 0;
718 mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb);
719 return num;
720}
721
705/* 722/*
706 * Visit the first child (need not be the first child as per the ordering 723 * Visit the first child (need not be the first child as per the ordering
707 * of the cgroup list, since we track last_scanned_child) of @mem and use 724 * of the cgroup list, since we track last_scanned_child) of @mem and use
@@ -750,9 +767,11 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
750 * 767 *
751 * We give up and return to the caller when we visit root_mem twice. 768 * We give up and return to the caller when we visit root_mem twice.
752 * (other groups can be removed while we're walking....) 769 * (other groups can be removed while we're walking....)
770 *
771 * If shrink==true, for avoiding to free too much, this returns immedieately.
753 */ 772 */
754static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, 773static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
755 gfp_t gfp_mask, bool noswap) 774 gfp_t gfp_mask, bool noswap, bool shrink)
756{ 775{
757 struct mem_cgroup *victim; 776 struct mem_cgroup *victim;
758 int ret, total = 0; 777 int ret, total = 0;
@@ -771,6 +790,13 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
771 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, 790 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
772 get_swappiness(victim)); 791 get_swappiness(victim));
773 css_put(&victim->css); 792 css_put(&victim->css);
793 /*
794 * At shrinking usage, we can't check we should stop here or
795 * reclaim more. It's depends on callers. last_scanned_child
796 * will work enough for keeping fairness under tree.
797 */
798 if (shrink)
799 return ret;
774 total += ret; 800 total += ret;
775 if (mem_cgroup_check_under_limit(root_mem)) 801 if (mem_cgroup_check_under_limit(root_mem))
776 return 1 + total; 802 return 1 + total;
@@ -856,7 +882,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
856 goto nomem; 882 goto nomem;
857 883
858 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 884 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
859 noswap); 885 noswap, false);
860 if (ret) 886 if (ret)
861 continue; 887 continue;
862 888
@@ -1489,7 +1515,8 @@ int mem_cgroup_shrink_usage(struct page *page,
1489 return 0; 1515 return 0;
1490 1516
1491 do { 1517 do {
1492 progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); 1518 progress = mem_cgroup_hierarchical_reclaim(mem,
1519 gfp_mask, true, false);
1493 progress += mem_cgroup_check_under_limit(mem); 1520 progress += mem_cgroup_check_under_limit(mem);
1494 } while (!progress && --retry); 1521 } while (!progress && --retry);
1495 1522
@@ -1504,11 +1531,21 @@ static DEFINE_MUTEX(set_limit_mutex);
1504static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, 1531static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1505 unsigned long long val) 1532 unsigned long long val)
1506{ 1533{
1507 1534 int retry_count;
1508 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
1509 int progress; 1535 int progress;
1510 u64 memswlimit; 1536 u64 memswlimit;
1511 int ret = 0; 1537 int ret = 0;
1538 int children = mem_cgroup_count_children(memcg);
1539 u64 curusage, oldusage;
1540
1541 /*
1542 * For keeping hierarchical_reclaim simple, how long we should retry
1543 * is depends on callers. We set our retry-count to be function
1544 * of # of children which we should visit in this loop.
1545 */
1546 retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
1547
1548 oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
1512 1549
1513 while (retry_count) { 1550 while (retry_count) {
1514 if (signal_pending(current)) { 1551 if (signal_pending(current)) {
@@ -1534,8 +1571,13 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1534 break; 1571 break;
1535 1572
1536 progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, 1573 progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
1537 false); 1574 false, true);
1538 if (!progress) retry_count--; 1575 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
1576 /* Usage is reduced ? */
1577 if (curusage >= oldusage)
1578 retry_count--;
1579 else
1580 oldusage = curusage;
1539 } 1581 }
1540 1582
1541 return ret; 1583 return ret;
@@ -1544,13 +1586,16 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1544int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, 1586int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1545 unsigned long long val) 1587 unsigned long long val)
1546{ 1588{
1547 int retry_count = MEM_CGROUP_RECLAIM_RETRIES; 1589 int retry_count;
1548 u64 memlimit, oldusage, curusage; 1590 u64 memlimit, oldusage, curusage;
1549 int ret; 1591 int children = mem_cgroup_count_children(memcg);
1592 int ret = -EBUSY;
1550 1593
1551 if (!do_swap_account) 1594 if (!do_swap_account)
1552 return -EINVAL; 1595 return -EINVAL;
1553 1596 /* see mem_cgroup_resize_res_limit */
1597 retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
1598 oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1554 while (retry_count) { 1599 while (retry_count) {
1555 if (signal_pending(current)) { 1600 if (signal_pending(current)) {
1556 ret = -EINTR; 1601 ret = -EINTR;
@@ -1574,11 +1619,13 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1574 if (!ret) 1619 if (!ret)
1575 break; 1620 break;
1576 1621
1577 oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 1622 mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
1578 mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true);
1579 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 1623 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1624 /* Usage is reduced ? */
1580 if (curusage >= oldusage) 1625 if (curusage >= oldusage)
1581 retry_count--; 1626 retry_count--;
1627 else
1628 oldusage = curusage;
1582 } 1629 }
1583 return ret; 1630 return ret;
1584} 1631}