aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2009-01-07 21:08:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:05 -0500
commit28dbc4b6a01fb579a9441c7b81e3d3413dc452df (patch)
treec45d94be6cd243f3e677ec8279bfec87855802d0
parent52bc0d82100cd896213a9a25ec01c1ba87b939db (diff)
memcg: memory cgroup resource counters for hierarchy
Add support for building hierarchies in resource counters. Cgroups allows us to build a deep hierarchy, but we currently don't link the resource counters belonging to the memory controller control groups, in the same fashion as the corresponding cgroup entries in the cgroup hierarchy. This patch provides the infrastructure for resource counters that have the same hiearchy as their cgroup counter parts. These set of patches are based on the resource counter hiearchy patches posted by Pavel Emelianov. NOTE: Building hiearchies is expensive, deeper hierarchies imply charging the all the way up to the root. It is known that hiearchies are expensive, so the user needs to be careful and aware of the trade-offs before creating very deep ones. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/res_counter.h8
-rw-r--r--kernel/res_counter.c44
-rw-r--r--mm/memcontrol.c20
3 files changed, 54 insertions, 18 deletions
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 271c1c2c9f6f..dede0a2cfc45 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -43,6 +43,10 @@ struct res_counter {
43 * the routines below consider this to be IRQ-safe 43 * the routines below consider this to be IRQ-safe
44 */ 44 */
45 spinlock_t lock; 45 spinlock_t lock;
46 /*
47 * Parent counter, used for hierarchial resource accounting
48 */
49 struct res_counter *parent;
46}; 50};
47 51
48/** 52/**
@@ -87,7 +91,7 @@ enum {
87 * helpers for accounting 91 * helpers for accounting
88 */ 92 */
89 93
90void res_counter_init(struct res_counter *counter); 94void res_counter_init(struct res_counter *counter, struct res_counter *parent);
91 95
92/* 96/*
93 * charge - try to consume more resource. 97 * charge - try to consume more resource.
@@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter);
103int __must_check res_counter_charge_locked(struct res_counter *counter, 107int __must_check res_counter_charge_locked(struct res_counter *counter,
104 unsigned long val); 108 unsigned long val);
105int __must_check res_counter_charge(struct res_counter *counter, 109int __must_check res_counter_charge(struct res_counter *counter,
106 unsigned long val); 110 unsigned long val, struct res_counter **limit_fail_at);
107 111
108/* 112/*
109 * uncharge - tell that some portion of the resource is released 113 * uncharge - tell that some portion of the resource is released
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index f275c8eca772..bf8e7534c803 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -15,10 +15,11 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17 17
18void res_counter_init(struct res_counter *counter) 18void res_counter_init(struct res_counter *counter, struct res_counter *parent)
19{ 19{
20 spin_lock_init(&counter->lock); 20 spin_lock_init(&counter->lock);
21 counter->limit = (unsigned long long)LLONG_MAX; 21 counter->limit = (unsigned long long)LLONG_MAX;
22 counter->parent = parent;
22} 23}
23 24
24int res_counter_charge_locked(struct res_counter *counter, unsigned long val) 25int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
34 return 0; 35 return 0;
35} 36}
36 37
37int res_counter_charge(struct res_counter *counter, unsigned long val) 38int res_counter_charge(struct res_counter *counter, unsigned long val,
39 struct res_counter **limit_fail_at)
38{ 40{
39 int ret; 41 int ret;
40 unsigned long flags; 42 unsigned long flags;
41 43 struct res_counter *c, *u;
42 spin_lock_irqsave(&counter->lock, flags); 44
43 ret = res_counter_charge_locked(counter, val); 45 *limit_fail_at = NULL;
44 spin_unlock_irqrestore(&counter->lock, flags); 46 local_irq_save(flags);
47 for (c = counter; c != NULL; c = c->parent) {
48 spin_lock(&c->lock);
49 ret = res_counter_charge_locked(c, val);
50 spin_unlock(&c->lock);
51 if (ret < 0) {
52 *limit_fail_at = c;
53 goto undo;
54 }
55 }
56 ret = 0;
57 goto done;
58undo:
59 for (u = counter; u != c; u = u->parent) {
60 spin_lock(&u->lock);
61 res_counter_uncharge_locked(u, val);
62 spin_unlock(&u->lock);
63 }
64done:
65 local_irq_restore(flags);
45 return ret; 66 return ret;
46} 67}
47 68
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
56void res_counter_uncharge(struct res_counter *counter, unsigned long val) 77void res_counter_uncharge(struct res_counter *counter, unsigned long val)
57{ 78{
58 unsigned long flags; 79 unsigned long flags;
80 struct res_counter *c;
59 81
60 spin_lock_irqsave(&counter->lock, flags); 82 local_irq_save(flags);
61 res_counter_uncharge_locked(counter, val); 83 for (c = counter; c != NULL; c = c->parent) {
62 spin_unlock_irqrestore(&counter->lock, flags); 84 spin_lock(&c->lock);
85 res_counter_uncharge_locked(c, val);
86 spin_unlock(&c->lock);
87 }
88 local_irq_restore(flags);
63} 89}
64 90
65 91
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9846f617115d..e72fb2b4a7d8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -471,6 +471,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
471{ 471{
472 struct mem_cgroup *mem; 472 struct mem_cgroup *mem;
473 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 473 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
474 struct res_counter *fail_res;
474 /* 475 /*
475 * We always charge the cgroup the mm_struct belongs to. 476 * We always charge the cgroup the mm_struct belongs to.
476 * The mm_struct's mem_cgroup changes on task migration if the 477 * The mm_struct's mem_cgroup changes on task migration if the
@@ -499,11 +500,12 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
499 int ret; 500 int ret;
500 bool noswap = false; 501 bool noswap = false;
501 502
502 ret = res_counter_charge(&mem->res, PAGE_SIZE); 503 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
503 if (likely(!ret)) { 504 if (likely(!ret)) {
504 if (!do_swap_account) 505 if (!do_swap_account)
505 break; 506 break;
506 ret = res_counter_charge(&mem->memsw, PAGE_SIZE); 507 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
508 &fail_res);
507 if (likely(!ret)) 509 if (likely(!ret))
508 break; 510 break;
509 /* mem+swap counter fails */ 511 /* mem+swap counter fails */
@@ -1709,22 +1711,26 @@ static void __init enable_swap_cgroup(void)
1709static struct cgroup_subsys_state * 1711static struct cgroup_subsys_state *
1710mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 1712mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1711{ 1713{
1712 struct mem_cgroup *mem; 1714 struct mem_cgroup *mem, *parent;
1713 int node; 1715 int node;
1714 1716
1715 mem = mem_cgroup_alloc(); 1717 mem = mem_cgroup_alloc();
1716 if (!mem) 1718 if (!mem)
1717 return ERR_PTR(-ENOMEM); 1719 return ERR_PTR(-ENOMEM);
1718 1720
1719 res_counter_init(&mem->res);
1720 res_counter_init(&mem->memsw);
1721
1722 for_each_node_state(node, N_POSSIBLE) 1721 for_each_node_state(node, N_POSSIBLE)
1723 if (alloc_mem_cgroup_per_zone_info(mem, node)) 1722 if (alloc_mem_cgroup_per_zone_info(mem, node))
1724 goto free_out; 1723 goto free_out;
1725 /* root ? */ 1724 /* root ? */
1726 if (cont->parent == NULL) 1725 if (cont->parent == NULL) {
1727 enable_swap_cgroup(); 1726 enable_swap_cgroup();
1727 parent = NULL;
1728 } else
1729 parent = mem_cgroup_from_cont(cont->parent);
1730
1731 res_counter_init(&mem->res, parent ? &parent->res : NULL);
1732 res_counter_init(&mem->memsw, parent ? &parent->memsw : NULL);
1733
1728 1734
1729 return &mem->css; 1735 return &mem->css;
1730free_out: 1736free_out: