aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 18:03:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 18:03:33 -0400
commit14208b0ec56919f5333dd654b1a7d10765d0ad05 (patch)
tree474b46c351efced45925d15dc2e0049c49784716 /mm/memcontrol.c
parent6ea4fa70e4af0da8b133b246458fb789d8cb3985 (diff)
parentc731ae1d0f02a300697a8b1564780ad28a6c2013 (diff)
Merge branch 'for-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "A lot of activities on cgroup side. Heavy restructuring including locking simplification took place to improve the code base and enable implementation of the unified hierarchy, which currently exists behind a __DEVEL__ mount option. The core support is mostly complete but individual controllers need further work. To explain the design and rationales of the the unified hierarchy Documentation/cgroups/unified-hierarchy.txt is added. Another notable change is css (cgroup_subsys_state - what each controller uses to identify and interact with a cgroup) iteration update. This is part of continuing updates on css object lifetime and visibility. cgroup started with reference count draining on removal way back and is now reaching a point where csses behave and are iterated like normal refcnted objects albeit with some complexities to allow distinguishing the state where they're being deleted. The css iteration update isn't taken advantage of yet but is planned to be used to simplify memcg significantly" * 'for-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (77 commits) cgroup: disallow disabled controllers on the default hierarchy cgroup: don't destroy the default root cgroup: disallow debug controller on the default hierarchy cgroup: clean up MAINTAINERS entries cgroup: implement css_tryget() device_cgroup: use css_has_online_children() instead of has_children() cgroup: convert cgroup_has_live_children() into css_has_online_children() cgroup: use CSS_ONLINE instead of CGRP_DEAD cgroup: iterate cgroup_subsys_states directly cgroup: introduce CSS_RELEASED and reduce css iteration fallback window cgroup: move cgroup->serial_nr into cgroup_subsys_state cgroup: link all cgroup_subsys_states in their sibling lists cgroup: move cgroup->sibling and ->children into cgroup_subsys_state cgroup: remove cgroup->parent device_cgroup: remove direct access to cgroup->children memcg: update memcg_has_children() to use css_next_child() memcg: remove tasks/children test from mem_cgroup_force_empty() cgroup: remove css_parent() cgroup: skip refcnting on normal root csses and cgrp_dfl_root self css cgroup: use cgroup->self.refcnt for cgroup refcnting ...
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c188
1 files changed, 98 insertions, 90 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a9559b91603c..a2c7bcb0e6eb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -526,18 +526,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
526 526
527static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 527static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
528{ 528{
529 /* 529 return memcg->css.id;
530 * The ID of the root cgroup is 0, but memcg treat 0 as an
531 * invalid ID, so we return (cgroup_id + 1).
532 */
533 return memcg->css.cgroup->id + 1;
534} 530}
535 531
536static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 532static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
537{ 533{
538 struct cgroup_subsys_state *css; 534 struct cgroup_subsys_state *css;
539 535
540 css = css_from_id(id - 1, &memory_cgrp_subsys); 536 css = css_from_id(id, &memory_cgrp_subsys);
541 return mem_cgroup_from_css(css); 537 return mem_cgroup_from_css(css);
542} 538}
543 539
@@ -570,7 +566,8 @@ void sock_update_memcg(struct sock *sk)
570 memcg = mem_cgroup_from_task(current); 566 memcg = mem_cgroup_from_task(current);
571 cg_proto = sk->sk_prot->proto_cgroup(memcg); 567 cg_proto = sk->sk_prot->proto_cgroup(memcg);
572 if (!mem_cgroup_is_root(memcg) && 568 if (!mem_cgroup_is_root(memcg) &&
573 memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) { 569 memcg_proto_active(cg_proto) &&
570 css_tryget_online(&memcg->css)) {
574 sk->sk_cgrp = cg_proto; 571 sk->sk_cgrp = cg_proto;
575 } 572 }
576 rcu_read_unlock(); 573 rcu_read_unlock();
@@ -831,7 +828,7 @@ retry:
831 */ 828 */
832 __mem_cgroup_remove_exceeded(mz, mctz); 829 __mem_cgroup_remove_exceeded(mz, mctz);
833 if (!res_counter_soft_limit_excess(&mz->memcg->res) || 830 if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
834 !css_tryget(&mz->memcg->css)) 831 !css_tryget_online(&mz->memcg->css))
835 goto retry; 832 goto retry;
836done: 833done:
837 return mz; 834 return mz;
@@ -1073,7 +1070,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
1073 if (unlikely(!memcg)) 1070 if (unlikely(!memcg))
1074 memcg = root_mem_cgroup; 1071 memcg = root_mem_cgroup;
1075 } 1072 }
1076 } while (!css_tryget(&memcg->css)); 1073 } while (!css_tryget_online(&memcg->css));
1077 rcu_read_unlock(); 1074 rcu_read_unlock();
1078 return memcg; 1075 return memcg;
1079} 1076}
@@ -1110,7 +1107,8 @@ skip_node:
1110 */ 1107 */
1111 if (next_css) { 1108 if (next_css) {
1112 if ((next_css == &root->css) || 1109 if ((next_css == &root->css) ||
1113 ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) 1110 ((next_css->flags & CSS_ONLINE) &&
1111 css_tryget_online(next_css)))
1114 return mem_cgroup_from_css(next_css); 1112 return mem_cgroup_from_css(next_css);
1115 1113
1116 prev_css = next_css; 1114 prev_css = next_css;
@@ -1156,7 +1154,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
1156 * would be returned all the time. 1154 * would be returned all the time.
1157 */ 1155 */
1158 if (position && position != root && 1156 if (position && position != root &&
1159 !css_tryget(&position->css)) 1157 !css_tryget_online(&position->css))
1160 position = NULL; 1158 position = NULL;
1161 } 1159 }
1162 return position; 1160 return position;
@@ -1533,7 +1531,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1533int mem_cgroup_swappiness(struct mem_cgroup *memcg) 1531int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1534{ 1532{
1535 /* root ? */ 1533 /* root ? */
1536 if (mem_cgroup_disabled() || !css_parent(&memcg->css)) 1534 if (mem_cgroup_disabled() || !memcg->css.parent)
1537 return vm_swappiness; 1535 return vm_swappiness;
1538 1536
1539 return memcg->swappiness; 1537 return memcg->swappiness;
@@ -2769,9 +2767,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2769 2767
2770/* 2768/*
2771 * A helper function to get mem_cgroup from ID. must be called under 2769 * A helper function to get mem_cgroup from ID. must be called under
2772 * rcu_read_lock(). The caller is responsible for calling css_tryget if 2770 * rcu_read_lock(). The caller is responsible for calling
2773 * the mem_cgroup is used for charging. (dropping refcnt from swap can be 2771 * css_tryget_online() if the mem_cgroup is used for charging. (dropping
2774 * called against removed memcg.) 2772 * refcnt from swap can be called against removed memcg.)
2775 */ 2773 */
2776static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) 2774static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2777{ 2775{
@@ -2794,14 +2792,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2794 lock_page_cgroup(pc); 2792 lock_page_cgroup(pc);
2795 if (PageCgroupUsed(pc)) { 2793 if (PageCgroupUsed(pc)) {
2796 memcg = pc->mem_cgroup; 2794 memcg = pc->mem_cgroup;
2797 if (memcg && !css_tryget(&memcg->css)) 2795 if (memcg && !css_tryget_online(&memcg->css))
2798 memcg = NULL; 2796 memcg = NULL;
2799 } else if (PageSwapCache(page)) { 2797 } else if (PageSwapCache(page)) {
2800 ent.val = page_private(page); 2798 ent.val = page_private(page);
2801 id = lookup_swap_cgroup_id(ent); 2799 id = lookup_swap_cgroup_id(ent);
2802 rcu_read_lock(); 2800 rcu_read_lock();
2803 memcg = mem_cgroup_lookup(id); 2801 memcg = mem_cgroup_lookup(id);
2804 if (memcg && !css_tryget(&memcg->css)) 2802 if (memcg && !css_tryget_online(&memcg->css))
2805 memcg = NULL; 2803 memcg = NULL;
2806 rcu_read_unlock(); 2804 rcu_read_unlock();
2807 } 2805 }
@@ -3365,7 +3363,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
3365 } 3363 }
3366 3364
3367 /* The corresponding put will be done in the workqueue. */ 3365 /* The corresponding put will be done in the workqueue. */
3368 if (!css_tryget(&memcg->css)) 3366 if (!css_tryget_online(&memcg->css))
3369 goto out; 3367 goto out;
3370 rcu_read_unlock(); 3368 rcu_read_unlock();
3371 3369
@@ -4125,8 +4123,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
4125 memcg = mem_cgroup_lookup(id); 4123 memcg = mem_cgroup_lookup(id);
4126 if (memcg) { 4124 if (memcg) {
4127 /* 4125 /*
4128 * We uncharge this because swap is freed. 4126 * We uncharge this because swap is freed. This memcg can
4129 * This memcg can be obsolete one. We avoid calling css_tryget 4127 * be obsolete one. We avoid calling css_tryget_online().
4130 */ 4128 */
4131 if (!mem_cgroup_is_root(memcg)) 4129 if (!mem_cgroup_is_root(memcg))
4132 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 4130 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
@@ -4711,18 +4709,28 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
4711 } while (usage > 0); 4709 } while (usage > 0);
4712} 4710}
4713 4711
4712/*
4713 * Test whether @memcg has children, dead or alive. Note that this
4714 * function doesn't care whether @memcg has use_hierarchy enabled and
4715 * returns %true if there are child csses according to the cgroup
4716 * hierarchy. Testing use_hierarchy is the caller's responsiblity.
4717 */
4714static inline bool memcg_has_children(struct mem_cgroup *memcg) 4718static inline bool memcg_has_children(struct mem_cgroup *memcg)
4715{ 4719{
4716 lockdep_assert_held(&memcg_create_mutex); 4720 bool ret;
4721
4717 /* 4722 /*
4718 * The lock does not prevent addition or deletion to the list 4723 * The lock does not prevent addition or deletion of children, but
4719 * of children, but it prevents a new child from being 4724 * it prevents a new child from being initialized based on this
4720 * initialized based on this parent in css_online(), so it's 4725 * parent in css_online(), so it's enough to decide whether
4721 * enough to decide whether hierarchically inherited 4726 * hierarchically inherited attributes can still be changed or not.
4722 * attributes can still be changed or not.
4723 */ 4727 */
4724 return memcg->use_hierarchy && 4728 lockdep_assert_held(&memcg_create_mutex);
4725 !list_empty(&memcg->css.cgroup->children); 4729
4730 rcu_read_lock();
4731 ret = css_next_child(NULL, &memcg->css);
4732 rcu_read_unlock();
4733 return ret;
4726} 4734}
4727 4735
4728/* 4736/*
@@ -4734,11 +4742,6 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
4734static int mem_cgroup_force_empty(struct mem_cgroup *memcg) 4742static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
4735{ 4743{
4736 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 4744 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
4737 struct cgroup *cgrp = memcg->css.cgroup;
4738
4739 /* returns EBUSY if there is a task or if we come here twice. */
4740 if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children))
4741 return -EBUSY;
4742 4745
4743 /* we call try-to-free pages for make this cgroup empty */ 4746 /* we call try-to-free pages for make this cgroup empty */
4744 lru_add_drain_all(); 4747 lru_add_drain_all();
@@ -4758,20 +4761,19 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
4758 } 4761 }
4759 4762
4760 } 4763 }
4761 lru_add_drain();
4762 mem_cgroup_reparent_charges(memcg);
4763 4764
4764 return 0; 4765 return 0;
4765} 4766}
4766 4767
4767static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, 4768static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
4768 unsigned int event) 4769 char *buf, size_t nbytes,
4770 loff_t off)
4769{ 4771{
4770 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4772 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
4771 4773
4772 if (mem_cgroup_is_root(memcg)) 4774 if (mem_cgroup_is_root(memcg))
4773 return -EINVAL; 4775 return -EINVAL;
4774 return mem_cgroup_force_empty(memcg); 4776 return mem_cgroup_force_empty(memcg) ?: nbytes;
4775} 4777}
4776 4778
4777static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, 4779static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
@@ -4785,7 +4787,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
4785{ 4787{
4786 int retval = 0; 4788 int retval = 0;
4787 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4789 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4788 struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css)); 4790 struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
4789 4791
4790 mutex_lock(&memcg_create_mutex); 4792 mutex_lock(&memcg_create_mutex);
4791 4793
@@ -4802,7 +4804,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
4802 */ 4804 */
4803 if ((!parent_memcg || !parent_memcg->use_hierarchy) && 4805 if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
4804 (val == 1 || val == 0)) { 4806 (val == 1 || val == 0)) {
4805 if (list_empty(&memcg->css.cgroup->children)) 4807 if (!memcg_has_children(memcg))
4806 memcg->use_hierarchy = val; 4808 memcg->use_hierarchy = val;
4807 else 4809 else
4808 retval = -EBUSY; 4810 retval = -EBUSY;
@@ -4919,7 +4921,8 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
4919 * of course permitted. 4921 * of course permitted.
4920 */ 4922 */
4921 mutex_lock(&memcg_create_mutex); 4923 mutex_lock(&memcg_create_mutex);
4922 if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg)) 4924 if (cgroup_has_tasks(memcg->css.cgroup) ||
4925 (memcg->use_hierarchy && memcg_has_children(memcg)))
4923 err = -EBUSY; 4926 err = -EBUSY;
4924 mutex_unlock(&memcg_create_mutex); 4927 mutex_unlock(&memcg_create_mutex);
4925 if (err) 4928 if (err)
@@ -5021,17 +5024,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
5021 * The user of this function is... 5024 * The user of this function is...
5022 * RES_LIMIT. 5025 * RES_LIMIT.
5023 */ 5026 */
5024static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, 5027static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
5025 char *buffer) 5028 char *buf, size_t nbytes, loff_t off)
5026{ 5029{
5027 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5030 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5028 enum res_type type; 5031 enum res_type type;
5029 int name; 5032 int name;
5030 unsigned long long val; 5033 unsigned long long val;
5031 int ret; 5034 int ret;
5032 5035
5033 type = MEMFILE_TYPE(cft->private); 5036 buf = strstrip(buf);
5034 name = MEMFILE_ATTR(cft->private); 5037 type = MEMFILE_TYPE(of_cft(of)->private);
5038 name = MEMFILE_ATTR(of_cft(of)->private);
5035 5039
5036 switch (name) { 5040 switch (name) {
5037 case RES_LIMIT: 5041 case RES_LIMIT:
@@ -5040,7 +5044,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
5040 break; 5044 break;
5041 } 5045 }
5042 /* This function does all necessary parse...reuse it */ 5046 /* This function does all necessary parse...reuse it */
5043 ret = res_counter_memparse_write_strategy(buffer, &val); 5047 ret = res_counter_memparse_write_strategy(buf, &val);
5044 if (ret) 5048 if (ret)
5045 break; 5049 break;
5046 if (type == _MEM) 5050 if (type == _MEM)
@@ -5053,7 +5057,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
5053 return -EINVAL; 5057 return -EINVAL;
5054 break; 5058 break;
5055 case RES_SOFT_LIMIT: 5059 case RES_SOFT_LIMIT:
5056 ret = res_counter_memparse_write_strategy(buffer, &val); 5060 ret = res_counter_memparse_write_strategy(buf, &val);
5057 if (ret) 5061 if (ret)
5058 break; 5062 break;
5059 /* 5063 /*
@@ -5070,7 +5074,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
5070 ret = -EINVAL; /* should be BUG() ? */ 5074 ret = -EINVAL; /* should be BUG() ? */
5071 break; 5075 break;
5072 } 5076 }
5073 return ret; 5077 return ret ?: nbytes;
5074} 5078}
5075 5079
5076static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, 5080static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
@@ -5083,8 +5087,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
5083 if (!memcg->use_hierarchy) 5087 if (!memcg->use_hierarchy)
5084 goto out; 5088 goto out;
5085 5089
5086 while (css_parent(&memcg->css)) { 5090 while (memcg->css.parent) {
5087 memcg = mem_cgroup_from_css(css_parent(&memcg->css)); 5091 memcg = mem_cgroup_from_css(memcg->css.parent);
5088 if (!memcg->use_hierarchy) 5092 if (!memcg->use_hierarchy)
5089 break; 5093 break;
5090 tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); 5094 tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5097,14 +5101,15 @@ out:
5097 *memsw_limit = min_memsw_limit; 5101 *memsw_limit = min_memsw_limit;
5098} 5102}
5099 5103
5100static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) 5104static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
5105 size_t nbytes, loff_t off)
5101{ 5106{
5102 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5107 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5103 int name; 5108 int name;
5104 enum res_type type; 5109 enum res_type type;
5105 5110
5106 type = MEMFILE_TYPE(event); 5111 type = MEMFILE_TYPE(of_cft(of)->private);
5107 name = MEMFILE_ATTR(event); 5112 name = MEMFILE_ATTR(of_cft(of)->private);
5108 5113
5109 switch (name) { 5114 switch (name) {
5110 case RES_MAX_USAGE: 5115 case RES_MAX_USAGE:
@@ -5129,7 +5134,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
5129 break; 5134 break;
5130 } 5135 }
5131 5136
5132 return 0; 5137 return nbytes;
5133} 5138}
5134 5139
5135static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, 5140static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
@@ -5322,7 +5327,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
5322 if (val > 100) 5327 if (val > 100)
5323 return -EINVAL; 5328 return -EINVAL;
5324 5329
5325 if (css_parent(css)) 5330 if (css->parent)
5326 memcg->swappiness = val; 5331 memcg->swappiness = val;
5327 else 5332 else
5328 vm_swappiness = val; 5333 vm_swappiness = val;
@@ -5659,7 +5664,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
5659 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5664 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
5660 5665
5661 /* cannot set to root cgroup and only 0 and 1 are allowed */ 5666 /* cannot set to root cgroup and only 0 and 1 are allowed */
5662 if (!css_parent(css) || !((val == 0) || (val == 1))) 5667 if (!css->parent || !((val == 0) || (val == 1)))
5663 return -EINVAL; 5668 return -EINVAL;
5664 5669
5665 memcg->oom_kill_disable = val; 5670 memcg->oom_kill_disable = val;
@@ -5705,10 +5710,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
5705 * which is then paired with css_put during uncharge resp. here. 5710 * which is then paired with css_put during uncharge resp. here.
5706 * 5711 *
5707 * Although this might sound strange as this path is called from 5712 * Although this might sound strange as this path is called from
5708 * css_offline() when the referencemight have dropped down to 0 5713 * css_offline() when the referencemight have dropped down to 0 and
5709 * and shouldn't be incremented anymore (css_tryget would fail) 5714 * shouldn't be incremented anymore (css_tryget_online() would
5710 * we do not have other options because of the kmem allocations 5715 * fail) we do not have other options because of the kmem
5711 * lifetime. 5716 * allocations lifetime.
5712 */ 5717 */
5713 css_get(&memcg->css); 5718 css_get(&memcg->css);
5714 5719
@@ -5827,9 +5832,10 @@ static void memcg_event_ptable_queue_proc(struct file *file,
5827 * Input must be in format '<event_fd> <control_fd> <args>'. 5832 * Input must be in format '<event_fd> <control_fd> <args>'.
5828 * Interpretation of args is defined by control file implementation. 5833 * Interpretation of args is defined by control file implementation.
5829 */ 5834 */
5830static int memcg_write_event_control(struct cgroup_subsys_state *css, 5835static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
5831 struct cftype *cft, char *buffer) 5836 char *buf, size_t nbytes, loff_t off)
5832{ 5837{
5838 struct cgroup_subsys_state *css = of_css(of);
5833 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5839 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
5834 struct mem_cgroup_event *event; 5840 struct mem_cgroup_event *event;
5835 struct cgroup_subsys_state *cfile_css; 5841 struct cgroup_subsys_state *cfile_css;
@@ -5840,15 +5846,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
5840 char *endp; 5846 char *endp;
5841 int ret; 5847 int ret;
5842 5848
5843 efd = simple_strtoul(buffer, &endp, 10); 5849 buf = strstrip(buf);
5850
5851 efd = simple_strtoul(buf, &endp, 10);
5844 if (*endp != ' ') 5852 if (*endp != ' ')
5845 return -EINVAL; 5853 return -EINVAL;
5846 buffer = endp + 1; 5854 buf = endp + 1;
5847 5855
5848 cfd = simple_strtoul(buffer, &endp, 10); 5856 cfd = simple_strtoul(buf, &endp, 10);
5849 if ((*endp != ' ') && (*endp != '\0')) 5857 if ((*endp != ' ') && (*endp != '\0'))
5850 return -EINVAL; 5858 return -EINVAL;
5851 buffer = endp + 1; 5859 buf = endp + 1;
5852 5860
5853 event = kzalloc(sizeof(*event), GFP_KERNEL); 5861 event = kzalloc(sizeof(*event), GFP_KERNEL);
5854 if (!event) 5862 if (!event)
@@ -5916,8 +5924,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
5916 * automatically removed on cgroup destruction but the removal is 5924 * automatically removed on cgroup destruction but the removal is
5917 * asynchronous, so take an extra ref on @css. 5925 * asynchronous, so take an extra ref on @css.
5918 */ 5926 */
5919 cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, 5927 cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
5920 &memory_cgrp_subsys); 5928 &memory_cgrp_subsys);
5921 ret = -EINVAL; 5929 ret = -EINVAL;
5922 if (IS_ERR(cfile_css)) 5930 if (IS_ERR(cfile_css))
5923 goto out_put_cfile; 5931 goto out_put_cfile;
@@ -5926,7 +5934,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
5926 goto out_put_cfile; 5934 goto out_put_cfile;
5927 } 5935 }
5928 5936
5929 ret = event->register_event(memcg, event->eventfd, buffer); 5937 ret = event->register_event(memcg, event->eventfd, buf);
5930 if (ret) 5938 if (ret)
5931 goto out_put_css; 5939 goto out_put_css;
5932 5940
@@ -5939,7 +5947,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
5939 fdput(cfile); 5947 fdput(cfile);
5940 fdput(efile); 5948 fdput(efile);
5941 5949
5942 return 0; 5950 return nbytes;
5943 5951
5944out_put_css: 5952out_put_css:
5945 css_put(css); 5953 css_put(css);
@@ -5964,25 +5972,25 @@ static struct cftype mem_cgroup_files[] = {
5964 { 5972 {
5965 .name = "max_usage_in_bytes", 5973 .name = "max_usage_in_bytes",
5966 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), 5974 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
5967 .trigger = mem_cgroup_reset, 5975 .write = mem_cgroup_reset,
5968 .read_u64 = mem_cgroup_read_u64, 5976 .read_u64 = mem_cgroup_read_u64,
5969 }, 5977 },
5970 { 5978 {
5971 .name = "limit_in_bytes", 5979 .name = "limit_in_bytes",
5972 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), 5980 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
5973 .write_string = mem_cgroup_write, 5981 .write = mem_cgroup_write,
5974 .read_u64 = mem_cgroup_read_u64, 5982 .read_u64 = mem_cgroup_read_u64,
5975 }, 5983 },
5976 { 5984 {
5977 .name = "soft_limit_in_bytes", 5985 .name = "soft_limit_in_bytes",
5978 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), 5986 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
5979 .write_string = mem_cgroup_write, 5987 .write = mem_cgroup_write,
5980 .read_u64 = mem_cgroup_read_u64, 5988 .read_u64 = mem_cgroup_read_u64,
5981 }, 5989 },
5982 { 5990 {
5983 .name = "failcnt", 5991 .name = "failcnt",
5984 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), 5992 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
5985 .trigger = mem_cgroup_reset, 5993 .write = mem_cgroup_reset,
5986 .read_u64 = mem_cgroup_read_u64, 5994 .read_u64 = mem_cgroup_read_u64,
5987 }, 5995 },
5988 { 5996 {
@@ -5991,7 +5999,7 @@ static struct cftype mem_cgroup_files[] = {
5991 }, 5999 },
5992 { 6000 {
5993 .name = "force_empty", 6001 .name = "force_empty",
5994 .trigger = mem_cgroup_force_empty_write, 6002 .write = mem_cgroup_force_empty_write,
5995 }, 6003 },
5996 { 6004 {
5997 .name = "use_hierarchy", 6005 .name = "use_hierarchy",
@@ -6001,7 +6009,7 @@ static struct cftype mem_cgroup_files[] = {
6001 }, 6009 },
6002 { 6010 {
6003 .name = "cgroup.event_control", /* XXX: for compat */ 6011 .name = "cgroup.event_control", /* XXX: for compat */
6004 .write_string = memcg_write_event_control, 6012 .write = memcg_write_event_control,
6005 .flags = CFTYPE_NO_PREFIX, 6013 .flags = CFTYPE_NO_PREFIX,
6006 .mode = S_IWUGO, 6014 .mode = S_IWUGO,
6007 }, 6015 },
@@ -6034,7 +6042,7 @@ static struct cftype mem_cgroup_files[] = {
6034 { 6042 {
6035 .name = "kmem.limit_in_bytes", 6043 .name = "kmem.limit_in_bytes",
6036 .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), 6044 .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
6037 .write_string = mem_cgroup_write, 6045 .write = mem_cgroup_write,
6038 .read_u64 = mem_cgroup_read_u64, 6046 .read_u64 = mem_cgroup_read_u64,
6039 }, 6047 },
6040 { 6048 {
@@ -6045,13 +6053,13 @@ static struct cftype mem_cgroup_files[] = {
6045 { 6053 {
6046 .name = "kmem.failcnt", 6054 .name = "kmem.failcnt",
6047 .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), 6055 .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
6048 .trigger = mem_cgroup_reset, 6056 .write = mem_cgroup_reset,
6049 .read_u64 = mem_cgroup_read_u64, 6057 .read_u64 = mem_cgroup_read_u64,
6050 }, 6058 },
6051 { 6059 {
6052 .name = "kmem.max_usage_in_bytes", 6060 .name = "kmem.max_usage_in_bytes",
6053 .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), 6061 .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
6054 .trigger = mem_cgroup_reset, 6062 .write = mem_cgroup_reset,
6055 .read_u64 = mem_cgroup_read_u64, 6063 .read_u64 = mem_cgroup_read_u64,
6056 }, 6064 },
6057#ifdef CONFIG_SLABINFO 6065#ifdef CONFIG_SLABINFO
@@ -6074,19 +6082,19 @@ static struct cftype memsw_cgroup_files[] = {
6074 { 6082 {
6075 .name = "memsw.max_usage_in_bytes", 6083 .name = "memsw.max_usage_in_bytes",
6076 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), 6084 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
6077 .trigger = mem_cgroup_reset, 6085 .write = mem_cgroup_reset,
6078 .read_u64 = mem_cgroup_read_u64, 6086 .read_u64 = mem_cgroup_read_u64,
6079 }, 6087 },
6080 { 6088 {
6081 .name = "memsw.limit_in_bytes", 6089 .name = "memsw.limit_in_bytes",
6082 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), 6090 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
6083 .write_string = mem_cgroup_write, 6091 .write = mem_cgroup_write,
6084 .read_u64 = mem_cgroup_read_u64, 6092 .read_u64 = mem_cgroup_read_u64,
6085 }, 6093 },
6086 { 6094 {
6087 .name = "memsw.failcnt", 6095 .name = "memsw.failcnt",
6088 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), 6096 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
6089 .trigger = mem_cgroup_reset, 6097 .write = mem_cgroup_reset,
6090 .read_u64 = mem_cgroup_read_u64, 6098 .read_u64 = mem_cgroup_read_u64,
6091 }, 6099 },
6092 { }, /* terminate */ 6100 { }, /* terminate */
@@ -6264,9 +6272,9 @@ static int
6264mem_cgroup_css_online(struct cgroup_subsys_state *css) 6272mem_cgroup_css_online(struct cgroup_subsys_state *css)
6265{ 6273{
6266 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 6274 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6267 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); 6275 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
6268 6276
6269 if (css->cgroup->id > MEM_CGROUP_ID_MAX) 6277 if (css->id > MEM_CGROUP_ID_MAX)
6270 return -ENOSPC; 6278 return -ENOSPC;
6271 6279
6272 if (!parent) 6280 if (!parent)
@@ -6361,7 +6369,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
6361 /* 6369 /*
6362 * XXX: css_offline() would be where we should reparent all 6370 * XXX: css_offline() would be where we should reparent all
6363 * memory to prepare the cgroup for destruction. However, 6371 * memory to prepare the cgroup for destruction. However,
6364 * memcg does not do css_tryget() and res_counter charging 6372 * memcg does not do css_tryget_online() and res_counter charging
6365 * under the same RCU lock region, which means that charging 6373 * under the same RCU lock region, which means that charging
6366 * could race with offlining. Offlining only happens to 6374 * could race with offlining. Offlining only happens to
6367 * cgroups with no tasks in them but charges can show up 6375 * cgroups with no tasks in them but charges can show up
@@ -6375,9 +6383,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
6375 * lookup_swap_cgroup_id() 6383 * lookup_swap_cgroup_id()
6376 * rcu_read_lock() 6384 * rcu_read_lock()
6377 * mem_cgroup_lookup() 6385 * mem_cgroup_lookup()
6378 * css_tryget() 6386 * css_tryget_online()
6379 * rcu_read_unlock() 6387 * rcu_read_unlock()
6380 * disable css_tryget() 6388 * disable css_tryget_online()
6381 * call_rcu() 6389 * call_rcu()
6382 * offline_css() 6390 * offline_css()
6383 * reparent_charges() 6391 * reparent_charges()