aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-12-10 18:42:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 20:41:05 -0500
commite8ea14cc6eadfe2ea63e9989e16e62625a2619f8 (patch)
tree8109a731e199928c2fd87d3633a56a3251d85245
parent5ac8fb31ad2ebd6492d1c5e8f31846b532f03945 (diff)
mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during css_offline(): user pages stall the offlining process until all of them have been reparented, whereas kmemcg acquires a keep-alive reference if outstanding kernel pages are detected at that point. In preparation for removing all this complexity, make the pinning explicit and acquire a css references for every charged page. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cgroup.h26
-rw-r--r--include/linux/percpu-refcount.h47
-rw-r--r--mm/memcontrol.c21
3 files changed, 81 insertions, 13 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1d5196889048..9f96b25965c2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -113,6 +113,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
113} 113}
114 114
115/** 115/**
116 * css_get_many - obtain references on the specified css
117 * @css: target css
118 * @n: number of references to get
119 *
120 * The caller must already have a reference.
121 */
122static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
123{
124 if (!(css->flags & CSS_NO_REF))
125 percpu_ref_get_many(&css->refcnt, n);
126}
127
128/**
116 * css_tryget - try to obtain a reference on the specified css 129 * css_tryget - try to obtain a reference on the specified css
117 * @css: target css 130 * @css: target css
118 * 131 *
@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
159 percpu_ref_put(&css->refcnt); 172 percpu_ref_put(&css->refcnt);
160} 173}
161 174
175/**
176 * css_put_many - put css references
177 * @css: target css
178 * @n: number of references to put
179 *
180 * Put references obtained via css_get() and css_tryget_online().
181 */
182static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
183{
184 if (!(css->flags & CSS_NO_REF))
185 percpu_ref_put_many(&css->refcnt, n);
186}
187
162/* bits in struct cgroup flags field */ 188/* bits in struct cgroup flags field */
163enum { 189enum {
164 /* Control Group requires release notifications to userspace */ 190 /* Control Group requires release notifications to userspace */
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 51ce60c35f4c..530b249f7ea4 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -147,28 +147,42 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
147} 147}
148 148
149/** 149/**
150 * percpu_ref_get - increment a percpu refcount 150 * percpu_ref_get_many - increment a percpu refcount
151 * @ref: percpu_ref to get 151 * @ref: percpu_ref to get
152 * @nr: number of references to get
152 * 153 *
153 * Analagous to atomic_long_inc(). 154 * Analogous to atomic_long_add().
154 * 155 *
155 * This function is safe to call as long as @ref is between init and exit. 156 * This function is safe to call as long as @ref is between init and exit.
156 */ 157 */
157static inline void percpu_ref_get(struct percpu_ref *ref) 158static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
158{ 159{
159 unsigned long __percpu *percpu_count; 160 unsigned long __percpu *percpu_count;
160 161
161 rcu_read_lock_sched(); 162 rcu_read_lock_sched();
162 163
163 if (__ref_is_percpu(ref, &percpu_count)) 164 if (__ref_is_percpu(ref, &percpu_count))
164 this_cpu_inc(*percpu_count); 165 this_cpu_add(*percpu_count, nr);
165 else 166 else
166 atomic_long_inc(&ref->count); 167 atomic_long_add(nr, &ref->count);
167 168
168 rcu_read_unlock_sched(); 169 rcu_read_unlock_sched();
169} 170}
170 171
171/** 172/**
173 * percpu_ref_get - increment a percpu refcount
174 * @ref: percpu_ref to get
175 *
176 * Analagous to atomic_long_inc().
177 *
178 * This function is safe to call as long as @ref is between init and exit.
179 */
180static inline void percpu_ref_get(struct percpu_ref *ref)
181{
182 percpu_ref_get_many(ref, 1);
183}
184
185/**
172 * percpu_ref_tryget - try to increment a percpu refcount 186 * percpu_ref_tryget - try to increment a percpu refcount
173 * @ref: percpu_ref to try-get 187 * @ref: percpu_ref to try-get
174 * 188 *
@@ -231,29 +245,44 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
231} 245}
232 246
233/** 247/**
234 * percpu_ref_put - decrement a percpu refcount 248 * percpu_ref_put_many - decrement a percpu refcount
235 * @ref: percpu_ref to put 249 * @ref: percpu_ref to put
250 * @nr: number of references to put
236 * 251 *
237 * Decrement the refcount, and if 0, call the release function (which was passed 252 * Decrement the refcount, and if 0, call the release function (which was passed
238 * to percpu_ref_init()) 253 * to percpu_ref_init())
239 * 254 *
240 * This function is safe to call as long as @ref is between init and exit. 255 * This function is safe to call as long as @ref is between init and exit.
241 */ 256 */
242static inline void percpu_ref_put(struct percpu_ref *ref) 257static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
243{ 258{
244 unsigned long __percpu *percpu_count; 259 unsigned long __percpu *percpu_count;
245 260
246 rcu_read_lock_sched(); 261 rcu_read_lock_sched();
247 262
248 if (__ref_is_percpu(ref, &percpu_count)) 263 if (__ref_is_percpu(ref, &percpu_count))
249 this_cpu_dec(*percpu_count); 264 this_cpu_sub(*percpu_count, nr);
250 else if (unlikely(atomic_long_dec_and_test(&ref->count))) 265 else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
251 ref->release(ref); 266 ref->release(ref);
252 267
253 rcu_read_unlock_sched(); 268 rcu_read_unlock_sched();
254} 269}
255 270
256/** 271/**
272 * percpu_ref_put - decrement a percpu refcount
273 * @ref: percpu_ref to put
274 *
275 * Decrement the refcount, and if 0, call the release function (which was passed
276 * to percpu_ref_init())
277 *
278 * This function is safe to call as long as @ref is between init and exit.
279 */
280static inline void percpu_ref_put(struct percpu_ref *ref)
281{
282 percpu_ref_put_many(ref, 1);
283}
284
285/**
257 * percpu_ref_is_zero - test whether a percpu refcount reached zero 286 * percpu_ref_is_zero - test whether a percpu refcount reached zero
258 * @ref: percpu_ref to test 287 * @ref: percpu_ref to test
259 * 288 *
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3cd3bb77dd9..f69da2ac6323 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
2273 page_counter_uncharge(&old->memory, stock->nr_pages); 2273 page_counter_uncharge(&old->memory, stock->nr_pages);
2274 if (do_swap_account) 2274 if (do_swap_account)
2275 page_counter_uncharge(&old->memsw, stock->nr_pages); 2275 page_counter_uncharge(&old->memsw, stock->nr_pages);
2276 css_put_many(&old->css, stock->nr_pages);
2276 stock->nr_pages = 0; 2277 stock->nr_pages = 0;
2277 } 2278 }
2278 stock->cached = NULL; 2279 stock->cached = NULL;
@@ -2530,6 +2531,7 @@ bypass:
2530 return -EINTR; 2531 return -EINTR;
2531 2532
2532done_restock: 2533done_restock:
2534 css_get_many(&memcg->css, batch);
2533 if (batch > nr_pages) 2535 if (batch > nr_pages)
2534 refill_stock(memcg, batch - nr_pages); 2536 refill_stock(memcg, batch - nr_pages);
2535done: 2537done:
@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2544 page_counter_uncharge(&memcg->memory, nr_pages); 2546 page_counter_uncharge(&memcg->memory, nr_pages);
2545 if (do_swap_account) 2547 if (do_swap_account)
2546 page_counter_uncharge(&memcg->memsw, nr_pages); 2548 page_counter_uncharge(&memcg->memsw, nr_pages);
2549
2550 css_put_many(&memcg->css, nr_pages);
2547} 2551}
2548 2552
2549/* 2553/*
@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2739 page_counter_charge(&memcg->memory, nr_pages); 2743 page_counter_charge(&memcg->memory, nr_pages);
2740 if (do_swap_account) 2744 if (do_swap_account)
2741 page_counter_charge(&memcg->memsw, nr_pages); 2745 page_counter_charge(&memcg->memsw, nr_pages);
2746 css_get_many(&memcg->css, nr_pages);
2742 ret = 0; 2747 ret = 0;
2743 } else if (ret) 2748 } else if (ret)
2744 page_counter_uncharge(&memcg->kmem, nr_pages); 2749 page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
2754 page_counter_uncharge(&memcg->memsw, nr_pages); 2759 page_counter_uncharge(&memcg->memsw, nr_pages);
2755 2760
2756 /* Not down to 0 */ 2761 /* Not down to 0 */
2757 if (page_counter_uncharge(&memcg->kmem, nr_pages)) 2762 if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
2763 css_put_many(&memcg->css, nr_pages);
2758 return; 2764 return;
2765 }
2759 2766
2760 /* 2767 /*
2761 * Releases a reference taken in kmem_cgroup_css_offline in case 2768 * Releases a reference taken in kmem_cgroup_css_offline in case
@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
2767 */ 2774 */
2768 if (memcg_kmem_test_and_clear_dead(memcg)) 2775 if (memcg_kmem_test_and_clear_dead(memcg))
2769 css_put(&memcg->css); 2776 css_put(&memcg->css);
2777
2778 css_put_many(&memcg->css, nr_pages);
2770} 2779}
2771 2780
2772/* 2781/*
@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page,
3394 ret = mem_cgroup_move_account(page, nr_pages, 3403 ret = mem_cgroup_move_account(page, nr_pages,
3395 pc, child, parent); 3404 pc, child, parent);
3396 if (!ret) { 3405 if (!ret) {
3406 if (!mem_cgroup_is_root(parent))
3407 css_get_many(&parent->css, nr_pages);
3397 /* Take charge off the local counters */ 3408 /* Take charge off the local counters */
3398 page_counter_cancel(&child->memory, nr_pages); 3409 page_counter_cancel(&child->memory, nr_pages);
3399 if (do_swap_account) 3410 if (do_swap_account)
3400 page_counter_cancel(&child->memsw, nr_pages); 3411 page_counter_cancel(&child->memsw, nr_pages);
3412 css_put_many(&child->css, nr_pages);
3401 } 3413 }
3402 3414
3403 if (nr_pages > 1) 3415 if (nr_pages > 1)
@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void)
5767{ 5779{
5768 struct mem_cgroup *from = mc.from; 5780 struct mem_cgroup *from = mc.from;
5769 struct mem_cgroup *to = mc.to; 5781 struct mem_cgroup *to = mc.to;
5770 int i;
5771 5782
5772 /* we must uncharge all the leftover precharges from mc.to */ 5783 /* we must uncharge all the leftover precharges from mc.to */
5773 if (mc.precharge) { 5784 if (mc.precharge) {
@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void)
5795 if (!mem_cgroup_is_root(mc.to)) 5806 if (!mem_cgroup_is_root(mc.to))
5796 page_counter_uncharge(&mc.to->memory, mc.moved_swap); 5807 page_counter_uncharge(&mc.to->memory, mc.moved_swap);
5797 5808
5798 for (i = 0; i < mc.moved_swap; i++) 5809 css_put_many(&mc.from->css, mc.moved_swap);
5799 css_put(&mc.from->css);
5800 5810
5801 /* we've already done css_get(mc.to) */ 5811 /* we've already done css_get(mc.to) */
5802 mc.moved_swap = 0; 5812 mc.moved_swap = 0;
@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
6343 __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); 6353 __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
6344 memcg_check_events(memcg, dummy_page); 6354 memcg_check_events(memcg, dummy_page);
6345 local_irq_restore(flags); 6355 local_irq_restore(flags);
6356
6357 if (!mem_cgroup_is_root(memcg))
6358 css_put_many(&memcg->css, max(nr_mem, nr_memsw));
6346} 6359}
6347 6360
6348static void uncharge_list(struct list_head *page_list) 6361static void uncharge_list(struct list_head *page_list)