diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2014-12-10 18:42:42 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 20:41:05 -0500 |
commit | e8ea14cc6eadfe2ea63e9989e16e62625a2619f8 (patch) | |
tree | 8109a731e199928c2fd87d3633a56a3251d85245 | |
parent | 5ac8fb31ad2ebd6492d1c5e8f31846b532f03945 (diff) |
mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during
css_offline(): user pages stall the offlining process until all of them
have been reparented, whereas kmemcg acquires a keep-alive reference if
outstanding kernel pages are detected at that point.
In preparation for removing all this complexity, make the pinning explicit
and acquire a css references for every charged page.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/cgroup.h | 26 | ||||
-rw-r--r-- | include/linux/percpu-refcount.h | 47 | ||||
-rw-r--r-- | mm/memcontrol.c | 21 |
3 files changed, 81 insertions, 13 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1d5196889048..9f96b25965c2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -113,6 +113,19 @@ static inline void css_get(struct cgroup_subsys_state *css) | |||
113 | } | 113 | } |
114 | 114 | ||
115 | /** | 115 | /** |
116 | * css_get_many - obtain references on the specified css | ||
117 | * @css: target css | ||
118 | * @n: number of references to get | ||
119 | * | ||
120 | * The caller must already have a reference. | ||
121 | */ | ||
122 | static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) | ||
123 | { | ||
124 | if (!(css->flags & CSS_NO_REF)) | ||
125 | percpu_ref_get_many(&css->refcnt, n); | ||
126 | } | ||
127 | |||
128 | /** | ||
116 | * css_tryget - try to obtain a reference on the specified css | 129 | * css_tryget - try to obtain a reference on the specified css |
117 | * @css: target css | 130 | * @css: target css |
118 | * | 131 | * |
@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css) | |||
159 | percpu_ref_put(&css->refcnt); | 172 | percpu_ref_put(&css->refcnt); |
160 | } | 173 | } |
161 | 174 | ||
175 | /** | ||
176 | * css_put_many - put css references | ||
177 | * @css: target css | ||
178 | * @n: number of references to put | ||
179 | * | ||
180 | * Put references obtained via css_get() and css_tryget_online(). | ||
181 | */ | ||
182 | static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) | ||
183 | { | ||
184 | if (!(css->flags & CSS_NO_REF)) | ||
185 | percpu_ref_put_many(&css->refcnt, n); | ||
186 | } | ||
187 | |||
162 | /* bits in struct cgroup flags field */ | 188 | /* bits in struct cgroup flags field */ |
163 | enum { | 189 | enum { |
164 | /* Control Group requires release notifications to userspace */ | 190 | /* Control Group requires release notifications to userspace */ |
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 51ce60c35f4c..530b249f7ea4 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h | |||
@@ -147,28 +147,42 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, | |||
147 | } | 147 | } |
148 | 148 | ||
149 | /** | 149 | /** |
150 | * percpu_ref_get - increment a percpu refcount | 150 | * percpu_ref_get_many - increment a percpu refcount |
151 | * @ref: percpu_ref to get | 151 | * @ref: percpu_ref to get |
152 | * @nr: number of references to get | ||
152 | * | 153 | * |
153 | * Analagous to atomic_long_inc(). | 154 | * Analogous to atomic_long_add(). |
154 | * | 155 | * |
155 | * This function is safe to call as long as @ref is between init and exit. | 156 | * This function is safe to call as long as @ref is between init and exit. |
156 | */ | 157 | */ |
157 | static inline void percpu_ref_get(struct percpu_ref *ref) | 158 | static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) |
158 | { | 159 | { |
159 | unsigned long __percpu *percpu_count; | 160 | unsigned long __percpu *percpu_count; |
160 | 161 | ||
161 | rcu_read_lock_sched(); | 162 | rcu_read_lock_sched(); |
162 | 163 | ||
163 | if (__ref_is_percpu(ref, &percpu_count)) | 164 | if (__ref_is_percpu(ref, &percpu_count)) |
164 | this_cpu_inc(*percpu_count); | 165 | this_cpu_add(*percpu_count, nr); |
165 | else | 166 | else |
166 | atomic_long_inc(&ref->count); | 167 | atomic_long_add(nr, &ref->count); |
167 | 168 | ||
168 | rcu_read_unlock_sched(); | 169 | rcu_read_unlock_sched(); |
169 | } | 170 | } |
170 | 171 | ||
171 | /** | 172 | /** |
173 | * percpu_ref_get - increment a percpu refcount | ||
174 | * @ref: percpu_ref to get | ||
175 | * | ||
176 | * Analagous to atomic_long_inc(). | ||
177 | * | ||
178 | * This function is safe to call as long as @ref is between init and exit. | ||
179 | */ | ||
180 | static inline void percpu_ref_get(struct percpu_ref *ref) | ||
181 | { | ||
182 | percpu_ref_get_many(ref, 1); | ||
183 | } | ||
184 | |||
185 | /** | ||
172 | * percpu_ref_tryget - try to increment a percpu refcount | 186 | * percpu_ref_tryget - try to increment a percpu refcount |
173 | * @ref: percpu_ref to try-get | 187 | * @ref: percpu_ref to try-get |
174 | * | 188 | * |
@@ -231,29 +245,44 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) | |||
231 | } | 245 | } |
232 | 246 | ||
233 | /** | 247 | /** |
234 | * percpu_ref_put - decrement a percpu refcount | 248 | * percpu_ref_put_many - decrement a percpu refcount |
235 | * @ref: percpu_ref to put | 249 | * @ref: percpu_ref to put |
250 | * @nr: number of references to put | ||
236 | * | 251 | * |
237 | * Decrement the refcount, and if 0, call the release function (which was passed | 252 | * Decrement the refcount, and if 0, call the release function (which was passed |
238 | * to percpu_ref_init()) | 253 | * to percpu_ref_init()) |
239 | * | 254 | * |
240 | * This function is safe to call as long as @ref is between init and exit. | 255 | * This function is safe to call as long as @ref is between init and exit. |
241 | */ | 256 | */ |
242 | static inline void percpu_ref_put(struct percpu_ref *ref) | 257 | static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) |
243 | { | 258 | { |
244 | unsigned long __percpu *percpu_count; | 259 | unsigned long __percpu *percpu_count; |
245 | 260 | ||
246 | rcu_read_lock_sched(); | 261 | rcu_read_lock_sched(); |
247 | 262 | ||
248 | if (__ref_is_percpu(ref, &percpu_count)) | 263 | if (__ref_is_percpu(ref, &percpu_count)) |
249 | this_cpu_dec(*percpu_count); | 264 | this_cpu_sub(*percpu_count, nr); |
250 | else if (unlikely(atomic_long_dec_and_test(&ref->count))) | 265 | else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) |
251 | ref->release(ref); | 266 | ref->release(ref); |
252 | 267 | ||
253 | rcu_read_unlock_sched(); | 268 | rcu_read_unlock_sched(); |
254 | } | 269 | } |
255 | 270 | ||
256 | /** | 271 | /** |
272 | * percpu_ref_put - decrement a percpu refcount | ||
273 | * @ref: percpu_ref to put | ||
274 | * | ||
275 | * Decrement the refcount, and if 0, call the release function (which was passed | ||
276 | * to percpu_ref_init()) | ||
277 | * | ||
278 | * This function is safe to call as long as @ref is between init and exit. | ||
279 | */ | ||
280 | static inline void percpu_ref_put(struct percpu_ref *ref) | ||
281 | { | ||
282 | percpu_ref_put_many(ref, 1); | ||
283 | } | ||
284 | |||
285 | /** | ||
257 | * percpu_ref_is_zero - test whether a percpu refcount reached zero | 286 | * percpu_ref_is_zero - test whether a percpu refcount reached zero |
258 | * @ref: percpu_ref to test | 287 | * @ref: percpu_ref to test |
259 | * | 288 | * |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3cd3bb77dd9..f69da2ac6323 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock) | |||
2273 | page_counter_uncharge(&old->memory, stock->nr_pages); | 2273 | page_counter_uncharge(&old->memory, stock->nr_pages); |
2274 | if (do_swap_account) | 2274 | if (do_swap_account) |
2275 | page_counter_uncharge(&old->memsw, stock->nr_pages); | 2275 | page_counter_uncharge(&old->memsw, stock->nr_pages); |
2276 | css_put_many(&old->css, stock->nr_pages); | ||
2276 | stock->nr_pages = 0; | 2277 | stock->nr_pages = 0; |
2277 | } | 2278 | } |
2278 | stock->cached = NULL; | 2279 | stock->cached = NULL; |
@@ -2530,6 +2531,7 @@ bypass: | |||
2530 | return -EINTR; | 2531 | return -EINTR; |
2531 | 2532 | ||
2532 | done_restock: | 2533 | done_restock: |
2534 | css_get_many(&memcg->css, batch); | ||
2533 | if (batch > nr_pages) | 2535 | if (batch > nr_pages) |
2534 | refill_stock(memcg, batch - nr_pages); | 2536 | refill_stock(memcg, batch - nr_pages); |
2535 | done: | 2537 | done: |
@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
2544 | page_counter_uncharge(&memcg->memory, nr_pages); | 2546 | page_counter_uncharge(&memcg->memory, nr_pages); |
2545 | if (do_swap_account) | 2547 | if (do_swap_account) |
2546 | page_counter_uncharge(&memcg->memsw, nr_pages); | 2548 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2549 | |||
2550 | css_put_many(&memcg->css, nr_pages); | ||
2547 | } | 2551 | } |
2548 | 2552 | ||
2549 | /* | 2553 | /* |
@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, | |||
2739 | page_counter_charge(&memcg->memory, nr_pages); | 2743 | page_counter_charge(&memcg->memory, nr_pages); |
2740 | if (do_swap_account) | 2744 | if (do_swap_account) |
2741 | page_counter_charge(&memcg->memsw, nr_pages); | 2745 | page_counter_charge(&memcg->memsw, nr_pages); |
2746 | css_get_many(&memcg->css, nr_pages); | ||
2742 | ret = 0; | 2747 | ret = 0; |
2743 | } else if (ret) | 2748 | } else if (ret) |
2744 | page_counter_uncharge(&memcg->kmem, nr_pages); | 2749 | page_counter_uncharge(&memcg->kmem, nr_pages); |
@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, | |||
2754 | page_counter_uncharge(&memcg->memsw, nr_pages); | 2759 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2755 | 2760 | ||
2756 | /* Not down to 0 */ | 2761 | /* Not down to 0 */ |
2757 | if (page_counter_uncharge(&memcg->kmem, nr_pages)) | 2762 | if (page_counter_uncharge(&memcg->kmem, nr_pages)) { |
2763 | css_put_many(&memcg->css, nr_pages); | ||
2758 | return; | 2764 | return; |
2765 | } | ||
2759 | 2766 | ||
2760 | /* | 2767 | /* |
2761 | * Releases a reference taken in kmem_cgroup_css_offline in case | 2768 | * Releases a reference taken in kmem_cgroup_css_offline in case |
@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, | |||
2767 | */ | 2774 | */ |
2768 | if (memcg_kmem_test_and_clear_dead(memcg)) | 2775 | if (memcg_kmem_test_and_clear_dead(memcg)) |
2769 | css_put(&memcg->css); | 2776 | css_put(&memcg->css); |
2777 | |||
2778 | css_put_many(&memcg->css, nr_pages); | ||
2770 | } | 2779 | } |
2771 | 2780 | ||
2772 | /* | 2781 | /* |
@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page, | |||
3394 | ret = mem_cgroup_move_account(page, nr_pages, | 3403 | ret = mem_cgroup_move_account(page, nr_pages, |
3395 | pc, child, parent); | 3404 | pc, child, parent); |
3396 | if (!ret) { | 3405 | if (!ret) { |
3406 | if (!mem_cgroup_is_root(parent)) | ||
3407 | css_get_many(&parent->css, nr_pages); | ||
3397 | /* Take charge off the local counters */ | 3408 | /* Take charge off the local counters */ |
3398 | page_counter_cancel(&child->memory, nr_pages); | 3409 | page_counter_cancel(&child->memory, nr_pages); |
3399 | if (do_swap_account) | 3410 | if (do_swap_account) |
3400 | page_counter_cancel(&child->memsw, nr_pages); | 3411 | page_counter_cancel(&child->memsw, nr_pages); |
3412 | css_put_many(&child->css, nr_pages); | ||
3401 | } | 3413 | } |
3402 | 3414 | ||
3403 | if (nr_pages > 1) | 3415 | if (nr_pages > 1) |
@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void) | |||
5767 | { | 5779 | { |
5768 | struct mem_cgroup *from = mc.from; | 5780 | struct mem_cgroup *from = mc.from; |
5769 | struct mem_cgroup *to = mc.to; | 5781 | struct mem_cgroup *to = mc.to; |
5770 | int i; | ||
5771 | 5782 | ||
5772 | /* we must uncharge all the leftover precharges from mc.to */ | 5783 | /* we must uncharge all the leftover precharges from mc.to */ |
5773 | if (mc.precharge) { | 5784 | if (mc.precharge) { |
@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void) | |||
5795 | if (!mem_cgroup_is_root(mc.to)) | 5806 | if (!mem_cgroup_is_root(mc.to)) |
5796 | page_counter_uncharge(&mc.to->memory, mc.moved_swap); | 5807 | page_counter_uncharge(&mc.to->memory, mc.moved_swap); |
5797 | 5808 | ||
5798 | for (i = 0; i < mc.moved_swap; i++) | 5809 | css_put_many(&mc.from->css, mc.moved_swap); |
5799 | css_put(&mc.from->css); | ||
5800 | 5810 | ||
5801 | /* we've already done css_get(mc.to) */ | 5811 | /* we've already done css_get(mc.to) */ |
5802 | mc.moved_swap = 0; | 5812 | mc.moved_swap = 0; |
@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |||
6343 | __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); | 6353 | __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); |
6344 | memcg_check_events(memcg, dummy_page); | 6354 | memcg_check_events(memcg, dummy_page); |
6345 | local_irq_restore(flags); | 6355 | local_irq_restore(flags); |
6356 | |||
6357 | if (!mem_cgroup_is_root(memcg)) | ||
6358 | css_put_many(&memcg->css, max(nr_mem, nr_memsw)); | ||
6346 | } | 6359 | } |
6347 | 6360 | ||
6348 | static void uncharge_list(struct list_head *page_list) | 6361 | static void uncharge_list(struct list_head *page_list) |