aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-12 17:59:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 21:54:09 -0500
commit60d3fd32a7a9da4c8c93a9f89cfda22a0b4c65ce (patch)
tree47de9f3f0e48ecc9f416b5a40fac6f6e1c97395e /mm
parentc0a5b560938a0f2fd2fbf66ddc446c7c2b41383a (diff)
list_lru: introduce per-memcg lists
There are several FS shrinkers, including super_block::s_shrink, that keep reclaimable objects in the list_lru structure. Hence to turn them to memcg-aware shrinkers, it is enough to make list_lru per-memcg. This patch does the trick. It adds an array of lru lists to the list_lru_node structure (per-node part of the list_lru), one for each kmem-active memcg, and dispatches every item addition or removal to the list corresponding to the memcg which the item is accounted to. So now the list_lru structure is not just per node, but per node and per memcg. Not all list_lrus need this feature, so this patch also adds a new method, list_lru_init_memcg, which initializes a list_lru as memcg aware. Otherwise (i.e. if initialized with old list_lru_init), the list_lru won't have per memcg lists. Just like per memcg caches arrays, the arrays of per-memcg lists are indexed by memcg_cache_id, so we must grow them whenever memcg_nr_cache_ids is increased. So we introduce a callback, memcg_update_all_list_lrus, invoked by memcg_alloc_cache_id if the id space is full. The locking is implemented in a manner similar to lruvecs, i.e. we have one lock per node that protects all lists (both global and per cgroup) on the node. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/list_lru.c374
-rw-r--r--mm/memcontrol.c20
2 files changed, 370 insertions, 24 deletions
diff --git a/mm/list_lru.c b/mm/list_lru.c
index a9021cb3ccde..79aee70c3b9d 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -10,6 +10,7 @@
10#include <linux/list_lru.h> 10#include <linux/list_lru.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/memcontrol.h>
13 14
14#ifdef CONFIG_MEMCG_KMEM 15#ifdef CONFIG_MEMCG_KMEM
15static LIST_HEAD(list_lrus); 16static LIST_HEAD(list_lrus);
@@ -38,16 +39,71 @@ static void list_lru_unregister(struct list_lru *lru)
38} 39}
39#endif /* CONFIG_MEMCG_KMEM */ 40#endif /* CONFIG_MEMCG_KMEM */
40 41
42#ifdef CONFIG_MEMCG_KMEM
43static inline bool list_lru_memcg_aware(struct list_lru *lru)
44{
45 return !!lru->node[0].memcg_lrus;
46}
47
48static inline struct list_lru_one *
49list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
50{
51 /*
52 * The lock protects the array of per cgroup lists from relocation
53 * (see memcg_update_list_lru_node).
54 */
55 lockdep_assert_held(&nlru->lock);
56 if (nlru->memcg_lrus && idx >= 0)
57 return nlru->memcg_lrus->lru[idx];
58
59 return &nlru->lru;
60}
61
62static inline struct list_lru_one *
63list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
64{
65 struct mem_cgroup *memcg;
66
67 if (!nlru->memcg_lrus)
68 return &nlru->lru;
69
70 memcg = mem_cgroup_from_kmem(ptr);
71 if (!memcg)
72 return &nlru->lru;
73
74 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
75}
76#else
77static inline bool list_lru_memcg_aware(struct list_lru *lru)
78{
79 return false;
80}
81
82static inline struct list_lru_one *
83list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
84{
85 return &nlru->lru;
86}
87
88static inline struct list_lru_one *
89list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
90{
91 return &nlru->lru;
92}
93#endif /* CONFIG_MEMCG_KMEM */
94
41bool list_lru_add(struct list_lru *lru, struct list_head *item) 95bool list_lru_add(struct list_lru *lru, struct list_head *item)
42{ 96{
43 int nid = page_to_nid(virt_to_page(item)); 97 int nid = page_to_nid(virt_to_page(item));
44 struct list_lru_node *nlru = &lru->node[nid]; 98 struct list_lru_node *nlru = &lru->node[nid];
99 struct list_lru_one *l;
45 100
46 spin_lock(&nlru->lock); 101 spin_lock(&nlru->lock);
47 WARN_ON_ONCE(nlru->nr_items < 0); 102 l = list_lru_from_kmem(nlru, item);
103 WARN_ON_ONCE(l->nr_items < 0);
48 if (list_empty(item)) { 104 if (list_empty(item)) {
49 list_add_tail(item, &nlru->list); 105 list_add_tail(item, &l->list);
50 nlru->nr_items++; 106 l->nr_items++;
51 spin_unlock(&nlru->lock); 107 spin_unlock(&nlru->lock);
52 return true; 108 return true;
53 } 109 }
@@ -60,12 +116,14 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
60{ 116{
61 int nid = page_to_nid(virt_to_page(item)); 117 int nid = page_to_nid(virt_to_page(item));
62 struct list_lru_node *nlru = &lru->node[nid]; 118 struct list_lru_node *nlru = &lru->node[nid];
119 struct list_lru_one *l;
63 120
64 spin_lock(&nlru->lock); 121 spin_lock(&nlru->lock);
122 l = list_lru_from_kmem(nlru, item);
65 if (!list_empty(item)) { 123 if (!list_empty(item)) {
66 list_del_init(item); 124 list_del_init(item);
67 nlru->nr_items--; 125 l->nr_items--;
68 WARN_ON_ONCE(nlru->nr_items < 0); 126 WARN_ON_ONCE(l->nr_items < 0);
69 spin_unlock(&nlru->lock); 127 spin_unlock(&nlru->lock);
70 return true; 128 return true;
71 } 129 }
@@ -74,33 +132,58 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
74} 132}
75EXPORT_SYMBOL_GPL(list_lru_del); 133EXPORT_SYMBOL_GPL(list_lru_del);
76 134
77unsigned long 135static unsigned long __list_lru_count_one(struct list_lru *lru,
78list_lru_count_node(struct list_lru *lru, int nid) 136 int nid, int memcg_idx)
79{ 137{
80 unsigned long count = 0;
81 struct list_lru_node *nlru = &lru->node[nid]; 138 struct list_lru_node *nlru = &lru->node[nid];
139 struct list_lru_one *l;
140 unsigned long count;
82 141
83 spin_lock(&nlru->lock); 142 spin_lock(&nlru->lock);
84 WARN_ON_ONCE(nlru->nr_items < 0); 143 l = list_lru_from_memcg_idx(nlru, memcg_idx);
85 count += nlru->nr_items; 144 WARN_ON_ONCE(l->nr_items < 0);
145 count = l->nr_items;
86 spin_unlock(&nlru->lock); 146 spin_unlock(&nlru->lock);
87 147
88 return count; 148 return count;
89} 149}
150
151unsigned long list_lru_count_one(struct list_lru *lru,
152 int nid, struct mem_cgroup *memcg)
153{
154 return __list_lru_count_one(lru, nid, memcg_cache_id(memcg));
155}
156EXPORT_SYMBOL_GPL(list_lru_count_one);
157
158unsigned long list_lru_count_node(struct list_lru *lru, int nid)
159{
160 long count = 0;
161 int memcg_idx;
162
163 count += __list_lru_count_one(lru, nid, -1);
164 if (list_lru_memcg_aware(lru)) {
165 for_each_memcg_cache_index(memcg_idx)
166 count += __list_lru_count_one(lru, nid, memcg_idx);
167 }
168 return count;
169}
90EXPORT_SYMBOL_GPL(list_lru_count_node); 170EXPORT_SYMBOL_GPL(list_lru_count_node);
91 171
92unsigned long 172static unsigned long
93list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, 173__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
94 void *cb_arg, unsigned long *nr_to_walk) 174 list_lru_walk_cb isolate, void *cb_arg,
175 unsigned long *nr_to_walk)
95{ 176{
96 177
97 struct list_lru_node *nlru = &lru->node[nid]; 178 struct list_lru_node *nlru = &lru->node[nid];
179 struct list_lru_one *l;
98 struct list_head *item, *n; 180 struct list_head *item, *n;
99 unsigned long isolated = 0; 181 unsigned long isolated = 0;
100 182
101 spin_lock(&nlru->lock); 183 spin_lock(&nlru->lock);
184 l = list_lru_from_memcg_idx(nlru, memcg_idx);
102restart: 185restart:
103 list_for_each_safe(item, n, &nlru->list) { 186 list_for_each_safe(item, n, &l->list) {
104 enum lru_status ret; 187 enum lru_status ret;
105 188
106 /* 189 /*
@@ -116,8 +199,8 @@ restart:
116 case LRU_REMOVED_RETRY: 199 case LRU_REMOVED_RETRY:
117 assert_spin_locked(&nlru->lock); 200 assert_spin_locked(&nlru->lock);
118 case LRU_REMOVED: 201 case LRU_REMOVED:
119 nlru->nr_items--; 202 l->nr_items--;
120 WARN_ON_ONCE(nlru->nr_items < 0); 203 WARN_ON_ONCE(l->nr_items < 0);
121 isolated++; 204 isolated++;
122 /* 205 /*
123 * If the lru lock has been dropped, our list 206 * If the lru lock has been dropped, our list
@@ -128,7 +211,7 @@ restart:
128 goto restart; 211 goto restart;
129 break; 212 break;
130 case LRU_ROTATE: 213 case LRU_ROTATE:
131 list_move_tail(item, &nlru->list); 214 list_move_tail(item, &l->list);
132 break; 215 break;
133 case LRU_SKIP: 216 case LRU_SKIP:
134 break; 217 break;
@@ -147,36 +230,279 @@ restart:
147 spin_unlock(&nlru->lock); 230 spin_unlock(&nlru->lock);
148 return isolated; 231 return isolated;
149} 232}
233
234unsigned long
235list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
236 list_lru_walk_cb isolate, void *cb_arg,
237 unsigned long *nr_to_walk)
238{
239 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
240 isolate, cb_arg, nr_to_walk);
241}
242EXPORT_SYMBOL_GPL(list_lru_walk_one);
243
244unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
245 list_lru_walk_cb isolate, void *cb_arg,
246 unsigned long *nr_to_walk)
247{
248 long isolated = 0;
249 int memcg_idx;
250
251 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
252 nr_to_walk);
253 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
254 for_each_memcg_cache_index(memcg_idx) {
255 isolated += __list_lru_walk_one(lru, nid, memcg_idx,
256 isolate, cb_arg, nr_to_walk);
257 if (*nr_to_walk <= 0)
258 break;
259 }
260 }
261 return isolated;
262}
150EXPORT_SYMBOL_GPL(list_lru_walk_node); 263EXPORT_SYMBOL_GPL(list_lru_walk_node);
151 264
152int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key) 265static void init_one_lru(struct list_lru_one *l)
266{
267 INIT_LIST_HEAD(&l->list);
268 l->nr_items = 0;
269}
270
271#ifdef CONFIG_MEMCG_KMEM
272static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
273 int begin, int end)
274{
275 int i;
276
277 for (i = begin; i < end; i++)
278 kfree(memcg_lrus->lru[i]);
279}
280
281static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
282 int begin, int end)
283{
284 int i;
285
286 for (i = begin; i < end; i++) {
287 struct list_lru_one *l;
288
289 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
290 if (!l)
291 goto fail;
292
293 init_one_lru(l);
294 memcg_lrus->lru[i] = l;
295 }
296 return 0;
297fail:
298 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
299 return -ENOMEM;
300}
301
302static int memcg_init_list_lru_node(struct list_lru_node *nlru)
303{
304 int size = memcg_nr_cache_ids;
305
306 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
307 if (!nlru->memcg_lrus)
308 return -ENOMEM;
309
310 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
311 kfree(nlru->memcg_lrus);
312 return -ENOMEM;
313 }
314
315 return 0;
316}
317
318static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
319{
320 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
321 kfree(nlru->memcg_lrus);
322}
323
324static int memcg_update_list_lru_node(struct list_lru_node *nlru,
325 int old_size, int new_size)
326{
327 struct list_lru_memcg *old, *new;
328
329 BUG_ON(old_size > new_size);
330
331 old = nlru->memcg_lrus;
332 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
333 if (!new)
334 return -ENOMEM;
335
336 if (__memcg_init_list_lru_node(new, old_size, new_size)) {
337 kfree(new);
338 return -ENOMEM;
339 }
340
341 memcpy(new, old, old_size * sizeof(void *));
342
343 /*
344 * The lock guarantees that we won't race with a reader
345 * (see list_lru_from_memcg_idx).
346 *
347 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
348 * we have to use IRQ-safe primitives here to avoid deadlock.
349 */
350 spin_lock_irq(&nlru->lock);
351 nlru->memcg_lrus = new;
352 spin_unlock_irq(&nlru->lock);
353
354 kfree(old);
355 return 0;
356}
357
358static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
359 int old_size, int new_size)
360{
361 /* do not bother shrinking the array back to the old size, because we
362 * cannot handle allocation failures here */
363 __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size);
364}
365
366static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
367{
368 int i;
369
370 for (i = 0; i < nr_node_ids; i++) {
371 if (!memcg_aware)
372 lru->node[i].memcg_lrus = NULL;
373 else if (memcg_init_list_lru_node(&lru->node[i]))
374 goto fail;
375 }
376 return 0;
377fail:
378 for (i = i - 1; i >= 0; i--)
379 memcg_destroy_list_lru_node(&lru->node[i]);
380 return -ENOMEM;
381}
382
383static void memcg_destroy_list_lru(struct list_lru *lru)
384{
385 int i;
386
387 if (!list_lru_memcg_aware(lru))
388 return;
389
390 for (i = 0; i < nr_node_ids; i++)
391 memcg_destroy_list_lru_node(&lru->node[i]);
392}
393
394static int memcg_update_list_lru(struct list_lru *lru,
395 int old_size, int new_size)
396{
397 int i;
398
399 if (!list_lru_memcg_aware(lru))
400 return 0;
401
402 for (i = 0; i < nr_node_ids; i++) {
403 if (memcg_update_list_lru_node(&lru->node[i],
404 old_size, new_size))
405 goto fail;
406 }
407 return 0;
408fail:
409 for (i = i - 1; i >= 0; i--)
410 memcg_cancel_update_list_lru_node(&lru->node[i],
411 old_size, new_size);
412 return -ENOMEM;
413}
414
415static void memcg_cancel_update_list_lru(struct list_lru *lru,
416 int old_size, int new_size)
417{
418 int i;
419
420 if (!list_lru_memcg_aware(lru))
421 return;
422
423 for (i = 0; i < nr_node_ids; i++)
424 memcg_cancel_update_list_lru_node(&lru->node[i],
425 old_size, new_size);
426}
427
428int memcg_update_all_list_lrus(int new_size)
429{
430 int ret = 0;
431 struct list_lru *lru;
432 int old_size = memcg_nr_cache_ids;
433
434 mutex_lock(&list_lrus_mutex);
435 list_for_each_entry(lru, &list_lrus, list) {
436 ret = memcg_update_list_lru(lru, old_size, new_size);
437 if (ret)
438 goto fail;
439 }
440out:
441 mutex_unlock(&list_lrus_mutex);
442 return ret;
443fail:
444 list_for_each_entry_continue_reverse(lru, &list_lrus, list)
445 memcg_cancel_update_list_lru(lru, old_size, new_size);
446 goto out;
447}
448#else
449static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
450{
451 return 0;
452}
453
454static void memcg_destroy_list_lru(struct list_lru *lru)
455{
456}
457#endif /* CONFIG_MEMCG_KMEM */
458
459int __list_lru_init(struct list_lru *lru, bool memcg_aware,
460 struct lock_class_key *key)
153{ 461{
154 int i; 462 int i;
155 size_t size = sizeof(*lru->node) * nr_node_ids; 463 size_t size = sizeof(*lru->node) * nr_node_ids;
464 int err = -ENOMEM;
465
466 memcg_get_cache_ids();
156 467
157 lru->node = kzalloc(size, GFP_KERNEL); 468 lru->node = kzalloc(size, GFP_KERNEL);
158 if (!lru->node) 469 if (!lru->node)
159 return -ENOMEM; 470 goto out;
160 471
161 for (i = 0; i < nr_node_ids; i++) { 472 for (i = 0; i < nr_node_ids; i++) {
162 spin_lock_init(&lru->node[i].lock); 473 spin_lock_init(&lru->node[i].lock);
163 if (key) 474 if (key)
164 lockdep_set_class(&lru->node[i].lock, key); 475 lockdep_set_class(&lru->node[i].lock, key);
165 INIT_LIST_HEAD(&lru->node[i].list); 476 init_one_lru(&lru->node[i].lru);
166 lru->node[i].nr_items = 0; 477 }
478
479 err = memcg_init_list_lru(lru, memcg_aware);
480 if (err) {
481 kfree(lru->node);
482 goto out;
167 } 483 }
484
168 list_lru_register(lru); 485 list_lru_register(lru);
169 return 0; 486out:
487 memcg_put_cache_ids();
488 return err;
170} 489}
171EXPORT_SYMBOL_GPL(list_lru_init_key); 490EXPORT_SYMBOL_GPL(__list_lru_init);
172 491
173void list_lru_destroy(struct list_lru *lru) 492void list_lru_destroy(struct list_lru *lru)
174{ 493{
175 /* Already destroyed or not yet initialized? */ 494 /* Already destroyed or not yet initialized? */
176 if (!lru->node) 495 if (!lru->node)
177 return; 496 return;
497
498 memcg_get_cache_ids();
499
178 list_lru_unregister(lru); 500 list_lru_unregister(lru);
501
502 memcg_destroy_list_lru(lru);
179 kfree(lru->node); 503 kfree(lru->node);
180 lru->node = NULL; 504 lru->node = NULL;
505
506 memcg_put_cache_ids();
181} 507}
182EXPORT_SYMBOL_GPL(list_lru_destroy); 508EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6706e5fa5ac0..afa55bb38cbd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2572,6 +2572,8 @@ static int memcg_alloc_cache_id(void)
2572 2572
2573 err = memcg_update_all_caches(size); 2573 err = memcg_update_all_caches(size);
2574 if (!err) 2574 if (!err)
2575 err = memcg_update_all_list_lrus(size);
2576 if (!err)
2575 memcg_nr_cache_ids = size; 2577 memcg_nr_cache_ids = size;
2576 2578
2577 up_write(&memcg_cache_ids_sem); 2579 up_write(&memcg_cache_ids_sem);
@@ -2765,6 +2767,24 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
2765 memcg_uncharge_kmem(memcg, 1 << order); 2767 memcg_uncharge_kmem(memcg, 1 << order);
2766 page->mem_cgroup = NULL; 2768 page->mem_cgroup = NULL;
2767} 2769}
2770
2771struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
2772{
2773 struct mem_cgroup *memcg = NULL;
2774 struct kmem_cache *cachep;
2775 struct page *page;
2776
2777 page = virt_to_head_page(ptr);
2778 if (PageSlab(page)) {
2779 cachep = page->slab_cache;
2780 if (!is_root_cache(cachep))
2781 memcg = cachep->memcg_params->memcg;
2782 } else
2783 /* page allocated by alloc_kmem_pages */
2784 memcg = page->mem_cgroup;
2785
2786 return memcg;
2787}
2768#endif /* CONFIG_MEMCG_KMEM */ 2788#endif /* CONFIG_MEMCG_KMEM */
2769 2789
2770#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2790#ifdef CONFIG_TRANSPARENT_HUGEPAGE