diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:59:10 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:09 -0500 |
commit | 60d3fd32a7a9da4c8c93a9f89cfda22a0b4c65ce (patch) | |
tree | 47de9f3f0e48ecc9f416b5a40fac6f6e1c97395e | |
parent | c0a5b560938a0f2fd2fbf66ddc446c7c2b41383a (diff) |
list_lru: introduce per-memcg lists
There are several FS shrinkers, including super_block::s_shrink, that
keep reclaimable objects in the list_lru structure. Hence to turn them
to memcg-aware shrinkers, it is enough to make list_lru per-memcg.
This patch does the trick. It adds an array of lru lists to the
list_lru_node structure (per-node part of the list_lru), one for each
kmem-active memcg, and dispatches every item addition or removal to the
list corresponding to the memcg which the item is accounted to. So now
the list_lru structure is not just per node, but per node and per memcg.
Not all list_lrus need this feature, so this patch also adds a new
method, list_lru_init_memcg, which initializes a list_lru as memcg
aware. Otherwise (i.e. if initialized with old list_lru_init), the
list_lru won't have per memcg lists.
Just like per memcg caches arrays, the arrays of per-memcg lists are
indexed by memcg_cache_id, so we must grow them whenever
memcg_nr_cache_ids is increased. So we introduce a callback,
memcg_update_all_list_lrus, invoked by memcg_alloc_cache_id if the id
space is full.
The locking is implemented in a manner similar to lruvecs, i.e. we have
one lock per node that protects all lists (both global and per cgroup) on
the node.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/list_lru.h | 52 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 14 | ||||
-rw-r--r-- | mm/list_lru.c | 374 | ||||
-rw-r--r-- | mm/memcontrol.c | 20 |
4 files changed, 424 insertions, 36 deletions
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index ee9486ac0621..305b598abac2 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/nodemask.h> | 11 | #include <linux/nodemask.h> |
12 | #include <linux/shrinker.h> | 12 | #include <linux/shrinker.h> |
13 | 13 | ||
14 | struct mem_cgroup; | ||
15 | |||
14 | /* list_lru_walk_cb has to always return one of those */ | 16 | /* list_lru_walk_cb has to always return one of those */ |
15 | enum lru_status { | 17 | enum lru_status { |
16 | LRU_REMOVED, /* item removed from list */ | 18 | LRU_REMOVED, /* item removed from list */ |
@@ -22,11 +24,26 @@ enum lru_status { | |||
22 | internally, but has to return locked. */ | 24 | internally, but has to return locked. */ |
23 | }; | 25 | }; |
24 | 26 | ||
25 | struct list_lru_node { | 27 | struct list_lru_one { |
26 | spinlock_t lock; | ||
27 | struct list_head list; | 28 | struct list_head list; |
28 | /* kept as signed so we can catch imbalance bugs */ | 29 | /* kept as signed so we can catch imbalance bugs */ |
29 | long nr_items; | 30 | long nr_items; |
31 | }; | ||
32 | |||
33 | struct list_lru_memcg { | ||
34 | /* array of per cgroup lists, indexed by memcg_cache_id */ | ||
35 | struct list_lru_one *lru[0]; | ||
36 | }; | ||
37 | |||
38 | struct list_lru_node { | ||
39 | /* protects all lists on the node, including per cgroup */ | ||
40 | spinlock_t lock; | ||
41 | /* global list, used for the root cgroup in cgroup aware lrus */ | ||
42 | struct list_lru_one lru; | ||
43 | #ifdef CONFIG_MEMCG_KMEM | ||
44 | /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ | ||
45 | struct list_lru_memcg *memcg_lrus; | ||
46 | #endif | ||
30 | } ____cacheline_aligned_in_smp; | 47 | } ____cacheline_aligned_in_smp; |
31 | 48 | ||
32 | struct list_lru { | 49 | struct list_lru { |
@@ -37,11 +54,14 @@ struct list_lru { | |||
37 | }; | 54 | }; |
38 | 55 | ||
39 | void list_lru_destroy(struct list_lru *lru); | 56 | void list_lru_destroy(struct list_lru *lru); |
40 | int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key); | 57 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, |
41 | static inline int list_lru_init(struct list_lru *lru) | 58 | struct lock_class_key *key); |
42 | { | 59 | |
43 | return list_lru_init_key(lru, NULL); | 60 | #define list_lru_init(lru) __list_lru_init((lru), false, NULL) |
44 | } | 61 | #define list_lru_init_key(lru, key) __list_lru_init((lru), false, (key)) |
62 | #define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL) | ||
63 | |||
64 | int memcg_update_all_list_lrus(int num_memcgs); | ||
45 | 65 | ||
46 | /** | 66 | /** |
47 | * list_lru_add: add an element to the lru list's tail | 67 | * list_lru_add: add an element to the lru list's tail |
@@ -75,20 +95,23 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item); | |||
75 | bool list_lru_del(struct list_lru *lru, struct list_head *item); | 95 | bool list_lru_del(struct list_lru *lru, struct list_head *item); |
76 | 96 | ||
77 | /** | 97 | /** |
78 | * list_lru_count_node: return the number of objects currently held by @lru | 98 | * list_lru_count_one: return the number of objects currently held by @lru |
79 | * @lru: the lru pointer. | 99 | * @lru: the lru pointer. |
80 | * @nid: the node id to count from. | 100 | * @nid: the node id to count from. |
101 | * @memcg: the cgroup to count from. | ||
81 | * | 102 | * |
82 | * Always return a non-negative number, 0 for empty lists. There is no | 103 | * Always return a non-negative number, 0 for empty lists. There is no |
83 | * guarantee that the list is not updated while the count is being computed. | 104 | * guarantee that the list is not updated while the count is being computed. |
84 | * Callers that want such a guarantee need to provide an outer lock. | 105 | * Callers that want such a guarantee need to provide an outer lock. |
85 | */ | 106 | */ |
107 | unsigned long list_lru_count_one(struct list_lru *lru, | ||
108 | int nid, struct mem_cgroup *memcg); | ||
86 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); | 109 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); |
87 | 110 | ||
88 | static inline unsigned long list_lru_shrink_count(struct list_lru *lru, | 111 | static inline unsigned long list_lru_shrink_count(struct list_lru *lru, |
89 | struct shrink_control *sc) | 112 | struct shrink_control *sc) |
90 | { | 113 | { |
91 | return list_lru_count_node(lru, sc->nid); | 114 | return list_lru_count_one(lru, sc->nid, sc->memcg); |
92 | } | 115 | } |
93 | 116 | ||
94 | static inline unsigned long list_lru_count(struct list_lru *lru) | 117 | static inline unsigned long list_lru_count(struct list_lru *lru) |
@@ -105,9 +128,10 @@ static inline unsigned long list_lru_count(struct list_lru *lru) | |||
105 | typedef enum lru_status | 128 | typedef enum lru_status |
106 | (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); | 129 | (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); |
107 | /** | 130 | /** |
108 | * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. | 131 | * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items. |
109 | * @lru: the lru pointer. | 132 | * @lru: the lru pointer. |
110 | * @nid: the node id to scan from. | 133 | * @nid: the node id to scan from. |
134 | * @memcg: the cgroup to scan from. | ||
111 | * @isolate: callback function that is resposible for deciding what to do with | 135 | * @isolate: callback function that is resposible for deciding what to do with |
112 | * the item currently being scanned | 136 | * the item currently being scanned |
113 | * @cb_arg: opaque type that will be passed to @isolate | 137 | * @cb_arg: opaque type that will be passed to @isolate |
@@ -125,6 +149,10 @@ typedef enum lru_status | |||
125 | * | 149 | * |
126 | * Return value: the number of objects effectively removed from the LRU. | 150 | * Return value: the number of objects effectively removed from the LRU. |
127 | */ | 151 | */ |
152 | unsigned long list_lru_walk_one(struct list_lru *lru, | ||
153 | int nid, struct mem_cgroup *memcg, | ||
154 | list_lru_walk_cb isolate, void *cb_arg, | ||
155 | unsigned long *nr_to_walk); | ||
128 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | 156 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, |
129 | list_lru_walk_cb isolate, void *cb_arg, | 157 | list_lru_walk_cb isolate, void *cb_arg, |
130 | unsigned long *nr_to_walk); | 158 | unsigned long *nr_to_walk); |
@@ -133,8 +161,8 @@ static inline unsigned long | |||
133 | list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc, | 161 | list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc, |
134 | list_lru_walk_cb isolate, void *cb_arg) | 162 | list_lru_walk_cb isolate, void *cb_arg) |
135 | { | 163 | { |
136 | return list_lru_walk_node(lru, sc->nid, isolate, cb_arg, | 164 | return list_lru_walk_one(lru, sc->nid, sc->memcg, isolate, cb_arg, |
137 | &sc->nr_to_scan); | 165 | &sc->nr_to_scan); |
138 | } | 166 | } |
139 | 167 | ||
140 | static inline unsigned long | 168 | static inline unsigned long |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index dbc4baa3619c..72dff5fb0d0c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -439,6 +439,8 @@ int memcg_cache_id(struct mem_cgroup *memcg); | |||
439 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); | 439 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); |
440 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); | 440 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); |
441 | 441 | ||
442 | struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr); | ||
443 | |||
442 | int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, | 444 | int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, |
443 | unsigned long nr_pages); | 445 | unsigned long nr_pages); |
444 | void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); | 446 | void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); |
@@ -535,6 +537,13 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) | |||
535 | if (memcg_kmem_enabled()) | 537 | if (memcg_kmem_enabled()) |
536 | __memcg_kmem_put_cache(cachep); | 538 | __memcg_kmem_put_cache(cachep); |
537 | } | 539 | } |
540 | |||
541 | static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) | ||
542 | { | ||
543 | if (!memcg_kmem_enabled()) | ||
544 | return NULL; | ||
545 | return __mem_cgroup_from_kmem(ptr); | ||
546 | } | ||
538 | #else | 547 | #else |
539 | #define for_each_memcg_cache_index(_idx) \ | 548 | #define for_each_memcg_cache_index(_idx) \ |
540 | for (; NULL; ) | 549 | for (; NULL; ) |
@@ -586,6 +595,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
586 | static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) | 595 | static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) |
587 | { | 596 | { |
588 | } | 597 | } |
598 | |||
599 | static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) | ||
600 | { | ||
601 | return NULL; | ||
602 | } | ||
589 | #endif /* CONFIG_MEMCG_KMEM */ | 603 | #endif /* CONFIG_MEMCG_KMEM */ |
590 | #endif /* _LINUX_MEMCONTROL_H */ | 604 | #endif /* _LINUX_MEMCONTROL_H */ |
591 | 605 | ||
diff --git a/mm/list_lru.c b/mm/list_lru.c index a9021cb3ccde..79aee70c3b9d 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/list_lru.h> | 10 | #include <linux/list_lru.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/mutex.h> | 12 | #include <linux/mutex.h> |
13 | #include <linux/memcontrol.h> | ||
13 | 14 | ||
14 | #ifdef CONFIG_MEMCG_KMEM | 15 | #ifdef CONFIG_MEMCG_KMEM |
15 | static LIST_HEAD(list_lrus); | 16 | static LIST_HEAD(list_lrus); |
@@ -38,16 +39,71 @@ static void list_lru_unregister(struct list_lru *lru) | |||
38 | } | 39 | } |
39 | #endif /* CONFIG_MEMCG_KMEM */ | 40 | #endif /* CONFIG_MEMCG_KMEM */ |
40 | 41 | ||
42 | #ifdef CONFIG_MEMCG_KMEM | ||
43 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | ||
44 | { | ||
45 | return !!lru->node[0].memcg_lrus; | ||
46 | } | ||
47 | |||
48 | static inline struct list_lru_one * | ||
49 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | ||
50 | { | ||
51 | /* | ||
52 | * The lock protects the array of per cgroup lists from relocation | ||
53 | * (see memcg_update_list_lru_node). | ||
54 | */ | ||
55 | lockdep_assert_held(&nlru->lock); | ||
56 | if (nlru->memcg_lrus && idx >= 0) | ||
57 | return nlru->memcg_lrus->lru[idx]; | ||
58 | |||
59 | return &nlru->lru; | ||
60 | } | ||
61 | |||
62 | static inline struct list_lru_one * | ||
63 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | ||
64 | { | ||
65 | struct mem_cgroup *memcg; | ||
66 | |||
67 | if (!nlru->memcg_lrus) | ||
68 | return &nlru->lru; | ||
69 | |||
70 | memcg = mem_cgroup_from_kmem(ptr); | ||
71 | if (!memcg) | ||
72 | return &nlru->lru; | ||
73 | |||
74 | return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); | ||
75 | } | ||
76 | #else | ||
77 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | ||
78 | { | ||
79 | return false; | ||
80 | } | ||
81 | |||
82 | static inline struct list_lru_one * | ||
83 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | ||
84 | { | ||
85 | return &nlru->lru; | ||
86 | } | ||
87 | |||
88 | static inline struct list_lru_one * | ||
89 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | ||
90 | { | ||
91 | return &nlru->lru; | ||
92 | } | ||
93 | #endif /* CONFIG_MEMCG_KMEM */ | ||
94 | |||
41 | bool list_lru_add(struct list_lru *lru, struct list_head *item) | 95 | bool list_lru_add(struct list_lru *lru, struct list_head *item) |
42 | { | 96 | { |
43 | int nid = page_to_nid(virt_to_page(item)); | 97 | int nid = page_to_nid(virt_to_page(item)); |
44 | struct list_lru_node *nlru = &lru->node[nid]; | 98 | struct list_lru_node *nlru = &lru->node[nid]; |
99 | struct list_lru_one *l; | ||
45 | 100 | ||
46 | spin_lock(&nlru->lock); | 101 | spin_lock(&nlru->lock); |
47 | WARN_ON_ONCE(nlru->nr_items < 0); | 102 | l = list_lru_from_kmem(nlru, item); |
103 | WARN_ON_ONCE(l->nr_items < 0); | ||
48 | if (list_empty(item)) { | 104 | if (list_empty(item)) { |
49 | list_add_tail(item, &nlru->list); | 105 | list_add_tail(item, &l->list); |
50 | nlru->nr_items++; | 106 | l->nr_items++; |
51 | spin_unlock(&nlru->lock); | 107 | spin_unlock(&nlru->lock); |
52 | return true; | 108 | return true; |
53 | } | 109 | } |
@@ -60,12 +116,14 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) | |||
60 | { | 116 | { |
61 | int nid = page_to_nid(virt_to_page(item)); | 117 | int nid = page_to_nid(virt_to_page(item)); |
62 | struct list_lru_node *nlru = &lru->node[nid]; | 118 | struct list_lru_node *nlru = &lru->node[nid]; |
119 | struct list_lru_one *l; | ||
63 | 120 | ||
64 | spin_lock(&nlru->lock); | 121 | spin_lock(&nlru->lock); |
122 | l = list_lru_from_kmem(nlru, item); | ||
65 | if (!list_empty(item)) { | 123 | if (!list_empty(item)) { |
66 | list_del_init(item); | 124 | list_del_init(item); |
67 | nlru->nr_items--; | 125 | l->nr_items--; |
68 | WARN_ON_ONCE(nlru->nr_items < 0); | 126 | WARN_ON_ONCE(l->nr_items < 0); |
69 | spin_unlock(&nlru->lock); | 127 | spin_unlock(&nlru->lock); |
70 | return true; | 128 | return true; |
71 | } | 129 | } |
@@ -74,33 +132,58 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) | |||
74 | } | 132 | } |
75 | EXPORT_SYMBOL_GPL(list_lru_del); | 133 | EXPORT_SYMBOL_GPL(list_lru_del); |
76 | 134 | ||
77 | unsigned long | 135 | static unsigned long __list_lru_count_one(struct list_lru *lru, |
78 | list_lru_count_node(struct list_lru *lru, int nid) | 136 | int nid, int memcg_idx) |
79 | { | 137 | { |
80 | unsigned long count = 0; | ||
81 | struct list_lru_node *nlru = &lru->node[nid]; | 138 | struct list_lru_node *nlru = &lru->node[nid]; |
139 | struct list_lru_one *l; | ||
140 | unsigned long count; | ||
82 | 141 | ||
83 | spin_lock(&nlru->lock); | 142 | spin_lock(&nlru->lock); |
84 | WARN_ON_ONCE(nlru->nr_items < 0); | 143 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
85 | count += nlru->nr_items; | 144 | WARN_ON_ONCE(l->nr_items < 0); |
145 | count = l->nr_items; | ||
86 | spin_unlock(&nlru->lock); | 146 | spin_unlock(&nlru->lock); |
87 | 147 | ||
88 | return count; | 148 | return count; |
89 | } | 149 | } |
150 | |||
151 | unsigned long list_lru_count_one(struct list_lru *lru, | ||
152 | int nid, struct mem_cgroup *memcg) | ||
153 | { | ||
154 | return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); | ||
155 | } | ||
156 | EXPORT_SYMBOL_GPL(list_lru_count_one); | ||
157 | |||
158 | unsigned long list_lru_count_node(struct list_lru *lru, int nid) | ||
159 | { | ||
160 | long count = 0; | ||
161 | int memcg_idx; | ||
162 | |||
163 | count += __list_lru_count_one(lru, nid, -1); | ||
164 | if (list_lru_memcg_aware(lru)) { | ||
165 | for_each_memcg_cache_index(memcg_idx) | ||
166 | count += __list_lru_count_one(lru, nid, memcg_idx); | ||
167 | } | ||
168 | return count; | ||
169 | } | ||
90 | EXPORT_SYMBOL_GPL(list_lru_count_node); | 170 | EXPORT_SYMBOL_GPL(list_lru_count_node); |
91 | 171 | ||
92 | unsigned long | 172 | static unsigned long |
93 | list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, | 173 | __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, |
94 | void *cb_arg, unsigned long *nr_to_walk) | 174 | list_lru_walk_cb isolate, void *cb_arg, |
175 | unsigned long *nr_to_walk) | ||
95 | { | 176 | { |
96 | 177 | ||
97 | struct list_lru_node *nlru = &lru->node[nid]; | 178 | struct list_lru_node *nlru = &lru->node[nid]; |
179 | struct list_lru_one *l; | ||
98 | struct list_head *item, *n; | 180 | struct list_head *item, *n; |
99 | unsigned long isolated = 0; | 181 | unsigned long isolated = 0; |
100 | 182 | ||
101 | spin_lock(&nlru->lock); | 183 | spin_lock(&nlru->lock); |
184 | l = list_lru_from_memcg_idx(nlru, memcg_idx); | ||
102 | restart: | 185 | restart: |
103 | list_for_each_safe(item, n, &nlru->list) { | 186 | list_for_each_safe(item, n, &l->list) { |
104 | enum lru_status ret; | 187 | enum lru_status ret; |
105 | 188 | ||
106 | /* | 189 | /* |
@@ -116,8 +199,8 @@ restart: | |||
116 | case LRU_REMOVED_RETRY: | 199 | case LRU_REMOVED_RETRY: |
117 | assert_spin_locked(&nlru->lock); | 200 | assert_spin_locked(&nlru->lock); |
118 | case LRU_REMOVED: | 201 | case LRU_REMOVED: |
119 | nlru->nr_items--; | 202 | l->nr_items--; |
120 | WARN_ON_ONCE(nlru->nr_items < 0); | 203 | WARN_ON_ONCE(l->nr_items < 0); |
121 | isolated++; | 204 | isolated++; |
122 | /* | 205 | /* |
123 | * If the lru lock has been dropped, our list | 206 | * If the lru lock has been dropped, our list |
@@ -128,7 +211,7 @@ restart: | |||
128 | goto restart; | 211 | goto restart; |
129 | break; | 212 | break; |
130 | case LRU_ROTATE: | 213 | case LRU_ROTATE: |
131 | list_move_tail(item, &nlru->list); | 214 | list_move_tail(item, &l->list); |
132 | break; | 215 | break; |
133 | case LRU_SKIP: | 216 | case LRU_SKIP: |
134 | break; | 217 | break; |
@@ -147,36 +230,279 @@ restart: | |||
147 | spin_unlock(&nlru->lock); | 230 | spin_unlock(&nlru->lock); |
148 | return isolated; | 231 | return isolated; |
149 | } | 232 | } |
233 | |||
234 | unsigned long | ||
235 | list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, | ||
236 | list_lru_walk_cb isolate, void *cb_arg, | ||
237 | unsigned long *nr_to_walk) | ||
238 | { | ||
239 | return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), | ||
240 | isolate, cb_arg, nr_to_walk); | ||
241 | } | ||
242 | EXPORT_SYMBOL_GPL(list_lru_walk_one); | ||
243 | |||
244 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | ||
245 | list_lru_walk_cb isolate, void *cb_arg, | ||
246 | unsigned long *nr_to_walk) | ||
247 | { | ||
248 | long isolated = 0; | ||
249 | int memcg_idx; | ||
250 | |||
251 | isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, | ||
252 | nr_to_walk); | ||
253 | if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { | ||
254 | for_each_memcg_cache_index(memcg_idx) { | ||
255 | isolated += __list_lru_walk_one(lru, nid, memcg_idx, | ||
256 | isolate, cb_arg, nr_to_walk); | ||
257 | if (*nr_to_walk <= 0) | ||
258 | break; | ||
259 | } | ||
260 | } | ||
261 | return isolated; | ||
262 | } | ||
150 | EXPORT_SYMBOL_GPL(list_lru_walk_node); | 263 | EXPORT_SYMBOL_GPL(list_lru_walk_node); |
151 | 264 | ||
152 | int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key) | 265 | static void init_one_lru(struct list_lru_one *l) |
266 | { | ||
267 | INIT_LIST_HEAD(&l->list); | ||
268 | l->nr_items = 0; | ||
269 | } | ||
270 | |||
271 | #ifdef CONFIG_MEMCG_KMEM | ||
272 | static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, | ||
273 | int begin, int end) | ||
274 | { | ||
275 | int i; | ||
276 | |||
277 | for (i = begin; i < end; i++) | ||
278 | kfree(memcg_lrus->lru[i]); | ||
279 | } | ||
280 | |||
281 | static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, | ||
282 | int begin, int end) | ||
283 | { | ||
284 | int i; | ||
285 | |||
286 | for (i = begin; i < end; i++) { | ||
287 | struct list_lru_one *l; | ||
288 | |||
289 | l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); | ||
290 | if (!l) | ||
291 | goto fail; | ||
292 | |||
293 | init_one_lru(l); | ||
294 | memcg_lrus->lru[i] = l; | ||
295 | } | ||
296 | return 0; | ||
297 | fail: | ||
298 | __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); | ||
299 | return -ENOMEM; | ||
300 | } | ||
301 | |||
302 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) | ||
303 | { | ||
304 | int size = memcg_nr_cache_ids; | ||
305 | |||
306 | nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); | ||
307 | if (!nlru->memcg_lrus) | ||
308 | return -ENOMEM; | ||
309 | |||
310 | if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { | ||
311 | kfree(nlru->memcg_lrus); | ||
312 | return -ENOMEM; | ||
313 | } | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) | ||
319 | { | ||
320 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); | ||
321 | kfree(nlru->memcg_lrus); | ||
322 | } | ||
323 | |||
324 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, | ||
325 | int old_size, int new_size) | ||
326 | { | ||
327 | struct list_lru_memcg *old, *new; | ||
328 | |||
329 | BUG_ON(old_size > new_size); | ||
330 | |||
331 | old = nlru->memcg_lrus; | ||
332 | new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); | ||
333 | if (!new) | ||
334 | return -ENOMEM; | ||
335 | |||
336 | if (__memcg_init_list_lru_node(new, old_size, new_size)) { | ||
337 | kfree(new); | ||
338 | return -ENOMEM; | ||
339 | } | ||
340 | |||
341 | memcpy(new, old, old_size * sizeof(void *)); | ||
342 | |||
343 | /* | ||
344 | * The lock guarantees that we won't race with a reader | ||
345 | * (see list_lru_from_memcg_idx). | ||
346 | * | ||
347 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | ||
348 | * we have to use IRQ-safe primitives here to avoid deadlock. | ||
349 | */ | ||
350 | spin_lock_irq(&nlru->lock); | ||
351 | nlru->memcg_lrus = new; | ||
352 | spin_unlock_irq(&nlru->lock); | ||
353 | |||
354 | kfree(old); | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, | ||
359 | int old_size, int new_size) | ||
360 | { | ||
361 | /* do not bother shrinking the array back to the old size, because we | ||
362 | * cannot handle allocation failures here */ | ||
363 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); | ||
364 | } | ||
365 | |||
366 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | ||
367 | { | ||
368 | int i; | ||
369 | |||
370 | for (i = 0; i < nr_node_ids; i++) { | ||
371 | if (!memcg_aware) | ||
372 | lru->node[i].memcg_lrus = NULL; | ||
373 | else if (memcg_init_list_lru_node(&lru->node[i])) | ||
374 | goto fail; | ||
375 | } | ||
376 | return 0; | ||
377 | fail: | ||
378 | for (i = i - 1; i >= 0; i--) | ||
379 | memcg_destroy_list_lru_node(&lru->node[i]); | ||
380 | return -ENOMEM; | ||
381 | } | ||
382 | |||
383 | static void memcg_destroy_list_lru(struct list_lru *lru) | ||
384 | { | ||
385 | int i; | ||
386 | |||
387 | if (!list_lru_memcg_aware(lru)) | ||
388 | return; | ||
389 | |||
390 | for (i = 0; i < nr_node_ids; i++) | ||
391 | memcg_destroy_list_lru_node(&lru->node[i]); | ||
392 | } | ||
393 | |||
394 | static int memcg_update_list_lru(struct list_lru *lru, | ||
395 | int old_size, int new_size) | ||
396 | { | ||
397 | int i; | ||
398 | |||
399 | if (!list_lru_memcg_aware(lru)) | ||
400 | return 0; | ||
401 | |||
402 | for (i = 0; i < nr_node_ids; i++) { | ||
403 | if (memcg_update_list_lru_node(&lru->node[i], | ||
404 | old_size, new_size)) | ||
405 | goto fail; | ||
406 | } | ||
407 | return 0; | ||
408 | fail: | ||
409 | for (i = i - 1; i >= 0; i--) | ||
410 | memcg_cancel_update_list_lru_node(&lru->node[i], | ||
411 | old_size, new_size); | ||
412 | return -ENOMEM; | ||
413 | } | ||
414 | |||
415 | static void memcg_cancel_update_list_lru(struct list_lru *lru, | ||
416 | int old_size, int new_size) | ||
417 | { | ||
418 | int i; | ||
419 | |||
420 | if (!list_lru_memcg_aware(lru)) | ||
421 | return; | ||
422 | |||
423 | for (i = 0; i < nr_node_ids; i++) | ||
424 | memcg_cancel_update_list_lru_node(&lru->node[i], | ||
425 | old_size, new_size); | ||
426 | } | ||
427 | |||
428 | int memcg_update_all_list_lrus(int new_size) | ||
429 | { | ||
430 | int ret = 0; | ||
431 | struct list_lru *lru; | ||
432 | int old_size = memcg_nr_cache_ids; | ||
433 | |||
434 | mutex_lock(&list_lrus_mutex); | ||
435 | list_for_each_entry(lru, &list_lrus, list) { | ||
436 | ret = memcg_update_list_lru(lru, old_size, new_size); | ||
437 | if (ret) | ||
438 | goto fail; | ||
439 | } | ||
440 | out: | ||
441 | mutex_unlock(&list_lrus_mutex); | ||
442 | return ret; | ||
443 | fail: | ||
444 | list_for_each_entry_continue_reverse(lru, &list_lrus, list) | ||
445 | memcg_cancel_update_list_lru(lru, old_size, new_size); | ||
446 | goto out; | ||
447 | } | ||
448 | #else | ||
449 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | ||
450 | { | ||
451 | return 0; | ||
452 | } | ||
453 | |||
454 | static void memcg_destroy_list_lru(struct list_lru *lru) | ||
455 | { | ||
456 | } | ||
457 | #endif /* CONFIG_MEMCG_KMEM */ | ||
458 | |||
459 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, | ||
460 | struct lock_class_key *key) | ||
153 | { | 461 | { |
154 | int i; | 462 | int i; |
155 | size_t size = sizeof(*lru->node) * nr_node_ids; | 463 | size_t size = sizeof(*lru->node) * nr_node_ids; |
464 | int err = -ENOMEM; | ||
465 | |||
466 | memcg_get_cache_ids(); | ||
156 | 467 | ||
157 | lru->node = kzalloc(size, GFP_KERNEL); | 468 | lru->node = kzalloc(size, GFP_KERNEL); |
158 | if (!lru->node) | 469 | if (!lru->node) |
159 | return -ENOMEM; | 470 | goto out; |
160 | 471 | ||
161 | for (i = 0; i < nr_node_ids; i++) { | 472 | for (i = 0; i < nr_node_ids; i++) { |
162 | spin_lock_init(&lru->node[i].lock); | 473 | spin_lock_init(&lru->node[i].lock); |
163 | if (key) | 474 | if (key) |
164 | lockdep_set_class(&lru->node[i].lock, key); | 475 | lockdep_set_class(&lru->node[i].lock, key); |
165 | INIT_LIST_HEAD(&lru->node[i].list); | 476 | init_one_lru(&lru->node[i].lru); |
166 | lru->node[i].nr_items = 0; | 477 | } |
478 | |||
479 | err = memcg_init_list_lru(lru, memcg_aware); | ||
480 | if (err) { | ||
481 | kfree(lru->node); | ||
482 | goto out; | ||
167 | } | 483 | } |
484 | |||
168 | list_lru_register(lru); | 485 | list_lru_register(lru); |
169 | return 0; | 486 | out: |
487 | memcg_put_cache_ids(); | ||
488 | return err; | ||
170 | } | 489 | } |
171 | EXPORT_SYMBOL_GPL(list_lru_init_key); | 490 | EXPORT_SYMBOL_GPL(__list_lru_init); |
172 | 491 | ||
173 | void list_lru_destroy(struct list_lru *lru) | 492 | void list_lru_destroy(struct list_lru *lru) |
174 | { | 493 | { |
175 | /* Already destroyed or not yet initialized? */ | 494 | /* Already destroyed or not yet initialized? */ |
176 | if (!lru->node) | 495 | if (!lru->node) |
177 | return; | 496 | return; |
497 | |||
498 | memcg_get_cache_ids(); | ||
499 | |||
178 | list_lru_unregister(lru); | 500 | list_lru_unregister(lru); |
501 | |||
502 | memcg_destroy_list_lru(lru); | ||
179 | kfree(lru->node); | 503 | kfree(lru->node); |
180 | lru->node = NULL; | 504 | lru->node = NULL; |
505 | |||
506 | memcg_put_cache_ids(); | ||
181 | } | 507 | } |
182 | EXPORT_SYMBOL_GPL(list_lru_destroy); | 508 | EXPORT_SYMBOL_GPL(list_lru_destroy); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6706e5fa5ac0..afa55bb38cbd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2572,6 +2572,8 @@ static int memcg_alloc_cache_id(void) | |||
2572 | 2572 | ||
2573 | err = memcg_update_all_caches(size); | 2573 | err = memcg_update_all_caches(size); |
2574 | if (!err) | 2574 | if (!err) |
2575 | err = memcg_update_all_list_lrus(size); | ||
2576 | if (!err) | ||
2575 | memcg_nr_cache_ids = size; | 2577 | memcg_nr_cache_ids = size; |
2576 | 2578 | ||
2577 | up_write(&memcg_cache_ids_sem); | 2579 | up_write(&memcg_cache_ids_sem); |
@@ -2765,6 +2767,24 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) | |||
2765 | memcg_uncharge_kmem(memcg, 1 << order); | 2767 | memcg_uncharge_kmem(memcg, 1 << order); |
2766 | page->mem_cgroup = NULL; | 2768 | page->mem_cgroup = NULL; |
2767 | } | 2769 | } |
2770 | |||
2771 | struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr) | ||
2772 | { | ||
2773 | struct mem_cgroup *memcg = NULL; | ||
2774 | struct kmem_cache *cachep; | ||
2775 | struct page *page; | ||
2776 | |||
2777 | page = virt_to_head_page(ptr); | ||
2778 | if (PageSlab(page)) { | ||
2779 | cachep = page->slab_cache; | ||
2780 | if (!is_root_cache(cachep)) | ||
2781 | memcg = cachep->memcg_params->memcg; | ||
2782 | } else | ||
2783 | /* page allocated by alloc_kmem_pages */ | ||
2784 | memcg = page->mem_cgroup; | ||
2785 | |||
2786 | return memcg; | ||
2787 | } | ||
2768 | #endif /* CONFIG_MEMCG_KMEM */ | 2788 | #endif /* CONFIG_MEMCG_KMEM */ |
2769 | 2789 | ||
2770 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2790 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |