diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 978 |
1 files changed, 625 insertions, 353 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7f1a356153c0..5b6b0039f725 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -45,16 +45,17 @@ | |||
45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
46 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
47 | #include <linux/eventfd.h> | 47 | #include <linux/eventfd.h> |
48 | #include <linux/poll.h> | ||
48 | #include <linux/sort.h> | 49 | #include <linux/sort.h> |
49 | #include <linux/fs.h> | 50 | #include <linux/fs.h> |
50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
51 | #include <linux/vmalloc.h> | ||
52 | #include <linux/vmpressure.h> | 52 | #include <linux/vmpressure.h> |
53 | #include <linux/mm_inline.h> | 53 | #include <linux/mm_inline.h> |
54 | #include <linux/page_cgroup.h> | 54 | #include <linux/page_cgroup.h> |
55 | #include <linux/cpu.h> | 55 | #include <linux/cpu.h> |
56 | #include <linux/oom.h> | 56 | #include <linux/oom.h> |
57 | #include <linux/lockdep.h> | 57 | #include <linux/lockdep.h> |
58 | #include <linux/file.h> | ||
58 | #include "internal.h" | 59 | #include "internal.h" |
59 | #include <net/sock.h> | 60 | #include <net/sock.h> |
60 | #include <net/ip.h> | 61 | #include <net/ip.h> |
@@ -148,7 +149,7 @@ struct mem_cgroup_reclaim_iter { | |||
148 | * matches memcg->dead_count of the hierarchy root group. | 149 | * matches memcg->dead_count of the hierarchy root group. |
149 | */ | 150 | */ |
150 | struct mem_cgroup *last_visited; | 151 | struct mem_cgroup *last_visited; |
151 | unsigned long last_dead_count; | 152 | int last_dead_count; |
152 | 153 | ||
153 | /* scan generation, increased every round-trip */ | 154 | /* scan generation, increased every round-trip */ |
154 | unsigned int generation; | 155 | unsigned int generation; |
@@ -227,6 +228,46 @@ struct mem_cgroup_eventfd_list { | |||
227 | struct eventfd_ctx *eventfd; | 228 | struct eventfd_ctx *eventfd; |
228 | }; | 229 | }; |
229 | 230 | ||
231 | /* | ||
232 | * cgroup_event represents events which userspace want to receive. | ||
233 | */ | ||
234 | struct mem_cgroup_event { | ||
235 | /* | ||
236 | * memcg which the event belongs to. | ||
237 | */ | ||
238 | struct mem_cgroup *memcg; | ||
239 | /* | ||
240 | * eventfd to signal userspace about the event. | ||
241 | */ | ||
242 | struct eventfd_ctx *eventfd; | ||
243 | /* | ||
244 | * Each of these stored in a list by the cgroup. | ||
245 | */ | ||
246 | struct list_head list; | ||
247 | /* | ||
248 | * register_event() callback will be used to add new userspace | ||
249 | * waiter for changes related to this event. Use eventfd_signal() | ||
250 | * on eventfd to send notification to userspace. | ||
251 | */ | ||
252 | int (*register_event)(struct mem_cgroup *memcg, | ||
253 | struct eventfd_ctx *eventfd, const char *args); | ||
254 | /* | ||
255 | * unregister_event() callback will be called when userspace closes | ||
256 | * the eventfd or on cgroup removing. This callback must be set, | ||
257 | * if you want provide notification functionality. | ||
258 | */ | ||
259 | void (*unregister_event)(struct mem_cgroup *memcg, | ||
260 | struct eventfd_ctx *eventfd); | ||
261 | /* | ||
262 | * All fields below needed to unregister event when | ||
263 | * userspace closes eventfd. | ||
264 | */ | ||
265 | poll_table pt; | ||
266 | wait_queue_head_t *wqh; | ||
267 | wait_queue_t wait; | ||
268 | struct work_struct remove; | ||
269 | }; | ||
270 | |||
230 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); | 271 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
231 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | 272 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
232 | 273 | ||
@@ -331,27 +372,20 @@ struct mem_cgroup { | |||
331 | atomic_t numainfo_updating; | 372 | atomic_t numainfo_updating; |
332 | #endif | 373 | #endif |
333 | 374 | ||
375 | /* List of events which userspace want to receive */ | ||
376 | struct list_head event_list; | ||
377 | spinlock_t event_list_lock; | ||
378 | |||
334 | struct mem_cgroup_per_node *nodeinfo[0]; | 379 | struct mem_cgroup_per_node *nodeinfo[0]; |
335 | /* WARNING: nodeinfo must be the last member here */ | 380 | /* WARNING: nodeinfo must be the last member here */ |
336 | }; | 381 | }; |
337 | 382 | ||
338 | static size_t memcg_size(void) | ||
339 | { | ||
340 | return sizeof(struct mem_cgroup) + | ||
341 | nr_node_ids * sizeof(struct mem_cgroup_per_node *); | ||
342 | } | ||
343 | |||
344 | /* internal only representation about the status of kmem accounting. */ | 383 | /* internal only representation about the status of kmem accounting. */ |
345 | enum { | 384 | enum { |
346 | KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ | 385 | KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */ |
347 | KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */ | ||
348 | KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */ | 386 | KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */ |
349 | }; | 387 | }; |
350 | 388 | ||
351 | /* We account when limit is on, but only after call sites are patched */ | ||
352 | #define KMEM_ACCOUNTED_MASK \ | ||
353 | ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED)) | ||
354 | |||
355 | #ifdef CONFIG_MEMCG_KMEM | 389 | #ifdef CONFIG_MEMCG_KMEM |
356 | static inline void memcg_kmem_set_active(struct mem_cgroup *memcg) | 390 | static inline void memcg_kmem_set_active(struct mem_cgroup *memcg) |
357 | { | 391 | { |
@@ -363,16 +397,6 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | |||
363 | return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); | 397 | return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); |
364 | } | 398 | } |
365 | 399 | ||
366 | static void memcg_kmem_set_activated(struct mem_cgroup *memcg) | ||
367 | { | ||
368 | set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags); | ||
369 | } | ||
370 | |||
371 | static void memcg_kmem_clear_activated(struct mem_cgroup *memcg) | ||
372 | { | ||
373 | clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags); | ||
374 | } | ||
375 | |||
376 | static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) | 400 | static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) |
377 | { | 401 | { |
378 | /* | 402 | /* |
@@ -490,11 +514,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) | |||
490 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; | 514 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; |
491 | } | 515 | } |
492 | 516 | ||
493 | struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) | ||
494 | { | ||
495 | return &mem_cgroup_from_css(css)->vmpressure; | ||
496 | } | ||
497 | |||
498 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | 517 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) |
499 | { | 518 | { |
500 | return (memcg == root_mem_cgroup); | 519 | return (memcg == root_mem_cgroup); |
@@ -1098,16 +1117,22 @@ skip_node: | |||
1098 | * skipped and we should continue the tree walk. | 1117 | * skipped and we should continue the tree walk. |
1099 | * last_visited css is safe to use because it is | 1118 | * last_visited css is safe to use because it is |
1100 | * protected by css_get and the tree walk is rcu safe. | 1119 | * protected by css_get and the tree walk is rcu safe. |
1120 | * | ||
1121 | * We do not take a reference on the root of the tree walk | ||
1122 | * because we might race with the root removal when it would | ||
1123 | * be the only node in the iterated hierarchy and mem_cgroup_iter | ||
1124 | * would end up in an endless loop because it expects that at | ||
1125 | * least one valid node will be returned. Root cannot disappear | ||
1126 | * because caller of the iterator should hold it already so | ||
1127 | * skipping css reference should be safe. | ||
1101 | */ | 1128 | */ |
1102 | if (next_css) { | 1129 | if (next_css) { |
1103 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); | 1130 | if ((next_css == &root->css) || |
1131 | ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) | ||
1132 | return mem_cgroup_from_css(next_css); | ||
1104 | 1133 | ||
1105 | if (css_tryget(&mem->css)) | 1134 | prev_css = next_css; |
1106 | return mem; | 1135 | goto skip_node; |
1107 | else { | ||
1108 | prev_css = next_css; | ||
1109 | goto skip_node; | ||
1110 | } | ||
1111 | } | 1136 | } |
1112 | 1137 | ||
1113 | return NULL; | 1138 | return NULL; |
@@ -1141,7 +1166,15 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, | |||
1141 | if (iter->last_dead_count == *sequence) { | 1166 | if (iter->last_dead_count == *sequence) { |
1142 | smp_rmb(); | 1167 | smp_rmb(); |
1143 | position = iter->last_visited; | 1168 | position = iter->last_visited; |
1144 | if (position && !css_tryget(&position->css)) | 1169 | |
1170 | /* | ||
1171 | * We cannot take a reference to root because we might race | ||
1172 | * with root removal and returning NULL would end up in | ||
1173 | * an endless loop on the iterator user level when root | ||
1174 | * would be returned all the time. | ||
1175 | */ | ||
1176 | if (position && position != root && | ||
1177 | !css_tryget(&position->css)) | ||
1145 | position = NULL; | 1178 | position = NULL; |
1146 | } | 1179 | } |
1147 | return position; | 1180 | return position; |
@@ -1150,9 +1183,11 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, | |||
1150 | static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | 1183 | static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, |
1151 | struct mem_cgroup *last_visited, | 1184 | struct mem_cgroup *last_visited, |
1152 | struct mem_cgroup *new_position, | 1185 | struct mem_cgroup *new_position, |
1186 | struct mem_cgroup *root, | ||
1153 | int sequence) | 1187 | int sequence) |
1154 | { | 1188 | { |
1155 | if (last_visited) | 1189 | /* root reference counting symmetric to mem_cgroup_iter_load */ |
1190 | if (last_visited && last_visited != root) | ||
1156 | css_put(&last_visited->css); | 1191 | css_put(&last_visited->css); |
1157 | /* | 1192 | /* |
1158 | * We store the sequence count from the time @last_visited was | 1193 | * We store the sequence count from the time @last_visited was |
@@ -1227,7 +1262,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1227 | memcg = __mem_cgroup_iter_next(root, last_visited); | 1262 | memcg = __mem_cgroup_iter_next(root, last_visited); |
1228 | 1263 | ||
1229 | if (reclaim) { | 1264 | if (reclaim) { |
1230 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); | 1265 | mem_cgroup_iter_update(iter, last_visited, memcg, root, |
1266 | seq); | ||
1231 | 1267 | ||
1232 | if (!memcg) | 1268 | if (!memcg) |
1233 | iter->generation++; | 1269 | iter->generation++; |
@@ -1647,13 +1683,13 @@ static void move_unlock_mem_cgroup(struct mem_cgroup *memcg, | |||
1647 | */ | 1683 | */ |
1648 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | 1684 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) |
1649 | { | 1685 | { |
1650 | struct cgroup *task_cgrp; | ||
1651 | struct cgroup *mem_cgrp; | ||
1652 | /* | 1686 | /* |
1653 | * Need a buffer in BSS, can't rely on allocations. The code relies | 1687 | * protects memcg_name and makes sure that parallel ooms do not |
1654 | * on the assumption that OOM is serialized for memory controller. | 1688 | * interleave |
1655 | * If this assumption is broken, revisit this code. | ||
1656 | */ | 1689 | */ |
1690 | static DEFINE_MUTEX(oom_info_lock); | ||
1691 | struct cgroup *task_cgrp; | ||
1692 | struct cgroup *mem_cgrp; | ||
1657 | static char memcg_name[PATH_MAX]; | 1693 | static char memcg_name[PATH_MAX]; |
1658 | int ret; | 1694 | int ret; |
1659 | struct mem_cgroup *iter; | 1695 | struct mem_cgroup *iter; |
@@ -1662,6 +1698,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1662 | if (!p) | 1698 | if (!p) |
1663 | return; | 1699 | return; |
1664 | 1700 | ||
1701 | mutex_lock(&oom_info_lock); | ||
1665 | rcu_read_lock(); | 1702 | rcu_read_lock(); |
1666 | 1703 | ||
1667 | mem_cgrp = memcg->css.cgroup; | 1704 | mem_cgrp = memcg->css.cgroup; |
@@ -1730,6 +1767,7 @@ done: | |||
1730 | 1767 | ||
1731 | pr_cont("\n"); | 1768 | pr_cont("\n"); |
1732 | } | 1769 | } |
1770 | mutex_unlock(&oom_info_lock); | ||
1733 | } | 1771 | } |
1734 | 1772 | ||
1735 | /* | 1773 | /* |
@@ -1822,13 +1860,18 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1822 | break; | 1860 | break; |
1823 | }; | 1861 | }; |
1824 | points = oom_badness(task, memcg, NULL, totalpages); | 1862 | points = oom_badness(task, memcg, NULL, totalpages); |
1825 | if (points > chosen_points) { | 1863 | if (!points || points < chosen_points) |
1826 | if (chosen) | 1864 | continue; |
1827 | put_task_struct(chosen); | 1865 | /* Prefer thread group leaders for display purposes */ |
1828 | chosen = task; | 1866 | if (points == chosen_points && |
1829 | chosen_points = points; | 1867 | thread_group_leader(chosen)) |
1830 | get_task_struct(chosen); | 1868 | continue; |
1831 | } | 1869 | |
1870 | if (chosen) | ||
1871 | put_task_struct(chosen); | ||
1872 | chosen = task; | ||
1873 | chosen_points = points; | ||
1874 | get_task_struct(chosen); | ||
1832 | } | 1875 | } |
1833 | css_task_iter_end(&it); | 1876 | css_task_iter_end(&it); |
1834 | } | 1877 | } |
@@ -2861,7 +2904,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2861 | unsigned short id; | 2904 | unsigned short id; |
2862 | swp_entry_t ent; | 2905 | swp_entry_t ent; |
2863 | 2906 | ||
2864 | VM_BUG_ON(!PageLocked(page)); | 2907 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
2865 | 2908 | ||
2866 | pc = lookup_page_cgroup(page); | 2909 | pc = lookup_page_cgroup(page); |
2867 | lock_page_cgroup(pc); | 2910 | lock_page_cgroup(pc); |
@@ -2895,7 +2938,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2895 | bool anon; | 2938 | bool anon; |
2896 | 2939 | ||
2897 | lock_page_cgroup(pc); | 2940 | lock_page_cgroup(pc); |
2898 | VM_BUG_ON(PageCgroupUsed(pc)); | 2941 | VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); |
2899 | /* | 2942 | /* |
2900 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2943 | * we don't need page_cgroup_lock about tail pages, becase they are not |
2901 | * accessed by any other context at this point. | 2944 | * accessed by any other context at this point. |
@@ -2930,7 +2973,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2930 | if (lrucare) { | 2973 | if (lrucare) { |
2931 | if (was_on_lru) { | 2974 | if (was_on_lru) { |
2932 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); | 2975 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); |
2933 | VM_BUG_ON(PageLRU(page)); | 2976 | VM_BUG_ON_PAGE(PageLRU(page), page); |
2934 | SetPageLRU(page); | 2977 | SetPageLRU(page); |
2935 | add_page_to_lru_list(page, lruvec, page_lru(page)); | 2978 | add_page_to_lru_list(page, lruvec, page_lru(page)); |
2936 | } | 2979 | } |
@@ -2956,10 +2999,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2956 | static DEFINE_MUTEX(set_limit_mutex); | 2999 | static DEFINE_MUTEX(set_limit_mutex); |
2957 | 3000 | ||
2958 | #ifdef CONFIG_MEMCG_KMEM | 3001 | #ifdef CONFIG_MEMCG_KMEM |
3002 | static DEFINE_MUTEX(activate_kmem_mutex); | ||
3003 | |||
2959 | static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) | 3004 | static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) |
2960 | { | 3005 | { |
2961 | return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && | 3006 | return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && |
2962 | (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK); | 3007 | memcg_kmem_is_active(memcg); |
2963 | } | 3008 | } |
2964 | 3009 | ||
2965 | /* | 3010 | /* |
@@ -2976,10 +3021,9 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | |||
2976 | } | 3021 | } |
2977 | 3022 | ||
2978 | #ifdef CONFIG_SLABINFO | 3023 | #ifdef CONFIG_SLABINFO |
2979 | static int mem_cgroup_slabinfo_read(struct cgroup_subsys_state *css, | 3024 | static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) |
2980 | struct cftype *cft, struct seq_file *m) | ||
2981 | { | 3025 | { |
2982 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 3026 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
2983 | struct memcg_cache_params *params; | 3027 | struct memcg_cache_params *params; |
2984 | 3028 | ||
2985 | if (!memcg_can_account_kmem(memcg)) | 3029 | if (!memcg_can_account_kmem(memcg)) |
@@ -3059,16 +3103,6 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) | |||
3059 | css_put(&memcg->css); | 3103 | css_put(&memcg->css); |
3060 | } | 3104 | } |
3061 | 3105 | ||
3062 | void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) | ||
3063 | { | ||
3064 | if (!memcg) | ||
3065 | return; | ||
3066 | |||
3067 | mutex_lock(&memcg->slab_caches_mutex); | ||
3068 | list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); | ||
3069 | mutex_unlock(&memcg->slab_caches_mutex); | ||
3070 | } | ||
3071 | |||
3072 | /* | 3106 | /* |
3073 | * helper for acessing a memcg's index. It will be used as an index in the | 3107 | * helper for acessing a memcg's index. It will be used as an index in the |
3074 | * child cache array in kmem_cache, and also to derive its name. This function | 3108 | * child cache array in kmem_cache, and also to derive its name. This function |
@@ -3079,43 +3113,6 @@ int memcg_cache_id(struct mem_cgroup *memcg) | |||
3079 | return memcg ? memcg->kmemcg_id : -1; | 3113 | return memcg ? memcg->kmemcg_id : -1; |
3080 | } | 3114 | } |
3081 | 3115 | ||
3082 | /* | ||
3083 | * This ends up being protected by the set_limit mutex, during normal | ||
3084 | * operation, because that is its main call site. | ||
3085 | * | ||
3086 | * But when we create a new cache, we can call this as well if its parent | ||
3087 | * is kmem-limited. That will have to hold set_limit_mutex as well. | ||
3088 | */ | ||
3089 | int memcg_update_cache_sizes(struct mem_cgroup *memcg) | ||
3090 | { | ||
3091 | int num, ret; | ||
3092 | |||
3093 | num = ida_simple_get(&kmem_limited_groups, | ||
3094 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); | ||
3095 | if (num < 0) | ||
3096 | return num; | ||
3097 | /* | ||
3098 | * After this point, kmem_accounted (that we test atomically in | ||
3099 | * the beginning of this conditional), is no longer 0. This | ||
3100 | * guarantees only one process will set the following boolean | ||
3101 | * to true. We don't need test_and_set because we're protected | ||
3102 | * by the set_limit_mutex anyway. | ||
3103 | */ | ||
3104 | memcg_kmem_set_activated(memcg); | ||
3105 | |||
3106 | ret = memcg_update_all_caches(num+1); | ||
3107 | if (ret) { | ||
3108 | ida_simple_remove(&kmem_limited_groups, num); | ||
3109 | memcg_kmem_clear_activated(memcg); | ||
3110 | return ret; | ||
3111 | } | ||
3112 | |||
3113 | memcg->kmemcg_id = num; | ||
3114 | INIT_LIST_HEAD(&memcg->memcg_slab_caches); | ||
3115 | mutex_init(&memcg->slab_caches_mutex); | ||
3116 | return 0; | ||
3117 | } | ||
3118 | |||
3119 | static size_t memcg_caches_array_size(int num_groups) | 3116 | static size_t memcg_caches_array_size(int num_groups) |
3120 | { | 3117 | { |
3121 | ssize_t size; | 3118 | ssize_t size; |
@@ -3152,18 +3149,17 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3152 | 3149 | ||
3153 | if (num_groups > memcg_limited_groups_array_size) { | 3150 | if (num_groups > memcg_limited_groups_array_size) { |
3154 | int i; | 3151 | int i; |
3152 | struct memcg_cache_params *new_params; | ||
3155 | ssize_t size = memcg_caches_array_size(num_groups); | 3153 | ssize_t size = memcg_caches_array_size(num_groups); |
3156 | 3154 | ||
3157 | size *= sizeof(void *); | 3155 | size *= sizeof(void *); |
3158 | size += offsetof(struct memcg_cache_params, memcg_caches); | 3156 | size += offsetof(struct memcg_cache_params, memcg_caches); |
3159 | 3157 | ||
3160 | s->memcg_params = kzalloc(size, GFP_KERNEL); | 3158 | new_params = kzalloc(size, GFP_KERNEL); |
3161 | if (!s->memcg_params) { | 3159 | if (!new_params) |
3162 | s->memcg_params = cur_params; | ||
3163 | return -ENOMEM; | 3160 | return -ENOMEM; |
3164 | } | ||
3165 | 3161 | ||
3166 | s->memcg_params->is_root_cache = true; | 3162 | new_params->is_root_cache = true; |
3167 | 3163 | ||
3168 | /* | 3164 | /* |
3169 | * There is the chance it will be bigger than | 3165 | * There is the chance it will be bigger than |
@@ -3177,7 +3173,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3177 | for (i = 0; i < memcg_limited_groups_array_size; i++) { | 3173 | for (i = 0; i < memcg_limited_groups_array_size; i++) { |
3178 | if (!cur_params->memcg_caches[i]) | 3174 | if (!cur_params->memcg_caches[i]) |
3179 | continue; | 3175 | continue; |
3180 | s->memcg_params->memcg_caches[i] = | 3176 | new_params->memcg_caches[i] = |
3181 | cur_params->memcg_caches[i]; | 3177 | cur_params->memcg_caches[i]; |
3182 | } | 3178 | } |
3183 | 3179 | ||
@@ -3190,13 +3186,15 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3190 | * bigger than the others. And all updates will reset this | 3186 | * bigger than the others. And all updates will reset this |
3191 | * anyway. | 3187 | * anyway. |
3192 | */ | 3188 | */ |
3193 | kfree(cur_params); | 3189 | rcu_assign_pointer(s->memcg_params, new_params); |
3190 | if (cur_params) | ||
3191 | kfree_rcu(cur_params, rcu_head); | ||
3194 | } | 3192 | } |
3195 | return 0; | 3193 | return 0; |
3196 | } | 3194 | } |
3197 | 3195 | ||
3198 | int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, | 3196 | int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, |
3199 | struct kmem_cache *root_cache) | 3197 | struct kmem_cache *root_cache) |
3200 | { | 3198 | { |
3201 | size_t size; | 3199 | size_t size; |
3202 | 3200 | ||
@@ -3224,35 +3222,85 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, | |||
3224 | return 0; | 3222 | return 0; |
3225 | } | 3223 | } |
3226 | 3224 | ||
3227 | void memcg_release_cache(struct kmem_cache *s) | 3225 | void memcg_free_cache_params(struct kmem_cache *s) |
3226 | { | ||
3227 | kfree(s->memcg_params); | ||
3228 | } | ||
3229 | |||
3230 | void memcg_register_cache(struct kmem_cache *s) | ||
3228 | { | 3231 | { |
3229 | struct kmem_cache *root; | 3232 | struct kmem_cache *root; |
3230 | struct mem_cgroup *memcg; | 3233 | struct mem_cgroup *memcg; |
3231 | int id; | 3234 | int id; |
3232 | 3235 | ||
3233 | /* | 3236 | if (is_root_cache(s)) |
3234 | * This happens, for instance, when a root cache goes away before we | ||
3235 | * add any memcg. | ||
3236 | */ | ||
3237 | if (!s->memcg_params) | ||
3238 | return; | 3237 | return; |
3239 | 3238 | ||
3240 | if (s->memcg_params->is_root_cache) | 3239 | /* |
3241 | goto out; | 3240 | * Holding the slab_mutex assures nobody will touch the memcg_caches |
3241 | * array while we are modifying it. | ||
3242 | */ | ||
3243 | lockdep_assert_held(&slab_mutex); | ||
3242 | 3244 | ||
3245 | root = s->memcg_params->root_cache; | ||
3243 | memcg = s->memcg_params->memcg; | 3246 | memcg = s->memcg_params->memcg; |
3244 | id = memcg_cache_id(memcg); | 3247 | id = memcg_cache_id(memcg); |
3248 | |||
3249 | css_get(&memcg->css); | ||
3250 | |||
3251 | |||
3252 | /* | ||
3253 | * Since readers won't lock (see cache_from_memcg_idx()), we need a | ||
3254 | * barrier here to ensure nobody will see the kmem_cache partially | ||
3255 | * initialized. | ||
3256 | */ | ||
3257 | smp_wmb(); | ||
3258 | |||
3259 | /* | ||
3260 | * Initialize the pointer to this cache in its parent's memcg_params | ||
3261 | * before adding it to the memcg_slab_caches list, otherwise we can | ||
3262 | * fail to convert memcg_params_to_cache() while traversing the list. | ||
3263 | */ | ||
3264 | VM_BUG_ON(root->memcg_params->memcg_caches[id]); | ||
3265 | root->memcg_params->memcg_caches[id] = s; | ||
3266 | |||
3267 | mutex_lock(&memcg->slab_caches_mutex); | ||
3268 | list_add(&s->memcg_params->list, &memcg->memcg_slab_caches); | ||
3269 | mutex_unlock(&memcg->slab_caches_mutex); | ||
3270 | } | ||
3271 | |||
3272 | void memcg_unregister_cache(struct kmem_cache *s) | ||
3273 | { | ||
3274 | struct kmem_cache *root; | ||
3275 | struct mem_cgroup *memcg; | ||
3276 | int id; | ||
3277 | |||
3278 | if (is_root_cache(s)) | ||
3279 | return; | ||
3280 | |||
3281 | /* | ||
3282 | * Holding the slab_mutex assures nobody will touch the memcg_caches | ||
3283 | * array while we are modifying it. | ||
3284 | */ | ||
3285 | lockdep_assert_held(&slab_mutex); | ||
3245 | 3286 | ||
3246 | root = s->memcg_params->root_cache; | 3287 | root = s->memcg_params->root_cache; |
3247 | root->memcg_params->memcg_caches[id] = NULL; | 3288 | memcg = s->memcg_params->memcg; |
3289 | id = memcg_cache_id(memcg); | ||
3248 | 3290 | ||
3249 | mutex_lock(&memcg->slab_caches_mutex); | 3291 | mutex_lock(&memcg->slab_caches_mutex); |
3250 | list_del(&s->memcg_params->list); | 3292 | list_del(&s->memcg_params->list); |
3251 | mutex_unlock(&memcg->slab_caches_mutex); | 3293 | mutex_unlock(&memcg->slab_caches_mutex); |
3252 | 3294 | ||
3295 | /* | ||
3296 | * Clear the pointer to this cache in its parent's memcg_params only | ||
3297 | * after removing it from the memcg_slab_caches list, otherwise we can | ||
3298 | * fail to convert memcg_params_to_cache() while traversing the list. | ||
3299 | */ | ||
3300 | VM_BUG_ON(!root->memcg_params->memcg_caches[id]); | ||
3301 | root->memcg_params->memcg_caches[id] = NULL; | ||
3302 | |||
3253 | css_put(&memcg->css); | 3303 | css_put(&memcg->css); |
3254 | out: | ||
3255 | kfree(s->memcg_params); | ||
3256 | } | 3304 | } |
3257 | 3305 | ||
3258 | /* | 3306 | /* |
@@ -3311,11 +3359,9 @@ static void kmem_cache_destroy_work_func(struct work_struct *w) | |||
3311 | * So if we aren't down to zero, we'll just schedule a worker and try | 3359 | * So if we aren't down to zero, we'll just schedule a worker and try |
3312 | * again | 3360 | * again |
3313 | */ | 3361 | */ |
3314 | if (atomic_read(&cachep->memcg_params->nr_pages) != 0) { | 3362 | if (atomic_read(&cachep->memcg_params->nr_pages) != 0) |
3315 | kmem_cache_shrink(cachep); | 3363 | kmem_cache_shrink(cachep); |
3316 | if (atomic_read(&cachep->memcg_params->nr_pages) == 0) | 3364 | else |
3317 | return; | ||
3318 | } else | ||
3319 | kmem_cache_destroy(cachep); | 3365 | kmem_cache_destroy(cachep); |
3320 | } | 3366 | } |
3321 | 3367 | ||
@@ -3351,27 +3397,16 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) | |||
3351 | schedule_work(&cachep->memcg_params->destroy); | 3397 | schedule_work(&cachep->memcg_params->destroy); |
3352 | } | 3398 | } |
3353 | 3399 | ||
3354 | /* | 3400 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, |
3355 | * This lock protects updaters, not readers. We want readers to be as fast as | 3401 | struct kmem_cache *s) |
3356 | * they can, and they will either see NULL or a valid cache value. Our model | ||
3357 | * allow them to see NULL, in which case the root memcg will be selected. | ||
3358 | * | ||
3359 | * We need this lock because multiple allocations to the same cache from a non | ||
3360 | * will span more than one worker. Only one of them can create the cache. | ||
3361 | */ | ||
3362 | static DEFINE_MUTEX(memcg_cache_mutex); | ||
3363 | |||
3364 | /* | ||
3365 | * Called with memcg_cache_mutex held | ||
3366 | */ | ||
3367 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | ||
3368 | struct kmem_cache *s) | ||
3369 | { | 3402 | { |
3370 | struct kmem_cache *new; | 3403 | struct kmem_cache *new = NULL; |
3371 | static char *tmp_name = NULL; | 3404 | static char *tmp_name = NULL; |
3405 | static DEFINE_MUTEX(mutex); /* protects tmp_name */ | ||
3372 | 3406 | ||
3373 | lockdep_assert_held(&memcg_cache_mutex); | 3407 | BUG_ON(!memcg_can_account_kmem(memcg)); |
3374 | 3408 | ||
3409 | mutex_lock(&mutex); | ||
3375 | /* | 3410 | /* |
3376 | * kmem_cache_create_memcg duplicates the given name and | 3411 | * kmem_cache_create_memcg duplicates the given name and |
3377 | * cgroup_name for this name requires RCU context. | 3412 | * cgroup_name for this name requires RCU context. |
@@ -3381,7 +3416,7 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | |||
3381 | if (!tmp_name) { | 3416 | if (!tmp_name) { |
3382 | tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); | 3417 | tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); |
3383 | if (!tmp_name) | 3418 | if (!tmp_name) |
3384 | return NULL; | 3419 | goto out; |
3385 | } | 3420 | } |
3386 | 3421 | ||
3387 | rcu_read_lock(); | 3422 | rcu_read_lock(); |
@@ -3391,48 +3426,13 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | |||
3391 | 3426 | ||
3392 | new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, | 3427 | new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, |
3393 | (s->flags & ~SLAB_PANIC), s->ctor, s); | 3428 | (s->flags & ~SLAB_PANIC), s->ctor, s); |
3394 | |||
3395 | if (new) | 3429 | if (new) |
3396 | new->allocflags |= __GFP_KMEMCG; | 3430 | new->allocflags |= __GFP_KMEMCG; |
3397 | 3431 | else | |
3398 | return new; | 3432 | new = s; |
3399 | } | ||
3400 | |||
3401 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, | ||
3402 | struct kmem_cache *cachep) | ||
3403 | { | ||
3404 | struct kmem_cache *new_cachep; | ||
3405 | int idx; | ||
3406 | |||
3407 | BUG_ON(!memcg_can_account_kmem(memcg)); | ||
3408 | |||
3409 | idx = memcg_cache_id(memcg); | ||
3410 | |||
3411 | mutex_lock(&memcg_cache_mutex); | ||
3412 | new_cachep = cache_from_memcg_idx(cachep, idx); | ||
3413 | if (new_cachep) { | ||
3414 | css_put(&memcg->css); | ||
3415 | goto out; | ||
3416 | } | ||
3417 | |||
3418 | new_cachep = kmem_cache_dup(memcg, cachep); | ||
3419 | if (new_cachep == NULL) { | ||
3420 | new_cachep = cachep; | ||
3421 | css_put(&memcg->css); | ||
3422 | goto out; | ||
3423 | } | ||
3424 | |||
3425 | atomic_set(&new_cachep->memcg_params->nr_pages , 0); | ||
3426 | |||
3427 | cachep->memcg_params->memcg_caches[idx] = new_cachep; | ||
3428 | /* | ||
3429 | * the readers won't lock, make sure everybody sees the updated value, | ||
3430 | * so they won't put stuff in the queue again for no reason | ||
3431 | */ | ||
3432 | wmb(); | ||
3433 | out: | 3433 | out: |
3434 | mutex_unlock(&memcg_cache_mutex); | 3434 | mutex_unlock(&mutex); |
3435 | return new_cachep; | 3435 | return new; |
3436 | } | 3436 | } |
3437 | 3437 | ||
3438 | void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | 3438 | void kmem_cache_destroy_memcg_children(struct kmem_cache *s) |
@@ -3452,9 +3452,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3452 | * | 3452 | * |
3453 | * Still, we don't want anyone else freeing memcg_caches under our | 3453 | * Still, we don't want anyone else freeing memcg_caches under our |
3454 | * noses, which can happen if a new memcg comes to life. As usual, | 3454 | * noses, which can happen if a new memcg comes to life. As usual, |
3455 | * we'll take the set_limit_mutex to protect ourselves against this. | 3455 | * we'll take the activate_kmem_mutex to protect ourselves against |
3456 | * this. | ||
3456 | */ | 3457 | */ |
3457 | mutex_lock(&set_limit_mutex); | 3458 | mutex_lock(&activate_kmem_mutex); |
3458 | for_each_memcg_cache_index(i) { | 3459 | for_each_memcg_cache_index(i) { |
3459 | c = cache_from_memcg_idx(s, i); | 3460 | c = cache_from_memcg_idx(s, i); |
3460 | if (!c) | 3461 | if (!c) |
@@ -3477,7 +3478,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3477 | cancel_work_sync(&c->memcg_params->destroy); | 3478 | cancel_work_sync(&c->memcg_params->destroy); |
3478 | kmem_cache_destroy(c); | 3479 | kmem_cache_destroy(c); |
3479 | } | 3480 | } |
3480 | mutex_unlock(&set_limit_mutex); | 3481 | mutex_unlock(&activate_kmem_mutex); |
3481 | } | 3482 | } |
3482 | 3483 | ||
3483 | struct create_work { | 3484 | struct create_work { |
@@ -3509,6 +3510,7 @@ static void memcg_create_cache_work_func(struct work_struct *w) | |||
3509 | 3510 | ||
3510 | cw = container_of(w, struct create_work, work); | 3511 | cw = container_of(w, struct create_work, work); |
3511 | memcg_create_kmem_cache(cw->memcg, cw->cachep); | 3512 | memcg_create_kmem_cache(cw->memcg, cw->cachep); |
3513 | css_put(&cw->memcg->css); | ||
3512 | kfree(cw); | 3514 | kfree(cw); |
3513 | } | 3515 | } |
3514 | 3516 | ||
@@ -3568,7 +3570,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, | |||
3568 | gfp_t gfp) | 3570 | gfp_t gfp) |
3569 | { | 3571 | { |
3570 | struct mem_cgroup *memcg; | 3572 | struct mem_cgroup *memcg; |
3571 | int idx; | 3573 | struct kmem_cache *memcg_cachep; |
3572 | 3574 | ||
3573 | VM_BUG_ON(!cachep->memcg_params); | 3575 | VM_BUG_ON(!cachep->memcg_params); |
3574 | VM_BUG_ON(!cachep->memcg_params->is_root_cache); | 3576 | VM_BUG_ON(!cachep->memcg_params->is_root_cache); |
@@ -3582,15 +3584,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, | |||
3582 | if (!memcg_can_account_kmem(memcg)) | 3584 | if (!memcg_can_account_kmem(memcg)) |
3583 | goto out; | 3585 | goto out; |
3584 | 3586 | ||
3585 | idx = memcg_cache_id(memcg); | 3587 | memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); |
3586 | 3588 | if (likely(memcg_cachep)) { | |
3587 | /* | 3589 | cachep = memcg_cachep; |
3588 | * barrier to mare sure we're always seeing the up to date value. The | ||
3589 | * code updating memcg_caches will issue a write barrier to match this. | ||
3590 | */ | ||
3591 | read_barrier_depends(); | ||
3592 | if (likely(cache_from_memcg_idx(cachep, idx))) { | ||
3593 | cachep = cache_from_memcg_idx(cachep, idx); | ||
3594 | goto out; | 3590 | goto out; |
3595 | } | 3591 | } |
3596 | 3592 | ||
@@ -3744,7 +3740,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) | |||
3744 | if (!memcg) | 3740 | if (!memcg) |
3745 | return; | 3741 | return; |
3746 | 3742 | ||
3747 | VM_BUG_ON(mem_cgroup_is_root(memcg)); | 3743 | VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); |
3748 | memcg_uncharge_kmem(memcg, PAGE_SIZE << order); | 3744 | memcg_uncharge_kmem(memcg, PAGE_SIZE << order); |
3749 | } | 3745 | } |
3750 | #else | 3746 | #else |
@@ -3823,7 +3819,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
3823 | bool anon = PageAnon(page); | 3819 | bool anon = PageAnon(page); |
3824 | 3820 | ||
3825 | VM_BUG_ON(from == to); | 3821 | VM_BUG_ON(from == to); |
3826 | VM_BUG_ON(PageLRU(page)); | 3822 | VM_BUG_ON_PAGE(PageLRU(page), page); |
3827 | /* | 3823 | /* |
3828 | * The page is isolated from LRU. So, collapse function | 3824 | * The page is isolated from LRU. So, collapse function |
3829 | * will not handle this page. But page splitting can happen. | 3825 | * will not handle this page. But page splitting can happen. |
@@ -3916,7 +3912,7 @@ static int mem_cgroup_move_parent(struct page *page, | |||
3916 | parent = root_mem_cgroup; | 3912 | parent = root_mem_cgroup; |
3917 | 3913 | ||
3918 | if (nr_pages > 1) { | 3914 | if (nr_pages > 1) { |
3919 | VM_BUG_ON(!PageTransHuge(page)); | 3915 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
3920 | flags = compound_lock_irqsave(page); | 3916 | flags = compound_lock_irqsave(page); |
3921 | } | 3917 | } |
3922 | 3918 | ||
@@ -3950,7 +3946,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
3950 | 3946 | ||
3951 | if (PageTransHuge(page)) { | 3947 | if (PageTransHuge(page)) { |
3952 | nr_pages <<= compound_order(page); | 3948 | nr_pages <<= compound_order(page); |
3953 | VM_BUG_ON(!PageTransHuge(page)); | 3949 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
3954 | /* | 3950 | /* |
3955 | * Never OOM-kill a process for a huge page. The | 3951 | * Never OOM-kill a process for a huge page. The |
3956 | * fault handler will fall back to regular pages. | 3952 | * fault handler will fall back to regular pages. |
@@ -3970,8 +3966,8 @@ int mem_cgroup_newpage_charge(struct page *page, | |||
3970 | { | 3966 | { |
3971 | if (mem_cgroup_disabled()) | 3967 | if (mem_cgroup_disabled()) |
3972 | return 0; | 3968 | return 0; |
3973 | VM_BUG_ON(page_mapped(page)); | 3969 | VM_BUG_ON_PAGE(page_mapped(page), page); |
3974 | VM_BUG_ON(page->mapping && !PageAnon(page)); | 3970 | VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); |
3975 | VM_BUG_ON(!mm); | 3971 | VM_BUG_ON(!mm); |
3976 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 3972 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
3977 | MEM_CGROUP_CHARGE_TYPE_ANON); | 3973 | MEM_CGROUP_CHARGE_TYPE_ANON); |
@@ -4175,7 +4171,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | |||
4175 | 4171 | ||
4176 | if (PageTransHuge(page)) { | 4172 | if (PageTransHuge(page)) { |
4177 | nr_pages <<= compound_order(page); | 4173 | nr_pages <<= compound_order(page); |
4178 | VM_BUG_ON(!PageTransHuge(page)); | 4174 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
4179 | } | 4175 | } |
4180 | /* | 4176 | /* |
4181 | * Check if our page_cgroup is valid | 4177 | * Check if our page_cgroup is valid |
@@ -4267,7 +4263,7 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
4267 | /* early check. */ | 4263 | /* early check. */ |
4268 | if (page_mapped(page)) | 4264 | if (page_mapped(page)) |
4269 | return; | 4265 | return; |
4270 | VM_BUG_ON(page->mapping && !PageAnon(page)); | 4266 | VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); |
4271 | /* | 4267 | /* |
4272 | * If the page is in swap cache, uncharge should be deferred | 4268 | * If the page is in swap cache, uncharge should be deferred |
4273 | * to the swap path, which also properly accounts swap usage | 4269 | * to the swap path, which also properly accounts swap usage |
@@ -4287,8 +4283,8 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
4287 | 4283 | ||
4288 | void mem_cgroup_uncharge_cache_page(struct page *page) | 4284 | void mem_cgroup_uncharge_cache_page(struct page *page) |
4289 | { | 4285 | { |
4290 | VM_BUG_ON(page_mapped(page)); | 4286 | VM_BUG_ON_PAGE(page_mapped(page), page); |
4291 | VM_BUG_ON(page->mapping); | 4287 | VM_BUG_ON_PAGE(page->mapping, page); |
4292 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); | 4288 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); |
4293 | } | 4289 | } |
4294 | 4290 | ||
@@ -5112,14 +5108,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | |||
5112 | return val << PAGE_SHIFT; | 5108 | return val << PAGE_SHIFT; |
5113 | } | 5109 | } |
5114 | 5110 | ||
5115 | static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | 5111 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
5116 | struct cftype *cft, struct file *file, | 5112 | struct cftype *cft) |
5117 | char __user *buf, size_t nbytes, loff_t *ppos) | ||
5118 | { | 5113 | { |
5119 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5114 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5120 | char str[64]; | ||
5121 | u64 val; | 5115 | u64 val; |
5122 | int name, len; | 5116 | int name; |
5123 | enum res_type type; | 5117 | enum res_type type; |
5124 | 5118 | ||
5125 | type = MEMFILE_TYPE(cft->private); | 5119 | type = MEMFILE_TYPE(cft->private); |
@@ -5145,15 +5139,26 @@ static ssize_t mem_cgroup_read(struct cgroup_subsys_state *css, | |||
5145 | BUG(); | 5139 | BUG(); |
5146 | } | 5140 | } |
5147 | 5141 | ||
5148 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | 5142 | return val; |
5149 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
5150 | } | 5143 | } |
5151 | 5144 | ||
5152 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | ||
5153 | { | ||
5154 | int ret = -EINVAL; | ||
5155 | #ifdef CONFIG_MEMCG_KMEM | 5145 | #ifdef CONFIG_MEMCG_KMEM |
5156 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5146 | /* should be called with activate_kmem_mutex held */ |
5147 | static int __memcg_activate_kmem(struct mem_cgroup *memcg, | ||
5148 | unsigned long long limit) | ||
5149 | { | ||
5150 | int err = 0; | ||
5151 | int memcg_id; | ||
5152 | |||
5153 | if (memcg_kmem_is_active(memcg)) | ||
5154 | return 0; | ||
5155 | |||
5156 | /* | ||
5157 | * We are going to allocate memory for data shared by all memory | ||
5158 | * cgroups so let's stop accounting here. | ||
5159 | */ | ||
5160 | memcg_stop_kmem_account(); | ||
5161 | |||
5157 | /* | 5162 | /* |
5158 | * For simplicity, we won't allow this to be disabled. It also can't | 5163 | * For simplicity, we won't allow this to be disabled. It also can't |
5159 | * be changed if the cgroup has children already, or if tasks had | 5164 | * be changed if the cgroup has children already, or if tasks had |
@@ -5167,72 +5172,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | |||
5167 | * of course permitted. | 5172 | * of course permitted. |
5168 | */ | 5173 | */ |
5169 | mutex_lock(&memcg_create_mutex); | 5174 | mutex_lock(&memcg_create_mutex); |
5170 | mutex_lock(&set_limit_mutex); | 5175 | if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg)) |
5171 | if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { | 5176 | err = -EBUSY; |
5172 | if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { | 5177 | mutex_unlock(&memcg_create_mutex); |
5173 | ret = -EBUSY; | 5178 | if (err) |
5174 | goto out; | 5179 | goto out; |
5175 | } | ||
5176 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
5177 | VM_BUG_ON(ret); | ||
5178 | 5180 | ||
5179 | ret = memcg_update_cache_sizes(memcg); | 5181 | memcg_id = ida_simple_get(&kmem_limited_groups, |
5180 | if (ret) { | 5182 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); |
5181 | res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); | 5183 | if (memcg_id < 0) { |
5182 | goto out; | 5184 | err = memcg_id; |
5183 | } | 5185 | goto out; |
5184 | static_key_slow_inc(&memcg_kmem_enabled_key); | 5186 | } |
5185 | /* | 5187 | |
5186 | * setting the active bit after the inc will guarantee no one | 5188 | /* |
5187 | * starts accounting before all call sites are patched | 5189 | * Make sure we have enough space for this cgroup in each root cache's |
5188 | */ | 5190 | * memcg_params. |
5189 | memcg_kmem_set_active(memcg); | 5191 | */ |
5190 | } else | 5192 | err = memcg_update_all_caches(memcg_id + 1); |
5191 | ret = res_counter_set_limit(&memcg->kmem, val); | 5193 | if (err) |
5194 | goto out_rmid; | ||
5195 | |||
5196 | memcg->kmemcg_id = memcg_id; | ||
5197 | INIT_LIST_HEAD(&memcg->memcg_slab_caches); | ||
5198 | mutex_init(&memcg->slab_caches_mutex); | ||
5199 | |||
5200 | /* | ||
5201 | * We couldn't have accounted to this cgroup, because it hasn't got the | ||
5202 | * active bit set yet, so this should succeed. | ||
5203 | */ | ||
5204 | err = res_counter_set_limit(&memcg->kmem, limit); | ||
5205 | VM_BUG_ON(err); | ||
5206 | |||
5207 | static_key_slow_inc(&memcg_kmem_enabled_key); | ||
5208 | /* | ||
5209 | * Setting the active bit after enabling static branching will | ||
5210 | * guarantee no one starts accounting before all call sites are | ||
5211 | * patched. | ||
5212 | */ | ||
5213 | memcg_kmem_set_active(memcg); | ||
5192 | out: | 5214 | out: |
5193 | mutex_unlock(&set_limit_mutex); | 5215 | memcg_resume_kmem_account(); |
5194 | mutex_unlock(&memcg_create_mutex); | 5216 | return err; |
5195 | #endif | 5217 | |
5218 | out_rmid: | ||
5219 | ida_simple_remove(&kmem_limited_groups, memcg_id); | ||
5220 | goto out; | ||
5221 | } | ||
5222 | |||
5223 | static int memcg_activate_kmem(struct mem_cgroup *memcg, | ||
5224 | unsigned long long limit) | ||
5225 | { | ||
5226 | int ret; | ||
5227 | |||
5228 | mutex_lock(&activate_kmem_mutex); | ||
5229 | ret = __memcg_activate_kmem(memcg, limit); | ||
5230 | mutex_unlock(&activate_kmem_mutex); | ||
5231 | return ret; | ||
5232 | } | ||
5233 | |||
5234 | static int memcg_update_kmem_limit(struct mem_cgroup *memcg, | ||
5235 | unsigned long long val) | ||
5236 | { | ||
5237 | int ret; | ||
5238 | |||
5239 | if (!memcg_kmem_is_active(memcg)) | ||
5240 | ret = memcg_activate_kmem(memcg, val); | ||
5241 | else | ||
5242 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
5196 | return ret; | 5243 | return ret; |
5197 | } | 5244 | } |
5198 | 5245 | ||
5199 | #ifdef CONFIG_MEMCG_KMEM | ||
5200 | static int memcg_propagate_kmem(struct mem_cgroup *memcg) | 5246 | static int memcg_propagate_kmem(struct mem_cgroup *memcg) |
5201 | { | 5247 | { |
5202 | int ret = 0; | 5248 | int ret = 0; |
5203 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | 5249 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
5204 | if (!parent) | ||
5205 | goto out; | ||
5206 | 5250 | ||
5207 | memcg->kmem_account_flags = parent->kmem_account_flags; | 5251 | if (!parent) |
5208 | /* | 5252 | return 0; |
5209 | * When that happen, we need to disable the static branch only on those | ||
5210 | * memcgs that enabled it. To achieve this, we would be forced to | ||
5211 | * complicate the code by keeping track of which memcgs were the ones | ||
5212 | * that actually enabled limits, and which ones got it from its | ||
5213 | * parents. | ||
5214 | * | ||
5215 | * It is a lot simpler just to do static_key_slow_inc() on every child | ||
5216 | * that is accounted. | ||
5217 | */ | ||
5218 | if (!memcg_kmem_is_active(memcg)) | ||
5219 | goto out; | ||
5220 | 5253 | ||
5254 | mutex_lock(&activate_kmem_mutex); | ||
5221 | /* | 5255 | /* |
5222 | * __mem_cgroup_free() will issue static_key_slow_dec() because this | 5256 | * If the parent cgroup is not kmem-active now, it cannot be activated |
5223 | * memcg is active already. If the later initialization fails then the | 5257 | * after this point, because it has at least one child already. |
5224 | * cgroup core triggers the cleanup so we do not have to do it here. | ||
5225 | */ | 5258 | */ |
5226 | static_key_slow_inc(&memcg_kmem_enabled_key); | 5259 | if (memcg_kmem_is_active(parent)) |
5227 | 5260 | ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX); | |
5228 | mutex_lock(&set_limit_mutex); | 5261 | mutex_unlock(&activate_kmem_mutex); |
5229 | memcg_stop_kmem_account(); | ||
5230 | ret = memcg_update_cache_sizes(memcg); | ||
5231 | memcg_resume_kmem_account(); | ||
5232 | mutex_unlock(&set_limit_mutex); | ||
5233 | out: | ||
5234 | return ret; | 5262 | return ret; |
5235 | } | 5263 | } |
5264 | #else | ||
5265 | static int memcg_update_kmem_limit(struct mem_cgroup *memcg, | ||
5266 | unsigned long long val) | ||
5267 | { | ||
5268 | return -EINVAL; | ||
5269 | } | ||
5236 | #endif /* CONFIG_MEMCG_KMEM */ | 5270 | #endif /* CONFIG_MEMCG_KMEM */ |
5237 | 5271 | ||
5238 | /* | 5272 | /* |
@@ -5266,7 +5300,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, | |||
5266 | else if (type == _MEMSWAP) | 5300 | else if (type == _MEMSWAP) |
5267 | ret = mem_cgroup_resize_memsw_limit(memcg, val); | 5301 | ret = mem_cgroup_resize_memsw_limit(memcg, val); |
5268 | else if (type == _KMEM) | 5302 | else if (type == _KMEM) |
5269 | ret = memcg_update_kmem_limit(css, val); | 5303 | ret = memcg_update_kmem_limit(memcg, val); |
5270 | else | 5304 | else |
5271 | return -EINVAL; | 5305 | return -EINVAL; |
5272 | break; | 5306 | break; |
@@ -5383,8 +5417,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, | |||
5383 | #endif | 5417 | #endif |
5384 | 5418 | ||
5385 | #ifdef CONFIG_NUMA | 5419 | #ifdef CONFIG_NUMA |
5386 | static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | 5420 | static int memcg_numa_stat_show(struct seq_file *m, void *v) |
5387 | struct cftype *cft, struct seq_file *m) | ||
5388 | { | 5421 | { |
5389 | struct numa_stat { | 5422 | struct numa_stat { |
5390 | const char *name; | 5423 | const char *name; |
@@ -5400,7 +5433,7 @@ static int memcg_numa_stat_show(struct cgroup_subsys_state *css, | |||
5400 | const struct numa_stat *stat; | 5433 | const struct numa_stat *stat; |
5401 | int nid; | 5434 | int nid; |
5402 | unsigned long nr; | 5435 | unsigned long nr; |
5403 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5436 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5404 | 5437 | ||
5405 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { | 5438 | for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { |
5406 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); | 5439 | nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask); |
@@ -5439,10 +5472,9 @@ static inline void mem_cgroup_lru_names_not_uptodate(void) | |||
5439 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 5472 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
5440 | } | 5473 | } |
5441 | 5474 | ||
5442 | static int memcg_stat_show(struct cgroup_subsys_state *css, struct cftype *cft, | 5475 | static int memcg_stat_show(struct seq_file *m, void *v) |
5443 | struct seq_file *m) | ||
5444 | { | 5476 | { |
5445 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5477 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5446 | struct mem_cgroup *mi; | 5478 | struct mem_cgroup *mi; |
5447 | unsigned int i; | 5479 | unsigned int i; |
5448 | 5480 | ||
@@ -5651,13 +5683,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) | |||
5651 | mem_cgroup_oom_notify_cb(iter); | 5683 | mem_cgroup_oom_notify_cb(iter); |
5652 | } | 5684 | } |
5653 | 5685 | ||
5654 | static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, | 5686 | static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5655 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5687 | struct eventfd_ctx *eventfd, const char *args, enum res_type type) |
5656 | { | 5688 | { |
5657 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5658 | struct mem_cgroup_thresholds *thresholds; | 5689 | struct mem_cgroup_thresholds *thresholds; |
5659 | struct mem_cgroup_threshold_ary *new; | 5690 | struct mem_cgroup_threshold_ary *new; |
5660 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5661 | u64 threshold, usage; | 5691 | u64 threshold, usage; |
5662 | int i, size, ret; | 5692 | int i, size, ret; |
5663 | 5693 | ||
@@ -5734,13 +5764,23 @@ unlock: | |||
5734 | return ret; | 5764 | return ret; |
5735 | } | 5765 | } |
5736 | 5766 | ||
5737 | static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, | 5767 | static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5738 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5768 | struct eventfd_ctx *eventfd, const char *args) |
5769 | { | ||
5770 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM); | ||
5771 | } | ||
5772 | |||
5773 | static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg, | ||
5774 | struct eventfd_ctx *eventfd, const char *args) | ||
5775 | { | ||
5776 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP); | ||
5777 | } | ||
5778 | |||
5779 | static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5780 | struct eventfd_ctx *eventfd, enum res_type type) | ||
5739 | { | 5781 | { |
5740 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5741 | struct mem_cgroup_thresholds *thresholds; | 5782 | struct mem_cgroup_thresholds *thresholds; |
5742 | struct mem_cgroup_threshold_ary *new; | 5783 | struct mem_cgroup_threshold_ary *new; |
5743 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5744 | u64 usage; | 5784 | u64 usage; |
5745 | int i, j, size; | 5785 | int i, j, size; |
5746 | 5786 | ||
@@ -5813,14 +5853,23 @@ unlock: | |||
5813 | mutex_unlock(&memcg->thresholds_lock); | 5853 | mutex_unlock(&memcg->thresholds_lock); |
5814 | } | 5854 | } |
5815 | 5855 | ||
5816 | static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | 5856 | static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, |
5817 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5857 | struct eventfd_ctx *eventfd) |
5858 | { | ||
5859 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM); | ||
5860 | } | ||
5861 | |||
5862 | static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5863 | struct eventfd_ctx *eventfd) | ||
5864 | { | ||
5865 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP); | ||
5866 | } | ||
5867 | |||
5868 | static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, | ||
5869 | struct eventfd_ctx *eventfd, const char *args) | ||
5818 | { | 5870 | { |
5819 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5820 | struct mem_cgroup_eventfd_list *event; | 5871 | struct mem_cgroup_eventfd_list *event; |
5821 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5822 | 5872 | ||
5823 | BUG_ON(type != _OOM_TYPE); | ||
5824 | event = kmalloc(sizeof(*event), GFP_KERNEL); | 5873 | event = kmalloc(sizeof(*event), GFP_KERNEL); |
5825 | if (!event) | 5874 | if (!event) |
5826 | return -ENOMEM; | 5875 | return -ENOMEM; |
@@ -5838,14 +5887,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | |||
5838 | return 0; | 5887 | return 0; |
5839 | } | 5888 | } |
5840 | 5889 | ||
5841 | static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | 5890 | static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg, |
5842 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5891 | struct eventfd_ctx *eventfd) |
5843 | { | 5892 | { |
5844 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5845 | struct mem_cgroup_eventfd_list *ev, *tmp; | 5893 | struct mem_cgroup_eventfd_list *ev, *tmp; |
5846 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5847 | |||
5848 | BUG_ON(type != _OOM_TYPE); | ||
5849 | 5894 | ||
5850 | spin_lock(&memcg_oom_lock); | 5895 | spin_lock(&memcg_oom_lock); |
5851 | 5896 | ||
@@ -5859,17 +5904,12 @@ static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | |||
5859 | spin_unlock(&memcg_oom_lock); | 5904 | spin_unlock(&memcg_oom_lock); |
5860 | } | 5905 | } |
5861 | 5906 | ||
5862 | static int mem_cgroup_oom_control_read(struct cgroup_subsys_state *css, | 5907 | static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) |
5863 | struct cftype *cft, struct cgroup_map_cb *cb) | ||
5864 | { | 5908 | { |
5865 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5909 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf)); |
5866 | 5910 | ||
5867 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); | 5911 | seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); |
5868 | 5912 | seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom)); | |
5869 | if (atomic_read(&memcg->under_oom)) | ||
5870 | cb->fill(cb, "under_oom", 1); | ||
5871 | else | ||
5872 | cb->fill(cb, "under_oom", 0); | ||
5873 | return 0; | 5913 | return 0; |
5874 | } | 5914 | } |
5875 | 5915 | ||
@@ -5962,41 +6002,261 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | |||
5962 | } | 6002 | } |
5963 | #endif | 6003 | #endif |
5964 | 6004 | ||
6005 | /* | ||
6006 | * DO NOT USE IN NEW FILES. | ||
6007 | * | ||
6008 | * "cgroup.event_control" implementation. | ||
6009 | * | ||
6010 | * This is way over-engineered. It tries to support fully configurable | ||
6011 | * events for each user. Such level of flexibility is completely | ||
6012 | * unnecessary especially in the light of the planned unified hierarchy. | ||
6013 | * | ||
6014 | * Please deprecate this and replace with something simpler if at all | ||
6015 | * possible. | ||
6016 | */ | ||
6017 | |||
6018 | /* | ||
6019 | * Unregister event and free resources. | ||
6020 | * | ||
6021 | * Gets called from workqueue. | ||
6022 | */ | ||
6023 | static void memcg_event_remove(struct work_struct *work) | ||
6024 | { | ||
6025 | struct mem_cgroup_event *event = | ||
6026 | container_of(work, struct mem_cgroup_event, remove); | ||
6027 | struct mem_cgroup *memcg = event->memcg; | ||
6028 | |||
6029 | remove_wait_queue(event->wqh, &event->wait); | ||
6030 | |||
6031 | event->unregister_event(memcg, event->eventfd); | ||
6032 | |||
6033 | /* Notify userspace the event is going away. */ | ||
6034 | eventfd_signal(event->eventfd, 1); | ||
6035 | |||
6036 | eventfd_ctx_put(event->eventfd); | ||
6037 | kfree(event); | ||
6038 | css_put(&memcg->css); | ||
6039 | } | ||
6040 | |||
6041 | /* | ||
6042 | * Gets called on POLLHUP on eventfd when user closes it. | ||
6043 | * | ||
6044 | * Called with wqh->lock held and interrupts disabled. | ||
6045 | */ | ||
6046 | static int memcg_event_wake(wait_queue_t *wait, unsigned mode, | ||
6047 | int sync, void *key) | ||
6048 | { | ||
6049 | struct mem_cgroup_event *event = | ||
6050 | container_of(wait, struct mem_cgroup_event, wait); | ||
6051 | struct mem_cgroup *memcg = event->memcg; | ||
6052 | unsigned long flags = (unsigned long)key; | ||
6053 | |||
6054 | if (flags & POLLHUP) { | ||
6055 | /* | ||
6056 | * If the event has been detached at cgroup removal, we | ||
6057 | * can simply return knowing the other side will cleanup | ||
6058 | * for us. | ||
6059 | * | ||
6060 | * We can't race against event freeing since the other | ||
6061 | * side will require wqh->lock via remove_wait_queue(), | ||
6062 | * which we hold. | ||
6063 | */ | ||
6064 | spin_lock(&memcg->event_list_lock); | ||
6065 | if (!list_empty(&event->list)) { | ||
6066 | list_del_init(&event->list); | ||
6067 | /* | ||
6068 | * We are in atomic context, but cgroup_event_remove() | ||
6069 | * may sleep, so we have to call it in workqueue. | ||
6070 | */ | ||
6071 | schedule_work(&event->remove); | ||
6072 | } | ||
6073 | spin_unlock(&memcg->event_list_lock); | ||
6074 | } | ||
6075 | |||
6076 | return 0; | ||
6077 | } | ||
6078 | |||
6079 | static void memcg_event_ptable_queue_proc(struct file *file, | ||
6080 | wait_queue_head_t *wqh, poll_table *pt) | ||
6081 | { | ||
6082 | struct mem_cgroup_event *event = | ||
6083 | container_of(pt, struct mem_cgroup_event, pt); | ||
6084 | |||
6085 | event->wqh = wqh; | ||
6086 | add_wait_queue(wqh, &event->wait); | ||
6087 | } | ||
6088 | |||
6089 | /* | ||
6090 | * DO NOT USE IN NEW FILES. | ||
6091 | * | ||
6092 | * Parse input and register new cgroup event handler. | ||
6093 | * | ||
6094 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
6095 | * Interpretation of args is defined by control file implementation. | ||
6096 | */ | ||
6097 | static int memcg_write_event_control(struct cgroup_subsys_state *css, | ||
6098 | struct cftype *cft, const char *buffer) | ||
6099 | { | ||
6100 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
6101 | struct mem_cgroup_event *event; | ||
6102 | struct cgroup_subsys_state *cfile_css; | ||
6103 | unsigned int efd, cfd; | ||
6104 | struct fd efile; | ||
6105 | struct fd cfile; | ||
6106 | const char *name; | ||
6107 | char *endp; | ||
6108 | int ret; | ||
6109 | |||
6110 | efd = simple_strtoul(buffer, &endp, 10); | ||
6111 | if (*endp != ' ') | ||
6112 | return -EINVAL; | ||
6113 | buffer = endp + 1; | ||
6114 | |||
6115 | cfd = simple_strtoul(buffer, &endp, 10); | ||
6116 | if ((*endp != ' ') && (*endp != '\0')) | ||
6117 | return -EINVAL; | ||
6118 | buffer = endp + 1; | ||
6119 | |||
6120 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
6121 | if (!event) | ||
6122 | return -ENOMEM; | ||
6123 | |||
6124 | event->memcg = memcg; | ||
6125 | INIT_LIST_HEAD(&event->list); | ||
6126 | init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc); | ||
6127 | init_waitqueue_func_entry(&event->wait, memcg_event_wake); | ||
6128 | INIT_WORK(&event->remove, memcg_event_remove); | ||
6129 | |||
6130 | efile = fdget(efd); | ||
6131 | if (!efile.file) { | ||
6132 | ret = -EBADF; | ||
6133 | goto out_kfree; | ||
6134 | } | ||
6135 | |||
6136 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
6137 | if (IS_ERR(event->eventfd)) { | ||
6138 | ret = PTR_ERR(event->eventfd); | ||
6139 | goto out_put_efile; | ||
6140 | } | ||
6141 | |||
6142 | cfile = fdget(cfd); | ||
6143 | if (!cfile.file) { | ||
6144 | ret = -EBADF; | ||
6145 | goto out_put_eventfd; | ||
6146 | } | ||
6147 | |||
6148 | /* the process need read permission on control file */ | ||
6149 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
6150 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
6151 | if (ret < 0) | ||
6152 | goto out_put_cfile; | ||
6153 | |||
6154 | /* | ||
6155 | * Determine the event callbacks and set them in @event. This used | ||
6156 | * to be done via struct cftype but cgroup core no longer knows | ||
6157 | * about these events. The following is crude but the whole thing | ||
6158 | * is for compatibility anyway. | ||
6159 | * | ||
6160 | * DO NOT ADD NEW FILES. | ||
6161 | */ | ||
6162 | name = cfile.file->f_dentry->d_name.name; | ||
6163 | |||
6164 | if (!strcmp(name, "memory.usage_in_bytes")) { | ||
6165 | event->register_event = mem_cgroup_usage_register_event; | ||
6166 | event->unregister_event = mem_cgroup_usage_unregister_event; | ||
6167 | } else if (!strcmp(name, "memory.oom_control")) { | ||
6168 | event->register_event = mem_cgroup_oom_register_event; | ||
6169 | event->unregister_event = mem_cgroup_oom_unregister_event; | ||
6170 | } else if (!strcmp(name, "memory.pressure_level")) { | ||
6171 | event->register_event = vmpressure_register_event; | ||
6172 | event->unregister_event = vmpressure_unregister_event; | ||
6173 | } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { | ||
6174 | event->register_event = memsw_cgroup_usage_register_event; | ||
6175 | event->unregister_event = memsw_cgroup_usage_unregister_event; | ||
6176 | } else { | ||
6177 | ret = -EINVAL; | ||
6178 | goto out_put_cfile; | ||
6179 | } | ||
6180 | |||
6181 | /* | ||
6182 | * Verify @cfile should belong to @css. Also, remaining events are | ||
6183 | * automatically removed on cgroup destruction but the removal is | ||
6184 | * asynchronous, so take an extra ref on @css. | ||
6185 | */ | ||
6186 | rcu_read_lock(); | ||
6187 | |||
6188 | ret = -EINVAL; | ||
6189 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, | ||
6190 | &mem_cgroup_subsys); | ||
6191 | if (cfile_css == css && css_tryget(css)) | ||
6192 | ret = 0; | ||
6193 | |||
6194 | rcu_read_unlock(); | ||
6195 | if (ret) | ||
6196 | goto out_put_cfile; | ||
6197 | |||
6198 | ret = event->register_event(memcg, event->eventfd, buffer); | ||
6199 | if (ret) | ||
6200 | goto out_put_css; | ||
6201 | |||
6202 | efile.file->f_op->poll(efile.file, &event->pt); | ||
6203 | |||
6204 | spin_lock(&memcg->event_list_lock); | ||
6205 | list_add(&event->list, &memcg->event_list); | ||
6206 | spin_unlock(&memcg->event_list_lock); | ||
6207 | |||
6208 | fdput(cfile); | ||
6209 | fdput(efile); | ||
6210 | |||
6211 | return 0; | ||
6212 | |||
6213 | out_put_css: | ||
6214 | css_put(css); | ||
6215 | out_put_cfile: | ||
6216 | fdput(cfile); | ||
6217 | out_put_eventfd: | ||
6218 | eventfd_ctx_put(event->eventfd); | ||
6219 | out_put_efile: | ||
6220 | fdput(efile); | ||
6221 | out_kfree: | ||
6222 | kfree(event); | ||
6223 | |||
6224 | return ret; | ||
6225 | } | ||
6226 | |||
5965 | static struct cftype mem_cgroup_files[] = { | 6227 | static struct cftype mem_cgroup_files[] = { |
5966 | { | 6228 | { |
5967 | .name = "usage_in_bytes", | 6229 | .name = "usage_in_bytes", |
5968 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 6230 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
5969 | .read = mem_cgroup_read, | 6231 | .read_u64 = mem_cgroup_read_u64, |
5970 | .register_event = mem_cgroup_usage_register_event, | ||
5971 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
5972 | }, | 6232 | }, |
5973 | { | 6233 | { |
5974 | .name = "max_usage_in_bytes", | 6234 | .name = "max_usage_in_bytes", |
5975 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), | 6235 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), |
5976 | .trigger = mem_cgroup_reset, | 6236 | .trigger = mem_cgroup_reset, |
5977 | .read = mem_cgroup_read, | 6237 | .read_u64 = mem_cgroup_read_u64, |
5978 | }, | 6238 | }, |
5979 | { | 6239 | { |
5980 | .name = "limit_in_bytes", | 6240 | .name = "limit_in_bytes", |
5981 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), | 6241 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), |
5982 | .write_string = mem_cgroup_write, | 6242 | .write_string = mem_cgroup_write, |
5983 | .read = mem_cgroup_read, | 6243 | .read_u64 = mem_cgroup_read_u64, |
5984 | }, | 6244 | }, |
5985 | { | 6245 | { |
5986 | .name = "soft_limit_in_bytes", | 6246 | .name = "soft_limit_in_bytes", |
5987 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), | 6247 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), |
5988 | .write_string = mem_cgroup_write, | 6248 | .write_string = mem_cgroup_write, |
5989 | .read = mem_cgroup_read, | 6249 | .read_u64 = mem_cgroup_read_u64, |
5990 | }, | 6250 | }, |
5991 | { | 6251 | { |
5992 | .name = "failcnt", | 6252 | .name = "failcnt", |
5993 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), | 6253 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
5994 | .trigger = mem_cgroup_reset, | 6254 | .trigger = mem_cgroup_reset, |
5995 | .read = mem_cgroup_read, | 6255 | .read_u64 = mem_cgroup_read_u64, |
5996 | }, | 6256 | }, |
5997 | { | 6257 | { |
5998 | .name = "stat", | 6258 | .name = "stat", |
5999 | .read_seq_string = memcg_stat_show, | 6259 | .seq_show = memcg_stat_show, |
6000 | }, | 6260 | }, |
6001 | { | 6261 | { |
6002 | .name = "force_empty", | 6262 | .name = "force_empty", |
@@ -6009,6 +6269,12 @@ static struct cftype mem_cgroup_files[] = { | |||
6009 | .read_u64 = mem_cgroup_hierarchy_read, | 6269 | .read_u64 = mem_cgroup_hierarchy_read, |
6010 | }, | 6270 | }, |
6011 | { | 6271 | { |
6272 | .name = "cgroup.event_control", /* XXX: for compat */ | ||
6273 | .write_string = memcg_write_event_control, | ||
6274 | .flags = CFTYPE_NO_PREFIX, | ||
6275 | .mode = S_IWUGO, | ||
6276 | }, | ||
6277 | { | ||
6012 | .name = "swappiness", | 6278 | .name = "swappiness", |
6013 | .read_u64 = mem_cgroup_swappiness_read, | 6279 | .read_u64 = mem_cgroup_swappiness_read, |
6014 | .write_u64 = mem_cgroup_swappiness_write, | 6280 | .write_u64 = mem_cgroup_swappiness_write, |
@@ -6020,21 +6286,17 @@ static struct cftype mem_cgroup_files[] = { | |||
6020 | }, | 6286 | }, |
6021 | { | 6287 | { |
6022 | .name = "oom_control", | 6288 | .name = "oom_control", |
6023 | .read_map = mem_cgroup_oom_control_read, | 6289 | .seq_show = mem_cgroup_oom_control_read, |
6024 | .write_u64 = mem_cgroup_oom_control_write, | 6290 | .write_u64 = mem_cgroup_oom_control_write, |
6025 | .register_event = mem_cgroup_oom_register_event, | ||
6026 | .unregister_event = mem_cgroup_oom_unregister_event, | ||
6027 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), | 6291 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), |
6028 | }, | 6292 | }, |
6029 | { | 6293 | { |
6030 | .name = "pressure_level", | 6294 | .name = "pressure_level", |
6031 | .register_event = vmpressure_register_event, | ||
6032 | .unregister_event = vmpressure_unregister_event, | ||
6033 | }, | 6295 | }, |
6034 | #ifdef CONFIG_NUMA | 6296 | #ifdef CONFIG_NUMA |
6035 | { | 6297 | { |
6036 | .name = "numa_stat", | 6298 | .name = "numa_stat", |
6037 | .read_seq_string = memcg_numa_stat_show, | 6299 | .seq_show = memcg_numa_stat_show, |
6038 | }, | 6300 | }, |
6039 | #endif | 6301 | #endif |
6040 | #ifdef CONFIG_MEMCG_KMEM | 6302 | #ifdef CONFIG_MEMCG_KMEM |
@@ -6042,29 +6304,29 @@ static struct cftype mem_cgroup_files[] = { | |||
6042 | .name = "kmem.limit_in_bytes", | 6304 | .name = "kmem.limit_in_bytes", |
6043 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), | 6305 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), |
6044 | .write_string = mem_cgroup_write, | 6306 | .write_string = mem_cgroup_write, |
6045 | .read = mem_cgroup_read, | 6307 | .read_u64 = mem_cgroup_read_u64, |
6046 | }, | 6308 | }, |
6047 | { | 6309 | { |
6048 | .name = "kmem.usage_in_bytes", | 6310 | .name = "kmem.usage_in_bytes", |
6049 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), | 6311 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), |
6050 | .read = mem_cgroup_read, | 6312 | .read_u64 = mem_cgroup_read_u64, |
6051 | }, | 6313 | }, |
6052 | { | 6314 | { |
6053 | .name = "kmem.failcnt", | 6315 | .name = "kmem.failcnt", |
6054 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), | 6316 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), |
6055 | .trigger = mem_cgroup_reset, | 6317 | .trigger = mem_cgroup_reset, |
6056 | .read = mem_cgroup_read, | 6318 | .read_u64 = mem_cgroup_read_u64, |
6057 | }, | 6319 | }, |
6058 | { | 6320 | { |
6059 | .name = "kmem.max_usage_in_bytes", | 6321 | .name = "kmem.max_usage_in_bytes", |
6060 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), | 6322 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), |
6061 | .trigger = mem_cgroup_reset, | 6323 | .trigger = mem_cgroup_reset, |
6062 | .read = mem_cgroup_read, | 6324 | .read_u64 = mem_cgroup_read_u64, |
6063 | }, | 6325 | }, |
6064 | #ifdef CONFIG_SLABINFO | 6326 | #ifdef CONFIG_SLABINFO |
6065 | { | 6327 | { |
6066 | .name = "kmem.slabinfo", | 6328 | .name = "kmem.slabinfo", |
6067 | .read_seq_string = mem_cgroup_slabinfo_read, | 6329 | .seq_show = mem_cgroup_slabinfo_read, |
6068 | }, | 6330 | }, |
6069 | #endif | 6331 | #endif |
6070 | #endif | 6332 | #endif |
@@ -6076,27 +6338,25 @@ static struct cftype memsw_cgroup_files[] = { | |||
6076 | { | 6338 | { |
6077 | .name = "memsw.usage_in_bytes", | 6339 | .name = "memsw.usage_in_bytes", |
6078 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | 6340 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), |
6079 | .read = mem_cgroup_read, | 6341 | .read_u64 = mem_cgroup_read_u64, |
6080 | .register_event = mem_cgroup_usage_register_event, | ||
6081 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
6082 | }, | 6342 | }, |
6083 | { | 6343 | { |
6084 | .name = "memsw.max_usage_in_bytes", | 6344 | .name = "memsw.max_usage_in_bytes", |
6085 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | 6345 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), |
6086 | .trigger = mem_cgroup_reset, | 6346 | .trigger = mem_cgroup_reset, |
6087 | .read = mem_cgroup_read, | 6347 | .read_u64 = mem_cgroup_read_u64, |
6088 | }, | 6348 | }, |
6089 | { | 6349 | { |
6090 | .name = "memsw.limit_in_bytes", | 6350 | .name = "memsw.limit_in_bytes", |
6091 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | 6351 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), |
6092 | .write_string = mem_cgroup_write, | 6352 | .write_string = mem_cgroup_write, |
6093 | .read = mem_cgroup_read, | 6353 | .read_u64 = mem_cgroup_read_u64, |
6094 | }, | 6354 | }, |
6095 | { | 6355 | { |
6096 | .name = "memsw.failcnt", | 6356 | .name = "memsw.failcnt", |
6097 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | 6357 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), |
6098 | .trigger = mem_cgroup_reset, | 6358 | .trigger = mem_cgroup_reset, |
6099 | .read = mem_cgroup_read, | 6359 | .read_u64 = mem_cgroup_read_u64, |
6100 | }, | 6360 | }, |
6101 | { }, /* terminate */ | 6361 | { }, /* terminate */ |
6102 | }; | 6362 | }; |
@@ -6139,14 +6399,12 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
6139 | static struct mem_cgroup *mem_cgroup_alloc(void) | 6399 | static struct mem_cgroup *mem_cgroup_alloc(void) |
6140 | { | 6400 | { |
6141 | struct mem_cgroup *memcg; | 6401 | struct mem_cgroup *memcg; |
6142 | size_t size = memcg_size(); | 6402 | size_t size; |
6143 | 6403 | ||
6144 | /* Can be very big if nr_node_ids is very big */ | 6404 | size = sizeof(struct mem_cgroup); |
6145 | if (size < PAGE_SIZE) | 6405 | size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); |
6146 | memcg = kzalloc(size, GFP_KERNEL); | ||
6147 | else | ||
6148 | memcg = vzalloc(size); | ||
6149 | 6406 | ||
6407 | memcg = kzalloc(size, GFP_KERNEL); | ||
6150 | if (!memcg) | 6408 | if (!memcg) |
6151 | return NULL; | 6409 | return NULL; |
6152 | 6410 | ||
@@ -6157,10 +6415,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
6157 | return memcg; | 6415 | return memcg; |
6158 | 6416 | ||
6159 | out_free: | 6417 | out_free: |
6160 | if (size < PAGE_SIZE) | 6418 | kfree(memcg); |
6161 | kfree(memcg); | ||
6162 | else | ||
6163 | vfree(memcg); | ||
6164 | return NULL; | 6419 | return NULL; |
6165 | } | 6420 | } |
6166 | 6421 | ||
@@ -6178,7 +6433,6 @@ out_free: | |||
6178 | static void __mem_cgroup_free(struct mem_cgroup *memcg) | 6433 | static void __mem_cgroup_free(struct mem_cgroup *memcg) |
6179 | { | 6434 | { |
6180 | int node; | 6435 | int node; |
6181 | size_t size = memcg_size(); | ||
6182 | 6436 | ||
6183 | mem_cgroup_remove_from_trees(memcg); | 6437 | mem_cgroup_remove_from_trees(memcg); |
6184 | 6438 | ||
@@ -6199,10 +6453,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
6199 | * the cgroup_lock. | 6453 | * the cgroup_lock. |
6200 | */ | 6454 | */ |
6201 | disarm_static_keys(memcg); | 6455 | disarm_static_keys(memcg); |
6202 | if (size < PAGE_SIZE) | 6456 | kfree(memcg); |
6203 | kfree(memcg); | ||
6204 | else | ||
6205 | vfree(memcg); | ||
6206 | } | 6457 | } |
6207 | 6458 | ||
6208 | /* | 6459 | /* |
@@ -6268,6 +6519,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
6268 | mutex_init(&memcg->thresholds_lock); | 6519 | mutex_init(&memcg->thresholds_lock); |
6269 | spin_lock_init(&memcg->move_lock); | 6520 | spin_lock_init(&memcg->move_lock); |
6270 | vmpressure_init(&memcg->vmpressure); | 6521 | vmpressure_init(&memcg->vmpressure); |
6522 | INIT_LIST_HEAD(&memcg->event_list); | ||
6523 | spin_lock_init(&memcg->event_list_lock); | ||
6271 | 6524 | ||
6272 | return &memcg->css; | 6525 | return &memcg->css; |
6273 | 6526 | ||
@@ -6281,7 +6534,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6281 | { | 6534 | { |
6282 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6535 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6283 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); | 6536 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); |
6284 | int error = 0; | ||
6285 | 6537 | ||
6286 | if (css->cgroup->id > MEM_CGROUP_ID_MAX) | 6538 | if (css->cgroup->id > MEM_CGROUP_ID_MAX) |
6287 | return -ENOSPC; | 6539 | return -ENOSPC; |
@@ -6316,10 +6568,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6316 | if (parent != root_mem_cgroup) | 6568 | if (parent != root_mem_cgroup) |
6317 | mem_cgroup_subsys.broken_hierarchy = true; | 6569 | mem_cgroup_subsys.broken_hierarchy = true; |
6318 | } | 6570 | } |
6319 | |||
6320 | error = memcg_init_kmem(memcg, &mem_cgroup_subsys); | ||
6321 | mutex_unlock(&memcg_create_mutex); | 6571 | mutex_unlock(&memcg_create_mutex); |
6322 | return error; | 6572 | |
6573 | return memcg_init_kmem(memcg, &mem_cgroup_subsys); | ||
6323 | } | 6574 | } |
6324 | 6575 | ||
6325 | /* | 6576 | /* |
@@ -6343,11 +6594,32 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
6343 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | 6594 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) |
6344 | { | 6595 | { |
6345 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6596 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6597 | struct mem_cgroup_event *event, *tmp; | ||
6598 | struct cgroup_subsys_state *iter; | ||
6599 | |||
6600 | /* | ||
6601 | * Unregister events and notify userspace. | ||
6602 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
6603 | * directory to avoid race between userspace and kernelspace. | ||
6604 | */ | ||
6605 | spin_lock(&memcg->event_list_lock); | ||
6606 | list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { | ||
6607 | list_del_init(&event->list); | ||
6608 | schedule_work(&event->remove); | ||
6609 | } | ||
6610 | spin_unlock(&memcg->event_list_lock); | ||
6346 | 6611 | ||
6347 | kmem_cgroup_css_offline(memcg); | 6612 | kmem_cgroup_css_offline(memcg); |
6348 | 6613 | ||
6349 | mem_cgroup_invalidate_reclaim_iterators(memcg); | 6614 | mem_cgroup_invalidate_reclaim_iterators(memcg); |
6350 | mem_cgroup_reparent_charges(memcg); | 6615 | |
6616 | /* | ||
6617 | * This requires that offlining is serialized. Right now that is | ||
6618 | * guaranteed because css_killed_work_fn() holds the cgroup_mutex. | ||
6619 | */ | ||
6620 | css_for_each_descendant_post(iter, css) | ||
6621 | mem_cgroup_reparent_charges(mem_cgroup_from_css(iter)); | ||
6622 | |||
6351 | mem_cgroup_destroy_all_caches(memcg); | 6623 | mem_cgroup_destroy_all_caches(memcg); |
6352 | vmpressure_cleanup(&memcg->vmpressure); | 6624 | vmpressure_cleanup(&memcg->vmpressure); |
6353 | } | 6625 | } |
@@ -6615,7 +6887,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, | |||
6615 | enum mc_target_type ret = MC_TARGET_NONE; | 6887 | enum mc_target_type ret = MC_TARGET_NONE; |
6616 | 6888 | ||
6617 | page = pmd_page(pmd); | 6889 | page = pmd_page(pmd); |
6618 | VM_BUG_ON(!page || !PageHead(page)); | 6890 | VM_BUG_ON_PAGE(!page || !PageHead(page), page); |
6619 | if (!move_anon()) | 6891 | if (!move_anon()) |
6620 | return ret; | 6892 | return ret; |
6621 | pc = lookup_page_cgroup(page); | 6893 | pc = lookup_page_cgroup(page); |