diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 353 |
1 files changed, 321 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f1a0ae6e11b8..7aa0d405b148 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
46 | #include <linux/spinlock.h> | 46 | #include <linux/spinlock.h> |
47 | #include <linux/eventfd.h> | 47 | #include <linux/eventfd.h> |
48 | #include <linux/poll.h> | ||
48 | #include <linux/sort.h> | 49 | #include <linux/sort.h> |
49 | #include <linux/fs.h> | 50 | #include <linux/fs.h> |
50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
@@ -55,6 +56,7 @@ | |||
55 | #include <linux/cpu.h> | 56 | #include <linux/cpu.h> |
56 | #include <linux/oom.h> | 57 | #include <linux/oom.h> |
57 | #include <linux/lockdep.h> | 58 | #include <linux/lockdep.h> |
59 | #include <linux/file.h> | ||
58 | #include "internal.h" | 60 | #include "internal.h" |
59 | #include <net/sock.h> | 61 | #include <net/sock.h> |
60 | #include <net/ip.h> | 62 | #include <net/ip.h> |
@@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list { | |||
227 | struct eventfd_ctx *eventfd; | 229 | struct eventfd_ctx *eventfd; |
228 | }; | 230 | }; |
229 | 231 | ||
232 | /* | ||
233 | * cgroup_event represents events which userspace want to receive. | ||
234 | */ | ||
235 | struct mem_cgroup_event { | ||
236 | /* | ||
237 | * memcg which the event belongs to. | ||
238 | */ | ||
239 | struct mem_cgroup *memcg; | ||
240 | /* | ||
241 | * eventfd to signal userspace about the event. | ||
242 | */ | ||
243 | struct eventfd_ctx *eventfd; | ||
244 | /* | ||
245 | * Each of these stored in a list by the cgroup. | ||
246 | */ | ||
247 | struct list_head list; | ||
248 | /* | ||
249 | * register_event() callback will be used to add new userspace | ||
250 | * waiter for changes related to this event. Use eventfd_signal() | ||
251 | * on eventfd to send notification to userspace. | ||
252 | */ | ||
253 | int (*register_event)(struct mem_cgroup *memcg, | ||
254 | struct eventfd_ctx *eventfd, const char *args); | ||
255 | /* | ||
256 | * unregister_event() callback will be called when userspace closes | ||
257 | * the eventfd or on cgroup removing. This callback must be set, | ||
258 | * if you want provide notification functionality. | ||
259 | */ | ||
260 | void (*unregister_event)(struct mem_cgroup *memcg, | ||
261 | struct eventfd_ctx *eventfd); | ||
262 | /* | ||
263 | * All fields below needed to unregister event when | ||
264 | * userspace closes eventfd. | ||
265 | */ | ||
266 | poll_table pt; | ||
267 | wait_queue_head_t *wqh; | ||
268 | wait_queue_t wait; | ||
269 | struct work_struct remove; | ||
270 | }; | ||
271 | |||
230 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); | 272 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
231 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | 273 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
232 | 274 | ||
@@ -331,6 +373,10 @@ struct mem_cgroup { | |||
331 | atomic_t numainfo_updating; | 373 | atomic_t numainfo_updating; |
332 | #endif | 374 | #endif |
333 | 375 | ||
376 | /* List of events which userspace want to receive */ | ||
377 | struct list_head event_list; | ||
378 | spinlock_t event_list_lock; | ||
379 | |||
334 | struct mem_cgroup_per_node *nodeinfo[0]; | 380 | struct mem_cgroup_per_node *nodeinfo[0]; |
335 | /* WARNING: nodeinfo must be the last member here */ | 381 | /* WARNING: nodeinfo must be the last member here */ |
336 | }; | 382 | }; |
@@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) | |||
490 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; | 536 | return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; |
491 | } | 537 | } |
492 | 538 | ||
493 | struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) | ||
494 | { | ||
495 | return &mem_cgroup_from_css(css)->vmpressure; | ||
496 | } | ||
497 | |||
498 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | 539 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) |
499 | { | 540 | { |
500 | return (memcg == root_mem_cgroup); | 541 | return (memcg == root_mem_cgroup); |
@@ -5648,13 +5689,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) | |||
5648 | mem_cgroup_oom_notify_cb(iter); | 5689 | mem_cgroup_oom_notify_cb(iter); |
5649 | } | 5690 | } |
5650 | 5691 | ||
5651 | static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css, | 5692 | static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5652 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5693 | struct eventfd_ctx *eventfd, const char *args, enum res_type type) |
5653 | { | 5694 | { |
5654 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5655 | struct mem_cgroup_thresholds *thresholds; | 5695 | struct mem_cgroup_thresholds *thresholds; |
5656 | struct mem_cgroup_threshold_ary *new; | 5696 | struct mem_cgroup_threshold_ary *new; |
5657 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5658 | u64 threshold, usage; | 5697 | u64 threshold, usage; |
5659 | int i, size, ret; | 5698 | int i, size, ret; |
5660 | 5699 | ||
@@ -5731,13 +5770,23 @@ unlock: | |||
5731 | return ret; | 5770 | return ret; |
5732 | } | 5771 | } |
5733 | 5772 | ||
5734 | static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css, | 5773 | static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg, |
5735 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5774 | struct eventfd_ctx *eventfd, const char *args) |
5775 | { | ||
5776 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM); | ||
5777 | } | ||
5778 | |||
5779 | static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg, | ||
5780 | struct eventfd_ctx *eventfd, const char *args) | ||
5781 | { | ||
5782 | return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP); | ||
5783 | } | ||
5784 | |||
5785 | static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5786 | struct eventfd_ctx *eventfd, enum res_type type) | ||
5736 | { | 5787 | { |
5737 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5738 | struct mem_cgroup_thresholds *thresholds; | 5788 | struct mem_cgroup_thresholds *thresholds; |
5739 | struct mem_cgroup_threshold_ary *new; | 5789 | struct mem_cgroup_threshold_ary *new; |
5740 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5741 | u64 usage; | 5790 | u64 usage; |
5742 | int i, j, size; | 5791 | int i, j, size; |
5743 | 5792 | ||
@@ -5810,14 +5859,23 @@ unlock: | |||
5810 | mutex_unlock(&memcg->thresholds_lock); | 5859 | mutex_unlock(&memcg->thresholds_lock); |
5811 | } | 5860 | } |
5812 | 5861 | ||
5813 | static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | 5862 | static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, |
5814 | struct cftype *cft, struct eventfd_ctx *eventfd, const char *args) | 5863 | struct eventfd_ctx *eventfd) |
5864 | { | ||
5865 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM); | ||
5866 | } | ||
5867 | |||
5868 | static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | ||
5869 | struct eventfd_ctx *eventfd) | ||
5870 | { | ||
5871 | return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP); | ||
5872 | } | ||
5873 | |||
5874 | static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, | ||
5875 | struct eventfd_ctx *eventfd, const char *args) | ||
5815 | { | 5876 | { |
5816 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5817 | struct mem_cgroup_eventfd_list *event; | 5877 | struct mem_cgroup_eventfd_list *event; |
5818 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5819 | 5878 | ||
5820 | BUG_ON(type != _OOM_TYPE); | ||
5821 | event = kmalloc(sizeof(*event), GFP_KERNEL); | 5879 | event = kmalloc(sizeof(*event), GFP_KERNEL); |
5822 | if (!event) | 5880 | if (!event) |
5823 | return -ENOMEM; | 5881 | return -ENOMEM; |
@@ -5835,14 +5893,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css, | |||
5835 | return 0; | 5893 | return 0; |
5836 | } | 5894 | } |
5837 | 5895 | ||
5838 | static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css, | 5896 | static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg, |
5839 | struct cftype *cft, struct eventfd_ctx *eventfd) | 5897 | struct eventfd_ctx *eventfd) |
5840 | { | 5898 | { |
5841 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
5842 | struct mem_cgroup_eventfd_list *ev, *tmp; | 5899 | struct mem_cgroup_eventfd_list *ev, *tmp; |
5843 | enum res_type type = MEMFILE_TYPE(cft->private); | ||
5844 | |||
5845 | BUG_ON(type != _OOM_TYPE); | ||
5846 | 5900 | ||
5847 | spin_lock(&memcg_oom_lock); | 5901 | spin_lock(&memcg_oom_lock); |
5848 | 5902 | ||
@@ -5959,13 +6013,233 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | |||
5959 | } | 6013 | } |
5960 | #endif | 6014 | #endif |
5961 | 6015 | ||
6016 | /* | ||
6017 | * DO NOT USE IN NEW FILES. | ||
6018 | * | ||
6019 | * "cgroup.event_control" implementation. | ||
6020 | * | ||
6021 | * This is way over-engineered. It tries to support fully configurable | ||
6022 | * events for each user. Such level of flexibility is completely | ||
6023 | * unnecessary especially in the light of the planned unified hierarchy. | ||
6024 | * | ||
6025 | * Please deprecate this and replace with something simpler if at all | ||
6026 | * possible. | ||
6027 | */ | ||
6028 | |||
6029 | /* | ||
6030 | * Unregister event and free resources. | ||
6031 | * | ||
6032 | * Gets called from workqueue. | ||
6033 | */ | ||
6034 | static void memcg_event_remove(struct work_struct *work) | ||
6035 | { | ||
6036 | struct mem_cgroup_event *event = | ||
6037 | container_of(work, struct mem_cgroup_event, remove); | ||
6038 | struct mem_cgroup *memcg = event->memcg; | ||
6039 | |||
6040 | remove_wait_queue(event->wqh, &event->wait); | ||
6041 | |||
6042 | event->unregister_event(memcg, event->eventfd); | ||
6043 | |||
6044 | /* Notify userspace the event is going away. */ | ||
6045 | eventfd_signal(event->eventfd, 1); | ||
6046 | |||
6047 | eventfd_ctx_put(event->eventfd); | ||
6048 | kfree(event); | ||
6049 | css_put(&memcg->css); | ||
6050 | } | ||
6051 | |||
6052 | /* | ||
6053 | * Gets called on POLLHUP on eventfd when user closes it. | ||
6054 | * | ||
6055 | * Called with wqh->lock held and interrupts disabled. | ||
6056 | */ | ||
6057 | static int memcg_event_wake(wait_queue_t *wait, unsigned mode, | ||
6058 | int sync, void *key) | ||
6059 | { | ||
6060 | struct mem_cgroup_event *event = | ||
6061 | container_of(wait, struct mem_cgroup_event, wait); | ||
6062 | struct mem_cgroup *memcg = event->memcg; | ||
6063 | unsigned long flags = (unsigned long)key; | ||
6064 | |||
6065 | if (flags & POLLHUP) { | ||
6066 | /* | ||
6067 | * If the event has been detached at cgroup removal, we | ||
6068 | * can simply return knowing the other side will cleanup | ||
6069 | * for us. | ||
6070 | * | ||
6071 | * We can't race against event freeing since the other | ||
6072 | * side will require wqh->lock via remove_wait_queue(), | ||
6073 | * which we hold. | ||
6074 | */ | ||
6075 | spin_lock(&memcg->event_list_lock); | ||
6076 | if (!list_empty(&event->list)) { | ||
6077 | list_del_init(&event->list); | ||
6078 | /* | ||
6079 | * We are in atomic context, but cgroup_event_remove() | ||
6080 | * may sleep, so we have to call it in workqueue. | ||
6081 | */ | ||
6082 | schedule_work(&event->remove); | ||
6083 | } | ||
6084 | spin_unlock(&memcg->event_list_lock); | ||
6085 | } | ||
6086 | |||
6087 | return 0; | ||
6088 | } | ||
6089 | |||
6090 | static void memcg_event_ptable_queue_proc(struct file *file, | ||
6091 | wait_queue_head_t *wqh, poll_table *pt) | ||
6092 | { | ||
6093 | struct mem_cgroup_event *event = | ||
6094 | container_of(pt, struct mem_cgroup_event, pt); | ||
6095 | |||
6096 | event->wqh = wqh; | ||
6097 | add_wait_queue(wqh, &event->wait); | ||
6098 | } | ||
6099 | |||
6100 | /* | ||
6101 | * DO NOT USE IN NEW FILES. | ||
6102 | * | ||
6103 | * Parse input and register new cgroup event handler. | ||
6104 | * | ||
6105 | * Input must be in format '<event_fd> <control_fd> <args>'. | ||
6106 | * Interpretation of args is defined by control file implementation. | ||
6107 | */ | ||
6108 | static int memcg_write_event_control(struct cgroup_subsys_state *css, | ||
6109 | struct cftype *cft, const char *buffer) | ||
6110 | { | ||
6111 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
6112 | struct mem_cgroup_event *event; | ||
6113 | struct cgroup_subsys_state *cfile_css; | ||
6114 | unsigned int efd, cfd; | ||
6115 | struct fd efile; | ||
6116 | struct fd cfile; | ||
6117 | const char *name; | ||
6118 | char *endp; | ||
6119 | int ret; | ||
6120 | |||
6121 | efd = simple_strtoul(buffer, &endp, 10); | ||
6122 | if (*endp != ' ') | ||
6123 | return -EINVAL; | ||
6124 | buffer = endp + 1; | ||
6125 | |||
6126 | cfd = simple_strtoul(buffer, &endp, 10); | ||
6127 | if ((*endp != ' ') && (*endp != '\0')) | ||
6128 | return -EINVAL; | ||
6129 | buffer = endp + 1; | ||
6130 | |||
6131 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
6132 | if (!event) | ||
6133 | return -ENOMEM; | ||
6134 | |||
6135 | event->memcg = memcg; | ||
6136 | INIT_LIST_HEAD(&event->list); | ||
6137 | init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc); | ||
6138 | init_waitqueue_func_entry(&event->wait, memcg_event_wake); | ||
6139 | INIT_WORK(&event->remove, memcg_event_remove); | ||
6140 | |||
6141 | efile = fdget(efd); | ||
6142 | if (!efile.file) { | ||
6143 | ret = -EBADF; | ||
6144 | goto out_kfree; | ||
6145 | } | ||
6146 | |||
6147 | event->eventfd = eventfd_ctx_fileget(efile.file); | ||
6148 | if (IS_ERR(event->eventfd)) { | ||
6149 | ret = PTR_ERR(event->eventfd); | ||
6150 | goto out_put_efile; | ||
6151 | } | ||
6152 | |||
6153 | cfile = fdget(cfd); | ||
6154 | if (!cfile.file) { | ||
6155 | ret = -EBADF; | ||
6156 | goto out_put_eventfd; | ||
6157 | } | ||
6158 | |||
6159 | /* the process need read permission on control file */ | ||
6160 | /* AV: shouldn't we check that it's been opened for read instead? */ | ||
6161 | ret = inode_permission(file_inode(cfile.file), MAY_READ); | ||
6162 | if (ret < 0) | ||
6163 | goto out_put_cfile; | ||
6164 | |||
6165 | /* | ||
6166 | * Determine the event callbacks and set them in @event. This used | ||
6167 | * to be done via struct cftype but cgroup core no longer knows | ||
6168 | * about these events. The following is crude but the whole thing | ||
6169 | * is for compatibility anyway. | ||
6170 | * | ||
6171 | * DO NOT ADD NEW FILES. | ||
6172 | */ | ||
6173 | name = cfile.file->f_dentry->d_name.name; | ||
6174 | |||
6175 | if (!strcmp(name, "memory.usage_in_bytes")) { | ||
6176 | event->register_event = mem_cgroup_usage_register_event; | ||
6177 | event->unregister_event = mem_cgroup_usage_unregister_event; | ||
6178 | } else if (!strcmp(name, "memory.oom_control")) { | ||
6179 | event->register_event = mem_cgroup_oom_register_event; | ||
6180 | event->unregister_event = mem_cgroup_oom_unregister_event; | ||
6181 | } else if (!strcmp(name, "memory.pressure_level")) { | ||
6182 | event->register_event = vmpressure_register_event; | ||
6183 | event->unregister_event = vmpressure_unregister_event; | ||
6184 | } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { | ||
6185 | event->register_event = memsw_cgroup_usage_register_event; | ||
6186 | event->unregister_event = memsw_cgroup_usage_unregister_event; | ||
6187 | } else { | ||
6188 | ret = -EINVAL; | ||
6189 | goto out_put_cfile; | ||
6190 | } | ||
6191 | |||
6192 | /* | ||
6193 | * Verify @cfile should belong to @css. Also, remaining events are | ||
6194 | * automatically removed on cgroup destruction but the removal is | ||
6195 | * asynchronous, so take an extra ref on @css. | ||
6196 | */ | ||
6197 | rcu_read_lock(); | ||
6198 | |||
6199 | ret = -EINVAL; | ||
6200 | cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, | ||
6201 | &mem_cgroup_subsys); | ||
6202 | if (cfile_css == css && css_tryget(css)) | ||
6203 | ret = 0; | ||
6204 | |||
6205 | rcu_read_unlock(); | ||
6206 | if (ret) | ||
6207 | goto out_put_cfile; | ||
6208 | |||
6209 | ret = event->register_event(memcg, event->eventfd, buffer); | ||
6210 | if (ret) | ||
6211 | goto out_put_css; | ||
6212 | |||
6213 | efile.file->f_op->poll(efile.file, &event->pt); | ||
6214 | |||
6215 | spin_lock(&memcg->event_list_lock); | ||
6216 | list_add(&event->list, &memcg->event_list); | ||
6217 | spin_unlock(&memcg->event_list_lock); | ||
6218 | |||
6219 | fdput(cfile); | ||
6220 | fdput(efile); | ||
6221 | |||
6222 | return 0; | ||
6223 | |||
6224 | out_put_css: | ||
6225 | css_put(css); | ||
6226 | out_put_cfile: | ||
6227 | fdput(cfile); | ||
6228 | out_put_eventfd: | ||
6229 | eventfd_ctx_put(event->eventfd); | ||
6230 | out_put_efile: | ||
6231 | fdput(efile); | ||
6232 | out_kfree: | ||
6233 | kfree(event); | ||
6234 | |||
6235 | return ret; | ||
6236 | } | ||
6237 | |||
5962 | static struct cftype mem_cgroup_files[] = { | 6238 | static struct cftype mem_cgroup_files[] = { |
5963 | { | 6239 | { |
5964 | .name = "usage_in_bytes", | 6240 | .name = "usage_in_bytes", |
5965 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 6241 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
5966 | .read = mem_cgroup_read, | 6242 | .read = mem_cgroup_read, |
5967 | .register_event = mem_cgroup_usage_register_event, | ||
5968 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
5969 | }, | 6243 | }, |
5970 | { | 6244 | { |
5971 | .name = "max_usage_in_bytes", | 6245 | .name = "max_usage_in_bytes", |
@@ -6006,6 +6280,12 @@ static struct cftype mem_cgroup_files[] = { | |||
6006 | .read_u64 = mem_cgroup_hierarchy_read, | 6280 | .read_u64 = mem_cgroup_hierarchy_read, |
6007 | }, | 6281 | }, |
6008 | { | 6282 | { |
6283 | .name = "cgroup.event_control", /* XXX: for compat */ | ||
6284 | .write_string = memcg_write_event_control, | ||
6285 | .flags = CFTYPE_NO_PREFIX, | ||
6286 | .mode = S_IWUGO, | ||
6287 | }, | ||
6288 | { | ||
6009 | .name = "swappiness", | 6289 | .name = "swappiness", |
6010 | .read_u64 = mem_cgroup_swappiness_read, | 6290 | .read_u64 = mem_cgroup_swappiness_read, |
6011 | .write_u64 = mem_cgroup_swappiness_write, | 6291 | .write_u64 = mem_cgroup_swappiness_write, |
@@ -6019,14 +6299,10 @@ static struct cftype mem_cgroup_files[] = { | |||
6019 | .name = "oom_control", | 6299 | .name = "oom_control", |
6020 | .read_map = mem_cgroup_oom_control_read, | 6300 | .read_map = mem_cgroup_oom_control_read, |
6021 | .write_u64 = mem_cgroup_oom_control_write, | 6301 | .write_u64 = mem_cgroup_oom_control_write, |
6022 | .register_event = mem_cgroup_oom_register_event, | ||
6023 | .unregister_event = mem_cgroup_oom_unregister_event, | ||
6024 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), | 6302 | .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), |
6025 | }, | 6303 | }, |
6026 | { | 6304 | { |
6027 | .name = "pressure_level", | 6305 | .name = "pressure_level", |
6028 | .register_event = vmpressure_register_event, | ||
6029 | .unregister_event = vmpressure_unregister_event, | ||
6030 | }, | 6306 | }, |
6031 | #ifdef CONFIG_NUMA | 6307 | #ifdef CONFIG_NUMA |
6032 | { | 6308 | { |
@@ -6074,8 +6350,6 @@ static struct cftype memsw_cgroup_files[] = { | |||
6074 | .name = "memsw.usage_in_bytes", | 6350 | .name = "memsw.usage_in_bytes", |
6075 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | 6351 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), |
6076 | .read = mem_cgroup_read, | 6352 | .read = mem_cgroup_read, |
6077 | .register_event = mem_cgroup_usage_register_event, | ||
6078 | .unregister_event = mem_cgroup_usage_unregister_event, | ||
6079 | }, | 6353 | }, |
6080 | { | 6354 | { |
6081 | .name = "memsw.max_usage_in_bytes", | 6355 | .name = "memsw.max_usage_in_bytes", |
@@ -6265,6 +6539,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
6265 | mutex_init(&memcg->thresholds_lock); | 6539 | mutex_init(&memcg->thresholds_lock); |
6266 | spin_lock_init(&memcg->move_lock); | 6540 | spin_lock_init(&memcg->move_lock); |
6267 | vmpressure_init(&memcg->vmpressure); | 6541 | vmpressure_init(&memcg->vmpressure); |
6542 | INIT_LIST_HEAD(&memcg->event_list); | ||
6543 | spin_lock_init(&memcg->event_list_lock); | ||
6268 | 6544 | ||
6269 | return &memcg->css; | 6545 | return &memcg->css; |
6270 | 6546 | ||
@@ -6340,6 +6616,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
6340 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | 6616 | static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) |
6341 | { | 6617 | { |
6342 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6618 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6619 | struct mem_cgroup_event *event, *tmp; | ||
6620 | |||
6621 | /* | ||
6622 | * Unregister events and notify userspace. | ||
6623 | * Notify userspace about cgroup removing only after rmdir of cgroup | ||
6624 | * directory to avoid race between userspace and kernelspace. | ||
6625 | */ | ||
6626 | spin_lock(&memcg->event_list_lock); | ||
6627 | list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { | ||
6628 | list_del_init(&event->list); | ||
6629 | schedule_work(&event->remove); | ||
6630 | } | ||
6631 | spin_unlock(&memcg->event_list_lock); | ||
6343 | 6632 | ||
6344 | kmem_cgroup_css_offline(memcg); | 6633 | kmem_cgroup_css_offline(memcg); |
6345 | 6634 | ||