summaryrefslogtreecommitdiffstats
path: root/fs/notify/inotify
diff options
context:
space:
mode:
authorShakeel Butt <shakeelb@google.com>2018-08-17 18:46:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 19:20:30 -0400
commitd46eb14b735b11927d4bdc2d1854c311af19de6d (patch)
treefffe3fa953dec4dde9de89cb985d767fd168c8a6 /fs/notify/inotify
parentdc0b58643aff8b378086f25cce6789ccba68cbcb (diff)
fs: fsnotify: account fsnotify metadata to kmemcg
Patch series "Directed kmem charging", v8. The Linux kernel's memory cgroup allows limiting the memory usage of the jobs running on the system to provide isolation between the jobs. All the kernel memory allocated in the context of the job and marked with __GFP_ACCOUNT will also be included in the memory usage and be limited by the job's limit. The kernel memory can only be charged to the memcg of the process in whose context kernel memory was allocated. However there are cases where the allocated kernel memory should be charged to the memcg different from the current processes's memcg. This patch series contains two such concrete use-cases i.e. fsnotify and buffer_head. The fsnotify event objects can consume a lot of system memory for large or unlimited queues if there is either no or slow listener. The events are allocated in the context of the event producer. However they should be charged to the event consumer. Similarly the buffer_head objects can be allocated in a memcg different from the memcg of the page for which buffer_head objects are being allocated. To solve this issue, this patch series introduces mechanism to charge kernel memory to a given memcg. In case of fsnotify events, the memcg of the consumer can be used for charging and for buffer_head, the memcg of the page can be charged. For directed charging, the caller can use the scope API memalloc_[un]use_memcg() to specify the memcg to charge for all the __GFP_ACCOUNT allocations within the scope. This patch (of 2): A lot of memory can be consumed by the events generated for the huge or unlimited queues if there is either no or slow listener. This can cause system level memory pressure or OOMs. So, it's better to account the fsnotify kmem caches to the memcg of the listener. However the listener can be in a different memcg than the memcg of the producer and these allocations happen in the context of the event producer. This patch introduces remote memcg charging API which the producer can use to charge the allocations to the memcg of the listener. There are seven fsnotify kmem caches and among them allocations from dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and inotify_inode_mark_cachep happens in the context of syscall from the listener. So, SLAB_ACCOUNT is enough for these caches. The objects from fsnotify_mark_connector_cachep are not accounted as they are small compared to the notification mark or events and it is unclear whom to account connector to since it is shared by all events attached to the inode. The allocations from the event caches happen in the context of the event producer. For such caches we will need to remote charge the allocations to the listener's memcg. Thus we save the memcg reference in the fsnotify_group structure of the listener. This patch has also moved the members of fsnotify_group to keep the size same, at least for 64 bit build, even with additional member by filling the holes. [shakeelb@google.com: use GFP_KERNEL_ACCOUNT rather than open-coding it] Link: http://lkml.kernel.org/r/20180702215439.211597-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20180627191250.209150-2-shakeelb@google.com Signed-off-by: Shakeel Butt <shakeelb@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Greg Thelen <gthelen@google.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Roman Gushchin <guro@fb.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/notify/inotify')
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c7
-rw-r--r--fs/notify/inotify/inotify_user.c5
2 files changed, 10 insertions, 2 deletions
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ab6dde38a14..f4184b4f3815 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -31,6 +31,7 @@
31#include <linux/types.h> 31#include <linux/types.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/sched/user.h> 33#include <linux/sched/user.h>
34#include <linux/sched/mm.h>
34 35
35#include "inotify.h" 36#include "inotify.h"
36 37
@@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group,
98 i_mark = container_of(inode_mark, struct inotify_inode_mark, 99 i_mark = container_of(inode_mark, struct inotify_inode_mark,
99 fsn_mark); 100 fsn_mark);
100 101
101 event = kmalloc(alloc_len, GFP_KERNEL); 102 /* Whoever is interested in the event, pays for the allocation. */
103 memalloc_use_memcg(group->memcg);
104 event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
105 memalloc_unuse_memcg();
106
102 if (unlikely(!event)) { 107 if (unlikely(!event)) {
103 /* 108 /*
104 * Treat lost event due to ENOMEM the same way as queue 109 * Treat lost event due to ENOMEM the same way as queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..749c46ababa0 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -38,6 +38,7 @@
38#include <linux/uaccess.h> 38#include <linux/uaccess.h>
39#include <linux/poll.h> 39#include <linux/poll.h>
40#include <linux/wait.h> 40#include <linux/wait.h>
41#include <linux/memcontrol.h>
41 42
42#include "inotify.h" 43#include "inotify.h"
43#include "../fdinfo.h" 44#include "../fdinfo.h"
@@ -636,6 +637,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
636 oevent->name_len = 0; 637 oevent->name_len = 0;
637 638
638 group->max_events = max_events; 639 group->max_events = max_events;
640 group->memcg = get_mem_cgroup_from_mm(current->mm);
639 641
640 spin_lock_init(&group->inotify_data.idr_lock); 642 spin_lock_init(&group->inotify_data.idr_lock);
641 idr_init(&group->inotify_data.idr); 643 idr_init(&group->inotify_data.idr);
@@ -808,7 +810,8 @@ static int __init inotify_user_setup(void)
808 810
809 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); 811 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
810 812
811 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); 813 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
814 SLAB_PANIC|SLAB_ACCOUNT);
812 815
813 inotify_max_queued_events = 16384; 816 inotify_max_queued_events = 16384;
814 init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; 817 init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;