summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikolay Borisov <n.borisov.lkml@gmail.com>2016-12-14 08:56:33 -0500
committerEric W. Biederman <ebiederm@xmission.com>2017-01-23 18:03:07 -0500
commit1cce1eea0aff51201753fcaca421df825b0813b6 (patch)
tree9717a36b5968a179942e2b2f62d21c3c05cc39c6
parent880a38547ff08715ce4f1daf9a4bb30c87676e68 (diff)
inotify: Convert to using per-namespace limits
This patchset converts inotify to using the newly introduced per-userns sysctl infrastructure. Currently the inotify instances/watches are being accounted in the user_struct structure. This means that in setups where multiple users in unprivileged containers map to the same underlying real user (i.e. pointing to the same user_struct) the inotify limits are going to be shared as well, allowing one user(or application) to exhaust all others limits. Fix this by switching the inotify sysctls to using the per-namespace/per-user limits. This will allow the server admin to set sensible global limits, which can further be tuned inside every individual user namespace. Additionally, in order to preserve the sysctl ABI make the existing inotify instances/watches sysctls modify the values of the initial user namespace. Signed-off-by: Nikolay Borisov <n.borisov.lkml@gmail.com> Acked-by: Jan Kara <jack@suse.cz> Acked-by: Serge Hallyn <serge@hallyn.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r--fs/notify/inotify/inotify.h17
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c6
-rw-r--r--fs/notify/inotify/inotify_user.c34
-rw-r--r--include/linux/fsnotify_backend.h3
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/user_namespace.h4
-rw-r--r--kernel/ucount.c6
7 files changed, 47 insertions, 27 deletions
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index a6f5907a3fee..7c461fd49c4c 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group,
30 const unsigned char *file_name, u32 cookie); 30 const unsigned char *file_name, u32 cookie);
31 31
32extern const struct fsnotify_ops inotify_fsnotify_ops; 32extern const struct fsnotify_ops inotify_fsnotify_ops;
33
34#ifdef CONFIG_INOTIFY_USER
35static inline void dec_inotify_instances(struct ucounts *ucounts)
36{
37 dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
38}
39
40static inline struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
41{
42 return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
43}
44
45static inline void dec_inotify_watches(struct ucounts *ucounts)
46{
47 dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
48}
49#endif
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 19e7ec109a75..f36c29398de3 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
165 /* ideally the idr is empty and we won't hit the BUG in the callback */ 165 /* ideally the idr is empty and we won't hit the BUG in the callback */
166 idr_for_each(&group->inotify_data.idr, idr_callback, group); 166 idr_for_each(&group->inotify_data.idr, idr_callback, group);
167 idr_destroy(&group->inotify_data.idr); 167 idr_destroy(&group->inotify_data.idr);
168 if (group->inotify_data.user) { 168 if (group->inotify_data.ucounts)
169 atomic_dec(&group->inotify_data.user->inotify_devs); 169 dec_inotify_instances(group->inotify_data.ucounts);
170 free_uid(group->inotify_data.user);
171 }
172} 170}
173 171
174static void inotify_free_event(struct fsnotify_event *fsn_event) 172static void inotify_free_event(struct fsnotify_event *fsn_event)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 69d1ea3d292a..1cf41c623be1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -44,10 +44,8 @@
44 44
45#include <asm/ioctls.h> 45#include <asm/ioctls.h>
46 46
47/* these are configurable via /proc/sys/fs/inotify/ */ 47/* configurable via /proc/sys/fs/inotify/ */
48static int inotify_max_user_instances __read_mostly;
49static int inotify_max_queued_events __read_mostly; 48static int inotify_max_queued_events __read_mostly;
50static int inotify_max_user_watches __read_mostly;
51 49
52static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; 50static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
53 51
@@ -60,7 +58,7 @@ static int zero;
60struct ctl_table inotify_table[] = { 58struct ctl_table inotify_table[] = {
61 { 59 {
62 .procname = "max_user_instances", 60 .procname = "max_user_instances",
63 .data = &inotify_max_user_instances, 61 .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
64 .maxlen = sizeof(int), 62 .maxlen = sizeof(int),
65 .mode = 0644, 63 .mode = 0644,
66 .proc_handler = proc_dointvec_minmax, 64 .proc_handler = proc_dointvec_minmax,
@@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = {
68 }, 66 },
69 { 67 {
70 .procname = "max_user_watches", 68 .procname = "max_user_watches",
71 .data = &inotify_max_user_watches, 69 .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
72 .maxlen = sizeof(int), 70 .maxlen = sizeof(int),
73 .mode = 0644, 71 .mode = 0644,
74 .proc_handler = proc_dointvec_minmax, 72 .proc_handler = proc_dointvec_minmax,
@@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
500 /* remove this mark from the idr */ 498 /* remove this mark from the idr */
501 inotify_remove_from_idr(group, i_mark); 499 inotify_remove_from_idr(group, i_mark);
502 500
503 atomic_dec(&group->inotify_data.user->inotify_watches); 501 dec_inotify_watches(group->inotify_data.ucounts);
504} 502}
505 503
506/* ding dong the mark is dead */ 504/* ding dong the mark is dead */
@@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
584 tmp_i_mark->fsn_mark.mask = mask; 582 tmp_i_mark->fsn_mark.mask = mask;
585 tmp_i_mark->wd = -1; 583 tmp_i_mark->wd = -1;
586 584
587 ret = -ENOSPC;
588 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
589 goto out_err;
590
591 ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); 585 ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
592 if (ret) 586 if (ret)
593 goto out_err; 587 goto out_err;
594 588
589 /* increment the number of watches the user has */
590 if (!inc_inotify_watches(group->inotify_data.ucounts)) {
591 inotify_remove_from_idr(group, tmp_i_mark);
592 ret = -ENOSPC;
593 goto out_err;
594 }
595
595 /* we are on the idr, now get on the inode */ 596 /* we are on the idr, now get on the inode */
596 ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, 597 ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
597 NULL, 0); 598 NULL, 0);
@@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
601 goto out_err; 602 goto out_err;
602 } 603 }
603 604
604 /* increment the number of watches the user has */
605 atomic_inc(&group->inotify_data.user->inotify_watches);
606 605
607 /* return the watch descriptor for this new mark */ 606 /* return the watch descriptor for this new mark */
608 ret = tmp_i_mark->wd; 607 ret = tmp_i_mark->wd;
@@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
653 652
654 spin_lock_init(&group->inotify_data.idr_lock); 653 spin_lock_init(&group->inotify_data.idr_lock);
655 idr_init(&group->inotify_data.idr); 654 idr_init(&group->inotify_data.idr);
656 group->inotify_data.user = get_current_user(); 655 group->inotify_data.ucounts = inc_ucount(current_user_ns(),
656 current_euid(),
657 UCOUNT_INOTIFY_INSTANCES);
657 658
658 if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > 659 if (!group->inotify_data.ucounts) {
659 inotify_max_user_instances) {
660 fsnotify_destroy_group(group); 660 fsnotify_destroy_group(group);
661 return ERR_PTR(-EMFILE); 661 return ERR_PTR(-EMFILE);
662 } 662 }
@@ -819,8 +819,8 @@ static int __init inotify_user_setup(void)
819 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); 819 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
820 820
821 inotify_max_queued_events = 16384; 821 inotify_max_queued_events = 16384;
822 inotify_max_user_instances = 128; 822 init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
823 inotify_max_user_watches = 8192; 823 init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
824 824
825 return 0; 825 return 0;
826} 826}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0cf34d6cc253..c8f2738113f4 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -16,6 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/atomic.h> 18#include <linux/atomic.h>
19#include <linux/user_namespace.h>
19 20
20/* 21/*
21 * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily 22 * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -170,7 +171,7 @@ struct fsnotify_group {
170 struct inotify_group_private_data { 171 struct inotify_group_private_data {
171 spinlock_t idr_lock; 172 spinlock_t idr_lock;
172 struct idr idr; 173 struct idr idr;
173 struct user_struct *user; 174 struct ucounts *ucounts;
174 } inotify_data; 175 } inotify_data;
175#endif 176#endif
176#ifdef CONFIG_FANOTIFY 177#ifdef CONFIG_FANOTIFY
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d1905245c7a..d2334229167f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -868,10 +868,6 @@ struct user_struct {
868 atomic_t __count; /* reference count */ 868 atomic_t __count; /* reference count */
869 atomic_t processes; /* How many processes does this user have? */ 869 atomic_t processes; /* How many processes does this user have? */
870 atomic_t sigpending; /* How many pending signals does this user have? */ 870 atomic_t sigpending; /* How many pending signals does this user have? */
871#ifdef CONFIG_INOTIFY_USER
872 atomic_t inotify_watches; /* How many inotify watches does this user have? */
873 atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
874#endif
875#ifdef CONFIG_FANOTIFY 871#ifdef CONFIG_FANOTIFY
876 atomic_t fanotify_listeners; 872 atomic_t fanotify_listeners;
877#endif 873#endif
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb209d4523f5..363e0e8082a9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -32,6 +32,10 @@ enum ucount_type {
32 UCOUNT_NET_NAMESPACES, 32 UCOUNT_NET_NAMESPACES,
33 UCOUNT_MNT_NAMESPACES, 33 UCOUNT_MNT_NAMESPACES,
34 UCOUNT_CGROUP_NAMESPACES, 34 UCOUNT_CGROUP_NAMESPACES,
35#ifdef CONFIG_INOTIFY_USER
36 UCOUNT_INOTIFY_INSTANCES,
37 UCOUNT_INOTIFY_WATCHES,
38#endif
35 UCOUNT_COUNTS, 39 UCOUNT_COUNTS,
36}; 40};
37 41
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 4bbd38ec3788..68716403b261 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
57 57
58static int zero = 0; 58static int zero = 0;
59static int int_max = INT_MAX; 59static int int_max = INT_MAX;
60#define UCOUNT_ENTRY(name) \ 60#define UCOUNT_ENTRY(name) \
61 { \ 61 { \
62 .procname = name, \ 62 .procname = name, \
63 .maxlen = sizeof(int), \ 63 .maxlen = sizeof(int), \
@@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
74 UCOUNT_ENTRY("max_net_namespaces"), 74 UCOUNT_ENTRY("max_net_namespaces"),
75 UCOUNT_ENTRY("max_mnt_namespaces"), 75 UCOUNT_ENTRY("max_mnt_namespaces"),
76 UCOUNT_ENTRY("max_cgroup_namespaces"), 76 UCOUNT_ENTRY("max_cgroup_namespaces"),
77#ifdef CONFIG_INOTIFY_USER
78 UCOUNT_ENTRY("max_inotify_instances"),
79 UCOUNT_ENTRY("max_inotify_watches"),
80#endif
77 { } 81 { }
78}; 82};
79#endif /* CONFIG_SYSCTL */ 83#endif /* CONFIG_SYSCTL */