aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2009-05-21 17:01:20 -0400
committerEric Paris <eparis@redhat.com>2009-06-11 14:57:52 -0400
commit90586523eb4b349806887c62ee70685a49415124 (patch)
tree2ba6da637f897bbb2309b141b81801e4151d87b0 /fs
parentc9059598ea8981d02356eead3188bf7fa4d717b8 (diff)
fsnotify: unified filesystem notification backend
fsnotify is a backend for filesystem notification. fsnotify does not provide any userspace interface but does provide the basis needed for other notification schemes such as dnotify. fsnotify can be extended to be the backend for inotify or the upcoming fanotify. fsnotify provides a mechanism for "groups" to register for some set of filesystem events and to then deliver those events to those groups for processing. fsnotify has a number of benefits, the first being actually shrinking the size of an inode. Before fsnotify to support both dnotify and inotify an inode had unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ struct list_head inotify_watches; /* watches on this inode */ struct mutex inotify_mutex; /* protects the watches list But with fsnotify this same functionallity (and more) is done with just __u32 i_fsnotify_mask; /* all events for this inode */ struct hlist_head i_fsnotify_mark_entries; /* marks on this inode */ That's right, inotify, dnotify, and fanotify all in 64 bits. We used that much space just in inotify_watches alone, before this patch set. fsnotify object lifetime and locking is MUCH better than what we have today. inotify locking is incredibly complex. See 8f7b0ba1c8539 as an example of what's been busted since inception. inotify needs to know internal semantics of superblock destruction and unmounting to function. The inode pinning and vfs contortions are horrible. no fsnotify implementers do allocation under locks. This means things like f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to GFP_NOFS can be reverted. There are no longer any allocation rules when using or implementing your own fsnotify listener. fsnotify paves the way for fanotify. In brief fanotify is a notification mechanism that delivers the lisener both an 'event' and an open file descriptor to the object in question. This means that fanotify is pathname agnostic. Some on lkml may not care for the original companies or users that pushed for TALPA, but fanotify was designed with flexibility and input for other users in mind. The readahead group expressed interest in fanotify as it could be used to profile disk access on boot without breaking the audit system. The desktop search groups have also expressed interest in fanotify as it solves a number of the race conditions and problems present with managing inotify when more than a limited number of specific files are of interest. fanotify can provide for a userspace access control system which makes it a clean interface for AV vendors to hook without trying to do binary patching on the syscall table, LSM, and everywhere else they do their things today. With this patch series fanotify can be implemented in less than 1200 lines of easy to review code. Almost all of which is the socket based user interface. This patch series builds fsnotify to the point that it can implement dnotify and inotify_user. Patches exist and will be sent soon after acceptance to finish the in kernel inotify conversion (audit) and implement fanotify. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/notify/Kconfig13
-rw-r--r--fs/notify/Makefile2
-rw-r--r--fs/notify/fsnotify.c79
-rw-r--r--fs/notify/fsnotify.h15
-rw-r--r--fs/notify/group.c198
-rw-r--r--fs/notify/inotify/inotify.c20
-rw-r--r--fs/notify/notification.c121
7 files changed, 448 insertions, 0 deletions
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 50914d7303c6..31dac7e3b0f1 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,2 +1,15 @@
1config FSNOTIFY
2 bool "Filesystem notification backend"
3 default y
4 ---help---
5 fsnotify is a backend for filesystem notification. fsnotify does
6 not provide any userspace interface but does provide the basis
7 needed for other notification schemes such as dnotify, inotify,
8 and fanotify.
9
10 Say Y here to enable fsnotify suport.
11
12 If unsure, say Y.
13
1source "fs/notify/dnotify/Kconfig" 14source "fs/notify/dnotify/Kconfig"
2source "fs/notify/inotify/Kconfig" 15source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 5a95b6010ce7..db5467b5b58d 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,2 +1,4 @@
1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o
2
1obj-y += dnotify/ 3obj-y += dnotify/
2obj-y += inotify/ 4obj-y += inotify/
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
new file mode 100644
index 000000000000..56bee0f10c38
--- /dev/null
+++ b/fs/notify/fsnotify.c
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/dcache.h>
20#include <linux/fs.h>
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/srcu.h>
24
25#include <linux/fsnotify_backend.h>
26#include "fsnotify.h"
27
28/*
29 * This is the main call to fsnotify. The VFS calls into hook specific functions
30 * in linux/fsnotify.h. Those functions then in turn call here. Here will call
31 * out to all of the registered fsnotify_group. Those groups can then use the
32 * notification event in whatever means they feel necessary.
33 */
34void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
35{
36 struct fsnotify_group *group;
37 struct fsnotify_event *event = NULL;
38 int idx;
39
40 if (list_empty(&fsnotify_groups))
41 return;
42
43 if (!(mask & fsnotify_mask))
44 return;
45
46 /*
47 * SRCU!! the groups list is very very much read only and the path is
48 * very hot. The VAST majority of events are not going to need to do
49 * anything other than walk the list so it's crazy to pre-allocate.
50 */
51 idx = srcu_read_lock(&fsnotify_grp_srcu);
52 list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
53 if (mask & group->mask) {
54 if (!event) {
55 event = fsnotify_create_event(to_tell, mask, data, data_is);
56 /* shit, we OOM'd and now we can't tell, maybe
57 * someday someone else will want to do something
58 * here */
59 if (!event)
60 break;
61 }
62 group->ops->handle_event(group, event);
63 }
64 }
65 srcu_read_unlock(&fsnotify_grp_srcu, idx);
66 /*
67 * fsnotify_create_event() took a reference so the event can't be cleaned
68 * up while we are still trying to add it to lists, drop that one.
69 */
70 if (event)
71 fsnotify_put_event(event);
72}
73EXPORT_SYMBOL_GPL(fsnotify);
74
75static __init int fsnotify_init(void)
76{
77 return init_srcu_struct(&fsnotify_grp_srcu);
78}
79subsys_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
new file mode 100644
index 000000000000..c6a8bd476572
--- /dev/null
+++ b/fs/notify/fsnotify.h
@@ -0,0 +1,15 @@
1#ifndef __FS_NOTIFY_FSNOTIFY_H_
2#define __FS_NOTIFY_FSNOTIFY_H_
3
4#include <linux/list.h>
5#include <linux/fsnotify.h>
6#include <linux/srcu.h>
7#include <linux/types.h>
8
9/* protects reads of fsnotify_groups */
10extern struct srcu_struct fsnotify_grp_srcu;
11/* all groups which receive fsnotify events */
12extern struct list_head fsnotify_groups;
13/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
14extern __u32 fsnotify_mask;
15#endif /* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
new file mode 100644
index 000000000000..c6812953b968
--- /dev/null
+++ b/fs/notify/group.c
@@ -0,0 +1,198 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/list.h>
20#include <linux/mutex.h>
21#include <linux/slab.h>
22#include <linux/srcu.h>
23#include <linux/rculist.h>
24#include <linux/wait.h>
25
26#include <linux/fsnotify_backend.h>
27#include "fsnotify.h"
28
29#include <asm/atomic.h>
30
31/* protects writes to fsnotify_groups and fsnotify_mask */
32static DEFINE_MUTEX(fsnotify_grp_mutex);
33/* protects reads while running the fsnotify_groups list */
34struct srcu_struct fsnotify_grp_srcu;
35/* all groups registered to receive filesystem notifications */
36LIST_HEAD(fsnotify_groups);
37/* bitwise OR of all events (FS_*) interesting to some group on this system */
38__u32 fsnotify_mask;
39
40/*
41 * When a new group registers or changes it's set of interesting events
42 * this function updates the fsnotify_mask to contain all interesting events
43 */
44void fsnotify_recalc_global_mask(void)
45{
46 struct fsnotify_group *group;
47 __u32 mask = 0;
48 int idx;
49
50 idx = srcu_read_lock(&fsnotify_grp_srcu);
51 list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
52 mask |= group->mask;
53 srcu_read_unlock(&fsnotify_grp_srcu, idx);
54 fsnotify_mask = mask;
55}
56
57/*
58 * Take a reference to a group so things found under the fsnotify_grp_mutex
59 * can't get freed under us
60 */
61static void fsnotify_get_group(struct fsnotify_group *group)
62{
63 atomic_inc(&group->refcnt);
64}
65
66/*
67 * Final freeing of a group
68 */
69static void fsnotify_destroy_group(struct fsnotify_group *group)
70{
71 if (group->ops->free_group_priv)
72 group->ops->free_group_priv(group);
73
74 kfree(group);
75}
76
77/*
78 * Remove this group from the global list of groups that will get events
79 * this can be done even if there are still references and things still using
80 * this group. This just stops the group from getting new events.
81 */
82static void __fsnotify_evict_group(struct fsnotify_group *group)
83{
84 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
85
86 if (group->on_group_list)
87 list_del_rcu(&group->group_list);
88 group->on_group_list = 0;
89}
90
91/*
92 * Called when a group is no longer interested in getting events. This can be
93 * used if a group is misbehaving or if for some reason a group should no longer
94 * get any filesystem events.
95 */
96void fsnotify_evict_group(struct fsnotify_group *group)
97{
98 mutex_lock(&fsnotify_grp_mutex);
99 __fsnotify_evict_group(group);
100 mutex_unlock(&fsnotify_grp_mutex);
101}
102
103/*
104 * Drop a reference to a group. Free it if it's through.
105 */
106void fsnotify_put_group(struct fsnotify_group *group)
107{
108 if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
109 return;
110
111 /*
112 * OK, now we know that there's no other users *and* we hold mutex,
113 * so no new references will appear
114 */
115 __fsnotify_evict_group(group);
116
117 /*
118 * now it's off the list, so the only thing we might care about is
119 * srcu access....
120 */
121 mutex_unlock(&fsnotify_grp_mutex);
122 synchronize_srcu(&fsnotify_grp_srcu);
123
124 /* and now it is really dead. _Nothing_ could be seeing it */
125 fsnotify_recalc_global_mask();
126 fsnotify_destroy_group(group);
127}
128
129/*
130 * Simply run the fsnotify_groups list and find a group which matches
131 * the given parameters. If a group is found we take a reference to that
132 * group.
133 */
134static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
135 const struct fsnotify_ops *ops)
136{
137 struct fsnotify_group *group_iter;
138 struct fsnotify_group *group = NULL;
139
140 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
141
142 list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
143 if (group_iter->group_num == group_num) {
144 if ((group_iter->mask == mask) &&
145 (group_iter->ops == ops)) {
146 fsnotify_get_group(group_iter);
147 group = group_iter;
148 } else
149 group = ERR_PTR(-EEXIST);
150 }
151 }
152 return group;
153}
154
155/*
156 * Either finds an existing group which matches the group_num, mask, and ops or
157 * creates a new group and adds it to the global group list. In either case we
158 * take a reference for the group returned.
159 */
160struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
161 const struct fsnotify_ops *ops)
162{
163 struct fsnotify_group *group, *tgroup;
164
165 /* very low use, simpler locking if we just always alloc */
166 group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
167 if (!group)
168 return ERR_PTR(-ENOMEM);
169
170 atomic_set(&group->refcnt, 1);
171
172 group->on_group_list = 0;
173 group->group_num = group_num;
174 group->mask = mask;
175
176 group->ops = ops;
177
178 mutex_lock(&fsnotify_grp_mutex);
179 tgroup = fsnotify_find_group(group_num, mask, ops);
180 if (tgroup) {
181 /* group already exists */
182 mutex_unlock(&fsnotify_grp_mutex);
183 /* destroy the new one we made */
184 fsnotify_put_group(group);
185 return tgroup;
186 }
187
188 /* group not found, add a new one */
189 list_add_rcu(&group->group_list, &fsnotify_groups);
190 group->on_group_list = 1;
191
192 mutex_unlock(&fsnotify_grp_mutex);
193
194 if (mask)
195 fsnotify_recalc_global_mask();
196
197 return group;
198}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 220c13f0d73d..40b1cf914ccb 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -32,6 +32,7 @@
32#include <linux/list.h> 32#include <linux/list.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/inotify.h> 34#include <linux/inotify.h>
35#include <linux/fsnotify_backend.h>
35 36
36static atomic_t inotify_cookie; 37static atomic_t inotify_cookie;
37 38
@@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch);
905 */ 906 */
906static int __init inotify_setup(void) 907static int __init inotify_setup(void)
907{ 908{
909 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
910 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
911 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
912 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
913 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
914 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
915 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
916 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
917 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
918 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
919 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
920 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
921 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
922
923 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
924 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
925 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
926 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
927
908 atomic_set(&inotify_cookie, 0); 928 atomic_set(&inotify_cookie, 0);
909 929
910 return 0; 930 return 0;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
new file mode 100644
index 000000000000..b8e9a87f8f58
--- /dev/null
+++ b/fs/notify/notification.c
@@ -0,0 +1,121 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/init.h>
21#include <linux/kernel.h>
22#include <linux/list.h>
23#include <linux/mount.h>
24#include <linux/mutex.h>
25#include <linux/namei.h>
26#include <linux/path.h>
27#include <linux/slab.h>
28#include <linux/spinlock.h>
29
30#include <asm/atomic.h>
31
32#include <linux/fsnotify_backend.h>
33#include "fsnotify.h"
34
35static struct kmem_cache *fsnotify_event_cachep;
36
37void fsnotify_get_event(struct fsnotify_event *event)
38{
39 atomic_inc(&event->refcnt);
40}
41
42void fsnotify_put_event(struct fsnotify_event *event)
43{
44 if (!event)
45 return;
46
47 if (atomic_dec_and_test(&event->refcnt)) {
48 if (event->data_type == FSNOTIFY_EVENT_PATH)
49 path_put(&event->path);
50
51 kmem_cache_free(fsnotify_event_cachep, event);
52 }
53}
54
55/*
56 * Allocate a new event which will be sent to each group's handle_event function
57 * if the group was interested in this particular event.
58 */
59struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
60 void *data, int data_type)
61{
62 struct fsnotify_event *event;
63
64 event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
65 if (!event)
66 return NULL;
67
68 atomic_set(&event->refcnt, 1);
69
70 spin_lock_init(&event->lock);
71
72 event->path.dentry = NULL;
73 event->path.mnt = NULL;
74 event->inode = NULL;
75
76 event->to_tell = to_tell;
77
78 switch (data_type) {
79 case FSNOTIFY_EVENT_FILE: {
80 struct file *file = data;
81 struct path *path = &file->f_path;
82 event->path.dentry = path->dentry;
83 event->path.mnt = path->mnt;
84 path_get(&event->path);
85 event->data_type = FSNOTIFY_EVENT_PATH;
86 break;
87 }
88 case FSNOTIFY_EVENT_PATH: {
89 struct path *path = data;
90 event->path.dentry = path->dentry;
91 event->path.mnt = path->mnt;
92 path_get(&event->path);
93 event->data_type = FSNOTIFY_EVENT_PATH;
94 break;
95 }
96 case FSNOTIFY_EVENT_INODE:
97 event->inode = data;
98 event->data_type = FSNOTIFY_EVENT_INODE;
99 break;
100 case FSNOTIFY_EVENT_NONE:
101 event->inode = NULL;
102 event->path.dentry = NULL;
103 event->path.mnt = NULL;
104 break;
105 default:
106 BUG();
107 }
108
109 event->mask = mask;
110
111 return event;
112}
113
114__init int fsnotify_notification_init(void)
115{
116 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
117
118 return 0;
119}
120subsys_initcall(fsnotify_notification_init);
121