aboutsummaryrefslogtreecommitdiffstats
path: root/fs/notify
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2008-12-17 13:59:41 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2008-12-31 18:07:43 -0500
commit272eb01485dda98e3b8910c7c1a53d597616b0a0 (patch)
tree6a1dcd34c1dd668b465c166c2d6d2596924eff5f /fs/notify
parentc2acf7b90821785fe812cc0aa05148e5a1f84204 (diff)
filesystem notification: create fs/notify to contain all fs notification
Creating a generic filesystem notification interface, fsnotify, which will be used by inotify, dnotify, and eventually fanotify is really starting to clutter the fs directory. This patch simply moves inotify and dnotify into fs/notify/inotify and fs/notify/dnotify respectively to make both current fs/ and future notification tidier. Signed-off-by: Eric Paris <eparis@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/Makefile2
-rw-r--r--fs/notify/dnotify/Kconfig10
-rw-r--r--fs/notify/dnotify/Makefile1
-rw-r--r--fs/notify/dnotify/dnotify.c191
-rw-r--r--fs/notify/inotify/Kconfig27
-rw-r--r--fs/notify/inotify/Makefile2
-rw-r--r--fs/notify/inotify/inotify.c913
-rw-r--r--fs/notify/inotify/inotify_user.c778
9 files changed, 1926 insertions, 0 deletions
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
new file mode 100644
index 000000000000..50914d7303c6
--- /dev/null
+++ b/fs/notify/Kconfig
@@ -0,0 +1,2 @@
1source "fs/notify/dnotify/Kconfig"
2source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
new file mode 100644
index 000000000000..5a95b6010ce7
--- /dev/null
+++ b/fs/notify/Makefile
@@ -0,0 +1,2 @@
1obj-y += dnotify/
2obj-y += inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
new file mode 100644
index 000000000000..26adf5dfa646
--- /dev/null
+++ b/fs/notify/dnotify/Kconfig
@@ -0,0 +1,10 @@
1config DNOTIFY
2 bool "Dnotify support"
3 default y
4 help
5 Dnotify is a directory-based per-fd file change notification system
6 that uses signals to communicate events to user-space. There exist
7 superior alternatives, but some applications may still rely on
8 dnotify.
9
10 If unsure, say Y.
diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile
new file mode 100644
index 000000000000..f145251dcadb
--- /dev/null
+++ b/fs/notify/dnotify/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_DNOTIFY) += dnotify.o
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
new file mode 100644
index 000000000000..b0aa2cde80bd
--- /dev/null
+++ b/fs/notify/dnotify/dnotify.c
@@ -0,0 +1,191 @@
1/*
2 * Directory notifications for Linux.
3 *
4 * Copyright (C) 2000,2001,2002 Stephen Rothwell
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16#include <linux/fs.h>
17#include <linux/module.h>
18#include <linux/sched.h>
19#include <linux/dnotify.h>
20#include <linux/init.h>
21#include <linux/spinlock.h>
22#include <linux/slab.h>
23#include <linux/fdtable.h>
24
25int dir_notify_enable __read_mostly = 1;
26
27static struct kmem_cache *dn_cache __read_mostly;
28
29static void redo_inode_mask(struct inode *inode)
30{
31 unsigned long new_mask;
32 struct dnotify_struct *dn;
33
34 new_mask = 0;
35 for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
36 new_mask |= dn->dn_mask & ~DN_MULTISHOT;
37 inode->i_dnotify_mask = new_mask;
38}
39
40void dnotify_flush(struct file *filp, fl_owner_t id)
41{
42 struct dnotify_struct *dn;
43 struct dnotify_struct **prev;
44 struct inode *inode;
45
46 inode = filp->f_path.dentry->d_inode;
47 if (!S_ISDIR(inode->i_mode))
48 return;
49 spin_lock(&inode->i_lock);
50 prev = &inode->i_dnotify;
51 while ((dn = *prev) != NULL) {
52 if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
53 *prev = dn->dn_next;
54 redo_inode_mask(inode);
55 kmem_cache_free(dn_cache, dn);
56 break;
57 }
58 prev = &dn->dn_next;
59 }
60 spin_unlock(&inode->i_lock);
61}
62
63int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
64{
65 struct dnotify_struct *dn;
66 struct dnotify_struct *odn;
67 struct dnotify_struct **prev;
68 struct inode *inode;
69 fl_owner_t id = current->files;
70 struct file *f;
71 int error = 0;
72
73 if ((arg & ~DN_MULTISHOT) == 0) {
74 dnotify_flush(filp, id);
75 return 0;
76 }
77 if (!dir_notify_enable)
78 return -EINVAL;
79 inode = filp->f_path.dentry->d_inode;
80 if (!S_ISDIR(inode->i_mode))
81 return -ENOTDIR;
82 dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
83 if (dn == NULL)
84 return -ENOMEM;
85 spin_lock(&inode->i_lock);
86 prev = &inode->i_dnotify;
87 while ((odn = *prev) != NULL) {
88 if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
89 odn->dn_fd = fd;
90 odn->dn_mask |= arg;
91 inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
92 goto out_free;
93 }
94 prev = &odn->dn_next;
95 }
96
97 rcu_read_lock();
98 f = fcheck(fd);
99 rcu_read_unlock();
100 /* we'd lost the race with close(), sod off silently */
101 /* note that inode->i_lock prevents reordering problems
102 * between accesses to descriptor table and ->i_dnotify */
103 if (f != filp)
104 goto out_free;
105
106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
107 if (error)
108 goto out_free;
109
110 dn->dn_mask = arg;
111 dn->dn_fd = fd;
112 dn->dn_filp = filp;
113 dn->dn_owner = id;
114 inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
115 dn->dn_next = inode->i_dnotify;
116 inode->i_dnotify = dn;
117 spin_unlock(&inode->i_lock);
118 return 0;
119
120out_free:
121 spin_unlock(&inode->i_lock);
122 kmem_cache_free(dn_cache, dn);
123 return error;
124}
125
126void __inode_dir_notify(struct inode *inode, unsigned long event)
127{
128 struct dnotify_struct * dn;
129 struct dnotify_struct **prev;
130 struct fown_struct * fown;
131 int changed = 0;
132
133 spin_lock(&inode->i_lock);
134 prev = &inode->i_dnotify;
135 while ((dn = *prev) != NULL) {
136 if ((dn->dn_mask & event) == 0) {
137 prev = &dn->dn_next;
138 continue;
139 }
140 fown = &dn->dn_filp->f_owner;
141 send_sigio(fown, dn->dn_fd, POLL_MSG);
142 if (dn->dn_mask & DN_MULTISHOT)
143 prev = &dn->dn_next;
144 else {
145 *prev = dn->dn_next;
146 changed = 1;
147 kmem_cache_free(dn_cache, dn);
148 }
149 }
150 if (changed)
151 redo_inode_mask(inode);
152 spin_unlock(&inode->i_lock);
153}
154
155EXPORT_SYMBOL(__inode_dir_notify);
156
157/*
158 * This is hopelessly wrong, but unfixable without API changes. At
159 * least it doesn't oops the kernel...
160 *
161 * To safely access ->d_parent we need to keep d_move away from it. Use the
162 * dentry's d_lock for this.
163 */
164void dnotify_parent(struct dentry *dentry, unsigned long event)
165{
166 struct dentry *parent;
167
168 if (!dir_notify_enable)
169 return;
170
171 spin_lock(&dentry->d_lock);
172 parent = dentry->d_parent;
173 if (parent->d_inode->i_dnotify_mask & event) {
174 dget(parent);
175 spin_unlock(&dentry->d_lock);
176 __inode_dir_notify(parent->d_inode, event);
177 dput(parent);
178 } else {
179 spin_unlock(&dentry->d_lock);
180 }
181}
182EXPORT_SYMBOL_GPL(dnotify_parent);
183
184static int __init dnotify_init(void)
185{
186 dn_cache = kmem_cache_create("dnotify_cache",
187 sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
188 return 0;
189}
190
191module_init(dnotify_init)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
new file mode 100644
index 000000000000..446792841023
--- /dev/null
+++ b/fs/notify/inotify/Kconfig
@@ -0,0 +1,27 @@
1config INOTIFY
2 bool "Inotify file change notification support"
3 default y
4 ---help---
5 Say Y here to enable inotify support. Inotify is a file change
6 notification system and a replacement for dnotify. Inotify fixes
7 numerous shortcomings in dnotify and introduces several new features
8 including multiple file events, one-shot support, and unmount
9 notification.
10
11 For more information, see <file:Documentation/filesystems/inotify.txt>
12
13 If unsure, say Y.
14
15config INOTIFY_USER
16 bool "Inotify support for userspace"
17 depends on INOTIFY
18 default y
19 ---help---
20 Say Y here to enable inotify support for userspace, including the
21 associated system calls. Inotify allows monitoring of both files and
22 directories via a single open fd. Events are read from the file
23 descriptor, which is also select()- and poll()-able.
24
25 For more information, see <file:Documentation/filesystems/inotify.txt>
26
27 If unsure, say Y.
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
new file mode 100644
index 000000000000..e290f3bb9d8d
--- /dev/null
+++ b/fs/notify/inotify/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
new file mode 100644
index 000000000000..dae3f28f30d4
--- /dev/null
+++ b/fs/notify/inotify/inotify.c
@@ -0,0 +1,913 @@
1/*
2 * fs/inotify.c - inode-based file event notifications
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9 *
10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2, or (at your option) any
16 * later version.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 */
23
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/spinlock.h>
27#include <linux/idr.h>
28#include <linux/slab.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/list.h>
33#include <linux/writeback.h>
34#include <linux/inotify.h>
35
36static atomic_t inotify_cookie;
37
38/*
39 * Lock ordering:
40 *
41 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
42 * iprune_mutex (synchronize shrink_icache_memory())
43 * inode_lock (protects the super_block->s_inodes list)
44 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
45 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
46 *
47 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
48 * of a caller's event handler. Thus, the caller must not hold any locks
49 * taken in their event handler while calling any of the published inotify
50 * interfaces.
51 */
52
53/*
54 * Lifetimes of the three main data structures--inotify_handle, inode, and
55 * inotify_watch--are managed by reference count.
56 *
57 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
58 * Additional references can bump the count via get_inotify_handle() and drop
59 * the count via put_inotify_handle().
60 *
61 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
62 * to remove_watch_no_event(). Additional references can bump the count via
63 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
64 * is reponsible for the final put after receiving IN_IGNORED, or when using
65 * IN_ONESHOT after receiving the first event. Inotify does the final put if
66 * inotify_destroy() is called.
67 *
68 * inode: Pinned so long as the inode is associated with a watch, from
69 * inotify_add_watch() to the final put_inotify_watch().
70 */
71
72/*
73 * struct inotify_handle - represents an inotify instance
74 *
75 * This structure is protected by the mutex 'mutex'.
76 */
77struct inotify_handle {
78 struct idr idr; /* idr mapping wd -> watch */
79 struct mutex mutex; /* protects this bad boy */
80 struct list_head watches; /* list of watches */
81 atomic_t count; /* reference count */
82 u32 last_wd; /* the last wd allocated */
83 const struct inotify_operations *in_ops; /* inotify caller operations */
84};
85
86static inline void get_inotify_handle(struct inotify_handle *ih)
87{
88 atomic_inc(&ih->count);
89}
90
91static inline void put_inotify_handle(struct inotify_handle *ih)
92{
93 if (atomic_dec_and_test(&ih->count)) {
94 idr_destroy(&ih->idr);
95 kfree(ih);
96 }
97}
98
99/**
100 * get_inotify_watch - grab a reference to an inotify_watch
101 * @watch: watch to grab
102 */
103void get_inotify_watch(struct inotify_watch *watch)
104{
105 atomic_inc(&watch->count);
106}
107EXPORT_SYMBOL_GPL(get_inotify_watch);
108
109int pin_inotify_watch(struct inotify_watch *watch)
110{
111 struct super_block *sb = watch->inode->i_sb;
112 spin_lock(&sb_lock);
113 if (sb->s_count >= S_BIAS) {
114 atomic_inc(&sb->s_active);
115 spin_unlock(&sb_lock);
116 atomic_inc(&watch->count);
117 return 1;
118 }
119 spin_unlock(&sb_lock);
120 return 0;
121}
122
123/**
124 * put_inotify_watch - decrements the ref count on a given watch. cleans up
125 * watch references if the count reaches zero. inotify_watch is freed by
126 * inotify callers via the destroy_watch() op.
127 * @watch: watch to release
128 */
129void put_inotify_watch(struct inotify_watch *watch)
130{
131 if (atomic_dec_and_test(&watch->count)) {
132 struct inotify_handle *ih = watch->ih;
133
134 iput(watch->inode);
135 ih->in_ops->destroy_watch(watch);
136 put_inotify_handle(ih);
137 }
138}
139EXPORT_SYMBOL_GPL(put_inotify_watch);
140
141void unpin_inotify_watch(struct inotify_watch *watch)
142{
143 struct super_block *sb = watch->inode->i_sb;
144 put_inotify_watch(watch);
145 deactivate_super(sb);
146}
147
148/*
149 * inotify_handle_get_wd - returns the next WD for use by the given handle
150 *
151 * Callers must hold ih->mutex. This function can sleep.
152 */
153static int inotify_handle_get_wd(struct inotify_handle *ih,
154 struct inotify_watch *watch)
155{
156 int ret;
157
158 do {
159 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
160 return -ENOSPC;
161 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
162 } while (ret == -EAGAIN);
163
164 if (likely(!ret))
165 ih->last_wd = watch->wd;
166
167 return ret;
168}
169
170/*
171 * inotify_inode_watched - returns nonzero if there are watches on this inode
172 * and zero otherwise. We call this lockless, we do not care if we race.
173 */
174static inline int inotify_inode_watched(struct inode *inode)
175{
176 return !list_empty(&inode->inotify_watches);
177}
178
179/*
180 * Get child dentry flag into synch with parent inode.
181 * Flag should always be clear for negative dentrys.
182 */
183static void set_dentry_child_flags(struct inode *inode, int watched)
184{
185 struct dentry *alias;
186
187 spin_lock(&dcache_lock);
188 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
189 struct dentry *child;
190
191 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
192 if (!child->d_inode)
193 continue;
194
195 spin_lock(&child->d_lock);
196 if (watched)
197 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
198 else
199 child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
200 spin_unlock(&child->d_lock);
201 }
202 }
203 spin_unlock(&dcache_lock);
204}
205
206/*
207 * inotify_find_handle - find the watch associated with the given inode and
208 * handle
209 *
210 * Callers must hold inode->inotify_mutex.
211 */
212static struct inotify_watch *inode_find_handle(struct inode *inode,
213 struct inotify_handle *ih)
214{
215 struct inotify_watch *watch;
216
217 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
218 if (watch->ih == ih)
219 return watch;
220 }
221
222 return NULL;
223}
224
225/*
226 * remove_watch_no_event - remove watch without the IN_IGNORED event.
227 *
228 * Callers must hold both inode->inotify_mutex and ih->mutex.
229 */
230static void remove_watch_no_event(struct inotify_watch *watch,
231 struct inotify_handle *ih)
232{
233 list_del(&watch->i_list);
234 list_del(&watch->h_list);
235
236 if (!inotify_inode_watched(watch->inode))
237 set_dentry_child_flags(watch->inode, 0);
238
239 idr_remove(&ih->idr, watch->wd);
240}
241
242/**
243 * inotify_remove_watch_locked - Remove a watch from both the handle and the
244 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
245 * watched. May be invoked from a caller's event handler.
246 * @ih: inotify handle associated with watch
247 * @watch: watch to remove
248 *
249 * Callers must hold both inode->inotify_mutex and ih->mutex.
250 */
251void inotify_remove_watch_locked(struct inotify_handle *ih,
252 struct inotify_watch *watch)
253{
254 remove_watch_no_event(watch, ih);
255 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
256}
257EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
258
259/* Kernel API for producing events */
260
261/*
262 * inotify_d_instantiate - instantiate dcache entry for inode
263 */
264void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
265{
266 struct dentry *parent;
267
268 if (!inode)
269 return;
270
271 spin_lock(&entry->d_lock);
272 parent = entry->d_parent;
273 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
274 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
275 spin_unlock(&entry->d_lock);
276}
277
278/*
279 * inotify_d_move - dcache entry has been moved
280 */
281void inotify_d_move(struct dentry *entry)
282{
283 struct dentry *parent;
284
285 parent = entry->d_parent;
286 if (inotify_inode_watched(parent->d_inode))
287 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
288 else
289 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
290}
291
292/**
293 * inotify_inode_queue_event - queue an event to all watches on this inode
294 * @inode: inode event is originating from
295 * @mask: event mask describing this event
296 * @cookie: cookie for synchronization, or zero
297 * @name: filename, if any
298 * @n_inode: inode associated with name
299 */
300void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
301 const char *name, struct inode *n_inode)
302{
303 struct inotify_watch *watch, *next;
304
305 if (!inotify_inode_watched(inode))
306 return;
307
308 mutex_lock(&inode->inotify_mutex);
309 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
310 u32 watch_mask = watch->mask;
311 if (watch_mask & mask) {
312 struct inotify_handle *ih= watch->ih;
313 mutex_lock(&ih->mutex);
314 if (watch_mask & IN_ONESHOT)
315 remove_watch_no_event(watch, ih);
316 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
317 name, n_inode);
318 mutex_unlock(&ih->mutex);
319 }
320 }
321 mutex_unlock(&inode->inotify_mutex);
322}
323EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
324
325/**
326 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
327 * @dentry: the dentry in question, we queue against this dentry's parent
328 * @mask: event mask describing this event
329 * @cookie: cookie for synchronization, or zero
330 * @name: filename, if any
331 */
332void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
333 u32 cookie, const char *name)
334{
335 struct dentry *parent;
336 struct inode *inode;
337
338 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
339 return;
340
341 spin_lock(&dentry->d_lock);
342 parent = dentry->d_parent;
343 inode = parent->d_inode;
344
345 if (inotify_inode_watched(inode)) {
346 dget(parent);
347 spin_unlock(&dentry->d_lock);
348 inotify_inode_queue_event(inode, mask, cookie, name,
349 dentry->d_inode);
350 dput(parent);
351 } else
352 spin_unlock(&dentry->d_lock);
353}
354EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
355
356/**
357 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
358 */
359u32 inotify_get_cookie(void)
360{
361 return atomic_inc_return(&inotify_cookie);
362}
363EXPORT_SYMBOL_GPL(inotify_get_cookie);
364
365/**
366 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
367 * @list: list of inodes being unmounted (sb->s_inodes)
368 *
369 * Called with inode_lock held, protecting the unmounting super block's list
370 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
371 * We temporarily drop inode_lock, however, and CAN block.
372 */
373void inotify_unmount_inodes(struct list_head *list)
374{
375 struct inode *inode, *next_i, *need_iput = NULL;
376
377 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
378 struct inotify_watch *watch, *next_w;
379 struct inode *need_iput_tmp;
380 struct list_head *watches;
381
382 /*
383 * If i_count is zero, the inode cannot have any watches and
384 * doing an __iget/iput with MS_ACTIVE clear would actually
385 * evict all inodes with zero i_count from icache which is
386 * unnecessarily violent and may in fact be illegal to do.
387 */
388 if (!atomic_read(&inode->i_count))
389 continue;
390
391 /*
392 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
393 * I_WILL_FREE which is fine because by that point the inode
394 * cannot have any associated watches.
395 */
396 if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
397 continue;
398
399 need_iput_tmp = need_iput;
400 need_iput = NULL;
401 /* In case inotify_remove_watch_locked() drops a reference. */
402 if (inode != need_iput_tmp)
403 __iget(inode);
404 else
405 need_iput_tmp = NULL;
406 /* In case the dropping of a reference would nuke next_i. */
407 if ((&next_i->i_sb_list != list) &&
408 atomic_read(&next_i->i_count) &&
409 !(next_i->i_state & (I_CLEAR | I_FREEING |
410 I_WILL_FREE))) {
411 __iget(next_i);
412 need_iput = next_i;
413 }
414
415 /*
416 * We can safely drop inode_lock here because we hold
417 * references on both inode and next_i. Also no new inodes
418 * will be added since the umount has begun. Finally,
419 * iprune_mutex keeps shrink_icache_memory() away.
420 */
421 spin_unlock(&inode_lock);
422
423 if (need_iput_tmp)
424 iput(need_iput_tmp);
425
426 /* for each watch, send IN_UNMOUNT and then remove it */
427 mutex_lock(&inode->inotify_mutex);
428 watches = &inode->inotify_watches;
429 list_for_each_entry_safe(watch, next_w, watches, i_list) {
430 struct inotify_handle *ih= watch->ih;
431 get_inotify_watch(watch);
432 mutex_lock(&ih->mutex);
433 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
434 NULL, NULL);
435 inotify_remove_watch_locked(ih, watch);
436 mutex_unlock(&ih->mutex);
437 put_inotify_watch(watch);
438 }
439 mutex_unlock(&inode->inotify_mutex);
440 iput(inode);
441
442 spin_lock(&inode_lock);
443 }
444}
445EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
446
447/**
448 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
449 * @inode: inode that is about to be removed
450 */
451void inotify_inode_is_dead(struct inode *inode)
452{
453 struct inotify_watch *watch, *next;
454
455 mutex_lock(&inode->inotify_mutex);
456 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
457 struct inotify_handle *ih = watch->ih;
458 mutex_lock(&ih->mutex);
459 inotify_remove_watch_locked(ih, watch);
460 mutex_unlock(&ih->mutex);
461 }
462 mutex_unlock(&inode->inotify_mutex);
463}
464EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
465
466/* Kernel Consumer API */
467
468/**
469 * inotify_init - allocate and initialize an inotify instance
470 * @ops: caller's inotify operations
471 */
472struct inotify_handle *inotify_init(const struct inotify_operations *ops)
473{
474 struct inotify_handle *ih;
475
476 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
477 if (unlikely(!ih))
478 return ERR_PTR(-ENOMEM);
479
480 idr_init(&ih->idr);
481 INIT_LIST_HEAD(&ih->watches);
482 mutex_init(&ih->mutex);
483 ih->last_wd = 0;
484 ih->in_ops = ops;
485 atomic_set(&ih->count, 0);
486 get_inotify_handle(ih);
487
488 return ih;
489}
490EXPORT_SYMBOL_GPL(inotify_init);
491
492/**
493 * inotify_init_watch - initialize an inotify watch
494 * @watch: watch to initialize
495 */
496void inotify_init_watch(struct inotify_watch *watch)
497{
498 INIT_LIST_HEAD(&watch->h_list);
499 INIT_LIST_HEAD(&watch->i_list);
500 atomic_set(&watch->count, 0);
501 get_inotify_watch(watch); /* initial get */
502}
503EXPORT_SYMBOL_GPL(inotify_init_watch);
504
505/*
506 * Watch removals suck violently. To kick the watch out we need (in this
507 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
508 * a hold on inode; however, for all other cases we need to make damn sure
509 * we don't race with umount. We can *NOT* just grab a reference to a
510 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
511 * with reference to inode potentially outliving its superblock. Ideally
512 * we just want to grab an active reference to superblock if we can; that
513 * will make sure we won't go into inotify_umount_inodes() until we are
514 * done. Cleanup is just deactivate_super(). However, that leaves a messy
515 * case - what if we *are* racing with umount() and active references to
516 * superblock can't be acquired anymore? We can bump ->s_count, grab
517 * ->s_umount, which will almost certainly wait until the superblock is shut
518 * down and the watch in question is pining for fjords. That's fine, but
519 * there is a problem - we might have hit the window between ->s_active
520 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
521 * is past the point of no return and is heading for shutdown) and the
522 * moment when deactivate_super() acquires ->s_umount. We could just do
523 * drop_super() yield() and retry, but that's rather antisocial and this
524 * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
525 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
526 * that we won't race with inotify_umount_inodes(). So we could grab a
527 * reference to watch and do the rest as above, just with drop_super() instead
528 * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
529 * could grab ->s_umount. So the watch could've been gone already.
530 *
531 * That still can be dealt with - we need to save watch->wd, do idr_find()
532 * and compare its result with our pointer. If they match, we either have
533 * the damn thing still alive or we'd lost not one but two races at once,
534 * the watch had been killed and a new one got created with the same ->wd
535 * at the same address. That couldn't have happened in inotify_destroy(),
536 * but inotify_rm_wd() could run into that. Still, "new one got created"
537 * is not a problem - we have every right to kill it or leave it alone,
538 * whatever's more convenient.
539 *
540 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
541 * "grab it and kill it" check. If it's been our original watch, we are
542 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
543 * race and kill the fscker anyway; we are safe since we know that its
544 * superblock won't be going away.
545 *
546 * And yes, this is far beyond mere "not very pretty"; so's the entire
547 * concept of inotify to start with.
548 */
549
550/**
551 * pin_to_kill - pin the watch down for removal
552 * @ih: inotify handle
553 * @watch: watch to kill
554 *
555 * Called with ih->mutex held, drops it. Possible return values:
556 * 0 - nothing to do, it has died
557 * 1 - remove it, drop the reference and deactivate_super()
558 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
559 * that variant, since it involved a lot of PITA, but that's the best that
560 * could've been done.
561 */
562static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
563{
564 struct super_block *sb = watch->inode->i_sb;
565 s32 wd = watch->wd;
566
567 spin_lock(&sb_lock);
568 if (sb->s_count >= S_BIAS) {
569 atomic_inc(&sb->s_active);
570 spin_unlock(&sb_lock);
571 get_inotify_watch(watch);
572 mutex_unlock(&ih->mutex);
573 return 1; /* the best outcome */
574 }
575 sb->s_count++;
576 spin_unlock(&sb_lock);
577 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
578 down_read(&sb->s_umount);
579 if (likely(!sb->s_root)) {
580 /* fs is already shut down; the watch is dead */
581 drop_super(sb);
582 return 0;
583 }
584 /* raced with the final deactivate_super() */
585 mutex_lock(&ih->mutex);
586 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
587 /* the watch is dead */
588 mutex_unlock(&ih->mutex);
589 drop_super(sb);
590 return 0;
591 }
592 /* still alive or freed and reused with the same sb and wd; kill */
593 get_inotify_watch(watch);
594 mutex_unlock(&ih->mutex);
595 return 2;
596}
597
598static void unpin_and_kill(struct inotify_watch *watch, int how)
599{
600 struct super_block *sb = watch->inode->i_sb;
601 put_inotify_watch(watch);
602 switch (how) {
603 case 1:
604 deactivate_super(sb);
605 break;
606 case 2:
607 drop_super(sb);
608 }
609}
610
611/**
612 * inotify_destroy - clean up and destroy an inotify instance
613 * @ih: inotify handle
614 */
615void inotify_destroy(struct inotify_handle *ih)
616{
617 /*
618 * Destroy all of the watches for this handle. Unfortunately, not very
619 * pretty. We cannot do a simple iteration over the list, because we
620 * do not know the inode until we iterate to the watch. But we need to
621 * hold inode->inotify_mutex before ih->mutex. The following works.
622 *
623 * AV: it had to become even uglier to start working ;-/
624 */
625 while (1) {
626 struct inotify_watch *watch;
627 struct list_head *watches;
628 struct super_block *sb;
629 struct inode *inode;
630 int how;
631
632 mutex_lock(&ih->mutex);
633 watches = &ih->watches;
634 if (list_empty(watches)) {
635 mutex_unlock(&ih->mutex);
636 break;
637 }
638 watch = list_first_entry(watches, struct inotify_watch, h_list);
639 sb = watch->inode->i_sb;
640 how = pin_to_kill(ih, watch);
641 if (!how)
642 continue;
643
644 inode = watch->inode;
645 mutex_lock(&inode->inotify_mutex);
646 mutex_lock(&ih->mutex);
647
648 /* make sure we didn't race with another list removal */
649 if (likely(idr_find(&ih->idr, watch->wd))) {
650 remove_watch_no_event(watch, ih);
651 put_inotify_watch(watch);
652 }
653
654 mutex_unlock(&ih->mutex);
655 mutex_unlock(&inode->inotify_mutex);
656 unpin_and_kill(watch, how);
657 }
658
659 /* free this handle: the put matching the get in inotify_init() */
660 put_inotify_handle(ih);
661}
662EXPORT_SYMBOL_GPL(inotify_destroy);
663
664/**
665 * inotify_find_watch - find an existing watch for an (ih,inode) pair
666 * @ih: inotify handle
667 * @inode: inode to watch
668 * @watchp: pointer to existing inotify_watch
669 *
670 * Caller must pin given inode (via nameidata).
671 */
672s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
673 struct inotify_watch **watchp)
674{
675 struct inotify_watch *old;
676 int ret = -ENOENT;
677
678 mutex_lock(&inode->inotify_mutex);
679 mutex_lock(&ih->mutex);
680
681 old = inode_find_handle(inode, ih);
682 if (unlikely(old)) {
683 get_inotify_watch(old); /* caller must put watch */
684 *watchp = old;
685 ret = old->wd;
686 }
687
688 mutex_unlock(&ih->mutex);
689 mutex_unlock(&inode->inotify_mutex);
690
691 return ret;
692}
693EXPORT_SYMBOL_GPL(inotify_find_watch);
694
695/**
696 * inotify_find_update_watch - find and update the mask of an existing watch
697 * @ih: inotify handle
698 * @inode: inode's watch to update
699 * @mask: mask of events to watch
700 *
701 * Caller must pin given inode (via nameidata).
702 */
703s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
704 u32 mask)
705{
706 struct inotify_watch *old;
707 int mask_add = 0;
708 int ret;
709
710 if (mask & IN_MASK_ADD)
711 mask_add = 1;
712
713 /* don't allow invalid bits: we don't want flags set */
714 mask &= IN_ALL_EVENTS | IN_ONESHOT;
715 if (unlikely(!mask))
716 return -EINVAL;
717
718 mutex_lock(&inode->inotify_mutex);
719 mutex_lock(&ih->mutex);
720
721 /*
722 * Handle the case of re-adding a watch on an (inode,ih) pair that we
723 * are already watching. We just update the mask and return its wd.
724 */
725 old = inode_find_handle(inode, ih);
726 if (unlikely(!old)) {
727 ret = -ENOENT;
728 goto out;
729 }
730
731 if (mask_add)
732 old->mask |= mask;
733 else
734 old->mask = mask;
735 ret = old->wd;
736out:
737 mutex_unlock(&ih->mutex);
738 mutex_unlock(&inode->inotify_mutex);
739 return ret;
740}
741EXPORT_SYMBOL_GPL(inotify_find_update_watch);
742
743/**
744 * inotify_add_watch - add a watch to an inotify instance
745 * @ih: inotify handle
746 * @watch: caller allocated watch structure
747 * @inode: inode to watch
748 * @mask: mask of events to watch
749 *
750 * Caller must pin given inode (via nameidata).
751 * Caller must ensure it only calls inotify_add_watch() once per watch.
752 * Calls inotify_handle_get_wd() so may sleep.
753 */
754s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
755 struct inode *inode, u32 mask)
756{
757 int ret = 0;
758 int newly_watched;
759
760 /* don't allow invalid bits: we don't want flags set */
761 mask &= IN_ALL_EVENTS | IN_ONESHOT;
762 if (unlikely(!mask))
763 return -EINVAL;
764 watch->mask = mask;
765
766 mutex_lock(&inode->inotify_mutex);
767 mutex_lock(&ih->mutex);
768
769 /* Initialize a new watch */
770 ret = inotify_handle_get_wd(ih, watch);
771 if (unlikely(ret))
772 goto out;
773 ret = watch->wd;
774
775 /* save a reference to handle and bump the count to make it official */
776 get_inotify_handle(ih);
777 watch->ih = ih;
778
779 /*
780 * Save a reference to the inode and bump the ref count to make it
781 * official. We hold a reference to nameidata, which makes this safe.
782 */
783 watch->inode = igrab(inode);
784
785 /* Add the watch to the handle's and the inode's list */
786 newly_watched = !inotify_inode_watched(inode);
787 list_add(&watch->h_list, &ih->watches);
788 list_add(&watch->i_list, &inode->inotify_watches);
789 /*
790 * Set child flags _after_ adding the watch, so there is no race
791 * windows where newly instantiated children could miss their parent's
792 * watched flag.
793 */
794 if (newly_watched)
795 set_dentry_child_flags(inode, 1);
796
797out:
798 mutex_unlock(&ih->mutex);
799 mutex_unlock(&inode->inotify_mutex);
800 return ret;
801}
802EXPORT_SYMBOL_GPL(inotify_add_watch);
803
804/**
805 * inotify_clone_watch - put the watch next to existing one
806 * @old: already installed watch
807 * @new: new watch
808 *
809 * Caller must hold the inotify_mutex of inode we are dealing with;
810 * it is expected to remove the old watch before unlocking the inode.
811 */
812s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
813{
814 struct inotify_handle *ih = old->ih;
815 int ret = 0;
816
817 new->mask = old->mask;
818 new->ih = ih;
819
820 mutex_lock(&ih->mutex);
821
822 /* Initialize a new watch */
823 ret = inotify_handle_get_wd(ih, new);
824 if (unlikely(ret))
825 goto out;
826 ret = new->wd;
827
828 get_inotify_handle(ih);
829
830 new->inode = igrab(old->inode);
831
832 list_add(&new->h_list, &ih->watches);
833 list_add(&new->i_list, &old->inode->inotify_watches);
834out:
835 mutex_unlock(&ih->mutex);
836 return ret;
837}
838
839void inotify_evict_watch(struct inotify_watch *watch)
840{
841 get_inotify_watch(watch);
842 mutex_lock(&watch->ih->mutex);
843 inotify_remove_watch_locked(watch->ih, watch);
844 mutex_unlock(&watch->ih->mutex);
845}
846
847/**
848 * inotify_rm_wd - remove a watch from an inotify instance
849 * @ih: inotify handle
850 * @wd: watch descriptor to remove
851 *
852 * Can sleep.
853 */
854int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
855{
856 struct inotify_watch *watch;
857 struct super_block *sb;
858 struct inode *inode;
859 int how;
860
861 mutex_lock(&ih->mutex);
862 watch = idr_find(&ih->idr, wd);
863 if (unlikely(!watch)) {
864 mutex_unlock(&ih->mutex);
865 return -EINVAL;
866 }
867 sb = watch->inode->i_sb;
868 how = pin_to_kill(ih, watch);
869 if (!how)
870 return 0;
871
872 inode = watch->inode;
873
874 mutex_lock(&inode->inotify_mutex);
875 mutex_lock(&ih->mutex);
876
877 /* make sure that we did not race */
878 if (likely(idr_find(&ih->idr, wd) == watch))
879 inotify_remove_watch_locked(ih, watch);
880
881 mutex_unlock(&ih->mutex);
882 mutex_unlock(&inode->inotify_mutex);
883 unpin_and_kill(watch, how);
884
885 return 0;
886}
887EXPORT_SYMBOL_GPL(inotify_rm_wd);
888
889/**
890 * inotify_rm_watch - remove a watch from an inotify instance
891 * @ih: inotify handle
892 * @watch: watch to remove
893 *
894 * Can sleep.
895 */
896int inotify_rm_watch(struct inotify_handle *ih,
897 struct inotify_watch *watch)
898{
899 return inotify_rm_wd(ih, watch->wd);
900}
901EXPORT_SYMBOL_GPL(inotify_rm_watch);
902
903/*
904 * inotify_setup - core initialization function
905 */
906static int __init inotify_setup(void)
907{
908 atomic_set(&inotify_cookie, 0);
909
910 return 0;
911}
912
913module_init(inotify_setup);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
new file mode 100644
index 000000000000..400f8064a548
--- /dev/null
+++ b/fs/notify/inotify/inotify_user.c
@@ -0,0 +1,778 @@
1/*
2 * fs/inotify_user.c - inotify support for userspace
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Copyright (C) 2005 John McCutchan
9 * Copyright 2006 Hewlett-Packard Development Company, L.P.
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2, or (at your option) any
14 * later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 */
21
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/fs.h>
26#include <linux/file.h>
27#include <linux/mount.h>
28#include <linux/namei.h>
29#include <linux/poll.h>
30#include <linux/init.h>
31#include <linux/list.h>
32#include <linux/inotify.h>
33#include <linux/syscalls.h>
34#include <linux/magic.h>
35
36#include <asm/ioctls.h>
37
38static struct kmem_cache *watch_cachep __read_mostly;
39static struct kmem_cache *event_cachep __read_mostly;
40
41static struct vfsmount *inotify_mnt __read_mostly;
42
43/* these are configurable via /proc/sys/fs/inotify/ */
44static int inotify_max_user_instances __read_mostly;
45static int inotify_max_user_watches __read_mostly;
46static int inotify_max_queued_events __read_mostly;
47
48/*
49 * Lock ordering:
50 *
51 * inotify_dev->up_mutex (ensures we don't re-add the same watch)
52 * inode->inotify_mutex (protects inode's watch list)
53 * inotify_handle->mutex (protects inotify_handle's watch list)
54 * inotify_dev->ev_mutex (protects device's event queue)
55 */
56
57/*
58 * Lifetimes of the main data structures:
59 *
60 * inotify_device: Lifetime is managed by reference count, from
61 * sys_inotify_init() until release. Additional references can bump the count
62 * via get_inotify_dev() and drop the count via put_inotify_dev().
63 *
64 * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
65 * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
66 * first event, or to inotify_destroy().
67 */
68
69/*
70 * struct inotify_device - represents an inotify instance
71 *
72 * This structure is protected by the mutex 'mutex'.
73 */
74struct inotify_device {
75 wait_queue_head_t wq; /* wait queue for i/o */
76 struct mutex ev_mutex; /* protects event queue */
77 struct mutex up_mutex; /* synchronizes watch updates */
78 struct list_head events; /* list of queued events */
79 struct user_struct *user; /* user who opened this dev */
80 struct inotify_handle *ih; /* inotify handle */
81 struct fasync_struct *fa; /* async notification */
82 atomic_t count; /* reference count */
83 unsigned int queue_size; /* size of the queue (bytes) */
84 unsigned int event_count; /* number of pending events */
85 unsigned int max_events; /* maximum number of events */
86};
87
88/*
89 * struct inotify_kernel_event - An inotify event, originating from a watch and
90 * queued for user-space. A list of these is attached to each instance of the
91 * device. In read(), this list is walked and all events that can fit in the
92 * buffer are returned.
93 *
94 * Protected by dev->ev_mutex of the device in which we are queued.
95 */
96struct inotify_kernel_event {
97 struct inotify_event event; /* the user-space event */
98 struct list_head list; /* entry in inotify_device's list */
99 char *name; /* filename, if any */
100};
101
102/*
103 * struct inotify_user_watch - our version of an inotify_watch, we add
104 * a reference to the associated inotify_device.
105 */
106struct inotify_user_watch {
107 struct inotify_device *dev; /* associated device */
108 struct inotify_watch wdata; /* inotify watch data */
109};
110
111#ifdef CONFIG_SYSCTL
112
113#include <linux/sysctl.h>
114
115static int zero;
116
117ctl_table inotify_table[] = {
118 {
119 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
120 .procname = "max_user_instances",
121 .data = &inotify_max_user_instances,
122 .maxlen = sizeof(int),
123 .mode = 0644,
124 .proc_handler = &proc_dointvec_minmax,
125 .strategy = &sysctl_intvec,
126 .extra1 = &zero,
127 },
128 {
129 .ctl_name = INOTIFY_MAX_USER_WATCHES,
130 .procname = "max_user_watches",
131 .data = &inotify_max_user_watches,
132 .maxlen = sizeof(int),
133 .mode = 0644,
134 .proc_handler = &proc_dointvec_minmax,
135 .strategy = &sysctl_intvec,
136 .extra1 = &zero,
137 },
138 {
139 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
140 .procname = "max_queued_events",
141 .data = &inotify_max_queued_events,
142 .maxlen = sizeof(int),
143 .mode = 0644,
144 .proc_handler = &proc_dointvec_minmax,
145 .strategy = &sysctl_intvec,
146 .extra1 = &zero
147 },
148 { .ctl_name = 0 }
149};
150#endif /* CONFIG_SYSCTL */
151
152static inline void get_inotify_dev(struct inotify_device *dev)
153{
154 atomic_inc(&dev->count);
155}
156
157static inline void put_inotify_dev(struct inotify_device *dev)
158{
159 if (atomic_dec_and_test(&dev->count)) {
160 atomic_dec(&dev->user->inotify_devs);
161 free_uid(dev->user);
162 kfree(dev);
163 }
164}
165
166/*
167 * free_inotify_user_watch - cleans up the watch and its references
168 */
169static void free_inotify_user_watch(struct inotify_watch *w)
170{
171 struct inotify_user_watch *watch;
172 struct inotify_device *dev;
173
174 watch = container_of(w, struct inotify_user_watch, wdata);
175 dev = watch->dev;
176
177 atomic_dec(&dev->user->inotify_watches);
178 put_inotify_dev(dev);
179 kmem_cache_free(watch_cachep, watch);
180}
181
182/*
183 * kernel_event - create a new kernel event with the given parameters
184 *
185 * This function can sleep.
186 */
187static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
188 const char *name)
189{
190 struct inotify_kernel_event *kevent;
191
192 kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
193 if (unlikely(!kevent))
194 return NULL;
195
196 /* we hand this out to user-space, so zero it just in case */
197 memset(&kevent->event, 0, sizeof(struct inotify_event));
198
199 kevent->event.wd = wd;
200 kevent->event.mask = mask;
201 kevent->event.cookie = cookie;
202
203 INIT_LIST_HEAD(&kevent->list);
204
205 if (name) {
206 size_t len, rem, event_size = sizeof(struct inotify_event);
207
208 /*
209 * We need to pad the filename so as to properly align an
210 * array of inotify_event structures. Because the structure is
211 * small and the common case is a small filename, we just round
212 * up to the next multiple of the structure's sizeof. This is
213 * simple and safe for all architectures.
214 */
215 len = strlen(name) + 1;
216 rem = event_size - len;
217 if (len > event_size) {
218 rem = event_size - (len % event_size);
219 if (len % event_size == 0)
220 rem = 0;
221 }
222
223 kevent->name = kmalloc(len + rem, GFP_KERNEL);
224 if (unlikely(!kevent->name)) {
225 kmem_cache_free(event_cachep, kevent);
226 return NULL;
227 }
228 memcpy(kevent->name, name, len);
229 if (rem)
230 memset(kevent->name + len, 0, rem);
231 kevent->event.len = len + rem;
232 } else {
233 kevent->event.len = 0;
234 kevent->name = NULL;
235 }
236
237 return kevent;
238}
239
240/*
241 * inotify_dev_get_event - return the next event in the given dev's queue
242 *
243 * Caller must hold dev->ev_mutex.
244 */
245static inline struct inotify_kernel_event *
246inotify_dev_get_event(struct inotify_device *dev)
247{
248 return list_entry(dev->events.next, struct inotify_kernel_event, list);
249}
250
251/*
252 * inotify_dev_get_last_event - return the last event in the given dev's queue
253 *
254 * Caller must hold dev->ev_mutex.
255 */
256static inline struct inotify_kernel_event *
257inotify_dev_get_last_event(struct inotify_device *dev)
258{
259 if (list_empty(&dev->events))
260 return NULL;
261 return list_entry(dev->events.prev, struct inotify_kernel_event, list);
262}
263
264/*
265 * inotify_dev_queue_event - event handler registered with core inotify, adds
266 * a new event to the given device
267 *
268 * Can sleep (calls kernel_event()).
269 */
270static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
271 u32 cookie, const char *name,
272 struct inode *ignored)
273{
274 struct inotify_user_watch *watch;
275 struct inotify_device *dev;
276 struct inotify_kernel_event *kevent, *last;
277
278 watch = container_of(w, struct inotify_user_watch, wdata);
279 dev = watch->dev;
280
281 mutex_lock(&dev->ev_mutex);
282
283 /* we can safely put the watch as we don't reference it while
284 * generating the event
285 */
286 if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
287 put_inotify_watch(w); /* final put */
288
289 /* coalescing: drop this event if it is a dupe of the previous */
290 last = inotify_dev_get_last_event(dev);
291 if (last && last->event.mask == mask && last->event.wd == wd &&
292 last->event.cookie == cookie) {
293 const char *lastname = last->name;
294
295 if (!name && !lastname)
296 goto out;
297 if (name && lastname && !strcmp(lastname, name))
298 goto out;
299 }
300
301 /* the queue overflowed and we already sent the Q_OVERFLOW event */
302 if (unlikely(dev->event_count > dev->max_events))
303 goto out;
304
305 /* if the queue overflows, we need to notify user space */
306 if (unlikely(dev->event_count == dev->max_events))
307 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
308 else
309 kevent = kernel_event(wd, mask, cookie, name);
310
311 if (unlikely(!kevent))
312 goto out;
313
314 /* queue the event and wake up anyone waiting */
315 dev->event_count++;
316 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
317 list_add_tail(&kevent->list, &dev->events);
318 wake_up_interruptible(&dev->wq);
319 kill_fasync(&dev->fa, SIGIO, POLL_IN);
320
321out:
322 mutex_unlock(&dev->ev_mutex);
323}
324
325/*
326 * remove_kevent - cleans up the given kevent
327 *
328 * Caller must hold dev->ev_mutex.
329 */
330static void remove_kevent(struct inotify_device *dev,
331 struct inotify_kernel_event *kevent)
332{
333 list_del(&kevent->list);
334
335 dev->event_count--;
336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
337}
338
339/*
340 * free_kevent - frees the given kevent.
341 */
342static void free_kevent(struct inotify_kernel_event *kevent)
343{
344 kfree(kevent->name);
345 kmem_cache_free(event_cachep, kevent);
346}
347
348/*
349 * inotify_dev_event_dequeue - destroy an event on the given device
350 *
351 * Caller must hold dev->ev_mutex.
352 */
353static void inotify_dev_event_dequeue(struct inotify_device *dev)
354{
355 if (!list_empty(&dev->events)) {
356 struct inotify_kernel_event *kevent;
357 kevent = inotify_dev_get_event(dev);
358 remove_kevent(dev, kevent);
359 free_kevent(kevent);
360 }
361}
362
363/*
364 * find_inode - resolve a user-given path to a specific inode
365 */
366static int find_inode(const char __user *dirname, struct path *path,
367 unsigned flags)
368{
369 int error;
370
371 error = user_path_at(AT_FDCWD, dirname, flags, path);
372 if (error)
373 return error;
374 /* you can only watch an inode if you have read permissions on it */
375 error = inode_permission(path->dentry->d_inode, MAY_READ);
376 if (error)
377 path_put(path);
378 return error;
379}
380
381/*
382 * create_watch - creates a watch on the given device.
383 *
384 * Callers must hold dev->up_mutex.
385 */
386static int create_watch(struct inotify_device *dev, struct inode *inode,
387 u32 mask)
388{
389 struct inotify_user_watch *watch;
390 int ret;
391
392 if (atomic_read(&dev->user->inotify_watches) >=
393 inotify_max_user_watches)
394 return -ENOSPC;
395
396 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
397 if (unlikely(!watch))
398 return -ENOMEM;
399
400 /* save a reference to device and bump the count to make it official */
401 get_inotify_dev(dev);
402 watch->dev = dev;
403
404 atomic_inc(&dev->user->inotify_watches);
405
406 inotify_init_watch(&watch->wdata);
407 ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
408 if (ret < 0)
409 free_inotify_user_watch(&watch->wdata);
410
411 return ret;
412}
413
414/* Device Interface */
415
416static unsigned int inotify_poll(struct file *file, poll_table *wait)
417{
418 struct inotify_device *dev = file->private_data;
419 int ret = 0;
420
421 poll_wait(file, &dev->wq, wait);
422 mutex_lock(&dev->ev_mutex);
423 if (!list_empty(&dev->events))
424 ret = POLLIN | POLLRDNORM;
425 mutex_unlock(&dev->ev_mutex);
426
427 return ret;
428}
429
430static ssize_t inotify_read(struct file *file, char __user *buf,
431 size_t count, loff_t *pos)
432{
433 size_t event_size = sizeof (struct inotify_event);
434 struct inotify_device *dev;
435 char __user *start;
436 int ret;
437 DEFINE_WAIT(wait);
438
439 start = buf;
440 dev = file->private_data;
441
442 while (1) {
443
444 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
445
446 mutex_lock(&dev->ev_mutex);
447 if (!list_empty(&dev->events)) {
448 ret = 0;
449 break;
450 }
451 mutex_unlock(&dev->ev_mutex);
452
453 if (file->f_flags & O_NONBLOCK) {
454 ret = -EAGAIN;
455 break;
456 }
457
458 if (signal_pending(current)) {
459 ret = -EINTR;
460 break;
461 }
462
463 schedule();
464 }
465
466 finish_wait(&dev->wq, &wait);
467 if (ret)
468 return ret;
469
470 while (1) {
471 struct inotify_kernel_event *kevent;
472
473 ret = buf - start;
474 if (list_empty(&dev->events))
475 break;
476
477 kevent = inotify_dev_get_event(dev);
478 if (event_size + kevent->event.len > count) {
479 if (ret == 0 && count > 0) {
480 /*
481 * could not get a single event because we
482 * didn't have enough buffer space.
483 */
484 ret = -EINVAL;
485 }
486 break;
487 }
488 remove_kevent(dev, kevent);
489
490 /*
491 * Must perform the copy_to_user outside the mutex in order
492 * to avoid a lock order reversal with mmap_sem.
493 */
494 mutex_unlock(&dev->ev_mutex);
495
496 if (copy_to_user(buf, &kevent->event, event_size)) {
497 ret = -EFAULT;
498 break;
499 }
500 buf += event_size;
501 count -= event_size;
502
503 if (kevent->name) {
504 if (copy_to_user(buf, kevent->name, kevent->event.len)){
505 ret = -EFAULT;
506 break;
507 }
508 buf += kevent->event.len;
509 count -= kevent->event.len;
510 }
511
512 free_kevent(kevent);
513
514 mutex_lock(&dev->ev_mutex);
515 }
516 mutex_unlock(&dev->ev_mutex);
517
518 return ret;
519}
520
521static int inotify_fasync(int fd, struct file *file, int on)
522{
523 struct inotify_device *dev = file->private_data;
524
525 return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
526}
527
528static int inotify_release(struct inode *ignored, struct file *file)
529{
530 struct inotify_device *dev = file->private_data;
531
532 inotify_destroy(dev->ih);
533
534 /* destroy all of the events on this device */
535 mutex_lock(&dev->ev_mutex);
536 while (!list_empty(&dev->events))
537 inotify_dev_event_dequeue(dev);
538 mutex_unlock(&dev->ev_mutex);
539
540 /* free this device: the put matching the get in inotify_init() */
541 put_inotify_dev(dev);
542
543 return 0;
544}
545
546static long inotify_ioctl(struct file *file, unsigned int cmd,
547 unsigned long arg)
548{
549 struct inotify_device *dev;
550 void __user *p;
551 int ret = -ENOTTY;
552
553 dev = file->private_data;
554 p = (void __user *) arg;
555
556 switch (cmd) {
557 case FIONREAD:
558 ret = put_user(dev->queue_size, (int __user *) p);
559 break;
560 }
561
562 return ret;
563}
564
565static const struct file_operations inotify_fops = {
566 .poll = inotify_poll,
567 .read = inotify_read,
568 .fasync = inotify_fasync,
569 .release = inotify_release,
570 .unlocked_ioctl = inotify_ioctl,
571 .compat_ioctl = inotify_ioctl,
572};
573
574static const struct inotify_operations inotify_user_ops = {
575 .handle_event = inotify_dev_queue_event,
576 .destroy_watch = free_inotify_user_watch,
577};
578
579asmlinkage long sys_inotify_init1(int flags)
580{
581 struct inotify_device *dev;
582 struct inotify_handle *ih;
583 struct user_struct *user;
584 struct file *filp;
585 int fd, ret;
586
587 /* Check the IN_* constants for consistency. */
588 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
589 BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
590
591 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
592 return -EINVAL;
593
594 fd = get_unused_fd_flags(flags & O_CLOEXEC);
595 if (fd < 0)
596 return fd;
597
598 filp = get_empty_filp();
599 if (!filp) {
600 ret = -ENFILE;
601 goto out_put_fd;
602 }
603
604 user = get_current_user();
605 if (unlikely(atomic_read(&user->inotify_devs) >=
606 inotify_max_user_instances)) {
607 ret = -EMFILE;
608 goto out_free_uid;
609 }
610
611 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
612 if (unlikely(!dev)) {
613 ret = -ENOMEM;
614 goto out_free_uid;
615 }
616
617 ih = inotify_init(&inotify_user_ops);
618 if (IS_ERR(ih)) {
619 ret = PTR_ERR(ih);
620 goto out_free_dev;
621 }
622 dev->ih = ih;
623 dev->fa = NULL;
624
625 filp->f_op = &inotify_fops;
626 filp->f_path.mnt = mntget(inotify_mnt);
627 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
628 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
629 filp->f_mode = FMODE_READ;
630 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
631 filp->private_data = dev;
632
633 INIT_LIST_HEAD(&dev->events);
634 init_waitqueue_head(&dev->wq);
635 mutex_init(&dev->ev_mutex);
636 mutex_init(&dev->up_mutex);
637 dev->event_count = 0;
638 dev->queue_size = 0;
639 dev->max_events = inotify_max_queued_events;
640 dev->user = user;
641 atomic_set(&dev->count, 0);
642
643 get_inotify_dev(dev);
644 atomic_inc(&user->inotify_devs);
645 fd_install(fd, filp);
646
647 return fd;
648out_free_dev:
649 kfree(dev);
650out_free_uid:
651 free_uid(user);
652 put_filp(filp);
653out_put_fd:
654 put_unused_fd(fd);
655 return ret;
656}
657
658asmlinkage long sys_inotify_init(void)
659{
660 return sys_inotify_init1(0);
661}
662
663asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
664{
665 struct inode *inode;
666 struct inotify_device *dev;
667 struct path path;
668 struct file *filp;
669 int ret, fput_needed;
670 unsigned flags = 0;
671
672 filp = fget_light(fd, &fput_needed);
673 if (unlikely(!filp))
674 return -EBADF;
675
676 /* verify that this is indeed an inotify instance */
677 if (unlikely(filp->f_op != &inotify_fops)) {
678 ret = -EINVAL;
679 goto fput_and_out;
680 }
681
682 if (!(mask & IN_DONT_FOLLOW))
683 flags |= LOOKUP_FOLLOW;
684 if (mask & IN_ONLYDIR)
685 flags |= LOOKUP_DIRECTORY;
686
687 ret = find_inode(pathname, &path, flags);
688 if (unlikely(ret))
689 goto fput_and_out;
690
691 /* inode held in place by reference to path; dev by fget on fd */
692 inode = path.dentry->d_inode;
693 dev = filp->private_data;
694
695 mutex_lock(&dev->up_mutex);
696 ret = inotify_find_update_watch(dev->ih, inode, mask);
697 if (ret == -ENOENT)
698 ret = create_watch(dev, inode, mask);
699 mutex_unlock(&dev->up_mutex);
700
701 path_put(&path);
702fput_and_out:
703 fput_light(filp, fput_needed);
704 return ret;
705}
706
707asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
708{
709 struct file *filp;
710 struct inotify_device *dev;
711 int ret, fput_needed;
712
713 filp = fget_light(fd, &fput_needed);
714 if (unlikely(!filp))
715 return -EBADF;
716
717 /* verify that this is indeed an inotify instance */
718 if (unlikely(filp->f_op != &inotify_fops)) {
719 ret = -EINVAL;
720 goto out;
721 }
722
723 dev = filp->private_data;
724
725 /* we free our watch data when we get IN_IGNORED */
726 ret = inotify_rm_wd(dev->ih, wd);
727
728out:
729 fput_light(filp, fput_needed);
730 return ret;
731}
732
733static int
734inotify_get_sb(struct file_system_type *fs_type, int flags,
735 const char *dev_name, void *data, struct vfsmount *mnt)
736{
737 return get_sb_pseudo(fs_type, "inotify", NULL,
738 INOTIFYFS_SUPER_MAGIC, mnt);
739}
740
741static struct file_system_type inotify_fs_type = {
742 .name = "inotifyfs",
743 .get_sb = inotify_get_sb,
744 .kill_sb = kill_anon_super,
745};
746
747/*
748 * inotify_user_setup - Our initialization function. Note that we cannnot return
749 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
750 * must result in panic().
751 */
752static int __init inotify_user_setup(void)
753{
754 int ret;
755
756 ret = register_filesystem(&inotify_fs_type);
757 if (unlikely(ret))
758 panic("inotify: register_filesystem returned %d!\n", ret);
759
760 inotify_mnt = kern_mount(&inotify_fs_type);
761 if (IS_ERR(inotify_mnt))
762 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
763
764 inotify_max_queued_events = 16384;
765 inotify_max_user_instances = 128;
766 inotify_max_user_watches = 8192;
767
768 watch_cachep = kmem_cache_create("inotify_watch_cache",
769 sizeof(struct inotify_user_watch),
770 0, SLAB_PANIC, NULL);
771 event_cachep = kmem_cache_create("inotify_event_cache",
772 sizeof(struct inotify_kernel_event),
773 0, SLAB_PANIC, NULL);
774
775 return 0;
776}
777
778module_init(inotify_user_setup);