aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAmy Griffis <amy.griffis@hp.com>2006-06-01 16:10:59 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2006-06-20 05:25:17 -0400
commit2d9048e201bfb67ba21f05e647b1286b8a4a5667 (patch)
tree1df2ca6780d403f3209cf445f8b0b27f45098434 /fs
parent90204e0b7b51e9f2a6905adca12dc331128602c7 (diff)
[PATCH] inotify (1/5): split kernel API from userspace support
The following series of patches introduces a kernel API for inotify, making it possible for kernel modules to benefit from inotify's mechanism for watching inodes. With these patches, inotify will maintain for each caller a list of watches (via an embedded struct inotify_watch), where each inotify_watch is associated with a corresponding struct inode. The caller registers an event handler and specifies for which filesystem events their event handler should be called per inotify_watch. Signed-off-by: Amy Griffis <amy.griffis@hp.com> Acked-by: Robert Love <rml@novell.com> Acked-by: John McCutchan <john@johnmccutchan.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig24
-rw-r--r--fs/Makefile1
-rw-r--r--fs/inotify.c941
-rw-r--r--fs/inotify_user.c717
4 files changed, 966 insertions, 717 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f9b5842c8d2d..74f11a23622d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -393,18 +393,30 @@ config INOTIFY
393 bool "Inotify file change notification support" 393 bool "Inotify file change notification support"
394 default y 394 default y
395 ---help--- 395 ---help---
396 Say Y here to enable inotify support and the associated system 396 Say Y here to enable inotify support. Inotify is a file change
397 calls. Inotify is a file change notification system and a 397 notification system and a replacement for dnotify. Inotify fixes
398 replacement for dnotify. Inotify fixes numerous shortcomings in 398 numerous shortcomings in dnotify and introduces several new features
399 dnotify and introduces several new features. It allows monitoring 399 including multiple file events, one-shot support, and unmount
400 of both files and directories via a single open fd. Other features
401 include multiple file events, one-shot support, and unmount
402 notification. 400 notification.
403 401
404 For more information, see Documentation/filesystems/inotify.txt 402 For more information, see Documentation/filesystems/inotify.txt
405 403
406 If unsure, say Y. 404 If unsure, say Y.
407 405
406config INOTIFY_USER
407 bool "Inotify support for userspace"
408 depends on INOTIFY
409 default y
410 ---help---
411 Say Y here to enable inotify support for userspace, including the
412 associated system calls. Inotify allows monitoring of both files and
413 directories via a single open fd. Events are read from the file
414 descriptor, which is also select()- and poll()-able.
415
416 For more information, see Documentation/filesystems/inotify.txt
417
418 If unsure, say Y.
419
408config QUOTA 420config QUOTA
409 bool "Quota support" 421 bool "Quota support"
410 help 422 help
diff --git a/fs/Makefile b/fs/Makefile
index 078d3d1191a5..d0ea6bfccf29 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,6 +13,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
13 ioprio.o pnode.o drop_caches.o splice.o sync.o 13 ioprio.o pnode.o drop_caches.o splice.o sync.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 17obj-$(CONFIG_EPOLL) += eventpoll.o
17obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 18obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
18 19
diff --git a/fs/inotify.c b/fs/inotify.c
index 732ec4bd5774..a1bedf3975ca 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -5,7 +5,10 @@
5 * John McCutchan <ttb@tentacle.dhs.org> 5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com> 6 * Robert Love <rml@novell.com>
7 * 7 *
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9 *
8 * Copyright (C) 2005 John McCutchan 10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
9 * 12 *
10 * This program is free software; you can redistribute it and/or modify it 13 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the 14 * under the terms of the GNU General Public License as published by the
@@ -20,35 +23,17 @@
20 23
21#include <linux/module.h> 24#include <linux/module.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/spinlock.h> 26#include <linux/spinlock.h>
25#include <linux/idr.h> 27#include <linux/idr.h>
26#include <linux/slab.h> 28#include <linux/slab.h>
27#include <linux/fs.h> 29#include <linux/fs.h>
28#include <linux/file.h>
29#include <linux/mount.h>
30#include <linux/namei.h>
31#include <linux/poll.h>
32#include <linux/init.h> 30#include <linux/init.h>
33#include <linux/list.h> 31#include <linux/list.h>
34#include <linux/writeback.h> 32#include <linux/writeback.h>
35#include <linux/inotify.h> 33#include <linux/inotify.h>
36#include <linux/syscalls.h>
37
38#include <asm/ioctls.h>
39 34
40static atomic_t inotify_cookie; 35static atomic_t inotify_cookie;
41 36
42static kmem_cache_t *watch_cachep __read_mostly;
43static kmem_cache_t *event_cachep __read_mostly;
44
45static struct vfsmount *inotify_mnt __read_mostly;
46
47/* these are configurable via /proc/sys/fs/inotify/ */
48int inotify_max_user_instances __read_mostly;
49int inotify_max_user_watches __read_mostly;
50int inotify_max_queued_events __read_mostly;
51
52/* 37/*
53 * Lock ordering: 38 * Lock ordering:
54 * 39 *
@@ -56,327 +41,108 @@ int inotify_max_queued_events __read_mostly;
56 * iprune_mutex (synchronize shrink_icache_memory()) 41 * iprune_mutex (synchronize shrink_icache_memory())
57 * inode_lock (protects the super_block->s_inodes list) 42 * inode_lock (protects the super_block->s_inodes list)
58 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) 43 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
59 * inotify_dev->mutex (protects inotify_device and watches->d_list) 44 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
45 *
46 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
47 * of a caller's event handler. Thus, the caller must not hold any locks
48 * taken in their event handler while calling any of the published inotify
49 * interfaces.
60 */ 50 */
61 51
62/* 52/*
63 * Lifetimes of the three main data structures--inotify_device, inode, and 53 * Lifetimes of the three main data structures--inotify_handle, inode, and
64 * inotify_watch--are managed by reference count. 54 * inotify_watch--are managed by reference count.
65 * 55 *
66 * inotify_device: Lifetime is from inotify_init() until release. Additional 56 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
67 * references can bump the count via get_inotify_dev() and drop the count via 57 * Additional references can bump the count via get_inotify_handle() and drop
68 * put_inotify_dev(). 58 * the count via put_inotify_handle().
69 * 59 *
70 * inotify_watch: Lifetime is from create_watch() to destory_watch(). 60 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
71 * Additional references can bump the count via get_inotify_watch() and drop 61 * to remove_watch_no_event(). Additional references can bump the count via
72 * the count via put_inotify_watch(). 62 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
63 * is reponsible for the final put after receiving IN_IGNORED, or when using
64 * IN_ONESHOT after receiving the first event. Inotify does the final put if
65 * inotify_destroy() is called.
73 * 66 *
74 * inode: Pinned so long as the inode is associated with a watch, from 67 * inode: Pinned so long as the inode is associated with a watch, from
75 * create_watch() to put_inotify_watch(). 68 * inotify_add_watch() to the final put_inotify_watch().
76 */ 69 */
77 70
78/* 71/*
79 * struct inotify_device - represents an inotify instance 72 * struct inotify_handle - represents an inotify instance
80 * 73 *
81 * This structure is protected by the mutex 'mutex'. 74 * This structure is protected by the mutex 'mutex'.
82 */ 75 */
83struct inotify_device { 76struct inotify_handle {
84 wait_queue_head_t wq; /* wait queue for i/o */
85 struct idr idr; /* idr mapping wd -> watch */ 77 struct idr idr; /* idr mapping wd -> watch */
86 struct mutex mutex; /* protects this bad boy */ 78 struct mutex mutex; /* protects this bad boy */
87 struct list_head events; /* list of queued events */
88 struct list_head watches; /* list of watches */ 79 struct list_head watches; /* list of watches */
89 atomic_t count; /* reference count */ 80 atomic_t count; /* reference count */
90 struct user_struct *user; /* user who opened this dev */
91 unsigned int queue_size; /* size of the queue (bytes) */
92 unsigned int event_count; /* number of pending events */
93 unsigned int max_events; /* maximum number of events */
94 u32 last_wd; /* the last wd allocated */ 81 u32 last_wd; /* the last wd allocated */
82 const struct inotify_operations *in_ops; /* inotify caller operations */
95}; 83};
96 84
97/* 85static inline void get_inotify_handle(struct inotify_handle *ih)
98 * struct inotify_kernel_event - An inotify event, originating from a watch and
99 * queued for user-space. A list of these is attached to each instance of the
100 * device. In read(), this list is walked and all events that can fit in the
101 * buffer are returned.
102 *
103 * Protected by dev->mutex of the device in which we are queued.
104 */
105struct inotify_kernel_event {
106 struct inotify_event event; /* the user-space event */
107 struct list_head list; /* entry in inotify_device's list */
108 char *name; /* filename, if any */
109};
110
111/*
112 * struct inotify_watch - represents a watch request on a specific inode
113 *
114 * d_list is protected by dev->mutex of the associated watch->dev.
115 * i_list and mask are protected by inode->inotify_mutex of the associated inode.
116 * dev, inode, and wd are never written to once the watch is created.
117 */
118struct inotify_watch {
119 struct list_head d_list; /* entry in inotify_device's list */
120 struct list_head i_list; /* entry in inode's list */
121 atomic_t count; /* reference count */
122 struct inotify_device *dev; /* associated device */
123 struct inode *inode; /* associated inode */
124 s32 wd; /* watch descriptor */
125 u32 mask; /* event mask for this watch */
126};
127
128#ifdef CONFIG_SYSCTL
129
130#include <linux/sysctl.h>
131
132static int zero;
133
134ctl_table inotify_table[] = {
135 {
136 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
137 .procname = "max_user_instances",
138 .data = &inotify_max_user_instances,
139 .maxlen = sizeof(int),
140 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &zero,
144 },
145 {
146 .ctl_name = INOTIFY_MAX_USER_WATCHES,
147 .procname = "max_user_watches",
148 .data = &inotify_max_user_watches,
149 .maxlen = sizeof(int),
150 .mode = 0644,
151 .proc_handler = &proc_dointvec_minmax,
152 .strategy = &sysctl_intvec,
153 .extra1 = &zero,
154 },
155 {
156 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
157 .procname = "max_queued_events",
158 .data = &inotify_max_queued_events,
159 .maxlen = sizeof(int),
160 .mode = 0644,
161 .proc_handler = &proc_dointvec_minmax,
162 .strategy = &sysctl_intvec,
163 .extra1 = &zero
164 },
165 { .ctl_name = 0 }
166};
167#endif /* CONFIG_SYSCTL */
168
169static inline void get_inotify_dev(struct inotify_device *dev)
170{ 86{
171 atomic_inc(&dev->count); 87 atomic_inc(&ih->count);
172} 88}
173 89
174static inline void put_inotify_dev(struct inotify_device *dev) 90static inline void put_inotify_handle(struct inotify_handle *ih)
175{ 91{
176 if (atomic_dec_and_test(&dev->count)) { 92 if (atomic_dec_and_test(&ih->count)) {
177 atomic_dec(&dev->user->inotify_devs); 93 idr_destroy(&ih->idr);
178 free_uid(dev->user); 94 kfree(ih);
179 idr_destroy(&dev->idr);
180 kfree(dev);
181 } 95 }
182} 96}
183 97
184static inline void get_inotify_watch(struct inotify_watch *watch) 98/**
99 * get_inotify_watch - grab a reference to an inotify_watch
100 * @watch: watch to grab
101 */
102void get_inotify_watch(struct inotify_watch *watch)
185{ 103{
186 atomic_inc(&watch->count); 104 atomic_inc(&watch->count);
187} 105}
106EXPORT_SYMBOL_GPL(get_inotify_watch);
188 107
189/* 108/**
190 * put_inotify_watch - decrements the ref count on a given watch. cleans up 109 * put_inotify_watch - decrements the ref count on a given watch. cleans up
191 * the watch and its references if the count reaches zero. 110 * watch references if the count reaches zero. inotify_watch is freed by
111 * inotify callers via the destroy_watch() op.
112 * @watch: watch to release
192 */ 113 */
193static inline void put_inotify_watch(struct inotify_watch *watch) 114void put_inotify_watch(struct inotify_watch *watch)
194{ 115{
195 if (atomic_dec_and_test(&watch->count)) { 116 if (atomic_dec_and_test(&watch->count)) {
196 put_inotify_dev(watch->dev); 117 struct inotify_handle *ih = watch->ih;
197 iput(watch->inode);
198 kmem_cache_free(watch_cachep, watch);
199 }
200}
201
202/*
203 * kernel_event - create a new kernel event with the given parameters
204 *
205 * This function can sleep.
206 */
207static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
208 const char *name)
209{
210 struct inotify_kernel_event *kevent;
211
212 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
213 if (unlikely(!kevent))
214 return NULL;
215
216 /* we hand this out to user-space, so zero it just in case */
217 memset(&kevent->event, 0, sizeof(struct inotify_event));
218
219 kevent->event.wd = wd;
220 kevent->event.mask = mask;
221 kevent->event.cookie = cookie;
222
223 INIT_LIST_HEAD(&kevent->list);
224
225 if (name) {
226 size_t len, rem, event_size = sizeof(struct inotify_event);
227
228 /*
229 * We need to pad the filename so as to properly align an
230 * array of inotify_event structures. Because the structure is
231 * small and the common case is a small filename, we just round
232 * up to the next multiple of the structure's sizeof. This is
233 * simple and safe for all architectures.
234 */
235 len = strlen(name) + 1;
236 rem = event_size - len;
237 if (len > event_size) {
238 rem = event_size - (len % event_size);
239 if (len % event_size == 0)
240 rem = 0;
241 }
242
243 kevent->name = kmalloc(len + rem, GFP_KERNEL);
244 if (unlikely(!kevent->name)) {
245 kmem_cache_free(event_cachep, kevent);
246 return NULL;
247 }
248 memcpy(kevent->name, name, len);
249 if (rem)
250 memset(kevent->name + len, 0, rem);
251 kevent->event.len = len + rem;
252 } else {
253 kevent->event.len = 0;
254 kevent->name = NULL;
255 }
256
257 return kevent;
258}
259
260/*
261 * inotify_dev_get_event - return the next event in the given dev's queue
262 *
263 * Caller must hold dev->mutex.
264 */
265static inline struct inotify_kernel_event *
266inotify_dev_get_event(struct inotify_device *dev)
267{
268 return list_entry(dev->events.next, struct inotify_kernel_event, list);
269}
270
271/*
272 * inotify_dev_queue_event - add a new event to the given device
273 *
274 * Caller must hold dev->mutex. Can sleep (calls kernel_event()).
275 */
276static void inotify_dev_queue_event(struct inotify_device *dev,
277 struct inotify_watch *watch, u32 mask,
278 u32 cookie, const char *name)
279{
280 struct inotify_kernel_event *kevent, *last;
281
282 /* coalescing: drop this event if it is a dupe of the previous */
283 last = inotify_dev_get_event(dev);
284 if (last && last->event.mask == mask && last->event.wd == watch->wd &&
285 last->event.cookie == cookie) {
286 const char *lastname = last->name;
287
288 if (!name && !lastname)
289 return;
290 if (name && lastname && !strcmp(lastname, name))
291 return;
292 }
293
294 /* the queue overflowed and we already sent the Q_OVERFLOW event */
295 if (unlikely(dev->event_count > dev->max_events))
296 return;
297
298 /* if the queue overflows, we need to notify user space */
299 if (unlikely(dev->event_count == dev->max_events))
300 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
301 else
302 kevent = kernel_event(watch->wd, mask, cookie, name);
303
304 if (unlikely(!kevent))
305 return;
306
307 /* queue the event and wake up anyone waiting */
308 dev->event_count++;
309 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
310 list_add_tail(&kevent->list, &dev->events);
311 wake_up_interruptible(&dev->wq);
312}
313
314/*
315 * remove_kevent - cleans up and ultimately frees the given kevent
316 *
317 * Caller must hold dev->mutex.
318 */
319static void remove_kevent(struct inotify_device *dev,
320 struct inotify_kernel_event *kevent)
321{
322 list_del(&kevent->list);
323 118
324 dev->event_count--; 119 iput(watch->inode);
325 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; 120 ih->in_ops->destroy_watch(watch);
326 121 put_inotify_handle(ih);
327 kfree(kevent->name);
328 kmem_cache_free(event_cachep, kevent);
329}
330
331/*
332 * inotify_dev_event_dequeue - destroy an event on the given device
333 *
334 * Caller must hold dev->mutex.
335 */
336static void inotify_dev_event_dequeue(struct inotify_device *dev)
337{
338 if (!list_empty(&dev->events)) {
339 struct inotify_kernel_event *kevent;
340 kevent = inotify_dev_get_event(dev);
341 remove_kevent(dev, kevent);
342 } 122 }
343} 123}
124EXPORT_SYMBOL_GPL(put_inotify_watch);
344 125
345/* 126/*
346 * inotify_dev_get_wd - returns the next WD for use by the given dev 127 * inotify_handle_get_wd - returns the next WD for use by the given handle
347 * 128 *
348 * Callers must hold dev->mutex. This function can sleep. 129 * Callers must hold ih->mutex. This function can sleep.
349 */ 130 */
350static int inotify_dev_get_wd(struct inotify_device *dev, 131static int inotify_handle_get_wd(struct inotify_handle *ih,
351 struct inotify_watch *watch) 132 struct inotify_watch *watch)
352{ 133{
353 int ret; 134 int ret;
354 135
355 do { 136 do {
356 if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) 137 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
357 return -ENOSPC; 138 return -ENOSPC;
358 ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd); 139 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
359 } while (ret == -EAGAIN); 140 } while (ret == -EAGAIN);
360 141
361 return ret; 142 if (likely(!ret))
362} 143 ih->last_wd = watch->wd;
363 144
364/* 145 return ret;
365 * find_inode - resolve a user-given path to a specific inode and return a nd
366 */
367static int find_inode(const char __user *dirname, struct nameidata *nd,
368 unsigned flags)
369{
370 int error;
371
372 error = __user_walk(dirname, flags, nd);
373 if (error)
374 return error;
375 /* you can only watch an inode if you have read permissions on it */
376 error = vfs_permission(nd, MAY_READ);
377 if (error)
378 path_release(nd);
379 return error;
380} 146}
381 147
382/* 148/*
@@ -422,67 +188,18 @@ static void set_dentry_child_flags(struct inode *inode, int watched)
422} 188}
423 189
424/* 190/*
425 * create_watch - creates a watch on the given device. 191 * inotify_find_handle - find the watch associated with the given inode and
426 * 192 * handle
427 * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep.
428 * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
429 */
430static struct inotify_watch *create_watch(struct inotify_device *dev,
431 u32 mask, struct inode *inode)
432{
433 struct inotify_watch *watch;
434 int ret;
435
436 if (atomic_read(&dev->user->inotify_watches) >=
437 inotify_max_user_watches)
438 return ERR_PTR(-ENOSPC);
439
440 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
441 if (unlikely(!watch))
442 return ERR_PTR(-ENOMEM);
443
444 ret = inotify_dev_get_wd(dev, watch);
445 if (unlikely(ret)) {
446 kmem_cache_free(watch_cachep, watch);
447 return ERR_PTR(ret);
448 }
449
450 dev->last_wd = watch->wd;
451 watch->mask = mask;
452 atomic_set(&watch->count, 0);
453 INIT_LIST_HEAD(&watch->d_list);
454 INIT_LIST_HEAD(&watch->i_list);
455
456 /* save a reference to device and bump the count to make it official */
457 get_inotify_dev(dev);
458 watch->dev = dev;
459
460 /*
461 * Save a reference to the inode and bump the ref count to make it
462 * official. We hold a reference to nameidata, which makes this safe.
463 */
464 watch->inode = igrab(inode);
465
466 /* bump our own count, corresponding to our entry in dev->watches */
467 get_inotify_watch(watch);
468
469 atomic_inc(&dev->user->inotify_watches);
470
471 return watch;
472}
473
474/*
475 * inotify_find_dev - find the watch associated with the given inode and dev
476 * 193 *
477 * Callers must hold inode->inotify_mutex. 194 * Callers must hold inode->inotify_mutex.
478 */ 195 */
479static struct inotify_watch *inode_find_dev(struct inode *inode, 196static struct inotify_watch *inode_find_handle(struct inode *inode,
480 struct inotify_device *dev) 197 struct inotify_handle *ih)
481{ 198{
482 struct inotify_watch *watch; 199 struct inotify_watch *watch;
483 200
484 list_for_each_entry(watch, &inode->inotify_watches, i_list) { 201 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
485 if (watch->dev == dev) 202 if (watch->ih == ih)
486 return watch; 203 return watch;
487 } 204 }
488 205
@@ -491,39 +208,34 @@ static struct inotify_watch *inode_find_dev(struct inode *inode,
491 208
492/* 209/*
493 * remove_watch_no_event - remove_watch() without the IN_IGNORED event. 210 * remove_watch_no_event - remove_watch() without the IN_IGNORED event.
211 *
212 * Callers must hold both inode->inotify_mutex and ih->mutex.
494 */ 213 */
495static void remove_watch_no_event(struct inotify_watch *watch, 214static void remove_watch_no_event(struct inotify_watch *watch,
496 struct inotify_device *dev) 215 struct inotify_handle *ih)
497{ 216{
498 list_del(&watch->i_list); 217 list_del(&watch->i_list);
499 list_del(&watch->d_list); 218 list_del(&watch->h_list);
500 219
501 if (!inotify_inode_watched(watch->inode)) 220 if (!inotify_inode_watched(watch->inode))
502 set_dentry_child_flags(watch->inode, 0); 221 set_dentry_child_flags(watch->inode, 0);
503 222
504 atomic_dec(&dev->user->inotify_watches); 223 idr_remove(&ih->idr, watch->wd);
505 idr_remove(&dev->idr, watch->wd);
506 put_inotify_watch(watch);
507} 224}
508 225
509/* 226/*
510 * remove_watch - Remove a watch from both the device and the inode. Sends 227 * remove_watch - Remove a watch from both the handle and the inode. Sends
511 * the IN_IGNORED event to the given device signifying that the inode is no 228 * the IN_IGNORED event signifying that the inode is no longer watched.
512 * longer watched.
513 *
514 * Callers must hold both inode->inotify_mutex and dev->mutex. We drop a
515 * reference to the inode before returning.
516 * 229 *
517 * The inode is not iput() so as to remain atomic. If the inode needs to be 230 * Callers must hold both inode->inotify_mutex and ih->mutex.
518 * iput(), the call returns one. Otherwise, it returns zero.
519 */ 231 */
520static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) 232static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih)
521{ 233{
522 inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL); 234 remove_watch_no_event(watch, ih);
523 remove_watch_no_event(watch, dev); 235 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL);
524} 236}
525 237
526/* Kernel API */ 238/* Kernel API for producing events */
527 239
528/* 240/*
529 * inotify_d_instantiate - instantiate dcache entry for inode 241 * inotify_d_instantiate - instantiate dcache entry for inode
@@ -576,14 +288,12 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
576 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { 288 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
577 u32 watch_mask = watch->mask; 289 u32 watch_mask = watch->mask;
578 if (watch_mask & mask) { 290 if (watch_mask & mask) {
579 struct inotify_device *dev = watch->dev; 291 struct inotify_handle *ih= watch->ih;
580 get_inotify_watch(watch); 292 mutex_lock(&ih->mutex);
581 mutex_lock(&dev->mutex);
582 inotify_dev_queue_event(dev, watch, mask, cookie, name);
583 if (watch_mask & IN_ONESHOT) 293 if (watch_mask & IN_ONESHOT)
584 remove_watch_no_event(watch, dev); 294 remove_watch_no_event(watch, ih);
585 mutex_unlock(&dev->mutex); 295 ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name);
586 put_inotify_watch(watch); 296 mutex_unlock(&ih->mutex);
587 } 297 }
588 } 298 }
589 mutex_unlock(&inode->inotify_mutex); 299 mutex_unlock(&inode->inotify_mutex);
@@ -694,11 +404,12 @@ void inotify_unmount_inodes(struct list_head *list)
694 mutex_lock(&inode->inotify_mutex); 404 mutex_lock(&inode->inotify_mutex);
695 watches = &inode->inotify_watches; 405 watches = &inode->inotify_watches;
696 list_for_each_entry_safe(watch, next_w, watches, i_list) { 406 list_for_each_entry_safe(watch, next_w, watches, i_list) {
697 struct inotify_device *dev = watch->dev; 407 struct inotify_handle *ih= watch->ih;
698 mutex_lock(&dev->mutex); 408 mutex_lock(&ih->mutex);
699 inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL); 409 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
700 remove_watch(watch, dev); 410 NULL);
701 mutex_unlock(&dev->mutex); 411 remove_watch(watch, ih);
412 mutex_unlock(&ih->mutex);
702 } 413 }
703 mutex_unlock(&inode->inotify_mutex); 414 mutex_unlock(&inode->inotify_mutex);
704 iput(inode); 415 iput(inode);
@@ -718,432 +429,240 @@ void inotify_inode_is_dead(struct inode *inode)
718 429
719 mutex_lock(&inode->inotify_mutex); 430 mutex_lock(&inode->inotify_mutex);
720 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { 431 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
721 struct inotify_device *dev = watch->dev; 432 struct inotify_handle *ih = watch->ih;
722 mutex_lock(&dev->mutex); 433 mutex_lock(&ih->mutex);
723 remove_watch(watch, dev); 434 remove_watch(watch, ih);
724 mutex_unlock(&dev->mutex); 435 mutex_unlock(&ih->mutex);
725 } 436 }
726 mutex_unlock(&inode->inotify_mutex); 437 mutex_unlock(&inode->inotify_mutex);
727} 438}
728EXPORT_SYMBOL_GPL(inotify_inode_is_dead); 439EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
729 440
730/* Device Interface */ 441/* Kernel Consumer API */
731
732static unsigned int inotify_poll(struct file *file, poll_table *wait)
733{
734 struct inotify_device *dev = file->private_data;
735 int ret = 0;
736
737 poll_wait(file, &dev->wq, wait);
738 mutex_lock(&dev->mutex);
739 if (!list_empty(&dev->events))
740 ret = POLLIN | POLLRDNORM;
741 mutex_unlock(&dev->mutex);
742
743 return ret;
744}
745 442
746static ssize_t inotify_read(struct file *file, char __user *buf, 443/**
747 size_t count, loff_t *pos) 444 * inotify_init - allocate and initialize an inotify instance
445 * @ops: caller's inotify operations
446 */
447struct inotify_handle *inotify_init(const struct inotify_operations *ops)
748{ 448{
749 size_t event_size = sizeof (struct inotify_event); 449 struct inotify_handle *ih;
750 struct inotify_device *dev;
751 char __user *start;
752 int ret;
753 DEFINE_WAIT(wait);
754
755 start = buf;
756 dev = file->private_data;
757
758 while (1) {
759 int events;
760
761 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
762 450
763 mutex_lock(&dev->mutex); 451 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
764 events = !list_empty(&dev->events); 452 if (unlikely(!ih))
765 mutex_unlock(&dev->mutex); 453 return ERR_PTR(-ENOMEM);
766 if (events) {
767 ret = 0;
768 break;
769 }
770
771 if (file->f_flags & O_NONBLOCK) {
772 ret = -EAGAIN;
773 break;
774 }
775
776 if (signal_pending(current)) {
777 ret = -EINTR;
778 break;
779 }
780
781 schedule();
782 }
783
784 finish_wait(&dev->wq, &wait);
785 if (ret)
786 return ret;
787
788 mutex_lock(&dev->mutex);
789 while (1) {
790 struct inotify_kernel_event *kevent;
791
792 ret = buf - start;
793 if (list_empty(&dev->events))
794 break;
795
796 kevent = inotify_dev_get_event(dev);
797 if (event_size + kevent->event.len > count)
798 break;
799
800 if (copy_to_user(buf, &kevent->event, event_size)) {
801 ret = -EFAULT;
802 break;
803 }
804 buf += event_size;
805 count -= event_size;
806
807 if (kevent->name) {
808 if (copy_to_user(buf, kevent->name, kevent->event.len)){
809 ret = -EFAULT;
810 break;
811 }
812 buf += kevent->event.len;
813 count -= kevent->event.len;
814 }
815 454
816 remove_kevent(dev, kevent); 455 idr_init(&ih->idr);
817 } 456 INIT_LIST_HEAD(&ih->watches);
818 mutex_unlock(&dev->mutex); 457 mutex_init(&ih->mutex);
458 ih->last_wd = 0;
459 ih->in_ops = ops;
460 atomic_set(&ih->count, 0);
461 get_inotify_handle(ih);
819 462
820 return ret; 463 return ih;
821} 464}
465EXPORT_SYMBOL_GPL(inotify_init);
822 466
823static int inotify_release(struct inode *ignored, struct file *file) 467/**
468 * inotify_destroy - clean up and destroy an inotify instance
469 * @ih: inotify handle
470 */
471void inotify_destroy(struct inotify_handle *ih)
824{ 472{
825 struct inotify_device *dev = file->private_data;
826
827 /* 473 /*
828 * Destroy all of the watches on this device. Unfortunately, not very 474 * Destroy all of the watches for this handle. Unfortunately, not very
829 * pretty. We cannot do a simple iteration over the list, because we 475 * pretty. We cannot do a simple iteration over the list, because we
830 * do not know the inode until we iterate to the watch. But we need to 476 * do not know the inode until we iterate to the watch. But we need to
831 * hold inode->inotify_mutex before dev->mutex. The following works. 477 * hold inode->inotify_mutex before ih->mutex. The following works.
832 */ 478 */
833 while (1) { 479 while (1) {
834 struct inotify_watch *watch; 480 struct inotify_watch *watch;
835 struct list_head *watches; 481 struct list_head *watches;
836 struct inode *inode; 482 struct inode *inode;
837 483
838 mutex_lock(&dev->mutex); 484 mutex_lock(&ih->mutex);
839 watches = &dev->watches; 485 watches = &ih->watches;
840 if (list_empty(watches)) { 486 if (list_empty(watches)) {
841 mutex_unlock(&dev->mutex); 487 mutex_unlock(&ih->mutex);
842 break; 488 break;
843 } 489 }
844 watch = list_entry(watches->next, struct inotify_watch, d_list); 490 watch = list_entry(watches->next, struct inotify_watch, h_list);
845 get_inotify_watch(watch); 491 get_inotify_watch(watch);
846 mutex_unlock(&dev->mutex); 492 mutex_unlock(&ih->mutex);
847 493
848 inode = watch->inode; 494 inode = watch->inode;
849 mutex_lock(&inode->inotify_mutex); 495 mutex_lock(&inode->inotify_mutex);
850 mutex_lock(&dev->mutex); 496 mutex_lock(&ih->mutex);
851 497
852 /* make sure we didn't race with another list removal */ 498 /* make sure we didn't race with another list removal */
853 if (likely(idr_find(&dev->idr, watch->wd))) 499 if (likely(idr_find(&ih->idr, watch->wd))) {
854 remove_watch_no_event(watch, dev); 500 remove_watch_no_event(watch, ih);
501 put_inotify_watch(watch);
502 }
855 503
856 mutex_unlock(&dev->mutex); 504 mutex_unlock(&ih->mutex);
857 mutex_unlock(&inode->inotify_mutex); 505 mutex_unlock(&inode->inotify_mutex);
858 put_inotify_watch(watch); 506 put_inotify_watch(watch);
859 } 507 }
860 508
861 /* destroy all of the events on this device */ 509 /* free this handle: the put matching the get in inotify_init() */
862 mutex_lock(&dev->mutex); 510 put_inotify_handle(ih);
863 while (!list_empty(&dev->events))
864 inotify_dev_event_dequeue(dev);
865 mutex_unlock(&dev->mutex);
866
867 /* free this device: the put matching the get in inotify_init() */
868 put_inotify_dev(dev);
869
870 return 0;
871} 511}
512EXPORT_SYMBOL_GPL(inotify_destroy);
872 513
873/* 514/**
874 * inotify_ignore - remove a given wd from this inotify instance. 515 * inotify_find_update_watch - find and update the mask of an existing watch
516 * @ih: inotify handle
517 * @inode: inode's watch to update
518 * @mask: mask of events to watch
875 * 519 *
876 * Can sleep. 520 * Caller must pin given inode (via nameidata).
877 */ 521 */
878static int inotify_ignore(struct inotify_device *dev, s32 wd) 522s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
523 u32 mask)
879{ 524{
880 struct inotify_watch *watch; 525 struct inotify_watch *old;
881 struct inode *inode; 526 int mask_add = 0;
527 int ret;
882 528
883 mutex_lock(&dev->mutex); 529 if (mask & IN_MASK_ADD)
884 watch = idr_find(&dev->idr, wd); 530 mask_add = 1;
885 if (unlikely(!watch)) { 531
886 mutex_unlock(&dev->mutex); 532 /* don't allow invalid bits: we don't want flags set */
533 mask &= IN_ALL_EVENTS | IN_ONESHOT;
534 if (unlikely(!mask))
887 return -EINVAL; 535 return -EINVAL;
888 }
889 get_inotify_watch(watch);
890 inode = watch->inode;
891 mutex_unlock(&dev->mutex);
892 536
893 mutex_lock(&inode->inotify_mutex); 537 mutex_lock(&inode->inotify_mutex);
894 mutex_lock(&dev->mutex); 538 mutex_lock(&ih->mutex);
895
896 /* make sure that we did not race */
897 if (likely(idr_find(&dev->idr, wd) == watch))
898 remove_watch(watch, dev);
899
900 mutex_unlock(&dev->mutex);
901 mutex_unlock(&inode->inotify_mutex);
902 put_inotify_watch(watch);
903
904 return 0;
905}
906
907static long inotify_ioctl(struct file *file, unsigned int cmd,
908 unsigned long arg)
909{
910 struct inotify_device *dev;
911 void __user *p;
912 int ret = -ENOTTY;
913 539
914 dev = file->private_data; 540 /*
915 p = (void __user *) arg; 541 * Handle the case of re-adding a watch on an (inode,ih) pair that we
916 542 * are already watching. We just update the mask and return its wd.
917 switch (cmd) { 543 */
918 case FIONREAD: 544 old = inode_find_handle(inode, ih);
919 ret = put_user(dev->queue_size, (int __user *) p); 545 if (unlikely(!old)) {
920 break; 546 ret = -ENOENT;
921 } 547 goto out;
922
923 return ret;
924}
925
926static const struct file_operations inotify_fops = {
927 .poll = inotify_poll,
928 .read = inotify_read,
929 .release = inotify_release,
930 .unlocked_ioctl = inotify_ioctl,
931 .compat_ioctl = inotify_ioctl,
932};
933
934asmlinkage long sys_inotify_init(void)
935{
936 struct inotify_device *dev;
937 struct user_struct *user;
938 struct file *filp;
939 int fd, ret;
940
941 fd = get_unused_fd();
942 if (fd < 0)
943 return fd;
944
945 filp = get_empty_filp();
946 if (!filp) {
947 ret = -ENFILE;
948 goto out_put_fd;
949 }
950
951 user = get_uid(current->user);
952 if (unlikely(atomic_read(&user->inotify_devs) >=
953 inotify_max_user_instances)) {
954 ret = -EMFILE;
955 goto out_free_uid;
956 }
957
958 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
959 if (unlikely(!dev)) {
960 ret = -ENOMEM;
961 goto out_free_uid;
962 } 548 }
963 549
964 filp->f_op = &inotify_fops; 550 if (mask_add)
965 filp->f_vfsmnt = mntget(inotify_mnt); 551 old->mask |= mask;
966 filp->f_dentry = dget(inotify_mnt->mnt_root); 552 else
967 filp->f_mapping = filp->f_dentry->d_inode->i_mapping; 553 old->mask = mask;
968 filp->f_mode = FMODE_READ; 554 ret = old->wd;
969 filp->f_flags = O_RDONLY; 555out:
970 filp->private_data = dev; 556 mutex_unlock(&ih->mutex);
971 557 mutex_unlock(&inode->inotify_mutex);
972 idr_init(&dev->idr);
973 INIT_LIST_HEAD(&dev->events);
974 INIT_LIST_HEAD(&dev->watches);
975 init_waitqueue_head(&dev->wq);
976 mutex_init(&dev->mutex);
977 dev->event_count = 0;
978 dev->queue_size = 0;
979 dev->max_events = inotify_max_queued_events;
980 dev->user = user;
981 dev->last_wd = 0;
982 atomic_set(&dev->count, 0);
983
984 get_inotify_dev(dev);
985 atomic_inc(&user->inotify_devs);
986 fd_install(fd, filp);
987
988 return fd;
989out_free_uid:
990 free_uid(user);
991 put_filp(filp);
992out_put_fd:
993 put_unused_fd(fd);
994 return ret; 558 return ret;
995} 559}
560EXPORT_SYMBOL_GPL(inotify_find_update_watch);
996 561
997asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 562/**
563 * inotify_add_watch - add a watch to an inotify instance
564 * @ih: inotify handle
565 * @watch: caller allocated watch structure
566 * @inode: inode to watch
567 * @mask: mask of events to watch
568 *
569 * Caller must pin given inode (via nameidata).
570 * Caller must ensure it only calls inotify_add_watch() once per watch.
571 * Calls inotify_handle_get_wd() so may sleep.
572 */
573s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
574 struct inode *inode, u32 mask)
998{ 575{
999 struct inotify_watch *watch, *old; 576 int ret = 0;
1000 struct inode *inode;
1001 struct inotify_device *dev;
1002 struct nameidata nd;
1003 struct file *filp;
1004 int ret, fput_needed;
1005 int mask_add = 0;
1006 unsigned flags = 0;
1007
1008 filp = fget_light(fd, &fput_needed);
1009 if (unlikely(!filp))
1010 return -EBADF;
1011 577
1012 /* verify that this is indeed an inotify instance */ 578 /* don't allow invalid bits: we don't want flags set */
1013 if (unlikely(filp->f_op != &inotify_fops)) { 579 mask &= IN_ALL_EVENTS | IN_ONESHOT;
1014 ret = -EINVAL; 580 if (unlikely(!mask))
1015 goto fput_and_out; 581 return -EINVAL;
1016 } 582 watch->mask = mask;
1017 583
1018 if (!(mask & IN_DONT_FOLLOW)) 584 mutex_lock(&inode->inotify_mutex);
1019 flags |= LOOKUP_FOLLOW; 585 mutex_lock(&ih->mutex);
1020 if (mask & IN_ONLYDIR)
1021 flags |= LOOKUP_DIRECTORY;
1022 586
1023 ret = find_inode(path, &nd, flags); 587 /* Initialize a new watch */
588 ret = inotify_handle_get_wd(ih, watch);
1024 if (unlikely(ret)) 589 if (unlikely(ret))
1025 goto fput_and_out; 590 goto out;
1026 591 ret = watch->wd;
1027 /* inode held in place by reference to nd; dev by fget on fd */
1028 inode = nd.dentry->d_inode;
1029 dev = filp->private_data;
1030
1031 mutex_lock(&inode->inotify_mutex);
1032 mutex_lock(&dev->mutex);
1033 592
1034 if (mask & IN_MASK_ADD) 593 atomic_set(&watch->count, 0);
1035 mask_add = 1; 594 INIT_LIST_HEAD(&watch->h_list);
595 INIT_LIST_HEAD(&watch->i_list);
1036 596
1037 /* don't let user-space set invalid bits: we don't want flags set */ 597 /* save a reference to handle and bump the count to make it official */
1038 mask &= IN_ALL_EVENTS | IN_ONESHOT; 598 get_inotify_handle(ih);
1039 if (unlikely(!mask)) { 599 watch->ih = ih;
1040 ret = -EINVAL;
1041 goto out;
1042 }
1043 600
1044 /* 601 /*
1045 * Handle the case of re-adding a watch on an (inode,dev) pair that we 602 * Save a reference to the inode and bump the ref count to make it
1046 * are already watching. We just update the mask and return its wd. 603 * official. We hold a reference to nameidata, which makes this safe.
1047 */ 604 */
1048 old = inode_find_dev(inode, dev); 605 watch->inode = igrab(inode);
1049 if (unlikely(old)) {
1050 if (mask_add)
1051 old->mask |= mask;
1052 else
1053 old->mask = mask;
1054 ret = old->wd;
1055 goto out;
1056 }
1057 606
1058 watch = create_watch(dev, mask, inode); 607 get_inotify_watch(watch); /* initial get */
1059 if (unlikely(IS_ERR(watch))) {
1060 ret = PTR_ERR(watch);
1061 goto out;
1062 }
1063 608
1064 if (!inotify_inode_watched(inode)) 609 if (!inotify_inode_watched(inode))
1065 set_dentry_child_flags(inode, 1); 610 set_dentry_child_flags(inode, 1);
1066 611
1067 /* Add the watch to the device's and the inode's list */ 612 /* Add the watch to the handle's and the inode's list */
1068 list_add(&watch->d_list, &dev->watches); 613 list_add(&watch->h_list, &ih->watches);
1069 list_add(&watch->i_list, &inode->inotify_watches); 614 list_add(&watch->i_list, &inode->inotify_watches);
1070 ret = watch->wd;
1071out: 615out:
1072 mutex_unlock(&dev->mutex); 616 mutex_unlock(&ih->mutex);
1073 mutex_unlock(&inode->inotify_mutex); 617 mutex_unlock(&inode->inotify_mutex);
1074 path_release(&nd);
1075fput_and_out:
1076 fput_light(filp, fput_needed);
1077 return ret; 618 return ret;
1078} 619}
620EXPORT_SYMBOL_GPL(inotify_add_watch);
1079 621
1080asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) 622/**
623 * inotify_rm_wd - remove a watch from an inotify instance
624 * @ih: inotify handle
625 * @wd: watch descriptor to remove
626 *
627 * Can sleep.
628 */
629int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
1081{ 630{
1082 struct file *filp; 631 struct inotify_watch *watch;
1083 struct inotify_device *dev; 632 struct inode *inode;
1084 int ret, fput_needed;
1085
1086 filp = fget_light(fd, &fput_needed);
1087 if (unlikely(!filp))
1088 return -EBADF;
1089 633
1090 /* verify that this is indeed an inotify instance */ 634 mutex_lock(&ih->mutex);
1091 if (unlikely(filp->f_op != &inotify_fops)) { 635 watch = idr_find(&ih->idr, wd);
1092 ret = -EINVAL; 636 if (unlikely(!watch)) {
1093 goto out; 637 mutex_unlock(&ih->mutex);
638 return -EINVAL;
1094 } 639 }
640 get_inotify_watch(watch);
641 inode = watch->inode;
642 mutex_unlock(&ih->mutex);
1095 643
1096 dev = filp->private_data; 644 mutex_lock(&inode->inotify_mutex);
1097 ret = inotify_ignore(dev, wd); 645 mutex_lock(&ih->mutex);
1098 646
1099out: 647 /* make sure that we did not race */
1100 fput_light(filp, fput_needed); 648 if (likely(idr_find(&ih->idr, wd) == watch))
1101 return ret; 649 remove_watch(watch, ih);
1102}
1103 650
1104static struct super_block * 651 mutex_unlock(&ih->mutex);
1105inotify_get_sb(struct file_system_type *fs_type, int flags, 652 mutex_unlock(&inode->inotify_mutex);
1106 const char *dev_name, void *data) 653 put_inotify_watch(watch);
1107{
1108 return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
1109}
1110 654
1111static struct file_system_type inotify_fs_type = { 655 return 0;
1112 .name = "inotifyfs", 656}
1113 .get_sb = inotify_get_sb, 657EXPORT_SYMBOL_GPL(inotify_rm_wd);
1114 .kill_sb = kill_anon_super,
1115};
1116 658
1117/* 659/*
1118 * inotify_setup - Our initialization function. Note that we cannnot return 660 * inotify_setup - core initialization function
1119 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
1120 * must result in panic().
1121 */ 661 */
1122static int __init inotify_setup(void) 662static int __init inotify_setup(void)
1123{ 663{
1124 int ret;
1125
1126 ret = register_filesystem(&inotify_fs_type);
1127 if (unlikely(ret))
1128 panic("inotify: register_filesystem returned %d!\n", ret);
1129
1130 inotify_mnt = kern_mount(&inotify_fs_type);
1131 if (IS_ERR(inotify_mnt))
1132 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
1133
1134 inotify_max_queued_events = 16384;
1135 inotify_max_user_instances = 128;
1136 inotify_max_user_watches = 8192;
1137
1138 atomic_set(&inotify_cookie, 0); 664 atomic_set(&inotify_cookie, 0);
1139 665
1140 watch_cachep = kmem_cache_create("inotify_watch_cache",
1141 sizeof(struct inotify_watch),
1142 0, SLAB_PANIC, NULL, NULL);
1143 event_cachep = kmem_cache_create("inotify_event_cache",
1144 sizeof(struct inotify_kernel_event),
1145 0, SLAB_PANIC, NULL, NULL);
1146
1147 return 0; 666 return 0;
1148} 667}
1149 668
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
new file mode 100644
index 000000000000..845dc79a4e9c
--- /dev/null
+++ b/fs/inotify_user.c
@@ -0,0 +1,717 @@
1/*
2 * fs/inotify_user.c - inotify support for userspace
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Copyright (C) 2005 John McCutchan
9 * Copyright 2006 Hewlett-Packard Development Company, L.P.
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2, or (at your option) any
14 * later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 */
21
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/fs.h>
26#include <linux/file.h>
27#include <linux/mount.h>
28#include <linux/namei.h>
29#include <linux/poll.h>
30#include <linux/init.h>
31#include <linux/list.h>
32#include <linux/inotify.h>
33#include <linux/syscalls.h>
34
35#include <asm/ioctls.h>
36
37static kmem_cache_t *watch_cachep __read_mostly;
38static kmem_cache_t *event_cachep __read_mostly;
39
40static struct vfsmount *inotify_mnt __read_mostly;
41
42/* these are configurable via /proc/sys/fs/inotify/ */
43int inotify_max_user_instances __read_mostly;
44int inotify_max_user_watches __read_mostly;
45int inotify_max_queued_events __read_mostly;
46
47/*
48 * Lock ordering:
49 *
50 * inotify_dev->up_mutex (ensures we don't re-add the same watch)
51 * inode->inotify_mutex (protects inode's watch list)
52 * inotify_handle->mutex (protects inotify_handle's watch list)
53 * inotify_dev->ev_mutex (protects device's event queue)
54 */
55
56/*
57 * Lifetimes of the main data structures:
58 *
59 * inotify_device: Lifetime is managed by reference count, from
60 * sys_inotify_init() until release. Additional references can bump the count
61 * via get_inotify_dev() and drop the count via put_inotify_dev().
62 *
63 * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
64 * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
65 * first event, or to inotify_destroy().
66 */
67
68/*
69 * struct inotify_device - represents an inotify instance
70 *
71 * This structure is protected by the mutex 'mutex'.
72 */
73struct inotify_device {
74 wait_queue_head_t wq; /* wait queue for i/o */
75 struct mutex ev_mutex; /* protects event queue */
76 struct mutex up_mutex; /* synchronizes watch updates */
77 struct list_head events; /* list of queued events */
78 atomic_t count; /* reference count */
79 struct user_struct *user; /* user who opened this dev */
80 struct inotify_handle *ih; /* inotify handle */
81 unsigned int queue_size; /* size of the queue (bytes) */
82 unsigned int event_count; /* number of pending events */
83 unsigned int max_events; /* maximum number of events */
84};
85
86/*
87 * struct inotify_kernel_event - An inotify event, originating from a watch and
88 * queued for user-space. A list of these is attached to each instance of the
89 * device. In read(), this list is walked and all events that can fit in the
90 * buffer are returned.
91 *
92 * Protected by dev->ev_mutex of the device in which we are queued.
93 */
94struct inotify_kernel_event {
95 struct inotify_event event; /* the user-space event */
96 struct list_head list; /* entry in inotify_device's list */
97 char *name; /* filename, if any */
98};
99
100/*
101 * struct inotify_user_watch - our version of an inotify_watch, we add
102 * a reference to the associated inotify_device.
103 */
104struct inotify_user_watch {
105 struct inotify_device *dev; /* associated device */
106 struct inotify_watch wdata; /* inotify watch data */
107};
108
109#ifdef CONFIG_SYSCTL
110
111#include <linux/sysctl.h>
112
113static int zero;
114
115ctl_table inotify_table[] = {
116 {
117 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
118 .procname = "max_user_instances",
119 .data = &inotify_max_user_instances,
120 .maxlen = sizeof(int),
121 .mode = 0644,
122 .proc_handler = &proc_dointvec_minmax,
123 .strategy = &sysctl_intvec,
124 .extra1 = &zero,
125 },
126 {
127 .ctl_name = INOTIFY_MAX_USER_WATCHES,
128 .procname = "max_user_watches",
129 .data = &inotify_max_user_watches,
130 .maxlen = sizeof(int),
131 .mode = 0644,
132 .proc_handler = &proc_dointvec_minmax,
133 .strategy = &sysctl_intvec,
134 .extra1 = &zero,
135 },
136 {
137 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
138 .procname = "max_queued_events",
139 .data = &inotify_max_queued_events,
140 .maxlen = sizeof(int),
141 .mode = 0644,
142 .proc_handler = &proc_dointvec_minmax,
143 .strategy = &sysctl_intvec,
144 .extra1 = &zero
145 },
146 { .ctl_name = 0 }
147};
148#endif /* CONFIG_SYSCTL */
149
150static inline void get_inotify_dev(struct inotify_device *dev)
151{
152 atomic_inc(&dev->count);
153}
154
155static inline void put_inotify_dev(struct inotify_device *dev)
156{
157 if (atomic_dec_and_test(&dev->count)) {
158 atomic_dec(&dev->user->inotify_devs);
159 free_uid(dev->user);
160 kfree(dev);
161 }
162}
163
164/*
165 * free_inotify_user_watch - cleans up the watch and its references
166 */
167static void free_inotify_user_watch(struct inotify_watch *w)
168{
169 struct inotify_user_watch *watch;
170 struct inotify_device *dev;
171
172 watch = container_of(w, struct inotify_user_watch, wdata);
173 dev = watch->dev;
174
175 atomic_dec(&dev->user->inotify_watches);
176 put_inotify_dev(dev);
177 kmem_cache_free(watch_cachep, watch);
178}
179
180/*
181 * kernel_event - create a new kernel event with the given parameters
182 *
183 * This function can sleep.
184 */
185static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
186 const char *name)
187{
188 struct inotify_kernel_event *kevent;
189
190 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
191 if (unlikely(!kevent))
192 return NULL;
193
194 /* we hand this out to user-space, so zero it just in case */
195 memset(&kevent->event, 0, sizeof(struct inotify_event));
196
197 kevent->event.wd = wd;
198 kevent->event.mask = mask;
199 kevent->event.cookie = cookie;
200
201 INIT_LIST_HEAD(&kevent->list);
202
203 if (name) {
204 size_t len, rem, event_size = sizeof(struct inotify_event);
205
206 /*
207 * We need to pad the filename so as to properly align an
208 * array of inotify_event structures. Because the structure is
209 * small and the common case is a small filename, we just round
210 * up to the next multiple of the structure's sizeof. This is
211 * simple and safe for all architectures.
212 */
213 len = strlen(name) + 1;
214 rem = event_size - len;
215 if (len > event_size) {
216 rem = event_size - (len % event_size);
217 if (len % event_size == 0)
218 rem = 0;
219 }
220
221 kevent->name = kmalloc(len + rem, GFP_KERNEL);
222 if (unlikely(!kevent->name)) {
223 kmem_cache_free(event_cachep, kevent);
224 return NULL;
225 }
226 memcpy(kevent->name, name, len);
227 if (rem)
228 memset(kevent->name + len, 0, rem);
229 kevent->event.len = len + rem;
230 } else {
231 kevent->event.len = 0;
232 kevent->name = NULL;
233 }
234
235 return kevent;
236}
237
238/*
239 * inotify_dev_get_event - return the next event in the given dev's queue
240 *
241 * Caller must hold dev->ev_mutex.
242 */
243static inline struct inotify_kernel_event *
244inotify_dev_get_event(struct inotify_device *dev)
245{
246 return list_entry(dev->events.next, struct inotify_kernel_event, list);
247}
248
249/*
250 * inotify_dev_queue_event - event handler registered with core inotify, adds
251 * a new event to the given device
252 *
253 * Can sleep (calls kernel_event()).
254 */
255static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
256 u32 cookie, const char *name)
257{
258 struct inotify_user_watch *watch;
259 struct inotify_device *dev;
260 struct inotify_kernel_event *kevent, *last;
261
262 watch = container_of(w, struct inotify_user_watch, wdata);
263 dev = watch->dev;
264
265 mutex_lock(&dev->ev_mutex);
266
267 /* we can safely put the watch as we don't reference it while
268 * generating the event
269 */
270 if (mask & IN_IGNORED || mask & IN_ONESHOT)
271 put_inotify_watch(w); /* final put */
272
273 /* coalescing: drop this event if it is a dupe of the previous */
274 last = inotify_dev_get_event(dev);
275 if (last && last->event.mask == mask && last->event.wd == wd &&
276 last->event.cookie == cookie) {
277 const char *lastname = last->name;
278
279 if (!name && !lastname)
280 goto out;
281 if (name && lastname && !strcmp(lastname, name))
282 goto out;
283 }
284
285 /* the queue overflowed and we already sent the Q_OVERFLOW event */
286 if (unlikely(dev->event_count > dev->max_events))
287 goto out;
288
289 /* if the queue overflows, we need to notify user space */
290 if (unlikely(dev->event_count == dev->max_events))
291 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
292 else
293 kevent = kernel_event(wd, mask, cookie, name);
294
295 if (unlikely(!kevent))
296 goto out;
297
298 /* queue the event and wake up anyone waiting */
299 dev->event_count++;
300 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
301 list_add_tail(&kevent->list, &dev->events);
302 wake_up_interruptible(&dev->wq);
303
304out:
305 mutex_unlock(&dev->ev_mutex);
306}
307
308/*
309 * remove_kevent - cleans up and ultimately frees the given kevent
310 *
311 * Caller must hold dev->ev_mutex.
312 */
313static void remove_kevent(struct inotify_device *dev,
314 struct inotify_kernel_event *kevent)
315{
316 list_del(&kevent->list);
317
318 dev->event_count--;
319 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
320
321 kfree(kevent->name);
322 kmem_cache_free(event_cachep, kevent);
323}
324
325/*
326 * inotify_dev_event_dequeue - destroy an event on the given device
327 *
328 * Caller must hold dev->ev_mutex.
329 */
330static void inotify_dev_event_dequeue(struct inotify_device *dev)
331{
332 if (!list_empty(&dev->events)) {
333 struct inotify_kernel_event *kevent;
334 kevent = inotify_dev_get_event(dev);
335 remove_kevent(dev, kevent);
336 }
337}
338
339/*
340 * find_inode - resolve a user-given path to a specific inode and return a nd
341 */
342static int find_inode(const char __user *dirname, struct nameidata *nd,
343 unsigned flags)
344{
345 int error;
346
347 error = __user_walk(dirname, flags, nd);
348 if (error)
349 return error;
350 /* you can only watch an inode if you have read permissions on it */
351 error = vfs_permission(nd, MAY_READ);
352 if (error)
353 path_release(nd);
354 return error;
355}
356
357/*
358 * create_watch - creates a watch on the given device.
359 *
360 * Callers must hold dev->up_mutex.
361 */
362static int create_watch(struct inotify_device *dev, struct inode *inode,
363 u32 mask)
364{
365 struct inotify_user_watch *watch;
366 int ret;
367
368 if (atomic_read(&dev->user->inotify_watches) >=
369 inotify_max_user_watches)
370 return -ENOSPC;
371
372 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
373 if (unlikely(!watch))
374 return -ENOMEM;
375
376 /* save a reference to device and bump the count to make it official */
377 get_inotify_dev(dev);
378 watch->dev = dev;
379
380 atomic_inc(&dev->user->inotify_watches);
381
382 ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
383 if (ret < 0)
384 free_inotify_user_watch(&watch->wdata);
385
386 return ret;
387}
388
389/* Device Interface */
390
391static unsigned int inotify_poll(struct file *file, poll_table *wait)
392{
393 struct inotify_device *dev = file->private_data;
394 int ret = 0;
395
396 poll_wait(file, &dev->wq, wait);
397 mutex_lock(&dev->ev_mutex);
398 if (!list_empty(&dev->events))
399 ret = POLLIN | POLLRDNORM;
400 mutex_unlock(&dev->ev_mutex);
401
402 return ret;
403}
404
405static ssize_t inotify_read(struct file *file, char __user *buf,
406 size_t count, loff_t *pos)
407{
408 size_t event_size = sizeof (struct inotify_event);
409 struct inotify_device *dev;
410 char __user *start;
411 int ret;
412 DEFINE_WAIT(wait);
413
414 start = buf;
415 dev = file->private_data;
416
417 while (1) {
418 int events;
419
420 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
421
422 mutex_lock(&dev->ev_mutex);
423 events = !list_empty(&dev->events);
424 mutex_unlock(&dev->ev_mutex);
425 if (events) {
426 ret = 0;
427 break;
428 }
429
430 if (file->f_flags & O_NONBLOCK) {
431 ret = -EAGAIN;
432 break;
433 }
434
435 if (signal_pending(current)) {
436 ret = -EINTR;
437 break;
438 }
439
440 schedule();
441 }
442
443 finish_wait(&dev->wq, &wait);
444 if (ret)
445 return ret;
446
447 mutex_lock(&dev->ev_mutex);
448 while (1) {
449 struct inotify_kernel_event *kevent;
450
451 ret = buf - start;
452 if (list_empty(&dev->events))
453 break;
454
455 kevent = inotify_dev_get_event(dev);
456 if (event_size + kevent->event.len > count)
457 break;
458
459 if (copy_to_user(buf, &kevent->event, event_size)) {
460 ret = -EFAULT;
461 break;
462 }
463 buf += event_size;
464 count -= event_size;
465
466 if (kevent->name) {
467 if (copy_to_user(buf, kevent->name, kevent->event.len)){
468 ret = -EFAULT;
469 break;
470 }
471 buf += kevent->event.len;
472 count -= kevent->event.len;
473 }
474
475 remove_kevent(dev, kevent);
476 }
477 mutex_unlock(&dev->ev_mutex);
478
479 return ret;
480}
481
482static int inotify_release(struct inode *ignored, struct file *file)
483{
484 struct inotify_device *dev = file->private_data;
485
486 inotify_destroy(dev->ih);
487
488 /* destroy all of the events on this device */
489 mutex_lock(&dev->ev_mutex);
490 while (!list_empty(&dev->events))
491 inotify_dev_event_dequeue(dev);
492 mutex_unlock(&dev->ev_mutex);
493
494 /* free this device: the put matching the get in inotify_init() */
495 put_inotify_dev(dev);
496
497 return 0;
498}
499
500static long inotify_ioctl(struct file *file, unsigned int cmd,
501 unsigned long arg)
502{
503 struct inotify_device *dev;
504 void __user *p;
505 int ret = -ENOTTY;
506
507 dev = file->private_data;
508 p = (void __user *) arg;
509
510 switch (cmd) {
511 case FIONREAD:
512 ret = put_user(dev->queue_size, (int __user *) p);
513 break;
514 }
515
516 return ret;
517}
518
519static const struct file_operations inotify_fops = {
520 .poll = inotify_poll,
521 .read = inotify_read,
522 .release = inotify_release,
523 .unlocked_ioctl = inotify_ioctl,
524 .compat_ioctl = inotify_ioctl,
525};
526
527static const struct inotify_operations inotify_user_ops = {
528 .handle_event = inotify_dev_queue_event,
529 .destroy_watch = free_inotify_user_watch,
530};
531
532asmlinkage long sys_inotify_init(void)
533{
534 struct inotify_device *dev;
535 struct inotify_handle *ih;
536 struct user_struct *user;
537 struct file *filp;
538 int fd, ret;
539
540 fd = get_unused_fd();
541 if (fd < 0)
542 return fd;
543
544 filp = get_empty_filp();
545 if (!filp) {
546 ret = -ENFILE;
547 goto out_put_fd;
548 }
549
550 user = get_uid(current->user);
551 if (unlikely(atomic_read(&user->inotify_devs) >=
552 inotify_max_user_instances)) {
553 ret = -EMFILE;
554 goto out_free_uid;
555 }
556
557 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
558 if (unlikely(!dev)) {
559 ret = -ENOMEM;
560 goto out_free_uid;
561 }
562
563 ih = inotify_init(&inotify_user_ops);
564 if (unlikely(IS_ERR(ih))) {
565 ret = PTR_ERR(ih);
566 goto out_free_dev;
567 }
568 dev->ih = ih;
569
570 filp->f_op = &inotify_fops;
571 filp->f_vfsmnt = mntget(inotify_mnt);
572 filp->f_dentry = dget(inotify_mnt->mnt_root);
573 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
574 filp->f_mode = FMODE_READ;
575 filp->f_flags = O_RDONLY;
576 filp->private_data = dev;
577
578 INIT_LIST_HEAD(&dev->events);
579 init_waitqueue_head(&dev->wq);
580 mutex_init(&dev->ev_mutex);
581 mutex_init(&dev->up_mutex);
582 dev->event_count = 0;
583 dev->queue_size = 0;
584 dev->max_events = inotify_max_queued_events;
585 dev->user = user;
586 atomic_set(&dev->count, 0);
587
588 get_inotify_dev(dev);
589 atomic_inc(&user->inotify_devs);
590 fd_install(fd, filp);
591
592 return fd;
593out_free_dev:
594 kfree(dev);
595out_free_uid:
596 free_uid(user);
597 put_filp(filp);
598out_put_fd:
599 put_unused_fd(fd);
600 return ret;
601}
602
603asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
604{
605 struct inode *inode;
606 struct inotify_device *dev;
607 struct nameidata nd;
608 struct file *filp;
609 int ret, fput_needed;
610 unsigned flags = 0;
611
612 filp = fget_light(fd, &fput_needed);
613 if (unlikely(!filp))
614 return -EBADF;
615
616 /* verify that this is indeed an inotify instance */
617 if (unlikely(filp->f_op != &inotify_fops)) {
618 ret = -EINVAL;
619 goto fput_and_out;
620 }
621
622 if (!(mask & IN_DONT_FOLLOW))
623 flags |= LOOKUP_FOLLOW;
624 if (mask & IN_ONLYDIR)
625 flags |= LOOKUP_DIRECTORY;
626
627 ret = find_inode(path, &nd, flags);
628 if (unlikely(ret))
629 goto fput_and_out;
630
631 /* inode held in place by reference to nd; dev by fget on fd */
632 inode = nd.dentry->d_inode;
633 dev = filp->private_data;
634
635 mutex_lock(&dev->up_mutex);
636 ret = inotify_find_update_watch(dev->ih, inode, mask);
637 if (ret == -ENOENT)
638 ret = create_watch(dev, inode, mask);
639 mutex_unlock(&dev->up_mutex);
640
641 path_release(&nd);
642fput_and_out:
643 fput_light(filp, fput_needed);
644 return ret;
645}
646
647asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
648{
649 struct file *filp;
650 struct inotify_device *dev;
651 int ret, fput_needed;
652
653 filp = fget_light(fd, &fput_needed);
654 if (unlikely(!filp))
655 return -EBADF;
656
657 /* verify that this is indeed an inotify instance */
658 if (unlikely(filp->f_op != &inotify_fops)) {
659 ret = -EINVAL;
660 goto out;
661 }
662
663 dev = filp->private_data;
664
665 /* we free our watch data when we get IN_IGNORED */
666 ret = inotify_rm_wd(dev->ih, wd);
667
668out:
669 fput_light(filp, fput_needed);
670 return ret;
671}
672
673static struct super_block *
674inotify_get_sb(struct file_system_type *fs_type, int flags,
675 const char *dev_name, void *data)
676{
677 return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
678}
679
680static struct file_system_type inotify_fs_type = {
681 .name = "inotifyfs",
682 .get_sb = inotify_get_sb,
683 .kill_sb = kill_anon_super,
684};
685
686/*
687 * inotify_user_setup - Our initialization function. Note that we cannnot return
688 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
689 * must result in panic().
690 */
691static int __init inotify_user_setup(void)
692{
693 int ret;
694
695 ret = register_filesystem(&inotify_fs_type);
696 if (unlikely(ret))
697 panic("inotify: register_filesystem returned %d!\n", ret);
698
699 inotify_mnt = kern_mount(&inotify_fs_type);
700 if (IS_ERR(inotify_mnt))
701 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
702
703 inotify_max_queued_events = 16384;
704 inotify_max_user_instances = 128;
705 inotify_max_user_watches = 8192;
706
707 watch_cachep = kmem_cache_create("inotify_watch_cache",
708 sizeof(struct inotify_user_watch),
709 0, SLAB_PANIC, NULL, NULL);
710 event_cachep = kmem_cache_create("inotify_event_cache",
711 sizeof(struct inotify_kernel_event),
712 0, SLAB_PANIC, NULL, NULL);
713
714 return 0;
715}
716
717module_init(inotify_user_setup);