diff options
author | Amy Griffis <amy.griffis@hp.com> | 2006-06-01 16:10:59 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2006-06-20 05:25:17 -0400 |
commit | 2d9048e201bfb67ba21f05e647b1286b8a4a5667 (patch) | |
tree | 1df2ca6780d403f3209cf445f8b0b27f45098434 /fs | |
parent | 90204e0b7b51e9f2a6905adca12dc331128602c7 (diff) |
[PATCH] inotify (1/5): split kernel API from userspace support
The following series of patches introduces a kernel API for inotify,
making it possible for kernel modules to benefit from inotify's
mechanism for watching inodes. With these patches, inotify will
maintain for each caller a list of watches (via an embedded struct
inotify_watch), where each inotify_watch is associated with a
corresponding struct inode. The caller registers an event handler and
specifies for which filesystem events their event handler should be
called per inotify_watch.
Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: Robert Love <rml@novell.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 24 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/inotify.c | 941 | ||||
-rw-r--r-- | fs/inotify_user.c | 717 |
4 files changed, 966 insertions, 717 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index f9b5842c8d2d..74f11a23622d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -393,18 +393,30 @@ config INOTIFY | |||
393 | bool "Inotify file change notification support" | 393 | bool "Inotify file change notification support" |
394 | default y | 394 | default y |
395 | ---help--- | 395 | ---help--- |
396 | Say Y here to enable inotify support and the associated system | 396 | Say Y here to enable inotify support. Inotify is a file change |
397 | calls. Inotify is a file change notification system and a | 397 | notification system and a replacement for dnotify. Inotify fixes |
398 | replacement for dnotify. Inotify fixes numerous shortcomings in | 398 | numerous shortcomings in dnotify and introduces several new features |
399 | dnotify and introduces several new features. It allows monitoring | 399 | including multiple file events, one-shot support, and unmount |
400 | of both files and directories via a single open fd. Other features | ||
401 | include multiple file events, one-shot support, and unmount | ||
402 | notification. | 400 | notification. |
403 | 401 | ||
404 | For more information, see Documentation/filesystems/inotify.txt | 402 | For more information, see Documentation/filesystems/inotify.txt |
405 | 403 | ||
406 | If unsure, say Y. | 404 | If unsure, say Y. |
407 | 405 | ||
406 | config INOTIFY_USER | ||
407 | bool "Inotify support for userspace" | ||
408 | depends on INOTIFY | ||
409 | default y | ||
410 | ---help--- | ||
411 | Say Y here to enable inotify support for userspace, including the | ||
412 | associated system calls. Inotify allows monitoring of both files and | ||
413 | directories via a single open fd. Events are read from the file | ||
414 | descriptor, which is also select()- and poll()-able. | ||
415 | |||
416 | For more information, see Documentation/filesystems/inotify.txt | ||
417 | |||
418 | If unsure, say Y. | ||
419 | |||
408 | config QUOTA | 420 | config QUOTA |
409 | bool "Quota support" | 421 | bool "Quota support" |
410 | help | 422 | help |
diff --git a/fs/Makefile b/fs/Makefile index 078d3d1191a5..d0ea6bfccf29 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -13,6 +13,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ | |||
13 | ioprio.o pnode.o drop_caches.o splice.o sync.o | 13 | ioprio.o pnode.o drop_caches.o splice.o sync.o |
14 | 14 | ||
15 | obj-$(CONFIG_INOTIFY) += inotify.o | 15 | obj-$(CONFIG_INOTIFY) += inotify.o |
16 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o | ||
16 | obj-$(CONFIG_EPOLL) += eventpoll.o | 17 | obj-$(CONFIG_EPOLL) += eventpoll.o |
17 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 18 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
18 | 19 | ||
diff --git a/fs/inotify.c b/fs/inotify.c index 732ec4bd5774..a1bedf3975ca 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
@@ -5,7 +5,10 @@ | |||
5 | * John McCutchan <ttb@tentacle.dhs.org> | 5 | * John McCutchan <ttb@tentacle.dhs.org> |
6 | * Robert Love <rml@novell.com> | 6 | * Robert Love <rml@novell.com> |
7 | * | 7 | * |
8 | * Kernel API added by: Amy Griffis <amy.griffis@hp.com> | ||
9 | * | ||
8 | * Copyright (C) 2005 John McCutchan | 10 | * Copyright (C) 2005 John McCutchan |
11 | * Copyright 2006 Hewlett-Packard Development Company, L.P. | ||
9 | * | 12 | * |
10 | * This program is free software; you can redistribute it and/or modify it | 13 | * This program is free software; you can redistribute it and/or modify it |
11 | * under the terms of the GNU General Public License as published by the | 14 | * under the terms of the GNU General Public License as published by the |
@@ -20,35 +23,17 @@ | |||
20 | 23 | ||
21 | #include <linux/module.h> | 24 | #include <linux/module.h> |
22 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
23 | #include <linux/sched.h> | ||
24 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
25 | #include <linux/idr.h> | 27 | #include <linux/idr.h> |
26 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
27 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
28 | #include <linux/file.h> | ||
29 | #include <linux/mount.h> | ||
30 | #include <linux/namei.h> | ||
31 | #include <linux/poll.h> | ||
32 | #include <linux/init.h> | 30 | #include <linux/init.h> |
33 | #include <linux/list.h> | 31 | #include <linux/list.h> |
34 | #include <linux/writeback.h> | 32 | #include <linux/writeback.h> |
35 | #include <linux/inotify.h> | 33 | #include <linux/inotify.h> |
36 | #include <linux/syscalls.h> | ||
37 | |||
38 | #include <asm/ioctls.h> | ||
39 | 34 | ||
40 | static atomic_t inotify_cookie; | 35 | static atomic_t inotify_cookie; |
41 | 36 | ||
42 | static kmem_cache_t *watch_cachep __read_mostly; | ||
43 | static kmem_cache_t *event_cachep __read_mostly; | ||
44 | |||
45 | static struct vfsmount *inotify_mnt __read_mostly; | ||
46 | |||
47 | /* these are configurable via /proc/sys/fs/inotify/ */ | ||
48 | int inotify_max_user_instances __read_mostly; | ||
49 | int inotify_max_user_watches __read_mostly; | ||
50 | int inotify_max_queued_events __read_mostly; | ||
51 | |||
52 | /* | 37 | /* |
53 | * Lock ordering: | 38 | * Lock ordering: |
54 | * | 39 | * |
@@ -56,327 +41,108 @@ int inotify_max_queued_events __read_mostly; | |||
56 | * iprune_mutex (synchronize shrink_icache_memory()) | 41 | * iprune_mutex (synchronize shrink_icache_memory()) |
57 | * inode_lock (protects the super_block->s_inodes list) | 42 | * inode_lock (protects the super_block->s_inodes list) |
58 | * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) | 43 | * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) |
59 | * inotify_dev->mutex (protects inotify_device and watches->d_list) | 44 | * inotify_handle->mutex (protects inotify_handle and watches->h_list) |
45 | * | ||
46 | * The inode->inotify_mutex and inotify_handle->mutex and held during execution | ||
47 | * of a caller's event handler. Thus, the caller must not hold any locks | ||
48 | * taken in their event handler while calling any of the published inotify | ||
49 | * interfaces. | ||
60 | */ | 50 | */ |
61 | 51 | ||
62 | /* | 52 | /* |
63 | * Lifetimes of the three main data structures--inotify_device, inode, and | 53 | * Lifetimes of the three main data structures--inotify_handle, inode, and |
64 | * inotify_watch--are managed by reference count. | 54 | * inotify_watch--are managed by reference count. |
65 | * | 55 | * |
66 | * inotify_device: Lifetime is from inotify_init() until release. Additional | 56 | * inotify_handle: Lifetime is from inotify_init() to inotify_destroy(). |
67 | * references can bump the count via get_inotify_dev() and drop the count via | 57 | * Additional references can bump the count via get_inotify_handle() and drop |
68 | * put_inotify_dev(). | 58 | * the count via put_inotify_handle(). |
69 | * | 59 | * |
70 | * inotify_watch: Lifetime is from create_watch() to destory_watch(). | 60 | * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch() |
71 | * Additional references can bump the count via get_inotify_watch() and drop | 61 | * to remove_watch_no_event(). Additional references can bump the count via |
72 | * the count via put_inotify_watch(). | 62 | * get_inotify_watch() and drop the count via put_inotify_watch(). The caller |
63 | * is reponsible for the final put after receiving IN_IGNORED, or when using | ||
64 | * IN_ONESHOT after receiving the first event. Inotify does the final put if | ||
65 | * inotify_destroy() is called. | ||
73 | * | 66 | * |
74 | * inode: Pinned so long as the inode is associated with a watch, from | 67 | * inode: Pinned so long as the inode is associated with a watch, from |
75 | * create_watch() to put_inotify_watch(). | 68 | * inotify_add_watch() to the final put_inotify_watch(). |
76 | */ | 69 | */ |
77 | 70 | ||
78 | /* | 71 | /* |
79 | * struct inotify_device - represents an inotify instance | 72 | * struct inotify_handle - represents an inotify instance |
80 | * | 73 | * |
81 | * This structure is protected by the mutex 'mutex'. | 74 | * This structure is protected by the mutex 'mutex'. |
82 | */ | 75 | */ |
83 | struct inotify_device { | 76 | struct inotify_handle { |
84 | wait_queue_head_t wq; /* wait queue for i/o */ | ||
85 | struct idr idr; /* idr mapping wd -> watch */ | 77 | struct idr idr; /* idr mapping wd -> watch */ |
86 | struct mutex mutex; /* protects this bad boy */ | 78 | struct mutex mutex; /* protects this bad boy */ |
87 | struct list_head events; /* list of queued events */ | ||
88 | struct list_head watches; /* list of watches */ | 79 | struct list_head watches; /* list of watches */ |
89 | atomic_t count; /* reference count */ | 80 | atomic_t count; /* reference count */ |
90 | struct user_struct *user; /* user who opened this dev */ | ||
91 | unsigned int queue_size; /* size of the queue (bytes) */ | ||
92 | unsigned int event_count; /* number of pending events */ | ||
93 | unsigned int max_events; /* maximum number of events */ | ||
94 | u32 last_wd; /* the last wd allocated */ | 81 | u32 last_wd; /* the last wd allocated */ |
82 | const struct inotify_operations *in_ops; /* inotify caller operations */ | ||
95 | }; | 83 | }; |
96 | 84 | ||
97 | /* | 85 | static inline void get_inotify_handle(struct inotify_handle *ih) |
98 | * struct inotify_kernel_event - An inotify event, originating from a watch and | ||
99 | * queued for user-space. A list of these is attached to each instance of the | ||
100 | * device. In read(), this list is walked and all events that can fit in the | ||
101 | * buffer are returned. | ||
102 | * | ||
103 | * Protected by dev->mutex of the device in which we are queued. | ||
104 | */ | ||
105 | struct inotify_kernel_event { | ||
106 | struct inotify_event event; /* the user-space event */ | ||
107 | struct list_head list; /* entry in inotify_device's list */ | ||
108 | char *name; /* filename, if any */ | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * struct inotify_watch - represents a watch request on a specific inode | ||
113 | * | ||
114 | * d_list is protected by dev->mutex of the associated watch->dev. | ||
115 | * i_list and mask are protected by inode->inotify_mutex of the associated inode. | ||
116 | * dev, inode, and wd are never written to once the watch is created. | ||
117 | */ | ||
118 | struct inotify_watch { | ||
119 | struct list_head d_list; /* entry in inotify_device's list */ | ||
120 | struct list_head i_list; /* entry in inode's list */ | ||
121 | atomic_t count; /* reference count */ | ||
122 | struct inotify_device *dev; /* associated device */ | ||
123 | struct inode *inode; /* associated inode */ | ||
124 | s32 wd; /* watch descriptor */ | ||
125 | u32 mask; /* event mask for this watch */ | ||
126 | }; | ||
127 | |||
128 | #ifdef CONFIG_SYSCTL | ||
129 | |||
130 | #include <linux/sysctl.h> | ||
131 | |||
132 | static int zero; | ||
133 | |||
134 | ctl_table inotify_table[] = { | ||
135 | { | ||
136 | .ctl_name = INOTIFY_MAX_USER_INSTANCES, | ||
137 | .procname = "max_user_instances", | ||
138 | .data = &inotify_max_user_instances, | ||
139 | .maxlen = sizeof(int), | ||
140 | .mode = 0644, | ||
141 | .proc_handler = &proc_dointvec_minmax, | ||
142 | .strategy = &sysctl_intvec, | ||
143 | .extra1 = &zero, | ||
144 | }, | ||
145 | { | ||
146 | .ctl_name = INOTIFY_MAX_USER_WATCHES, | ||
147 | .procname = "max_user_watches", | ||
148 | .data = &inotify_max_user_watches, | ||
149 | .maxlen = sizeof(int), | ||
150 | .mode = 0644, | ||
151 | .proc_handler = &proc_dointvec_minmax, | ||
152 | .strategy = &sysctl_intvec, | ||
153 | .extra1 = &zero, | ||
154 | }, | ||
155 | { | ||
156 | .ctl_name = INOTIFY_MAX_QUEUED_EVENTS, | ||
157 | .procname = "max_queued_events", | ||
158 | .data = &inotify_max_queued_events, | ||
159 | .maxlen = sizeof(int), | ||
160 | .mode = 0644, | ||
161 | .proc_handler = &proc_dointvec_minmax, | ||
162 | .strategy = &sysctl_intvec, | ||
163 | .extra1 = &zero | ||
164 | }, | ||
165 | { .ctl_name = 0 } | ||
166 | }; | ||
167 | #endif /* CONFIG_SYSCTL */ | ||
168 | |||
169 | static inline void get_inotify_dev(struct inotify_device *dev) | ||
170 | { | 86 | { |
171 | atomic_inc(&dev->count); | 87 | atomic_inc(&ih->count); |
172 | } | 88 | } |
173 | 89 | ||
174 | static inline void put_inotify_dev(struct inotify_device *dev) | 90 | static inline void put_inotify_handle(struct inotify_handle *ih) |
175 | { | 91 | { |
176 | if (atomic_dec_and_test(&dev->count)) { | 92 | if (atomic_dec_and_test(&ih->count)) { |
177 | atomic_dec(&dev->user->inotify_devs); | 93 | idr_destroy(&ih->idr); |
178 | free_uid(dev->user); | 94 | kfree(ih); |
179 | idr_destroy(&dev->idr); | ||
180 | kfree(dev); | ||
181 | } | 95 | } |
182 | } | 96 | } |
183 | 97 | ||
184 | static inline void get_inotify_watch(struct inotify_watch *watch) | 98 | /** |
99 | * get_inotify_watch - grab a reference to an inotify_watch | ||
100 | * @watch: watch to grab | ||
101 | */ | ||
102 | void get_inotify_watch(struct inotify_watch *watch) | ||
185 | { | 103 | { |
186 | atomic_inc(&watch->count); | 104 | atomic_inc(&watch->count); |
187 | } | 105 | } |
106 | EXPORT_SYMBOL_GPL(get_inotify_watch); | ||
188 | 107 | ||
189 | /* | 108 | /** |
190 | * put_inotify_watch - decrements the ref count on a given watch. cleans up | 109 | * put_inotify_watch - decrements the ref count on a given watch. cleans up |
191 | * the watch and its references if the count reaches zero. | 110 | * watch references if the count reaches zero. inotify_watch is freed by |
111 | * inotify callers via the destroy_watch() op. | ||
112 | * @watch: watch to release | ||
192 | */ | 113 | */ |
193 | static inline void put_inotify_watch(struct inotify_watch *watch) | 114 | void put_inotify_watch(struct inotify_watch *watch) |
194 | { | 115 | { |
195 | if (atomic_dec_and_test(&watch->count)) { | 116 | if (atomic_dec_and_test(&watch->count)) { |
196 | put_inotify_dev(watch->dev); | 117 | struct inotify_handle *ih = watch->ih; |
197 | iput(watch->inode); | ||
198 | kmem_cache_free(watch_cachep, watch); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * kernel_event - create a new kernel event with the given parameters | ||
204 | * | ||
205 | * This function can sleep. | ||
206 | */ | ||
207 | static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, | ||
208 | const char *name) | ||
209 | { | ||
210 | struct inotify_kernel_event *kevent; | ||
211 | |||
212 | kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); | ||
213 | if (unlikely(!kevent)) | ||
214 | return NULL; | ||
215 | |||
216 | /* we hand this out to user-space, so zero it just in case */ | ||
217 | memset(&kevent->event, 0, sizeof(struct inotify_event)); | ||
218 | |||
219 | kevent->event.wd = wd; | ||
220 | kevent->event.mask = mask; | ||
221 | kevent->event.cookie = cookie; | ||
222 | |||
223 | INIT_LIST_HEAD(&kevent->list); | ||
224 | |||
225 | if (name) { | ||
226 | size_t len, rem, event_size = sizeof(struct inotify_event); | ||
227 | |||
228 | /* | ||
229 | * We need to pad the filename so as to properly align an | ||
230 | * array of inotify_event structures. Because the structure is | ||
231 | * small and the common case is a small filename, we just round | ||
232 | * up to the next multiple of the structure's sizeof. This is | ||
233 | * simple and safe for all architectures. | ||
234 | */ | ||
235 | len = strlen(name) + 1; | ||
236 | rem = event_size - len; | ||
237 | if (len > event_size) { | ||
238 | rem = event_size - (len % event_size); | ||
239 | if (len % event_size == 0) | ||
240 | rem = 0; | ||
241 | } | ||
242 | |||
243 | kevent->name = kmalloc(len + rem, GFP_KERNEL); | ||
244 | if (unlikely(!kevent->name)) { | ||
245 | kmem_cache_free(event_cachep, kevent); | ||
246 | return NULL; | ||
247 | } | ||
248 | memcpy(kevent->name, name, len); | ||
249 | if (rem) | ||
250 | memset(kevent->name + len, 0, rem); | ||
251 | kevent->event.len = len + rem; | ||
252 | } else { | ||
253 | kevent->event.len = 0; | ||
254 | kevent->name = NULL; | ||
255 | } | ||
256 | |||
257 | return kevent; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * inotify_dev_get_event - return the next event in the given dev's queue | ||
262 | * | ||
263 | * Caller must hold dev->mutex. | ||
264 | */ | ||
265 | static inline struct inotify_kernel_event * | ||
266 | inotify_dev_get_event(struct inotify_device *dev) | ||
267 | { | ||
268 | return list_entry(dev->events.next, struct inotify_kernel_event, list); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * inotify_dev_queue_event - add a new event to the given device | ||
273 | * | ||
274 | * Caller must hold dev->mutex. Can sleep (calls kernel_event()). | ||
275 | */ | ||
276 | static void inotify_dev_queue_event(struct inotify_device *dev, | ||
277 | struct inotify_watch *watch, u32 mask, | ||
278 | u32 cookie, const char *name) | ||
279 | { | ||
280 | struct inotify_kernel_event *kevent, *last; | ||
281 | |||
282 | /* coalescing: drop this event if it is a dupe of the previous */ | ||
283 | last = inotify_dev_get_event(dev); | ||
284 | if (last && last->event.mask == mask && last->event.wd == watch->wd && | ||
285 | last->event.cookie == cookie) { | ||
286 | const char *lastname = last->name; | ||
287 | |||
288 | if (!name && !lastname) | ||
289 | return; | ||
290 | if (name && lastname && !strcmp(lastname, name)) | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | /* the queue overflowed and we already sent the Q_OVERFLOW event */ | ||
295 | if (unlikely(dev->event_count > dev->max_events)) | ||
296 | return; | ||
297 | |||
298 | /* if the queue overflows, we need to notify user space */ | ||
299 | if (unlikely(dev->event_count == dev->max_events)) | ||
300 | kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); | ||
301 | else | ||
302 | kevent = kernel_event(watch->wd, mask, cookie, name); | ||
303 | |||
304 | if (unlikely(!kevent)) | ||
305 | return; | ||
306 | |||
307 | /* queue the event and wake up anyone waiting */ | ||
308 | dev->event_count++; | ||
309 | dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; | ||
310 | list_add_tail(&kevent->list, &dev->events); | ||
311 | wake_up_interruptible(&dev->wq); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * remove_kevent - cleans up and ultimately frees the given kevent | ||
316 | * | ||
317 | * Caller must hold dev->mutex. | ||
318 | */ | ||
319 | static void remove_kevent(struct inotify_device *dev, | ||
320 | struct inotify_kernel_event *kevent) | ||
321 | { | ||
322 | list_del(&kevent->list); | ||
323 | 118 | ||
324 | dev->event_count--; | 119 | iput(watch->inode); |
325 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 120 | ih->in_ops->destroy_watch(watch); |
326 | 121 | put_inotify_handle(ih); | |
327 | kfree(kevent->name); | ||
328 | kmem_cache_free(event_cachep, kevent); | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * inotify_dev_event_dequeue - destroy an event on the given device | ||
333 | * | ||
334 | * Caller must hold dev->mutex. | ||
335 | */ | ||
336 | static void inotify_dev_event_dequeue(struct inotify_device *dev) | ||
337 | { | ||
338 | if (!list_empty(&dev->events)) { | ||
339 | struct inotify_kernel_event *kevent; | ||
340 | kevent = inotify_dev_get_event(dev); | ||
341 | remove_kevent(dev, kevent); | ||
342 | } | 122 | } |
343 | } | 123 | } |
124 | EXPORT_SYMBOL_GPL(put_inotify_watch); | ||
344 | 125 | ||
345 | /* | 126 | /* |
346 | * inotify_dev_get_wd - returns the next WD for use by the given dev | 127 | * inotify_handle_get_wd - returns the next WD for use by the given handle |
347 | * | 128 | * |
348 | * Callers must hold dev->mutex. This function can sleep. | 129 | * Callers must hold ih->mutex. This function can sleep. |
349 | */ | 130 | */ |
350 | static int inotify_dev_get_wd(struct inotify_device *dev, | 131 | static int inotify_handle_get_wd(struct inotify_handle *ih, |
351 | struct inotify_watch *watch) | 132 | struct inotify_watch *watch) |
352 | { | 133 | { |
353 | int ret; | 134 | int ret; |
354 | 135 | ||
355 | do { | 136 | do { |
356 | if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) | 137 | if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) |
357 | return -ENOSPC; | 138 | return -ENOSPC; |
358 | ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd); | 139 | ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); |
359 | } while (ret == -EAGAIN); | 140 | } while (ret == -EAGAIN); |
360 | 141 | ||
361 | return ret; | 142 | if (likely(!ret)) |
362 | } | 143 | ih->last_wd = watch->wd; |
363 | 144 | ||
364 | /* | 145 | return ret; |
365 | * find_inode - resolve a user-given path to a specific inode and return a nd | ||
366 | */ | ||
367 | static int find_inode(const char __user *dirname, struct nameidata *nd, | ||
368 | unsigned flags) | ||
369 | { | ||
370 | int error; | ||
371 | |||
372 | error = __user_walk(dirname, flags, nd); | ||
373 | if (error) | ||
374 | return error; | ||
375 | /* you can only watch an inode if you have read permissions on it */ | ||
376 | error = vfs_permission(nd, MAY_READ); | ||
377 | if (error) | ||
378 | path_release(nd); | ||
379 | return error; | ||
380 | } | 146 | } |
381 | 147 | ||
382 | /* | 148 | /* |
@@ -422,67 +188,18 @@ static void set_dentry_child_flags(struct inode *inode, int watched) | |||
422 | } | 188 | } |
423 | 189 | ||
424 | /* | 190 | /* |
425 | * create_watch - creates a watch on the given device. | 191 | * inotify_find_handle - find the watch associated with the given inode and |
426 | * | 192 | * handle |
427 | * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep. | ||
428 | * Both 'dev' and 'inode' (by way of nameidata) need to be pinned. | ||
429 | */ | ||
430 | static struct inotify_watch *create_watch(struct inotify_device *dev, | ||
431 | u32 mask, struct inode *inode) | ||
432 | { | ||
433 | struct inotify_watch *watch; | ||
434 | int ret; | ||
435 | |||
436 | if (atomic_read(&dev->user->inotify_watches) >= | ||
437 | inotify_max_user_watches) | ||
438 | return ERR_PTR(-ENOSPC); | ||
439 | |||
440 | watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); | ||
441 | if (unlikely(!watch)) | ||
442 | return ERR_PTR(-ENOMEM); | ||
443 | |||
444 | ret = inotify_dev_get_wd(dev, watch); | ||
445 | if (unlikely(ret)) { | ||
446 | kmem_cache_free(watch_cachep, watch); | ||
447 | return ERR_PTR(ret); | ||
448 | } | ||
449 | |||
450 | dev->last_wd = watch->wd; | ||
451 | watch->mask = mask; | ||
452 | atomic_set(&watch->count, 0); | ||
453 | INIT_LIST_HEAD(&watch->d_list); | ||
454 | INIT_LIST_HEAD(&watch->i_list); | ||
455 | |||
456 | /* save a reference to device and bump the count to make it official */ | ||
457 | get_inotify_dev(dev); | ||
458 | watch->dev = dev; | ||
459 | |||
460 | /* | ||
461 | * Save a reference to the inode and bump the ref count to make it | ||
462 | * official. We hold a reference to nameidata, which makes this safe. | ||
463 | */ | ||
464 | watch->inode = igrab(inode); | ||
465 | |||
466 | /* bump our own count, corresponding to our entry in dev->watches */ | ||
467 | get_inotify_watch(watch); | ||
468 | |||
469 | atomic_inc(&dev->user->inotify_watches); | ||
470 | |||
471 | return watch; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * inotify_find_dev - find the watch associated with the given inode and dev | ||
476 | * | 193 | * |
477 | * Callers must hold inode->inotify_mutex. | 194 | * Callers must hold inode->inotify_mutex. |
478 | */ | 195 | */ |
479 | static struct inotify_watch *inode_find_dev(struct inode *inode, | 196 | static struct inotify_watch *inode_find_handle(struct inode *inode, |
480 | struct inotify_device *dev) | 197 | struct inotify_handle *ih) |
481 | { | 198 | { |
482 | struct inotify_watch *watch; | 199 | struct inotify_watch *watch; |
483 | 200 | ||
484 | list_for_each_entry(watch, &inode->inotify_watches, i_list) { | 201 | list_for_each_entry(watch, &inode->inotify_watches, i_list) { |
485 | if (watch->dev == dev) | 202 | if (watch->ih == ih) |
486 | return watch; | 203 | return watch; |
487 | } | 204 | } |
488 | 205 | ||
@@ -491,39 +208,34 @@ static struct inotify_watch *inode_find_dev(struct inode *inode, | |||
491 | 208 | ||
492 | /* | 209 | /* |
493 | * remove_watch_no_event - remove_watch() without the IN_IGNORED event. | 210 | * remove_watch_no_event - remove_watch() without the IN_IGNORED event. |
211 | * | ||
212 | * Callers must hold both inode->inotify_mutex and ih->mutex. | ||
494 | */ | 213 | */ |
495 | static void remove_watch_no_event(struct inotify_watch *watch, | 214 | static void remove_watch_no_event(struct inotify_watch *watch, |
496 | struct inotify_device *dev) | 215 | struct inotify_handle *ih) |
497 | { | 216 | { |
498 | list_del(&watch->i_list); | 217 | list_del(&watch->i_list); |
499 | list_del(&watch->d_list); | 218 | list_del(&watch->h_list); |
500 | 219 | ||
501 | if (!inotify_inode_watched(watch->inode)) | 220 | if (!inotify_inode_watched(watch->inode)) |
502 | set_dentry_child_flags(watch->inode, 0); | 221 | set_dentry_child_flags(watch->inode, 0); |
503 | 222 | ||
504 | atomic_dec(&dev->user->inotify_watches); | 223 | idr_remove(&ih->idr, watch->wd); |
505 | idr_remove(&dev->idr, watch->wd); | ||
506 | put_inotify_watch(watch); | ||
507 | } | 224 | } |
508 | 225 | ||
509 | /* | 226 | /* |
510 | * remove_watch - Remove a watch from both the device and the inode. Sends | 227 | * remove_watch - Remove a watch from both the handle and the inode. Sends |
511 | * the IN_IGNORED event to the given device signifying that the inode is no | 228 | * the IN_IGNORED event signifying that the inode is no longer watched. |
512 | * longer watched. | ||
513 | * | ||
514 | * Callers must hold both inode->inotify_mutex and dev->mutex. We drop a | ||
515 | * reference to the inode before returning. | ||
516 | * | 229 | * |
517 | * The inode is not iput() so as to remain atomic. If the inode needs to be | 230 | * Callers must hold both inode->inotify_mutex and ih->mutex. |
518 | * iput(), the call returns one. Otherwise, it returns zero. | ||
519 | */ | 231 | */ |
520 | static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) | 232 | static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih) |
521 | { | 233 | { |
522 | inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL); | 234 | remove_watch_no_event(watch, ih); |
523 | remove_watch_no_event(watch, dev); | 235 | ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL); |
524 | } | 236 | } |
525 | 237 | ||
526 | /* Kernel API */ | 238 | /* Kernel API for producing events */ |
527 | 239 | ||
528 | /* | 240 | /* |
529 | * inotify_d_instantiate - instantiate dcache entry for inode | 241 | * inotify_d_instantiate - instantiate dcache entry for inode |
@@ -576,14 +288,12 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, | |||
576 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { | 288 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { |
577 | u32 watch_mask = watch->mask; | 289 | u32 watch_mask = watch->mask; |
578 | if (watch_mask & mask) { | 290 | if (watch_mask & mask) { |
579 | struct inotify_device *dev = watch->dev; | 291 | struct inotify_handle *ih= watch->ih; |
580 | get_inotify_watch(watch); | 292 | mutex_lock(&ih->mutex); |
581 | mutex_lock(&dev->mutex); | ||
582 | inotify_dev_queue_event(dev, watch, mask, cookie, name); | ||
583 | if (watch_mask & IN_ONESHOT) | 293 | if (watch_mask & IN_ONESHOT) |
584 | remove_watch_no_event(watch, dev); | 294 | remove_watch_no_event(watch, ih); |
585 | mutex_unlock(&dev->mutex); | 295 | ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name); |
586 | put_inotify_watch(watch); | 296 | mutex_unlock(&ih->mutex); |
587 | } | 297 | } |
588 | } | 298 | } |
589 | mutex_unlock(&inode->inotify_mutex); | 299 | mutex_unlock(&inode->inotify_mutex); |
@@ -694,11 +404,12 @@ void inotify_unmount_inodes(struct list_head *list) | |||
694 | mutex_lock(&inode->inotify_mutex); | 404 | mutex_lock(&inode->inotify_mutex); |
695 | watches = &inode->inotify_watches; | 405 | watches = &inode->inotify_watches; |
696 | list_for_each_entry_safe(watch, next_w, watches, i_list) { | 406 | list_for_each_entry_safe(watch, next_w, watches, i_list) { |
697 | struct inotify_device *dev = watch->dev; | 407 | struct inotify_handle *ih= watch->ih; |
698 | mutex_lock(&dev->mutex); | 408 | mutex_lock(&ih->mutex); |
699 | inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL); | 409 | ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, |
700 | remove_watch(watch, dev); | 410 | NULL); |
701 | mutex_unlock(&dev->mutex); | 411 | remove_watch(watch, ih); |
412 | mutex_unlock(&ih->mutex); | ||
702 | } | 413 | } |
703 | mutex_unlock(&inode->inotify_mutex); | 414 | mutex_unlock(&inode->inotify_mutex); |
704 | iput(inode); | 415 | iput(inode); |
@@ -718,432 +429,240 @@ void inotify_inode_is_dead(struct inode *inode) | |||
718 | 429 | ||
719 | mutex_lock(&inode->inotify_mutex); | 430 | mutex_lock(&inode->inotify_mutex); |
720 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { | 431 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { |
721 | struct inotify_device *dev = watch->dev; | 432 | struct inotify_handle *ih = watch->ih; |
722 | mutex_lock(&dev->mutex); | 433 | mutex_lock(&ih->mutex); |
723 | remove_watch(watch, dev); | 434 | remove_watch(watch, ih); |
724 | mutex_unlock(&dev->mutex); | 435 | mutex_unlock(&ih->mutex); |
725 | } | 436 | } |
726 | mutex_unlock(&inode->inotify_mutex); | 437 | mutex_unlock(&inode->inotify_mutex); |
727 | } | 438 | } |
728 | EXPORT_SYMBOL_GPL(inotify_inode_is_dead); | 439 | EXPORT_SYMBOL_GPL(inotify_inode_is_dead); |
729 | 440 | ||
730 | /* Device Interface */ | 441 | /* Kernel Consumer API */ |
731 | |||
732 | static unsigned int inotify_poll(struct file *file, poll_table *wait) | ||
733 | { | ||
734 | struct inotify_device *dev = file->private_data; | ||
735 | int ret = 0; | ||
736 | |||
737 | poll_wait(file, &dev->wq, wait); | ||
738 | mutex_lock(&dev->mutex); | ||
739 | if (!list_empty(&dev->events)) | ||
740 | ret = POLLIN | POLLRDNORM; | ||
741 | mutex_unlock(&dev->mutex); | ||
742 | |||
743 | return ret; | ||
744 | } | ||
745 | 442 | ||
746 | static ssize_t inotify_read(struct file *file, char __user *buf, | 443 | /** |
747 | size_t count, loff_t *pos) | 444 | * inotify_init - allocate and initialize an inotify instance |
445 | * @ops: caller's inotify operations | ||
446 | */ | ||
447 | struct inotify_handle *inotify_init(const struct inotify_operations *ops) | ||
748 | { | 448 | { |
749 | size_t event_size = sizeof (struct inotify_event); | 449 | struct inotify_handle *ih; |
750 | struct inotify_device *dev; | ||
751 | char __user *start; | ||
752 | int ret; | ||
753 | DEFINE_WAIT(wait); | ||
754 | |||
755 | start = buf; | ||
756 | dev = file->private_data; | ||
757 | |||
758 | while (1) { | ||
759 | int events; | ||
760 | |||
761 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | ||
762 | 450 | ||
763 | mutex_lock(&dev->mutex); | 451 | ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL); |
764 | events = !list_empty(&dev->events); | 452 | if (unlikely(!ih)) |
765 | mutex_unlock(&dev->mutex); | 453 | return ERR_PTR(-ENOMEM); |
766 | if (events) { | ||
767 | ret = 0; | ||
768 | break; | ||
769 | } | ||
770 | |||
771 | if (file->f_flags & O_NONBLOCK) { | ||
772 | ret = -EAGAIN; | ||
773 | break; | ||
774 | } | ||
775 | |||
776 | if (signal_pending(current)) { | ||
777 | ret = -EINTR; | ||
778 | break; | ||
779 | } | ||
780 | |||
781 | schedule(); | ||
782 | } | ||
783 | |||
784 | finish_wait(&dev->wq, &wait); | ||
785 | if (ret) | ||
786 | return ret; | ||
787 | |||
788 | mutex_lock(&dev->mutex); | ||
789 | while (1) { | ||
790 | struct inotify_kernel_event *kevent; | ||
791 | |||
792 | ret = buf - start; | ||
793 | if (list_empty(&dev->events)) | ||
794 | break; | ||
795 | |||
796 | kevent = inotify_dev_get_event(dev); | ||
797 | if (event_size + kevent->event.len > count) | ||
798 | break; | ||
799 | |||
800 | if (copy_to_user(buf, &kevent->event, event_size)) { | ||
801 | ret = -EFAULT; | ||
802 | break; | ||
803 | } | ||
804 | buf += event_size; | ||
805 | count -= event_size; | ||
806 | |||
807 | if (kevent->name) { | ||
808 | if (copy_to_user(buf, kevent->name, kevent->event.len)){ | ||
809 | ret = -EFAULT; | ||
810 | break; | ||
811 | } | ||
812 | buf += kevent->event.len; | ||
813 | count -= kevent->event.len; | ||
814 | } | ||
815 | 454 | ||
816 | remove_kevent(dev, kevent); | 455 | idr_init(&ih->idr); |
817 | } | 456 | INIT_LIST_HEAD(&ih->watches); |
818 | mutex_unlock(&dev->mutex); | 457 | mutex_init(&ih->mutex); |
458 | ih->last_wd = 0; | ||
459 | ih->in_ops = ops; | ||
460 | atomic_set(&ih->count, 0); | ||
461 | get_inotify_handle(ih); | ||
819 | 462 | ||
820 | return ret; | 463 | return ih; |
821 | } | 464 | } |
465 | EXPORT_SYMBOL_GPL(inotify_init); | ||
822 | 466 | ||
823 | static int inotify_release(struct inode *ignored, struct file *file) | 467 | /** |
468 | * inotify_destroy - clean up and destroy an inotify instance | ||
469 | * @ih: inotify handle | ||
470 | */ | ||
471 | void inotify_destroy(struct inotify_handle *ih) | ||
824 | { | 472 | { |
825 | struct inotify_device *dev = file->private_data; | ||
826 | |||
827 | /* | 473 | /* |
828 | * Destroy all of the watches on this device. Unfortunately, not very | 474 | * Destroy all of the watches for this handle. Unfortunately, not very |
829 | * pretty. We cannot do a simple iteration over the list, because we | 475 | * pretty. We cannot do a simple iteration over the list, because we |
830 | * do not know the inode until we iterate to the watch. But we need to | 476 | * do not know the inode until we iterate to the watch. But we need to |
831 | * hold inode->inotify_mutex before dev->mutex. The following works. | 477 | * hold inode->inotify_mutex before ih->mutex. The following works. |
832 | */ | 478 | */ |
833 | while (1) { | 479 | while (1) { |
834 | struct inotify_watch *watch; | 480 | struct inotify_watch *watch; |
835 | struct list_head *watches; | 481 | struct list_head *watches; |
836 | struct inode *inode; | 482 | struct inode *inode; |
837 | 483 | ||
838 | mutex_lock(&dev->mutex); | 484 | mutex_lock(&ih->mutex); |
839 | watches = &dev->watches; | 485 | watches = &ih->watches; |
840 | if (list_empty(watches)) { | 486 | if (list_empty(watches)) { |
841 | mutex_unlock(&dev->mutex); | 487 | mutex_unlock(&ih->mutex); |
842 | break; | 488 | break; |
843 | } | 489 | } |
844 | watch = list_entry(watches->next, struct inotify_watch, d_list); | 490 | watch = list_entry(watches->next, struct inotify_watch, h_list); |
845 | get_inotify_watch(watch); | 491 | get_inotify_watch(watch); |
846 | mutex_unlock(&dev->mutex); | 492 | mutex_unlock(&ih->mutex); |
847 | 493 | ||
848 | inode = watch->inode; | 494 | inode = watch->inode; |
849 | mutex_lock(&inode->inotify_mutex); | 495 | mutex_lock(&inode->inotify_mutex); |
850 | mutex_lock(&dev->mutex); | 496 | mutex_lock(&ih->mutex); |
851 | 497 | ||
852 | /* make sure we didn't race with another list removal */ | 498 | /* make sure we didn't race with another list removal */ |
853 | if (likely(idr_find(&dev->idr, watch->wd))) | 499 | if (likely(idr_find(&ih->idr, watch->wd))) { |
854 | remove_watch_no_event(watch, dev); | 500 | remove_watch_no_event(watch, ih); |
501 | put_inotify_watch(watch); | ||
502 | } | ||
855 | 503 | ||
856 | mutex_unlock(&dev->mutex); | 504 | mutex_unlock(&ih->mutex); |
857 | mutex_unlock(&inode->inotify_mutex); | 505 | mutex_unlock(&inode->inotify_mutex); |
858 | put_inotify_watch(watch); | 506 | put_inotify_watch(watch); |
859 | } | 507 | } |
860 | 508 | ||
861 | /* destroy all of the events on this device */ | 509 | /* free this handle: the put matching the get in inotify_init() */ |
862 | mutex_lock(&dev->mutex); | 510 | put_inotify_handle(ih); |
863 | while (!list_empty(&dev->events)) | ||
864 | inotify_dev_event_dequeue(dev); | ||
865 | mutex_unlock(&dev->mutex); | ||
866 | |||
867 | /* free this device: the put matching the get in inotify_init() */ | ||
868 | put_inotify_dev(dev); | ||
869 | |||
870 | return 0; | ||
871 | } | 511 | } |
512 | EXPORT_SYMBOL_GPL(inotify_destroy); | ||
872 | 513 | ||
873 | /* | 514 | /** |
874 | * inotify_ignore - remove a given wd from this inotify instance. | 515 | * inotify_find_update_watch - find and update the mask of an existing watch |
516 | * @ih: inotify handle | ||
517 | * @inode: inode's watch to update | ||
518 | * @mask: mask of events to watch | ||
875 | * | 519 | * |
876 | * Can sleep. | 520 | * Caller must pin given inode (via nameidata). |
877 | */ | 521 | */ |
878 | static int inotify_ignore(struct inotify_device *dev, s32 wd) | 522 | s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, |
523 | u32 mask) | ||
879 | { | 524 | { |
880 | struct inotify_watch *watch; | 525 | struct inotify_watch *old; |
881 | struct inode *inode; | 526 | int mask_add = 0; |
527 | int ret; | ||
882 | 528 | ||
883 | mutex_lock(&dev->mutex); | 529 | if (mask & IN_MASK_ADD) |
884 | watch = idr_find(&dev->idr, wd); | 530 | mask_add = 1; |
885 | if (unlikely(!watch)) { | 531 | |
886 | mutex_unlock(&dev->mutex); | 532 | /* don't allow invalid bits: we don't want flags set */ |
533 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | ||
534 | if (unlikely(!mask)) | ||
887 | return -EINVAL; | 535 | return -EINVAL; |
888 | } | ||
889 | get_inotify_watch(watch); | ||
890 | inode = watch->inode; | ||
891 | mutex_unlock(&dev->mutex); | ||
892 | 536 | ||
893 | mutex_lock(&inode->inotify_mutex); | 537 | mutex_lock(&inode->inotify_mutex); |
894 | mutex_lock(&dev->mutex); | 538 | mutex_lock(&ih->mutex); |
895 | |||
896 | /* make sure that we did not race */ | ||
897 | if (likely(idr_find(&dev->idr, wd) == watch)) | ||
898 | remove_watch(watch, dev); | ||
899 | |||
900 | mutex_unlock(&dev->mutex); | ||
901 | mutex_unlock(&inode->inotify_mutex); | ||
902 | put_inotify_watch(watch); | ||
903 | |||
904 | return 0; | ||
905 | } | ||
906 | |||
907 | static long inotify_ioctl(struct file *file, unsigned int cmd, | ||
908 | unsigned long arg) | ||
909 | { | ||
910 | struct inotify_device *dev; | ||
911 | void __user *p; | ||
912 | int ret = -ENOTTY; | ||
913 | 539 | ||
914 | dev = file->private_data; | 540 | /* |
915 | p = (void __user *) arg; | 541 | * Handle the case of re-adding a watch on an (inode,ih) pair that we |
916 | 542 | * are already watching. We just update the mask and return its wd. | |
917 | switch (cmd) { | 543 | */ |
918 | case FIONREAD: | 544 | old = inode_find_handle(inode, ih); |
919 | ret = put_user(dev->queue_size, (int __user *) p); | 545 | if (unlikely(!old)) { |
920 | break; | 546 | ret = -ENOENT; |
921 | } | 547 | goto out; |
922 | |||
923 | return ret; | ||
924 | } | ||
925 | |||
926 | static const struct file_operations inotify_fops = { | ||
927 | .poll = inotify_poll, | ||
928 | .read = inotify_read, | ||
929 | .release = inotify_release, | ||
930 | .unlocked_ioctl = inotify_ioctl, | ||
931 | .compat_ioctl = inotify_ioctl, | ||
932 | }; | ||
933 | |||
934 | asmlinkage long sys_inotify_init(void) | ||
935 | { | ||
936 | struct inotify_device *dev; | ||
937 | struct user_struct *user; | ||
938 | struct file *filp; | ||
939 | int fd, ret; | ||
940 | |||
941 | fd = get_unused_fd(); | ||
942 | if (fd < 0) | ||
943 | return fd; | ||
944 | |||
945 | filp = get_empty_filp(); | ||
946 | if (!filp) { | ||
947 | ret = -ENFILE; | ||
948 | goto out_put_fd; | ||
949 | } | ||
950 | |||
951 | user = get_uid(current->user); | ||
952 | if (unlikely(atomic_read(&user->inotify_devs) >= | ||
953 | inotify_max_user_instances)) { | ||
954 | ret = -EMFILE; | ||
955 | goto out_free_uid; | ||
956 | } | ||
957 | |||
958 | dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); | ||
959 | if (unlikely(!dev)) { | ||
960 | ret = -ENOMEM; | ||
961 | goto out_free_uid; | ||
962 | } | 548 | } |
963 | 549 | ||
964 | filp->f_op = &inotify_fops; | 550 | if (mask_add) |
965 | filp->f_vfsmnt = mntget(inotify_mnt); | 551 | old->mask |= mask; |
966 | filp->f_dentry = dget(inotify_mnt->mnt_root); | 552 | else |
967 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | 553 | old->mask = mask; |
968 | filp->f_mode = FMODE_READ; | 554 | ret = old->wd; |
969 | filp->f_flags = O_RDONLY; | 555 | out: |
970 | filp->private_data = dev; | 556 | mutex_unlock(&ih->mutex); |
971 | 557 | mutex_unlock(&inode->inotify_mutex); | |
972 | idr_init(&dev->idr); | ||
973 | INIT_LIST_HEAD(&dev->events); | ||
974 | INIT_LIST_HEAD(&dev->watches); | ||
975 | init_waitqueue_head(&dev->wq); | ||
976 | mutex_init(&dev->mutex); | ||
977 | dev->event_count = 0; | ||
978 | dev->queue_size = 0; | ||
979 | dev->max_events = inotify_max_queued_events; | ||
980 | dev->user = user; | ||
981 | dev->last_wd = 0; | ||
982 | atomic_set(&dev->count, 0); | ||
983 | |||
984 | get_inotify_dev(dev); | ||
985 | atomic_inc(&user->inotify_devs); | ||
986 | fd_install(fd, filp); | ||
987 | |||
988 | return fd; | ||
989 | out_free_uid: | ||
990 | free_uid(user); | ||
991 | put_filp(filp); | ||
992 | out_put_fd: | ||
993 | put_unused_fd(fd); | ||
994 | return ret; | 558 | return ret; |
995 | } | 559 | } |
560 | EXPORT_SYMBOL_GPL(inotify_find_update_watch); | ||
996 | 561 | ||
997 | asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | 562 | /** |
563 | * inotify_add_watch - add a watch to an inotify instance | ||
564 | * @ih: inotify handle | ||
565 | * @watch: caller allocated watch structure | ||
566 | * @inode: inode to watch | ||
567 | * @mask: mask of events to watch | ||
568 | * | ||
569 | * Caller must pin given inode (via nameidata). | ||
570 | * Caller must ensure it only calls inotify_add_watch() once per watch. | ||
571 | * Calls inotify_handle_get_wd() so may sleep. | ||
572 | */ | ||
573 | s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, | ||
574 | struct inode *inode, u32 mask) | ||
998 | { | 575 | { |
999 | struct inotify_watch *watch, *old; | 576 | int ret = 0; |
1000 | struct inode *inode; | ||
1001 | struct inotify_device *dev; | ||
1002 | struct nameidata nd; | ||
1003 | struct file *filp; | ||
1004 | int ret, fput_needed; | ||
1005 | int mask_add = 0; | ||
1006 | unsigned flags = 0; | ||
1007 | |||
1008 | filp = fget_light(fd, &fput_needed); | ||
1009 | if (unlikely(!filp)) | ||
1010 | return -EBADF; | ||
1011 | 577 | ||
1012 | /* verify that this is indeed an inotify instance */ | 578 | /* don't allow invalid bits: we don't want flags set */ |
1013 | if (unlikely(filp->f_op != &inotify_fops)) { | 579 | mask &= IN_ALL_EVENTS | IN_ONESHOT; |
1014 | ret = -EINVAL; | 580 | if (unlikely(!mask)) |
1015 | goto fput_and_out; | 581 | return -EINVAL; |
1016 | } | 582 | watch->mask = mask; |
1017 | 583 | ||
1018 | if (!(mask & IN_DONT_FOLLOW)) | 584 | mutex_lock(&inode->inotify_mutex); |
1019 | flags |= LOOKUP_FOLLOW; | 585 | mutex_lock(&ih->mutex); |
1020 | if (mask & IN_ONLYDIR) | ||
1021 | flags |= LOOKUP_DIRECTORY; | ||
1022 | 586 | ||
1023 | ret = find_inode(path, &nd, flags); | 587 | /* Initialize a new watch */ |
588 | ret = inotify_handle_get_wd(ih, watch); | ||
1024 | if (unlikely(ret)) | 589 | if (unlikely(ret)) |
1025 | goto fput_and_out; | 590 | goto out; |
1026 | 591 | ret = watch->wd; | |
1027 | /* inode held in place by reference to nd; dev by fget on fd */ | ||
1028 | inode = nd.dentry->d_inode; | ||
1029 | dev = filp->private_data; | ||
1030 | |||
1031 | mutex_lock(&inode->inotify_mutex); | ||
1032 | mutex_lock(&dev->mutex); | ||
1033 | 592 | ||
1034 | if (mask & IN_MASK_ADD) | 593 | atomic_set(&watch->count, 0); |
1035 | mask_add = 1; | 594 | INIT_LIST_HEAD(&watch->h_list); |
595 | INIT_LIST_HEAD(&watch->i_list); | ||
1036 | 596 | ||
1037 | /* don't let user-space set invalid bits: we don't want flags set */ | 597 | /* save a reference to handle and bump the count to make it official */ |
1038 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | 598 | get_inotify_handle(ih); |
1039 | if (unlikely(!mask)) { | 599 | watch->ih = ih; |
1040 | ret = -EINVAL; | ||
1041 | goto out; | ||
1042 | } | ||
1043 | 600 | ||
1044 | /* | 601 | /* |
1045 | * Handle the case of re-adding a watch on an (inode,dev) pair that we | 602 | * Save a reference to the inode and bump the ref count to make it |
1046 | * are already watching. We just update the mask and return its wd. | 603 | * official. We hold a reference to nameidata, which makes this safe. |
1047 | */ | 604 | */ |
1048 | old = inode_find_dev(inode, dev); | 605 | watch->inode = igrab(inode); |
1049 | if (unlikely(old)) { | ||
1050 | if (mask_add) | ||
1051 | old->mask |= mask; | ||
1052 | else | ||
1053 | old->mask = mask; | ||
1054 | ret = old->wd; | ||
1055 | goto out; | ||
1056 | } | ||
1057 | 606 | ||
1058 | watch = create_watch(dev, mask, inode); | 607 | get_inotify_watch(watch); /* initial get */ |
1059 | if (unlikely(IS_ERR(watch))) { | ||
1060 | ret = PTR_ERR(watch); | ||
1061 | goto out; | ||
1062 | } | ||
1063 | 608 | ||
1064 | if (!inotify_inode_watched(inode)) | 609 | if (!inotify_inode_watched(inode)) |
1065 | set_dentry_child_flags(inode, 1); | 610 | set_dentry_child_flags(inode, 1); |
1066 | 611 | ||
1067 | /* Add the watch to the device's and the inode's list */ | 612 | /* Add the watch to the handle's and the inode's list */ |
1068 | list_add(&watch->d_list, &dev->watches); | 613 | list_add(&watch->h_list, &ih->watches); |
1069 | list_add(&watch->i_list, &inode->inotify_watches); | 614 | list_add(&watch->i_list, &inode->inotify_watches); |
1070 | ret = watch->wd; | ||
1071 | out: | 615 | out: |
1072 | mutex_unlock(&dev->mutex); | 616 | mutex_unlock(&ih->mutex); |
1073 | mutex_unlock(&inode->inotify_mutex); | 617 | mutex_unlock(&inode->inotify_mutex); |
1074 | path_release(&nd); | ||
1075 | fput_and_out: | ||
1076 | fput_light(filp, fput_needed); | ||
1077 | return ret; | 618 | return ret; |
1078 | } | 619 | } |
620 | EXPORT_SYMBOL_GPL(inotify_add_watch); | ||
1079 | 621 | ||
1080 | asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) | 622 | /** |
623 | * inotify_rm_wd - remove a watch from an inotify instance | ||
624 | * @ih: inotify handle | ||
625 | * @wd: watch descriptor to remove | ||
626 | * | ||
627 | * Can sleep. | ||
628 | */ | ||
629 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | ||
1081 | { | 630 | { |
1082 | struct file *filp; | 631 | struct inotify_watch *watch; |
1083 | struct inotify_device *dev; | 632 | struct inode *inode; |
1084 | int ret, fput_needed; | ||
1085 | |||
1086 | filp = fget_light(fd, &fput_needed); | ||
1087 | if (unlikely(!filp)) | ||
1088 | return -EBADF; | ||
1089 | 633 | ||
1090 | /* verify that this is indeed an inotify instance */ | 634 | mutex_lock(&ih->mutex); |
1091 | if (unlikely(filp->f_op != &inotify_fops)) { | 635 | watch = idr_find(&ih->idr, wd); |
1092 | ret = -EINVAL; | 636 | if (unlikely(!watch)) { |
1093 | goto out; | 637 | mutex_unlock(&ih->mutex); |
638 | return -EINVAL; | ||
1094 | } | 639 | } |
640 | get_inotify_watch(watch); | ||
641 | inode = watch->inode; | ||
642 | mutex_unlock(&ih->mutex); | ||
1095 | 643 | ||
1096 | dev = filp->private_data; | 644 | mutex_lock(&inode->inotify_mutex); |
1097 | ret = inotify_ignore(dev, wd); | 645 | mutex_lock(&ih->mutex); |
1098 | 646 | ||
1099 | out: | 647 | /* make sure that we did not race */ |
1100 | fput_light(filp, fput_needed); | 648 | if (likely(idr_find(&ih->idr, wd) == watch)) |
1101 | return ret; | 649 | remove_watch(watch, ih); |
1102 | } | ||
1103 | 650 | ||
1104 | static struct super_block * | 651 | mutex_unlock(&ih->mutex); |
1105 | inotify_get_sb(struct file_system_type *fs_type, int flags, | 652 | mutex_unlock(&inode->inotify_mutex); |
1106 | const char *dev_name, void *data) | 653 | put_inotify_watch(watch); |
1107 | { | ||
1108 | return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); | ||
1109 | } | ||
1110 | 654 | ||
1111 | static struct file_system_type inotify_fs_type = { | 655 | return 0; |
1112 | .name = "inotifyfs", | 656 | } |
1113 | .get_sb = inotify_get_sb, | 657 | EXPORT_SYMBOL_GPL(inotify_rm_wd); |
1114 | .kill_sb = kill_anon_super, | ||
1115 | }; | ||
1116 | 658 | ||
1117 | /* | 659 | /* |
1118 | * inotify_setup - Our initialization function. Note that we cannnot return | 660 | * inotify_setup - core initialization function |
1119 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here | ||
1120 | * must result in panic(). | ||
1121 | */ | 661 | */ |
1122 | static int __init inotify_setup(void) | 662 | static int __init inotify_setup(void) |
1123 | { | 663 | { |
1124 | int ret; | ||
1125 | |||
1126 | ret = register_filesystem(&inotify_fs_type); | ||
1127 | if (unlikely(ret)) | ||
1128 | panic("inotify: register_filesystem returned %d!\n", ret); | ||
1129 | |||
1130 | inotify_mnt = kern_mount(&inotify_fs_type); | ||
1131 | if (IS_ERR(inotify_mnt)) | ||
1132 | panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); | ||
1133 | |||
1134 | inotify_max_queued_events = 16384; | ||
1135 | inotify_max_user_instances = 128; | ||
1136 | inotify_max_user_watches = 8192; | ||
1137 | |||
1138 | atomic_set(&inotify_cookie, 0); | 664 | atomic_set(&inotify_cookie, 0); |
1139 | 665 | ||
1140 | watch_cachep = kmem_cache_create("inotify_watch_cache", | ||
1141 | sizeof(struct inotify_watch), | ||
1142 | 0, SLAB_PANIC, NULL, NULL); | ||
1143 | event_cachep = kmem_cache_create("inotify_event_cache", | ||
1144 | sizeof(struct inotify_kernel_event), | ||
1145 | 0, SLAB_PANIC, NULL, NULL); | ||
1146 | |||
1147 | return 0; | 666 | return 0; |
1148 | } | 667 | } |
1149 | 668 | ||
diff --git a/fs/inotify_user.c b/fs/inotify_user.c new file mode 100644 index 000000000000..845dc79a4e9c --- /dev/null +++ b/fs/inotify_user.c | |||
@@ -0,0 +1,717 @@ | |||
1 | /* | ||
2 | * fs/inotify_user.c - inotify support for userspace | ||
3 | * | ||
4 | * Authors: | ||
5 | * John McCutchan <ttb@tentacle.dhs.org> | ||
6 | * Robert Love <rml@novell.com> | ||
7 | * | ||
8 | * Copyright (C) 2005 John McCutchan | ||
9 | * Copyright 2006 Hewlett-Packard Development Company, L.P. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the | ||
13 | * Free Software Foundation; either version 2, or (at your option) any | ||
14 | * later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, but | ||
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | */ | ||
21 | |||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/fs.h> | ||
26 | #include <linux/file.h> | ||
27 | #include <linux/mount.h> | ||
28 | #include <linux/namei.h> | ||
29 | #include <linux/poll.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/list.h> | ||
32 | #include <linux/inotify.h> | ||
33 | #include <linux/syscalls.h> | ||
34 | |||
35 | #include <asm/ioctls.h> | ||
36 | |||
37 | static kmem_cache_t *watch_cachep __read_mostly; | ||
38 | static kmem_cache_t *event_cachep __read_mostly; | ||
39 | |||
40 | static struct vfsmount *inotify_mnt __read_mostly; | ||
41 | |||
42 | /* these are configurable via /proc/sys/fs/inotify/ */ | ||
43 | int inotify_max_user_instances __read_mostly; | ||
44 | int inotify_max_user_watches __read_mostly; | ||
45 | int inotify_max_queued_events __read_mostly; | ||
46 | |||
47 | /* | ||
48 | * Lock ordering: | ||
49 | * | ||
50 | * inotify_dev->up_mutex (ensures we don't re-add the same watch) | ||
51 | * inode->inotify_mutex (protects inode's watch list) | ||
52 | * inotify_handle->mutex (protects inotify_handle's watch list) | ||
53 | * inotify_dev->ev_mutex (protects device's event queue) | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * Lifetimes of the main data structures: | ||
58 | * | ||
59 | * inotify_device: Lifetime is managed by reference count, from | ||
60 | * sys_inotify_init() until release. Additional references can bump the count | ||
61 | * via get_inotify_dev() and drop the count via put_inotify_dev(). | ||
62 | * | ||
63 | * inotify_user_watch: Lifetime is from create_watch() to the receipt of an | ||
64 | * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the | ||
65 | * first event, or to inotify_destroy(). | ||
66 | */ | ||
67 | |||
68 | /* | ||
69 | * struct inotify_device - represents an inotify instance | ||
70 | * | ||
71 | * This structure is protected by the mutex 'mutex'. | ||
72 | */ | ||
73 | struct inotify_device { | ||
74 | wait_queue_head_t wq; /* wait queue for i/o */ | ||
75 | struct mutex ev_mutex; /* protects event queue */ | ||
76 | struct mutex up_mutex; /* synchronizes watch updates */ | ||
77 | struct list_head events; /* list of queued events */ | ||
78 | atomic_t count; /* reference count */ | ||
79 | struct user_struct *user; /* user who opened this dev */ | ||
80 | struct inotify_handle *ih; /* inotify handle */ | ||
81 | unsigned int queue_size; /* size of the queue (bytes) */ | ||
82 | unsigned int event_count; /* number of pending events */ | ||
83 | unsigned int max_events; /* maximum number of events */ | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * struct inotify_kernel_event - An inotify event, originating from a watch and | ||
88 | * queued for user-space. A list of these is attached to each instance of the | ||
89 | * device. In read(), this list is walked and all events that can fit in the | ||
90 | * buffer are returned. | ||
91 | * | ||
92 | * Protected by dev->ev_mutex of the device in which we are queued. | ||
93 | */ | ||
94 | struct inotify_kernel_event { | ||
95 | struct inotify_event event; /* the user-space event */ | ||
96 | struct list_head list; /* entry in inotify_device's list */ | ||
97 | char *name; /* filename, if any */ | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * struct inotify_user_watch - our version of an inotify_watch, we add | ||
102 | * a reference to the associated inotify_device. | ||
103 | */ | ||
104 | struct inotify_user_watch { | ||
105 | struct inotify_device *dev; /* associated device */ | ||
106 | struct inotify_watch wdata; /* inotify watch data */ | ||
107 | }; | ||
108 | |||
109 | #ifdef CONFIG_SYSCTL | ||
110 | |||
111 | #include <linux/sysctl.h> | ||
112 | |||
113 | static int zero; | ||
114 | |||
115 | ctl_table inotify_table[] = { | ||
116 | { | ||
117 | .ctl_name = INOTIFY_MAX_USER_INSTANCES, | ||
118 | .procname = "max_user_instances", | ||
119 | .data = &inotify_max_user_instances, | ||
120 | .maxlen = sizeof(int), | ||
121 | .mode = 0644, | ||
122 | .proc_handler = &proc_dointvec_minmax, | ||
123 | .strategy = &sysctl_intvec, | ||
124 | .extra1 = &zero, | ||
125 | }, | ||
126 | { | ||
127 | .ctl_name = INOTIFY_MAX_USER_WATCHES, | ||
128 | .procname = "max_user_watches", | ||
129 | .data = &inotify_max_user_watches, | ||
130 | .maxlen = sizeof(int), | ||
131 | .mode = 0644, | ||
132 | .proc_handler = &proc_dointvec_minmax, | ||
133 | .strategy = &sysctl_intvec, | ||
134 | .extra1 = &zero, | ||
135 | }, | ||
136 | { | ||
137 | .ctl_name = INOTIFY_MAX_QUEUED_EVENTS, | ||
138 | .procname = "max_queued_events", | ||
139 | .data = &inotify_max_queued_events, | ||
140 | .maxlen = sizeof(int), | ||
141 | .mode = 0644, | ||
142 | .proc_handler = &proc_dointvec_minmax, | ||
143 | .strategy = &sysctl_intvec, | ||
144 | .extra1 = &zero | ||
145 | }, | ||
146 | { .ctl_name = 0 } | ||
147 | }; | ||
148 | #endif /* CONFIG_SYSCTL */ | ||
149 | |||
150 | static inline void get_inotify_dev(struct inotify_device *dev) | ||
151 | { | ||
152 | atomic_inc(&dev->count); | ||
153 | } | ||
154 | |||
155 | static inline void put_inotify_dev(struct inotify_device *dev) | ||
156 | { | ||
157 | if (atomic_dec_and_test(&dev->count)) { | ||
158 | atomic_dec(&dev->user->inotify_devs); | ||
159 | free_uid(dev->user); | ||
160 | kfree(dev); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * free_inotify_user_watch - cleans up the watch and its references | ||
166 | */ | ||
167 | static void free_inotify_user_watch(struct inotify_watch *w) | ||
168 | { | ||
169 | struct inotify_user_watch *watch; | ||
170 | struct inotify_device *dev; | ||
171 | |||
172 | watch = container_of(w, struct inotify_user_watch, wdata); | ||
173 | dev = watch->dev; | ||
174 | |||
175 | atomic_dec(&dev->user->inotify_watches); | ||
176 | put_inotify_dev(dev); | ||
177 | kmem_cache_free(watch_cachep, watch); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * kernel_event - create a new kernel event with the given parameters | ||
182 | * | ||
183 | * This function can sleep. | ||
184 | */ | ||
185 | static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, | ||
186 | const char *name) | ||
187 | { | ||
188 | struct inotify_kernel_event *kevent; | ||
189 | |||
190 | kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); | ||
191 | if (unlikely(!kevent)) | ||
192 | return NULL; | ||
193 | |||
194 | /* we hand this out to user-space, so zero it just in case */ | ||
195 | memset(&kevent->event, 0, sizeof(struct inotify_event)); | ||
196 | |||
197 | kevent->event.wd = wd; | ||
198 | kevent->event.mask = mask; | ||
199 | kevent->event.cookie = cookie; | ||
200 | |||
201 | INIT_LIST_HEAD(&kevent->list); | ||
202 | |||
203 | if (name) { | ||
204 | size_t len, rem, event_size = sizeof(struct inotify_event); | ||
205 | |||
206 | /* | ||
207 | * We need to pad the filename so as to properly align an | ||
208 | * array of inotify_event structures. Because the structure is | ||
209 | * small and the common case is a small filename, we just round | ||
210 | * up to the next multiple of the structure's sizeof. This is | ||
211 | * simple and safe for all architectures. | ||
212 | */ | ||
213 | len = strlen(name) + 1; | ||
214 | rem = event_size - len; | ||
215 | if (len > event_size) { | ||
216 | rem = event_size - (len % event_size); | ||
217 | if (len % event_size == 0) | ||
218 | rem = 0; | ||
219 | } | ||
220 | |||
221 | kevent->name = kmalloc(len + rem, GFP_KERNEL); | ||
222 | if (unlikely(!kevent->name)) { | ||
223 | kmem_cache_free(event_cachep, kevent); | ||
224 | return NULL; | ||
225 | } | ||
226 | memcpy(kevent->name, name, len); | ||
227 | if (rem) | ||
228 | memset(kevent->name + len, 0, rem); | ||
229 | kevent->event.len = len + rem; | ||
230 | } else { | ||
231 | kevent->event.len = 0; | ||
232 | kevent->name = NULL; | ||
233 | } | ||
234 | |||
235 | return kevent; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * inotify_dev_get_event - return the next event in the given dev's queue | ||
240 | * | ||
241 | * Caller must hold dev->ev_mutex. | ||
242 | */ | ||
243 | static inline struct inotify_kernel_event * | ||
244 | inotify_dev_get_event(struct inotify_device *dev) | ||
245 | { | ||
246 | return list_entry(dev->events.next, struct inotify_kernel_event, list); | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * inotify_dev_queue_event - event handler registered with core inotify, adds | ||
251 | * a new event to the given device | ||
252 | * | ||
253 | * Can sleep (calls kernel_event()). | ||
254 | */ | ||
255 | static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, | ||
256 | u32 cookie, const char *name) | ||
257 | { | ||
258 | struct inotify_user_watch *watch; | ||
259 | struct inotify_device *dev; | ||
260 | struct inotify_kernel_event *kevent, *last; | ||
261 | |||
262 | watch = container_of(w, struct inotify_user_watch, wdata); | ||
263 | dev = watch->dev; | ||
264 | |||
265 | mutex_lock(&dev->ev_mutex); | ||
266 | |||
267 | /* we can safely put the watch as we don't reference it while | ||
268 | * generating the event | ||
269 | */ | ||
270 | if (mask & IN_IGNORED || mask & IN_ONESHOT) | ||
271 | put_inotify_watch(w); /* final put */ | ||
272 | |||
273 | /* coalescing: drop this event if it is a dupe of the previous */ | ||
274 | last = inotify_dev_get_event(dev); | ||
275 | if (last && last->event.mask == mask && last->event.wd == wd && | ||
276 | last->event.cookie == cookie) { | ||
277 | const char *lastname = last->name; | ||
278 | |||
279 | if (!name && !lastname) | ||
280 | goto out; | ||
281 | if (name && lastname && !strcmp(lastname, name)) | ||
282 | goto out; | ||
283 | } | ||
284 | |||
285 | /* the queue overflowed and we already sent the Q_OVERFLOW event */ | ||
286 | if (unlikely(dev->event_count > dev->max_events)) | ||
287 | goto out; | ||
288 | |||
289 | /* if the queue overflows, we need to notify user space */ | ||
290 | if (unlikely(dev->event_count == dev->max_events)) | ||
291 | kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); | ||
292 | else | ||
293 | kevent = kernel_event(wd, mask, cookie, name); | ||
294 | |||
295 | if (unlikely(!kevent)) | ||
296 | goto out; | ||
297 | |||
298 | /* queue the event and wake up anyone waiting */ | ||
299 | dev->event_count++; | ||
300 | dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; | ||
301 | list_add_tail(&kevent->list, &dev->events); | ||
302 | wake_up_interruptible(&dev->wq); | ||
303 | |||
304 | out: | ||
305 | mutex_unlock(&dev->ev_mutex); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * remove_kevent - cleans up and ultimately frees the given kevent | ||
310 | * | ||
311 | * Caller must hold dev->ev_mutex. | ||
312 | */ | ||
313 | static void remove_kevent(struct inotify_device *dev, | ||
314 | struct inotify_kernel_event *kevent) | ||
315 | { | ||
316 | list_del(&kevent->list); | ||
317 | |||
318 | dev->event_count--; | ||
319 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | ||
320 | |||
321 | kfree(kevent->name); | ||
322 | kmem_cache_free(event_cachep, kevent); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * inotify_dev_event_dequeue - destroy an event on the given device | ||
327 | * | ||
328 | * Caller must hold dev->ev_mutex. | ||
329 | */ | ||
330 | static void inotify_dev_event_dequeue(struct inotify_device *dev) | ||
331 | { | ||
332 | if (!list_empty(&dev->events)) { | ||
333 | struct inotify_kernel_event *kevent; | ||
334 | kevent = inotify_dev_get_event(dev); | ||
335 | remove_kevent(dev, kevent); | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * find_inode - resolve a user-given path to a specific inode and return a nd | ||
341 | */ | ||
342 | static int find_inode(const char __user *dirname, struct nameidata *nd, | ||
343 | unsigned flags) | ||
344 | { | ||
345 | int error; | ||
346 | |||
347 | error = __user_walk(dirname, flags, nd); | ||
348 | if (error) | ||
349 | return error; | ||
350 | /* you can only watch an inode if you have read permissions on it */ | ||
351 | error = vfs_permission(nd, MAY_READ); | ||
352 | if (error) | ||
353 | path_release(nd); | ||
354 | return error; | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * create_watch - creates a watch on the given device. | ||
359 | * | ||
360 | * Callers must hold dev->up_mutex. | ||
361 | */ | ||
362 | static int create_watch(struct inotify_device *dev, struct inode *inode, | ||
363 | u32 mask) | ||
364 | { | ||
365 | struct inotify_user_watch *watch; | ||
366 | int ret; | ||
367 | |||
368 | if (atomic_read(&dev->user->inotify_watches) >= | ||
369 | inotify_max_user_watches) | ||
370 | return -ENOSPC; | ||
371 | |||
372 | watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); | ||
373 | if (unlikely(!watch)) | ||
374 | return -ENOMEM; | ||
375 | |||
376 | /* save a reference to device and bump the count to make it official */ | ||
377 | get_inotify_dev(dev); | ||
378 | watch->dev = dev; | ||
379 | |||
380 | atomic_inc(&dev->user->inotify_watches); | ||
381 | |||
382 | ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask); | ||
383 | if (ret < 0) | ||
384 | free_inotify_user_watch(&watch->wdata); | ||
385 | |||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | /* Device Interface */ | ||
390 | |||
391 | static unsigned int inotify_poll(struct file *file, poll_table *wait) | ||
392 | { | ||
393 | struct inotify_device *dev = file->private_data; | ||
394 | int ret = 0; | ||
395 | |||
396 | poll_wait(file, &dev->wq, wait); | ||
397 | mutex_lock(&dev->ev_mutex); | ||
398 | if (!list_empty(&dev->events)) | ||
399 | ret = POLLIN | POLLRDNORM; | ||
400 | mutex_unlock(&dev->ev_mutex); | ||
401 | |||
402 | return ret; | ||
403 | } | ||
404 | |||
405 | static ssize_t inotify_read(struct file *file, char __user *buf, | ||
406 | size_t count, loff_t *pos) | ||
407 | { | ||
408 | size_t event_size = sizeof (struct inotify_event); | ||
409 | struct inotify_device *dev; | ||
410 | char __user *start; | ||
411 | int ret; | ||
412 | DEFINE_WAIT(wait); | ||
413 | |||
414 | start = buf; | ||
415 | dev = file->private_data; | ||
416 | |||
417 | while (1) { | ||
418 | int events; | ||
419 | |||
420 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | ||
421 | |||
422 | mutex_lock(&dev->ev_mutex); | ||
423 | events = !list_empty(&dev->events); | ||
424 | mutex_unlock(&dev->ev_mutex); | ||
425 | if (events) { | ||
426 | ret = 0; | ||
427 | break; | ||
428 | } | ||
429 | |||
430 | if (file->f_flags & O_NONBLOCK) { | ||
431 | ret = -EAGAIN; | ||
432 | break; | ||
433 | } | ||
434 | |||
435 | if (signal_pending(current)) { | ||
436 | ret = -EINTR; | ||
437 | break; | ||
438 | } | ||
439 | |||
440 | schedule(); | ||
441 | } | ||
442 | |||
443 | finish_wait(&dev->wq, &wait); | ||
444 | if (ret) | ||
445 | return ret; | ||
446 | |||
447 | mutex_lock(&dev->ev_mutex); | ||
448 | while (1) { | ||
449 | struct inotify_kernel_event *kevent; | ||
450 | |||
451 | ret = buf - start; | ||
452 | if (list_empty(&dev->events)) | ||
453 | break; | ||
454 | |||
455 | kevent = inotify_dev_get_event(dev); | ||
456 | if (event_size + kevent->event.len > count) | ||
457 | break; | ||
458 | |||
459 | if (copy_to_user(buf, &kevent->event, event_size)) { | ||
460 | ret = -EFAULT; | ||
461 | break; | ||
462 | } | ||
463 | buf += event_size; | ||
464 | count -= event_size; | ||
465 | |||
466 | if (kevent->name) { | ||
467 | if (copy_to_user(buf, kevent->name, kevent->event.len)){ | ||
468 | ret = -EFAULT; | ||
469 | break; | ||
470 | } | ||
471 | buf += kevent->event.len; | ||
472 | count -= kevent->event.len; | ||
473 | } | ||
474 | |||
475 | remove_kevent(dev, kevent); | ||
476 | } | ||
477 | mutex_unlock(&dev->ev_mutex); | ||
478 | |||
479 | return ret; | ||
480 | } | ||
481 | |||
482 | static int inotify_release(struct inode *ignored, struct file *file) | ||
483 | { | ||
484 | struct inotify_device *dev = file->private_data; | ||
485 | |||
486 | inotify_destroy(dev->ih); | ||
487 | |||
488 | /* destroy all of the events on this device */ | ||
489 | mutex_lock(&dev->ev_mutex); | ||
490 | while (!list_empty(&dev->events)) | ||
491 | inotify_dev_event_dequeue(dev); | ||
492 | mutex_unlock(&dev->ev_mutex); | ||
493 | |||
494 | /* free this device: the put matching the get in inotify_init() */ | ||
495 | put_inotify_dev(dev); | ||
496 | |||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | static long inotify_ioctl(struct file *file, unsigned int cmd, | ||
501 | unsigned long arg) | ||
502 | { | ||
503 | struct inotify_device *dev; | ||
504 | void __user *p; | ||
505 | int ret = -ENOTTY; | ||
506 | |||
507 | dev = file->private_data; | ||
508 | p = (void __user *) arg; | ||
509 | |||
510 | switch (cmd) { | ||
511 | case FIONREAD: | ||
512 | ret = put_user(dev->queue_size, (int __user *) p); | ||
513 | break; | ||
514 | } | ||
515 | |||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | static const struct file_operations inotify_fops = { | ||
520 | .poll = inotify_poll, | ||
521 | .read = inotify_read, | ||
522 | .release = inotify_release, | ||
523 | .unlocked_ioctl = inotify_ioctl, | ||
524 | .compat_ioctl = inotify_ioctl, | ||
525 | }; | ||
526 | |||
527 | static const struct inotify_operations inotify_user_ops = { | ||
528 | .handle_event = inotify_dev_queue_event, | ||
529 | .destroy_watch = free_inotify_user_watch, | ||
530 | }; | ||
531 | |||
532 | asmlinkage long sys_inotify_init(void) | ||
533 | { | ||
534 | struct inotify_device *dev; | ||
535 | struct inotify_handle *ih; | ||
536 | struct user_struct *user; | ||
537 | struct file *filp; | ||
538 | int fd, ret; | ||
539 | |||
540 | fd = get_unused_fd(); | ||
541 | if (fd < 0) | ||
542 | return fd; | ||
543 | |||
544 | filp = get_empty_filp(); | ||
545 | if (!filp) { | ||
546 | ret = -ENFILE; | ||
547 | goto out_put_fd; | ||
548 | } | ||
549 | |||
550 | user = get_uid(current->user); | ||
551 | if (unlikely(atomic_read(&user->inotify_devs) >= | ||
552 | inotify_max_user_instances)) { | ||
553 | ret = -EMFILE; | ||
554 | goto out_free_uid; | ||
555 | } | ||
556 | |||
557 | dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); | ||
558 | if (unlikely(!dev)) { | ||
559 | ret = -ENOMEM; | ||
560 | goto out_free_uid; | ||
561 | } | ||
562 | |||
563 | ih = inotify_init(&inotify_user_ops); | ||
564 | if (unlikely(IS_ERR(ih))) { | ||
565 | ret = PTR_ERR(ih); | ||
566 | goto out_free_dev; | ||
567 | } | ||
568 | dev->ih = ih; | ||
569 | |||
570 | filp->f_op = &inotify_fops; | ||
571 | filp->f_vfsmnt = mntget(inotify_mnt); | ||
572 | filp->f_dentry = dget(inotify_mnt->mnt_root); | ||
573 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | ||
574 | filp->f_mode = FMODE_READ; | ||
575 | filp->f_flags = O_RDONLY; | ||
576 | filp->private_data = dev; | ||
577 | |||
578 | INIT_LIST_HEAD(&dev->events); | ||
579 | init_waitqueue_head(&dev->wq); | ||
580 | mutex_init(&dev->ev_mutex); | ||
581 | mutex_init(&dev->up_mutex); | ||
582 | dev->event_count = 0; | ||
583 | dev->queue_size = 0; | ||
584 | dev->max_events = inotify_max_queued_events; | ||
585 | dev->user = user; | ||
586 | atomic_set(&dev->count, 0); | ||
587 | |||
588 | get_inotify_dev(dev); | ||
589 | atomic_inc(&user->inotify_devs); | ||
590 | fd_install(fd, filp); | ||
591 | |||
592 | return fd; | ||
593 | out_free_dev: | ||
594 | kfree(dev); | ||
595 | out_free_uid: | ||
596 | free_uid(user); | ||
597 | put_filp(filp); | ||
598 | out_put_fd: | ||
599 | put_unused_fd(fd); | ||
600 | return ret; | ||
601 | } | ||
602 | |||
603 | asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | ||
604 | { | ||
605 | struct inode *inode; | ||
606 | struct inotify_device *dev; | ||
607 | struct nameidata nd; | ||
608 | struct file *filp; | ||
609 | int ret, fput_needed; | ||
610 | unsigned flags = 0; | ||
611 | |||
612 | filp = fget_light(fd, &fput_needed); | ||
613 | if (unlikely(!filp)) | ||
614 | return -EBADF; | ||
615 | |||
616 | /* verify that this is indeed an inotify instance */ | ||
617 | if (unlikely(filp->f_op != &inotify_fops)) { | ||
618 | ret = -EINVAL; | ||
619 | goto fput_and_out; | ||
620 | } | ||
621 | |||
622 | if (!(mask & IN_DONT_FOLLOW)) | ||
623 | flags |= LOOKUP_FOLLOW; | ||
624 | if (mask & IN_ONLYDIR) | ||
625 | flags |= LOOKUP_DIRECTORY; | ||
626 | |||
627 | ret = find_inode(path, &nd, flags); | ||
628 | if (unlikely(ret)) | ||
629 | goto fput_and_out; | ||
630 | |||
631 | /* inode held in place by reference to nd; dev by fget on fd */ | ||
632 | inode = nd.dentry->d_inode; | ||
633 | dev = filp->private_data; | ||
634 | |||
635 | mutex_lock(&dev->up_mutex); | ||
636 | ret = inotify_find_update_watch(dev->ih, inode, mask); | ||
637 | if (ret == -ENOENT) | ||
638 | ret = create_watch(dev, inode, mask); | ||
639 | mutex_unlock(&dev->up_mutex); | ||
640 | |||
641 | path_release(&nd); | ||
642 | fput_and_out: | ||
643 | fput_light(filp, fput_needed); | ||
644 | return ret; | ||
645 | } | ||
646 | |||
647 | asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) | ||
648 | { | ||
649 | struct file *filp; | ||
650 | struct inotify_device *dev; | ||
651 | int ret, fput_needed; | ||
652 | |||
653 | filp = fget_light(fd, &fput_needed); | ||
654 | if (unlikely(!filp)) | ||
655 | return -EBADF; | ||
656 | |||
657 | /* verify that this is indeed an inotify instance */ | ||
658 | if (unlikely(filp->f_op != &inotify_fops)) { | ||
659 | ret = -EINVAL; | ||
660 | goto out; | ||
661 | } | ||
662 | |||
663 | dev = filp->private_data; | ||
664 | |||
665 | /* we free our watch data when we get IN_IGNORED */ | ||
666 | ret = inotify_rm_wd(dev->ih, wd); | ||
667 | |||
668 | out: | ||
669 | fput_light(filp, fput_needed); | ||
670 | return ret; | ||
671 | } | ||
672 | |||
673 | static struct super_block * | ||
674 | inotify_get_sb(struct file_system_type *fs_type, int flags, | ||
675 | const char *dev_name, void *data) | ||
676 | { | ||
677 | return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); | ||
678 | } | ||
679 | |||
680 | static struct file_system_type inotify_fs_type = { | ||
681 | .name = "inotifyfs", | ||
682 | .get_sb = inotify_get_sb, | ||
683 | .kill_sb = kill_anon_super, | ||
684 | }; | ||
685 | |||
686 | /* | ||
687 | * inotify_user_setup - Our initialization function. Note that we cannnot return | ||
688 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here | ||
689 | * must result in panic(). | ||
690 | */ | ||
691 | static int __init inotify_user_setup(void) | ||
692 | { | ||
693 | int ret; | ||
694 | |||
695 | ret = register_filesystem(&inotify_fs_type); | ||
696 | if (unlikely(ret)) | ||
697 | panic("inotify: register_filesystem returned %d!\n", ret); | ||
698 | |||
699 | inotify_mnt = kern_mount(&inotify_fs_type); | ||
700 | if (IS_ERR(inotify_mnt)) | ||
701 | panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); | ||
702 | |||
703 | inotify_max_queued_events = 16384; | ||
704 | inotify_max_user_instances = 128; | ||
705 | inotify_max_user_watches = 8192; | ||
706 | |||
707 | watch_cachep = kmem_cache_create("inotify_watch_cache", | ||
708 | sizeof(struct inotify_user_watch), | ||
709 | 0, SLAB_PANIC, NULL, NULL); | ||
710 | event_cachep = kmem_cache_create("inotify_event_cache", | ||
711 | sizeof(struct inotify_kernel_event), | ||
712 | 0, SLAB_PANIC, NULL, NULL); | ||
713 | |||
714 | return 0; | ||
715 | } | ||
716 | |||
717 | module_init(inotify_user_setup); | ||