1 files changed, 778 insertions, 0 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
new file mode 100644
index 000000000000..400f8064a548
--- /dev/null
+++ b/fs/notify/inotify/inotify_user.c
@@ -0,0 +1,778 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *      John McCutchan  <ttb@tentacle.dhs.org>
+ *      Robert Love     <rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/inotify.h>
+#include <linux/syscalls.h>
+#include <linux/magic.h>
+#include <asm/ioctls.h>
+static struct kmem_cache *watch_cachep __read_mostly;
+static struct kmem_cache *event_cachep __read_mostly;
+static struct vfsmount *inotify_mnt __read_mostly;
+/* these are configurable via /proc/sys/fs/inotify/ */
+static int inotify_max_user_instances __read_mostly;
+static int inotify_max_user_watches __read_mostly;
+static int inotify_max_queued_events __read_mostly;
+/*
+ * Lock ordering:
+ *
+ * inotify_dev->up_mutex (ensures we don't re-add the same watch)
+ *      inode->inotify_mutex (protects inode's watch list)
+ *              inotify_handle->mutex (protects inotify_handle's watch list)
+ *                      inotify_dev->ev_mutex (protects device's event queue)
+ */
+/*
+ * Lifetimes of the main data structures:
+ *
+ * inotify_device: Lifetime is managed by reference count, from
+ * sys_inotify_init() until release.  Additional references can bump the count
+ * via get_inotify_dev() and drop the count via put_inotify_dev().
+ *
+ * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
+ * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
+ * first event, or to inotify_destroy().
+ */
+/*
+ * struct inotify_device - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_device {
+        wait_queue_head_t       wq;             /* wait queue for i/o */
+        struct mutex            ev_mutex;       /* protects event queue */
+        struct mutex            up_mutex;       /* synchronizes watch updates */
+        struct list_head        events;         /* list of queued events */
+        struct user_struct      *user;          /* user who opened this dev */
+        struct inotify_handle   *ih;            /* inotify handle */
+        struct fasync_struct    *fa;            /* async notification */
+        atomic_t                count;          /* reference count */
+        unsigned int            queue_size;     /* size of the queue (bytes) */
+        unsigned int            event_count;    /* number of pending events */
+        unsigned int            max_events;     /* maximum number of events */
+};
+/*
+ * struct inotify_kernel_event - An inotify event, originating from a watch and
+ * queued for user-space.  A list of these is attached to each instance of the
+ * device.  In read(), this list is walked and all events that can fit in the
+ * buffer are returned.
+ *
+ * Protected by dev->ev_mutex of the device in which we are queued.
+ */
+struct inotify_kernel_event {
+        struct inotify_event    event;  /* the user-space event */
+        struct list_head        list;   /* entry in inotify_device's list */
+        char                    *name;  /* filename, if any */
+};
+/*
+ * struct inotify_user_watch - our version of an inotify_watch, we add
+ * a reference to the associated inotify_device.
+ */
+struct inotify_user_watch {
+        struct inotify_device   *dev;   /* associated device */
+        struct inotify_watch    wdata;  /* inotify watch data */
+};
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+static int zero;
+ctl_table inotify_table[] = {
+        {
+                .ctl_name       = INOTIFY_MAX_USER_INSTANCES,
+                .procname       = "max_user_instances",
+                .data           = &inotify_max_user_instances,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &zero,
+        },
+        {
+                .ctl_name       = INOTIFY_MAX_USER_WATCHES,
+                .procname       = "max_user_watches",
+                .data           = &inotify_max_user_watches,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &zero,
+        },
+        {
+                .ctl_name       = INOTIFY_MAX_QUEUED_EVENTS,
+                .procname       = "max_queued_events",
+                .data           = &inotify_max_queued_events,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &zero
+        },
+        { .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+static inline void get_inotify_dev(struct inotify_device *dev)
+{
+        atomic_inc(&dev->count);
+}
+static inline void put_inotify_dev(struct inotify_device *dev)
+{
+        if (atomic_dec_and_test(&dev->count)) {
+                atomic_dec(&dev->user->inotify_devs);
+                free_uid(dev->user);
+                kfree(dev);
+        }
+}
+/*
+ * free_inotify_user_watch - cleans up the watch and its references
+ */
+static void free_inotify_user_watch(struct inotify_watch *w)
+{
+        struct inotify_user_watch *watch;
+        struct inotify_device *dev;
+        watch = container_of(w, struct inotify_user_watch, wdata);
+        dev = watch->dev;
+        atomic_dec(&dev->user->inotify_watches);
+        put_inotify_dev(dev);
+        kmem_cache_free(watch_cachep, watch);
+}
+/*
+ * kernel_event - create a new kernel event with the given parameters
+ *
+ * This function can sleep.
+ */
+static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
+                                                  const char *name)
+{
+        struct inotify_kernel_event *kevent;
+        kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
+        if (unlikely(!kevent))
+                return NULL;
+        /* we hand this out to user-space, so zero it just in case */
+        memset(&kevent->event, 0, sizeof(struct inotify_event));
+        kevent->event.wd = wd;
+        kevent->event.mask = mask;
+        kevent->event.cookie = cookie;
+        INIT_LIST_HEAD(&kevent->list);
+        if (name) {
+                size_t len, rem, event_size = sizeof(struct inotify_event);
+                /*
+                 * We need to pad the filename so as to properly align an
+                 * array of inotify_event structures.  Because the structure is
+                 * small and the common case is a small filename, we just round
+                 * up to the next multiple of the structure's sizeof.  This is
+                 * simple and safe for all architectures.
+                 */
+                len = strlen(name) + 1;
+                rem = event_size - len;
+                if (len > event_size) {
+                        rem = event_size - (len % event_size);
+                        if (len % event_size == 0)
+                                rem = 0;
+                }
+                kevent->name = kmalloc(len + rem, GFP_KERNEL);
+                if (unlikely(!kevent->name)) {
+                        kmem_cache_free(event_cachep, kevent);
+                        return NULL;
+                }
+                memcpy(kevent->name, name, len);
+                if (rem)
+                        memset(kevent->name + len, 0, rem);
+                kevent->event.len = len + rem;
+        } else {
+                kevent->event.len = 0;
+                kevent->name = NULL;
+        }
+        return kevent;
+}
+/*
+ * inotify_dev_get_event - return the next event in the given dev's queue
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static inline struct inotify_kernel_event *
+inotify_dev_get_event(struct inotify_device *dev)
+{
+        return list_entry(dev->events.next, struct inotify_kernel_event, list);
+}
+/*
+ * inotify_dev_get_last_event - return the last event in the given dev's queue
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static inline struct inotify_kernel_event *
+inotify_dev_get_last_event(struct inotify_device *dev)
+{
+        if (list_empty(&dev->events))
+                return NULL;
+        return list_entry(dev->events.prev, struct inotify_kernel_event, list);
+}
+/*
+ * inotify_dev_queue_event - event handler registered with core inotify, adds
+ * a new event to the given device
+ *
+ * Can sleep (calls kernel_event()).
+ */
+static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
+                                    u32 cookie, const char *name,
+                                    struct inode *ignored)
+{
+        struct inotify_user_watch *watch;
+        struct inotify_device *dev;
+        struct inotify_kernel_event *kevent, *last;
+        watch = container_of(w, struct inotify_user_watch, wdata);
+        dev = watch->dev;
+        mutex_lock(&dev->ev_mutex);
+        /* we can safely put the watch as we don't reference it while
+         * generating the event
+         */
+        if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
+                put_inotify_watch(w); /* final put */
+        /* coalescing: drop this event if it is a dupe of the previous */
+        last = inotify_dev_get_last_event(dev);
+        if (last && last->event.mask == mask && last->event.wd == wd &&
+                        last->event.cookie == cookie) {
+                const char *lastname = last->name;
+                if (!name && !lastname)
+                        goto out;
+                if (name && lastname && !strcmp(lastname, name))
+                        goto out;
+        }
+        /* the queue overflowed and we already sent the Q_OVERFLOW event */
+        if (unlikely(dev->event_count > dev->max_events))
+                goto out;
+        /* if the queue overflows, we need to notify user space */
+        if (unlikely(dev->event_count == dev->max_events))
+                kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
+        else
+                kevent = kernel_event(wd, mask, cookie, name);
+        if (unlikely(!kevent))
+                goto out;
+        /* queue the event and wake up anyone waiting */
+        dev->event_count++;
+        dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
+        list_add_tail(&kevent->list, &dev->events);
+        wake_up_interruptible(&dev->wq);
+        kill_fasync(&dev->fa, SIGIO, POLL_IN);
+out:
+        mutex_unlock(&dev->ev_mutex);
+}
+/*
+ * remove_kevent - cleans up the given kevent
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void remove_kevent(struct inotify_device *dev,
+                          struct inotify_kernel_event *kevent)
+{
+        list_del(&kevent->list);
+        dev->event_count--;
+        dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
+}
+/*
+ * free_kevent - frees the given kevent.
+ */
+static void free_kevent(struct inotify_kernel_event *kevent)
+{
+        kfree(kevent->name);
+        kmem_cache_free(event_cachep, kevent);
+}
+/*
+ * inotify_dev_event_dequeue - destroy an event on the given device
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void inotify_dev_event_dequeue(struct inotify_device *dev)
+{
+        if (!list_empty(&dev->events)) {
+                struct inotify_kernel_event *kevent;
+                kevent = inotify_dev_get_event(dev);
+                remove_kevent(dev, kevent);
+                free_kevent(kevent);
+        }
+}
+/*
+ * find_inode - resolve a user-given path to a specific inode
+ */
+static int find_inode(const char __user *dirname, struct path *path,
+                      unsigned flags)
+{
+        int error;
+        error = user_path_at(AT_FDCWD, dirname, flags, path);
+        if (error)
+                return error;
+        /* you can only watch an inode if you have read permissions on it */
+        error = inode_permission(path->dentry->d_inode, MAY_READ);
+        if (error)
+                path_put(path);
+        return error;
+}
+/*
+ * create_watch - creates a watch on the given device.
+ *
+ * Callers must hold dev->up_mutex.
+ */
+static int create_watch(struct inotify_device *dev, struct inode *inode,
+                        u32 mask)
+{
+        struct inotify_user_watch *watch;
+        int ret;
+        if (atomic_read(&dev->user->inotify_watches) >=
+                        inotify_max_user_watches)
+                return -ENOSPC;
+        watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
+        if (unlikely(!watch))
+                return -ENOMEM;
+        /* save a reference to device and bump the count to make it official */
+        get_inotify_dev(dev);
+        watch->dev = dev;
+        atomic_inc(&dev->user->inotify_watches);
+        inotify_init_watch(&watch->wdata);
+        ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
+        if (ret < 0)
+                free_inotify_user_watch(&watch->wdata);
+        return ret;
+}
+/* Device Interface */
+static unsigned int inotify_poll(struct file *file, poll_table *wait)
+{
+        struct inotify_device *dev = file->private_data;
+        int ret = 0;
+        poll_wait(file, &dev->wq, wait);
+        mutex_lock(&dev->ev_mutex);
+        if (!list_empty(&dev->events))
+                ret = POLLIN | POLLRDNORM;
+        mutex_unlock(&dev->ev_mutex);
+        return ret;
+}
+static ssize_t inotify_read(struct file *file, char __user *buf,
+                            size_t count, loff_t *pos)
+{
+        size_t event_size = sizeof (struct inotify_event);
+        struct inotify_device *dev;
+        char __user *start;
+        int ret;
+        DEFINE_WAIT(wait);
+        start = buf;
+        dev = file->private_data;
+        while (1) {
+                prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
+                mutex_lock(&dev->ev_mutex);
+                if (!list_empty(&dev->events)) {
+                        ret = 0;
+                        break;
+                }
+                mutex_unlock(&dev->ev_mutex);
+                if (file->f_flags & O_NONBLOCK) {
+                        ret = -EAGAIN;
+                        break;
+                }
+                if (signal_pending(current)) {
+                        ret = -EINTR;
+                        break;
+                }
+                schedule();
+        }
+        finish_wait(&dev->wq, &wait);
+        if (ret)
+                return ret;
+        while (1) {
+                struct inotify_kernel_event *kevent;
+                ret = buf - start;
+                if (list_empty(&dev->events))
+                        break;
+                kevent = inotify_dev_get_event(dev);
+                if (event_size + kevent->event.len > count) {
+                        if (ret == 0 && count > 0) {
+                                /*
+                                 * could not get a single event because we
+                                 * didn't have enough buffer space.
+                                 */
+                                ret = -EINVAL;
+                        }
+                        break;
+                }
+                remove_kevent(dev, kevent);
+                /*
+                 * Must perform the copy_to_user outside the mutex in order
+                 * to avoid a lock order reversal with mmap_sem.
+                 */
+                mutex_unlock(&dev->ev_mutex);
+                if (copy_to_user(buf, &kevent->event, event_size)) {
+                        ret = -EFAULT;
+                        break;
+                }
+                buf += event_size;
+                count -= event_size;
+                if (kevent->name) {
+                        if (copy_to_user(buf, kevent->name, kevent->event.len)){
+                                ret = -EFAULT;
+                                break;
+                        }
+                        buf += kevent->event.len;
+                        count -= kevent->event.len;
+                }
+                free_kevent(kevent);
+                mutex_lock(&dev->ev_mutex);
+        }
+        mutex_unlock(&dev->ev_mutex);
+        return ret;
+}
+static int inotify_fasync(int fd, struct file *file, int on)
+{
+        struct inotify_device *dev = file->private_data;
+        return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
+}
+static int inotify_release(struct inode *ignored, struct file *file)
+{
+        struct inotify_device *dev = file->private_data;
+        inotify_destroy(dev->ih);
+        /* destroy all of the events on this device */
+        mutex_lock(&dev->ev_mutex);
+        while (!list_empty(&dev->events))
+                inotify_dev_event_dequeue(dev);
+        mutex_unlock(&dev->ev_mutex);
+        /* free this device: the put matching the get in inotify_init() */
+        put_inotify_dev(dev);
+        return 0;
+}
+static long inotify_ioctl(struct file *file, unsigned int cmd,
+                          unsigned long arg)
+{
+        struct inotify_device *dev;
+        void __user *p;
+        int ret = -ENOTTY;
+        dev = file->private_data;
+        p = (void __user *) arg;
+        switch (cmd) {
+        case FIONREAD:
+                ret = put_user(dev->queue_size, (int __user *) p);
+                break;
+        }
+        return ret;
+}
+static const struct file_operations inotify_fops = {
+        .poll           = inotify_poll,
+        .read           = inotify_read,
+        .fasync         = inotify_fasync,
+        .release        = inotify_release,
+        .unlocked_ioctl = inotify_ioctl,
+        .compat_ioctl   = inotify_ioctl,
+};
+static const struct inotify_operations inotify_user_ops = {
+        .handle_event   = inotify_dev_queue_event,
+        .destroy_watch  = free_inotify_user_watch,
+};
+asmlinkage long sys_inotify_init1(int flags)
+{
+        struct inotify_device *dev;
+        struct inotify_handle *ih;
+        struct user_struct *user;
+        struct file *filp;
+        int fd, ret;
+        /* Check the IN_* constants for consistency.  */
+        BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+        BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+        if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
+                return -EINVAL;
+        fd = get_unused_fd_flags(flags & O_CLOEXEC);
+        if (fd < 0)
+                return fd;
+        filp = get_empty_filp();
+        if (!filp) {
+                ret = -ENFILE;
+                goto out_put_fd;
+        }
+        user = get_current_user();
+        if (unlikely(atomic_read(&user->inotify_devs) >=
+                        inotify_max_user_instances)) {
+                ret = -EMFILE;
+                goto out_free_uid;
+        }
+        dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+        if (unlikely(!dev)) {
+                ret = -ENOMEM;
+                goto out_free_uid;
+        }
+        ih = inotify_init(&inotify_user_ops);
+        if (IS_ERR(ih)) {
+                ret = PTR_ERR(ih);
+                goto out_free_dev;
+        }
+        dev->ih = ih;
+        dev->fa = NULL;
+        filp->f_op = &inotify_fops;
+        filp->f_path.mnt = mntget(inotify_mnt);
+        filp->f_path.dentry = dget(inotify_mnt->mnt_root);
+        filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+        filp->f_mode = FMODE_READ;
+        filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+        filp->private_data = dev;
+        INIT_LIST_HEAD(&dev->events);
+        init_waitqueue_head(&dev->wq);
+        mutex_init(&dev->ev_mutex);
+        mutex_init(&dev->up_mutex);
+        dev->event_count = 0;
+        dev->queue_size = 0;
+        dev->max_events = inotify_max_queued_events;
+        dev->user = user;
+        atomic_set(&dev->count, 0);
+        get_inotify_dev(dev);
+        atomic_inc(&user->inotify_devs);
+        fd_install(fd, filp);
+        return fd;
+out_free_dev:
+        kfree(dev);
+out_free_uid:
+        free_uid(user);
+        put_filp(filp);
+out_put_fd:
+        put_unused_fd(fd);
+        return ret;
+}
+asmlinkage long sys_inotify_init(void)
+{
+        return sys_inotify_init1(0);
+}
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
+{
+        struct inode *inode;
+        struct inotify_device *dev;
+        struct path path;
+        struct file *filp;
+        int ret, fput_needed;
+        unsigned flags = 0;
+        filp = fget_light(fd, &fput_needed);
+        if (unlikely(!filp))
+                return -EBADF;
+        /* verify that this is indeed an inotify instance */
+        if (unlikely(filp->f_op != &inotify_fops)) {
+                ret = -EINVAL;
+                goto fput_and_out;
+        }
+        if (!(mask & IN_DONT_FOLLOW))
+                flags |= LOOKUP_FOLLOW;
+        if (mask & IN_ONLYDIR)
+                flags |= LOOKUP_DIRECTORY;
+        ret = find_inode(pathname, &path, flags);
+        if (unlikely(ret))
+                goto fput_and_out;
+        /* inode held in place by reference to path; dev by fget on fd */
+        inode = path.dentry->d_inode;
+        dev = filp->private_data;
+        mutex_lock(&dev->up_mutex);
+        ret = inotify_find_update_watch(dev->ih, inode, mask);
+        if (ret == -ENOENT)
+                ret = create_watch(dev, inode, mask);
+        mutex_unlock(&dev->up_mutex);
+        path_put(&path);
+fput_and_out:
+        fput_light(filp, fput_needed);
+        return ret;
+}
+asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+{
+        struct file *filp;
+        struct inotify_device *dev;
+        int ret, fput_needed;
+        filp = fget_light(fd, &fput_needed);
+        if (unlikely(!filp))
+                return -EBADF;
+        /* verify that this is indeed an inotify instance */
+        if (unlikely(filp->f_op != &inotify_fops)) {
+                ret = -EINVAL;
+                goto out;
+        }
+        dev = filp->private_data;
+        /* we free our watch data when we get IN_IGNORED */
+        ret = inotify_rm_wd(dev->ih, wd);
+out:
+        fput_light(filp, fput_needed);
+        return ret;
+}
+static int
+inotify_get_sb(struct file_system_type *fs_type, int flags,
+               const char *dev_name, void *data, struct vfsmount *mnt)
+{
+        return get_sb_pseudo(fs_type, "inotify", NULL,
+                        INOTIFYFS_SUPER_MAGIC, mnt);
+}
+static struct file_system_type inotify_fs_type = {
+    .name           = "inotifyfs",
+    .get_sb         = inotify_get_sb,
+    .kill_sb        = kill_anon_super,
+};
+/*
+ * inotify_user_setup - Our initialization function.  Note that we cannnot return
+ * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
+ * must result in panic().
+ */
+static int __init inotify_user_setup(void)
+{
+        int ret;
+        ret = register_filesystem(&inotify_fs_type);
+        if (unlikely(ret))
+                panic("inotify: register_filesystem returned %d!\n", ret);
+        inotify_mnt = kern_mount(&inotify_fs_type);
+        if (IS_ERR(inotify_mnt))
+                panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
+        inotify_max_queued_events = 16384;
+        inotify_max_user_instances = 128;
+        inotify_max_user_watches = 8192;
+        watch_cachep = kmem_cache_create("inotify_watch_cache",
+                                         sizeof(struct inotify_user_watch),
+                                         0, SLAB_PANIC, NULL);
+        event_cachep = kmem_cache_create("inotify_event_cache",
+                                         sizeof(struct inotify_kernel_event),
+                                         0, SLAB_PANIC, NULL);
+        return 0;
+}
+module_init(inotify_user_setup);

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c new file mode 100644 index 000000000000..400f8064a548 --- /dev/null +++ b/fs/notify/inotify/inotify_user.c
@@ -0,0 +1,778 @@
	1	/*
	2	* fs/inotify_user.c - inotify support for userspace
	3	*
	4	* Authors:
	5	* John McCutchan <ttb@tentacle.dhs.org>
	6	* Robert Love <rml@novell.com>
	7	*
	8	* Copyright (C) 2005 John McCutchan
	9	* Copyright 2006 Hewlett-Packard Development Company, L.P.
	10	*
	11	* This program is free software; you can redistribute it and/or modify it
	12	* under the terms of the GNU General Public License as published by the
	13	* Free Software Foundation; either version 2, or (at your option) any
	14	* later version.
	15	*
	16	* This program is distributed in the hope that it will be useful, but
	17	* WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	19	* General Public License for more details.
	20	*/
	21
	22	#include <linux/kernel.h>
	23	#include <linux/sched.h>
	24	#include <linux/slab.h>
	25	#include <linux/fs.h>
	26	#include <linux/file.h>
	27	#include <linux/mount.h>
	28	#include <linux/namei.h>
	29	#include <linux/poll.h>
	30	#include <linux/init.h>
	31	#include <linux/list.h>
	32	#include <linux/inotify.h>
	33	#include <linux/syscalls.h>
	34	#include <linux/magic.h>
	35
	36	#include <asm/ioctls.h>
	37
	38	static struct kmem_cache *watch_cachep __read_mostly;
	39	static struct kmem_cache *event_cachep __read_mostly;
	40
	41	static struct vfsmount *inotify_mnt __read_mostly;
	42
	43	/* these are configurable via /proc/sys/fs/inotify/ */
	44	static int inotify_max_user_instances __read_mostly;
	45	static int inotify_max_user_watches __read_mostly;
	46	static int inotify_max_queued_events __read_mostly;
	47
	48	/*
	49	* Lock ordering:
	50	*
	51	* inotify_dev->up_mutex (ensures we don't re-add the same watch)
	52	* inode->inotify_mutex (protects inode's watch list)
	53	* inotify_handle->mutex (protects inotify_handle's watch list)
	54	* inotify_dev->ev_mutex (protects device's event queue)
	55	*/
	56
	57	/*
	58	* Lifetimes of the main data structures:
	59	*
	60	* inotify_device: Lifetime is managed by reference count, from
	61	* sys_inotify_init() until release. Additional references can bump the count
	62	* via get_inotify_dev() and drop the count via put_inotify_dev().
	63	*
	64	* inotify_user_watch: Lifetime is from create_watch() to the receipt of an
	65	* IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
	66	* first event, or to inotify_destroy().
	67	*/
	68
	69	/*
	70	* struct inotify_device - represents an inotify instance
	71	*
	72	* This structure is protected by the mutex 'mutex'.
	73	*/
	74	struct inotify_device {
	75	wait_queue_head_t wq; /* wait queue for i/o */
	76	struct mutex ev_mutex; /* protects event queue */
	77	struct mutex up_mutex; /* synchronizes watch updates */
	78	struct list_head events; /* list of queued events */
	79	struct user_struct user; / user who opened this dev */
	80	struct inotify_handle ih; / inotify handle */
	81	struct fasync_struct fa; / async notification */
	82	atomic_t count; /* reference count */
	83	unsigned int queue_size; /* size of the queue (bytes) */
	84	unsigned int event_count; /* number of pending events */
	85	unsigned int max_events; /* maximum number of events */
	86	};
	87
	88	/*
	89	* struct inotify_kernel_event - An inotify event, originating from a watch and
	90	* queued for user-space. A list of these is attached to each instance of the
	91	* device. In read(), this list is walked and all events that can fit in the
	92	* buffer are returned.
	93	*
	94	* Protected by dev->ev_mutex of the device in which we are queued.
	95	*/
	96	struct inotify_kernel_event {
	97	struct inotify_event event; /* the user-space event */
	98	struct list_head list; /* entry in inotify_device's list */
	99	char name; / filename, if any */
	100	};
	101
	102	/*
	103	* struct inotify_user_watch - our version of an inotify_watch, we add
	104	* a reference to the associated inotify_device.
	105	*/
	106	struct inotify_user_watch {
	107	struct inotify_device dev; / associated device */
	108	struct inotify_watch wdata; /* inotify watch data */
	109	};
	110
	111	#ifdef CONFIG_SYSCTL
	112
	113	#include <linux/sysctl.h>
	114
	115	static int zero;
	116
	117	ctl_table inotify_table[] = {
	118	{
	119	.ctl_name = INOTIFY_MAX_USER_INSTANCES,
	120	.procname = "max_user_instances",
	121	.data = &inotify_max_user_instances,
	122	.maxlen = sizeof(int),
	123	.mode = 0644,
	124	.proc_handler = &proc_dointvec_minmax,
	125	.strategy = &sysctl_intvec,
	126	.extra1 = &zero,
	127	},
	128	{
	129	.ctl_name = INOTIFY_MAX_USER_WATCHES,
	130	.procname = "max_user_watches",
	131	.data = &inotify_max_user_watches,
	132	.maxlen = sizeof(int),
	133	.mode = 0644,
	134	.proc_handler = &proc_dointvec_minmax,
	135	.strategy = &sysctl_intvec,
	136	.extra1 = &zero,
	137	},
	138	{
	139	.ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
	140	.procname = "max_queued_events",
	141	.data = &inotify_max_queued_events,
	142	.maxlen = sizeof(int),
	143	.mode = 0644,
	144	.proc_handler = &proc_dointvec_minmax,
	145	.strategy = &sysctl_intvec,
	146	.extra1 = &zero
	147	},
	148	{ .ctl_name = 0 }
	149	};
	150	#endif /* CONFIG_SYSCTL */
	151
	152	static inline void get_inotify_dev(struct inotify_device *dev)
	153	{
	154	atomic_inc(&dev->count);
	155	}
	156
	157	static inline void put_inotify_dev(struct inotify_device *dev)
	158	{
	159	if (atomic_dec_and_test(&dev->count)) {
	160	atomic_dec(&dev->user->inotify_devs);
	161	free_uid(dev->user);
	162	kfree(dev);
	163	}
	164	}
	165
	166	/*
	167	* free_inotify_user_watch - cleans up the watch and its references
	168	*/
	169	static void free_inotify_user_watch(struct inotify_watch *w)
	170	{
	171	struct inotify_user_watch *watch;
	172	struct inotify_device *dev;
	173
	174	watch = container_of(w, struct inotify_user_watch, wdata);
	175	dev = watch->dev;
	176
	177	atomic_dec(&dev->user->inotify_watches);
	178	put_inotify_dev(dev);
	179	kmem_cache_free(watch_cachep, watch);
	180	}
	181
	182	/*
	183	* kernel_event - create a new kernel event with the given parameters
	184	*
	185	* This function can sleep.
	186	*/
	187	static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
	188	const char *name)
	189	{
	190	struct inotify_kernel_event *kevent;
	191
	192	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
	193	if (unlikely(!kevent))
	194	return NULL;
	195
	196	/* we hand this out to user-space, so zero it just in case */
	197	memset(&kevent->event, 0, sizeof(struct inotify_event));
	198
	199	kevent->event.wd = wd;
	200	kevent->event.mask = mask;
	201	kevent->event.cookie = cookie;
	202
	203	INIT_LIST_HEAD(&kevent->list);
	204
	205	if (name) {
	206	size_t len, rem, event_size = sizeof(struct inotify_event);
	207
	208	/*
	209	* We need to pad the filename so as to properly align an
	210	* array of inotify_event structures. Because the structure is
	211	* small and the common case is a small filename, we just round
	212	* up to the next multiple of the structure's sizeof. This is
	213	* simple and safe for all architectures.
	214	*/
	215	len = strlen(name) + 1;
	216	rem = event_size - len;
	217	if (len > event_size) {
	218	rem = event_size - (len % event_size);
	219	if (len % event_size == 0)
	220	rem = 0;
	221	}
	222
	223	kevent->name = kmalloc(len + rem, GFP_KERNEL);
	224	if (unlikely(!kevent->name)) {
	225	kmem_cache_free(event_cachep, kevent);
	226	return NULL;
	227	}
	228	memcpy(kevent->name, name, len);
	229	if (rem)
	230	memset(kevent->name + len, 0, rem);
	231	kevent->event.len = len + rem;
	232	} else {
	233	kevent->event.len = 0;
	234	kevent->name = NULL;
	235	}
	236
	237	return kevent;
	238	}
	239
	240	/*
	241	* inotify_dev_get_event - return the next event in the given dev's queue
	242	*
	243	* Caller must hold dev->ev_mutex.
	244	*/
	245	static inline struct inotify_kernel_event *
	246	inotify_dev_get_event(struct inotify_device *dev)
	247	{
	248	return list_entry(dev->events.next, struct inotify_kernel_event, list);
	249	}
	250
	251	/*
	252	* inotify_dev_get_last_event - return the last event in the given dev's queue
	253	*
	254	* Caller must hold dev->ev_mutex.
	255	*/
	256	static inline struct inotify_kernel_event *
	257	inotify_dev_get_last_event(struct inotify_device *dev)
	258	{
	259	if (list_empty(&dev->events))
	260	return NULL;
	261	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
	262	}
	263
	264	/*
	265	* inotify_dev_queue_event - event handler registered with core inotify, adds
	266	* a new event to the given device
	267	*
	268	* Can sleep (calls kernel_event()).
	269	*/
	270	static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
	271	u32 cookie, const char *name,
	272	struct inode *ignored)
	273	{
	274	struct inotify_user_watch *watch;
	275	struct inotify_device *dev;
	276	struct inotify_kernel_event kevent, last;
	277
	278	watch = container_of(w, struct inotify_user_watch, wdata);
	279	dev = watch->dev;
	280
	281	mutex_lock(&dev->ev_mutex);
	282
	283	/* we can safely put the watch as we don't reference it while
	284	* generating the event
	285	*/
	286	if (mask & IN_IGNORED \|\| w->mask & IN_ONESHOT)
	287	put_inotify_watch(w); /* final put */
	288
	289	/* coalescing: drop this event if it is a dupe of the previous */
	290	last = inotify_dev_get_last_event(dev);
	291	if (last && last->event.mask == mask && last->event.wd == wd &&
	292	last->event.cookie == cookie) {
	293	const char *lastname = last->name;
	294
	295	if (!name && !lastname)
	296	goto out;
	297	if (name && lastname && !strcmp(lastname, name))
	298	goto out;
	299	}
	300
	301	/* the queue overflowed and we already sent the Q_OVERFLOW event */
	302	if (unlikely(dev->event_count > dev->max_events))
	303	goto out;
	304
	305	/* if the queue overflows, we need to notify user space */
	306	if (unlikely(dev->event_count == dev->max_events))
	307	kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
	308	else
	309	kevent = kernel_event(wd, mask, cookie, name);
	310
	311	if (unlikely(!kevent))
	312	goto out;
	313
	314	/* queue the event and wake up anyone waiting */
	315	dev->event_count++;
	316	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
	317	list_add_tail(&kevent->list, &dev->events);
	318	wake_up_interruptible(&dev->wq);
	319	kill_fasync(&dev->fa, SIGIO, POLL_IN);
	320
	321	out:
	322	mutex_unlock(&dev->ev_mutex);
	323	}
	324
	325	/*
	326	* remove_kevent - cleans up the given kevent
	327	*
	328	* Caller must hold dev->ev_mutex.
	329	*/
	330	static void remove_kevent(struct inotify_device *dev,
	331	struct inotify_kernel_event *kevent)
	332	{
	333	list_del(&kevent->list);
	334
	335	dev->event_count--;
	336	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
	337	}
	338
	339	/*
	340	* free_kevent - frees the given kevent.
	341	*/
	342	static void free_kevent(struct inotify_kernel_event *kevent)
	343	{
	344	kfree(kevent->name);
	345	kmem_cache_free(event_cachep, kevent);
	346	}
	347
	348	/*
	349	* inotify_dev_event_dequeue - destroy an event on the given device
	350	*
	351	* Caller must hold dev->ev_mutex.
	352	*/
	353	static void inotify_dev_event_dequeue(struct inotify_device *dev)
	354	{
	355	if (!list_empty(&dev->events)) {
	356	struct inotify_kernel_event *kevent;
	357	kevent = inotify_dev_get_event(dev);
	358	remove_kevent(dev, kevent);
	359	free_kevent(kevent);
	360	}
	361	}
	362
	363	/*
	364	* find_inode - resolve a user-given path to a specific inode
	365	*/
	366	static int find_inode(const char __user dirname, struct path path,
	367	unsigned flags)
	368	{
	369	int error;
	370
	371	error = user_path_at(AT_FDCWD, dirname, flags, path);
	372	if (error)
	373	return error;
	374	/* you can only watch an inode if you have read permissions on it */
	375	error = inode_permission(path->dentry->d_inode, MAY_READ);
	376	if (error)
	377	path_put(path);
	378	return error;
	379	}
	380
	381	/*
	382	* create_watch - creates a watch on the given device.
	383	*
	384	* Callers must hold dev->up_mutex.
	385	*/
	386	static int create_watch(struct inotify_device dev, struct inode inode,
	387	u32 mask)
	388	{
	389	struct inotify_user_watch *watch;
	390	int ret;
	391
	392	if (atomic_read(&dev->user->inotify_watches) >=
	393	inotify_max_user_watches)
	394	return -ENOSPC;
	395
	396	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
	397	if (unlikely(!watch))
	398	return -ENOMEM;
	399
	400	/* save a reference to device and bump the count to make it official */
	401	get_inotify_dev(dev);
	402	watch->dev = dev;
	403
	404	atomic_inc(&dev->user->inotify_watches);
	405
	406	inotify_init_watch(&watch->wdata);
	407	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
	408	if (ret < 0)
	409	free_inotify_user_watch(&watch->wdata);
	410
	411	return ret;
	412	}
	413
	414	/* Device Interface */
	415
	416	static unsigned int inotify_poll(struct file file, poll_table wait)
	417	{
	418	struct inotify_device *dev = file->private_data;
	419	int ret = 0;
	420
	421	poll_wait(file, &dev->wq, wait);
	422	mutex_lock(&dev->ev_mutex);
	423	if (!list_empty(&dev->events))
	424	ret = POLLIN \| POLLRDNORM;
	425	mutex_unlock(&dev->ev_mutex);
	426
	427	return ret;
	428	}
	429
	430	static ssize_t inotify_read(struct file file, char __user buf,
	431	size_t count, loff_t *pos)
	432	{
	433	size_t event_size = sizeof (struct inotify_event);
	434	struct inotify_device *dev;
	435	char __user *start;
	436	int ret;
	437	DEFINE_WAIT(wait);
	438
	439	start = buf;
	440	dev = file->private_data;
	441
	442	while (1) {
	443
	444	prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
	445
	446	mutex_lock(&dev->ev_mutex);
	447	if (!list_empty(&dev->events)) {
	448	ret = 0;
	449	break;
	450	}
	451	mutex_unlock(&dev->ev_mutex);
	452
	453	if (file->f_flags & O_NONBLOCK) {
	454	ret = -EAGAIN;
	455	break;
	456	}
	457
	458	if (signal_pending(current)) {
	459	ret = -EINTR;
	460	break;
	461	}
	462
	463	schedule();
	464	}
	465
	466	finish_wait(&dev->wq, &wait);
	467	if (ret)
	468	return ret;
	469
	470	while (1) {
	471	struct inotify_kernel_event *kevent;
	472
	473	ret = buf - start;
	474	if (list_empty(&dev->events))
	475	break;
	476
	477	kevent = inotify_dev_get_event(dev);
	478	if (event_size + kevent->event.len > count) {
	479	if (ret == 0 && count > 0) {
	480	/*
	481	* could not get a single event because we
	482	* didn't have enough buffer space.
	483	*/
	484	ret = -EINVAL;
	485	}
	486	break;
	487	}
	488	remove_kevent(dev, kevent);
	489
	490	/*
	491	* Must perform the copy_to_user outside the mutex in order
	492	* to avoid a lock order reversal with mmap_sem.
	493	*/
	494	mutex_unlock(&dev->ev_mutex);
	495
	496	if (copy_to_user(buf, &kevent->event, event_size)) {
	497	ret = -EFAULT;
	498	break;
	499	}
	500	buf += event_size;
	501	count -= event_size;
	502
	503	if (kevent->name) {
	504	if (copy_to_user(buf, kevent->name, kevent->event.len)){
	505	ret = -EFAULT;
	506	break;
	507	}
	508	buf += kevent->event.len;
	509	count -= kevent->event.len;
	510	}
	511
	512	free_kevent(kevent);
	513
	514	mutex_lock(&dev->ev_mutex);
	515	}
	516	mutex_unlock(&dev->ev_mutex);
	517
	518	return ret;
	519	}
	520
	521	static int inotify_fasync(int fd, struct file *file, int on)
	522	{
	523	struct inotify_device *dev = file->private_data;
	524
	525	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
	526	}
	527
	528	static int inotify_release(struct inode ignored, struct file file)
	529	{
	530	struct inotify_device *dev = file->private_data;
	531
	532	inotify_destroy(dev->ih);
	533
	534	/* destroy all of the events on this device */
	535	mutex_lock(&dev->ev_mutex);
	536	while (!list_empty(&dev->events))
	537	inotify_dev_event_dequeue(dev);
	538	mutex_unlock(&dev->ev_mutex);
	539
	540	/* free this device: the put matching the get in inotify_init() */
	541	put_inotify_dev(dev);
	542
	543	return 0;
	544	}
	545
	546	static long inotify_ioctl(struct file *file, unsigned int cmd,
	547	unsigned long arg)
	548	{
	549	struct inotify_device *dev;
	550	void __user *p;
	551	int ret = -ENOTTY;
	552
	553	dev = file->private_data;
	554	p = (void __user *) arg;
	555
	556	switch (cmd) {
	557	case FIONREAD:
	558	ret = put_user(dev->queue_size, (int __user *) p);
	559	break;
	560	}
	561
	562	return ret;
	563	}
	564
	565	static const struct file_operations inotify_fops = {
	566	.poll = inotify_poll,
	567	.read = inotify_read,
	568	.fasync = inotify_fasync,
	569	.release = inotify_release,
	570	.unlocked_ioctl = inotify_ioctl,
	571	.compat_ioctl = inotify_ioctl,
	572	};
	573
	574	static const struct inotify_operations inotify_user_ops = {
	575	.handle_event = inotify_dev_queue_event,
	576	.destroy_watch = free_inotify_user_watch,
	577	};
	578
	579	asmlinkage long sys_inotify_init1(int flags)
	580	{
	581	struct inotify_device *dev;
	582	struct inotify_handle *ih;
	583	struct user_struct *user;
	584	struct file *filp;
	585	int fd, ret;
	586
	587	/* Check the IN_* constants for consistency. */
	588	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
	589	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
	590
	591	if (flags & ~(IN_CLOEXEC \| IN_NONBLOCK))
	592	return -EINVAL;
	593
	594	fd = get_unused_fd_flags(flags & O_CLOEXEC);
	595	if (fd < 0)
	596	return fd;
	597
	598	filp = get_empty_filp();
	599	if (!filp) {
	600	ret = -ENFILE;
	601	goto out_put_fd;
	602	}
	603
	604	user = get_current_user();
	605	if (unlikely(atomic_read(&user->inotify_devs) >=
	606	inotify_max_user_instances)) {
	607	ret = -EMFILE;
	608	goto out_free_uid;
	609	}
	610
	611	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
	612	if (unlikely(!dev)) {
	613	ret = -ENOMEM;
	614	goto out_free_uid;
	615	}
	616
	617	ih = inotify_init(&inotify_user_ops);
	618	if (IS_ERR(ih)) {
	619	ret = PTR_ERR(ih);
	620	goto out_free_dev;
	621	}
	622	dev->ih = ih;
	623	dev->fa = NULL;
	624
	625	filp->f_op = &inotify_fops;
	626	filp->f_path.mnt = mntget(inotify_mnt);
	627	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
	628	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
	629	filp->f_mode = FMODE_READ;
	630	filp->f_flags = O_RDONLY \| (flags & O_NONBLOCK);
	631	filp->private_data = dev;
	632
	633	INIT_LIST_HEAD(&dev->events);
	634	init_waitqueue_head(&dev->wq);
	635	mutex_init(&dev->ev_mutex);
	636	mutex_init(&dev->up_mutex);
	637	dev->event_count = 0;
	638	dev->queue_size = 0;
	639	dev->max_events = inotify_max_queued_events;
	640	dev->user = user;
	641	atomic_set(&dev->count, 0);
	642
	643	get_inotify_dev(dev);
	644	atomic_inc(&user->inotify_devs);
	645	fd_install(fd, filp);
	646
	647	return fd;
	648	out_free_dev:
	649	kfree(dev);
	650	out_free_uid:
	651	free_uid(user);
	652	put_filp(filp);
	653	out_put_fd:
	654	put_unused_fd(fd);
	655	return ret;
	656	}
	657
	658	asmlinkage long sys_inotify_init(void)
	659	{
	660	return sys_inotify_init1(0);
	661	}
	662
	663	asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
	664	{
	665	struct inode *inode;
	666	struct inotify_device *dev;
	667	struct path path;
	668	struct file *filp;
	669	int ret, fput_needed;
	670	unsigned flags = 0;
	671
	672	filp = fget_light(fd, &fput_needed);
	673	if (unlikely(!filp))
	674	return -EBADF;
	675
	676	/* verify that this is indeed an inotify instance */
	677	if (unlikely(filp->f_op != &inotify_fops)) {
	678	ret = -EINVAL;
	679	goto fput_and_out;
	680	}
	681
	682	if (!(mask & IN_DONT_FOLLOW))
	683	flags \|= LOOKUP_FOLLOW;
	684	if (mask & IN_ONLYDIR)
	685	flags \|= LOOKUP_DIRECTORY;
	686
	687	ret = find_inode(pathname, &path, flags);
	688	if (unlikely(ret))
	689	goto fput_and_out;
	690
	691	/* inode held in place by reference to path; dev by fget on fd */
	692	inode = path.dentry->d_inode;
	693	dev = filp->private_data;
	694
	695	mutex_lock(&dev->up_mutex);
	696	ret = inotify_find_update_watch(dev->ih, inode, mask);
	697	if (ret == -ENOENT)
	698	ret = create_watch(dev, inode, mask);
	699	mutex_unlock(&dev->up_mutex);
	700
	701	path_put(&path);
	702	fput_and_out:
	703	fput_light(filp, fput_needed);
	704	return ret;
	705	}
	706
	707	asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
	708	{
	709	struct file *filp;
	710	struct inotify_device *dev;
	711	int ret, fput_needed;
	712
	713	filp = fget_light(fd, &fput_needed);
	714	if (unlikely(!filp))
	715	return -EBADF;
	716
	717	/* verify that this is indeed an inotify instance */
	718	if (unlikely(filp->f_op != &inotify_fops)) {
	719	ret = -EINVAL;
	720	goto out;
	721	}
	722
	723	dev = filp->private_data;
	724
	725	/* we free our watch data when we get IN_IGNORED */
	726	ret = inotify_rm_wd(dev->ih, wd);
	727
	728	out:
	729	fput_light(filp, fput_needed);
	730	return ret;
	731	}
	732
	733	static int
	734	inotify_get_sb(struct file_system_type *fs_type, int flags,
	735	const char dev_name, void data, struct vfsmount *mnt)
	736	{
	737	return get_sb_pseudo(fs_type, "inotify", NULL,
	738	INOTIFYFS_SUPER_MAGIC, mnt);
	739	}
	740
	741	static struct file_system_type inotify_fs_type = {
	742	.name = "inotifyfs",
	743	.get_sb = inotify_get_sb,
	744	.kill_sb = kill_anon_super,
	745	};
	746
	747	/*
	748	* inotify_user_setup - Our initialization function. Note that we cannnot return
	749	* error because we have compiled-in VFS hooks. So an (unlikely) failure here
	750	* must result in panic().
	751	*/
	752	static int __init inotify_user_setup(void)
	753	{
	754	int ret;
	755
	756	ret = register_filesystem(&inotify_fs_type);
	757	if (unlikely(ret))
	758	panic("inotify: register_filesystem returned %d!\n", ret);
	759
	760	inotify_mnt = kern_mount(&inotify_fs_type);
	761	if (IS_ERR(inotify_mnt))
	762	panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
	763
	764	inotify_max_queued_events = 16384;
	765	inotify_max_user_instances = 128;
	766	inotify_max_user_watches = 8192;
	767
	768	watch_cachep = kmem_cache_create("inotify_watch_cache",
	769	sizeof(struct inotify_user_watch),
	770	0, SLAB_PANIC, NULL);
	771	event_cachep = kmem_cache_create("inotify_event_cache",
	772	sizeof(struct inotify_kernel_event),
	773	0, SLAB_PANIC, NULL);
	774
	775	return 0;
	776	}
	777
	778	module_init(inotify_user_setup);