dnotify: reimplement dnotify using fsnotify

Reimplement dnotify using fsnotify. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de>
author: Eric Paris <eparis@redhat.com> 2009-05-21 17:01:33 -0400
committer: Eric Paris <eparis@redhat.com> 2009-06-11 14:57:53 -0400
commit: 3c5119c05d624f95f4967d16b38c9624b816bdb9 (patch)
tree: 0b5f66106aea38e52adf62958762b0a975607322 /fs/notify/dnotify/dnotify.c
parent: c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 (diff)
1 files changed, 362 insertions, 107 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b0aa2cde80bd..d9d80f502c6f 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
 *
 * Copyright (C) 2000,2001,2002 Stephen Rothwell
 *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * dnotify was largly rewritten to use the new fsnotify infrastructure
+ *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
@@ -21,24 +24,178 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
 int dir_notify_enable __read_mostly = 1;
-static struct kmem_cache *dn_cache __read_mostly;
+static struct kmem_cache *dnotify_struct_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct fsnotify_group *dnotify_group __read_mostly;
+static DEFINE_MUTEX(dnotify_mark_mutex);
+/*
+ * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * is being watched by dnotify.  If multiple userspace applications are watching
+ * the same directory with dnotify their information is chained in dn
+ */
+struct dnotify_mark_entry {
+        struct fsnotify_mark_entry fsn_entry;
+        struct dnotify_struct *dn;
+};
-static void redo_inode_mask(struct inode *inode)
+/*
+ * When a process starts or stops watching an inode the set of events which
+ * dnotify cares about for that inode may change.  This function runs the
+ * list of everything receiving dnotify events about this directory and calculates
+ * the set of all those events.  After it updates what dnotify is interested in
+ * it calls the fsnotify function so it can update the set of all events relevant
+ * to this inode.
+ */
+static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
 {
-        unsigned long new_mask;
+        __u32 new_mask, old_mask;
        struct dnotify_struct *dn;
+        struct dnotify_mark_entry *dnentry  = container_of(entry,
+                                                           struct dnotify_mark_entry,
+                                                           fsn_entry);
+        assert_spin_locked(&entry->lock);
+        old_mask = entry->mask;
        new_mask = 0;
-        for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
+        for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
-                new_mask |= dn->dn_mask & ~DN_MULTISHOT;
+                new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-        inode->i_dnotify_mask = new_mask;
+        entry->mask = new_mask;
+        if (old_mask == new_mask)
+                return;
+        if (entry->inode)
+                fsnotify_recalc_inode_mask(entry->inode);
 }
+/*
+ * Mains fsnotify call where events are delivered to dnotify.
+ * Find the dnotify mark on the relevant inode, run the list of dnotify structs
+ * on that mark and determine which of them has expressed interest in receiving
+ * events of this type.  When found send the correct process and signal and
+ * destroy the dnotify struct if it was not registered to receive multiple
+ * events.
+ */
+static int dnotify_handle_event(struct fsnotify_group *group,
+                                struct fsnotify_event *event)
+{
+        struct fsnotify_mark_entry *entry = NULL;
+        struct dnotify_mark_entry *dnentry;
+        struct inode *to_tell;
+        struct dnotify_struct *dn;
+        struct dnotify_struct **prev;
+        struct fown_struct *fown;
+        to_tell = event->to_tell;
+        spin_lock(&to_tell->i_lock);
+        entry = fsnotify_find_mark_entry(group, to_tell);
+        spin_unlock(&to_tell->i_lock);
+        /* unlikely since we alreay passed dnotify_should_send_event() */
+        if (unlikely(!entry))
+                return 0;
+        dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+        spin_lock(&entry->lock);
+        prev = &dnentry->dn;
+        while ((dn = *prev) != NULL) {
+                if ((dn->dn_mask & event->mask) == 0) {
+                        prev = &dn->dn_next;
+                        continue;
+                }
+                fown = &dn->dn_filp->f_owner;
+                send_sigio(fown, dn->dn_fd, POLL_MSG);
+                if (dn->dn_mask & FS_DN_MULTISHOT)
+                        prev = &dn->dn_next;
+                else {
+                        *prev = dn->dn_next;
+                        kmem_cache_free(dnotify_struct_cache, dn);
+                        dnotify_recalc_inode_mask(entry);
+                }
+        }
+        spin_unlock(&entry->lock);
+        fsnotify_put_mark(entry);
+        return 0;
+}
+/*
+ * Given an inode and mask determine if dnotify would be interested in sending
+ * userspace notification for that pair.
+ */
+static bool dnotify_should_send_event(struct fsnotify_group *group,
+                                      struct inode *inode, __u32 mask)
+{
+        struct fsnotify_mark_entry *entry;
+        bool send;
+        /* !dir_notify_enable should never get here, don't waste time checking
+        if (!dir_notify_enable)
+                return 0; */
+        /* not a dir, dnotify doesn't care */
+        if (!S_ISDIR(inode->i_mode))
+                return false;
+        spin_lock(&inode->i_lock);
+        entry = fsnotify_find_mark_entry(group, inode);
+        spin_unlock(&inode->i_lock);
+        /* no mark means no dnotify watch */
+        if (!entry)
+                return false;
+        spin_lock(&entry->lock);
+        send = (mask & entry->mask) ? true : false;
+        spin_unlock(&entry->lock);
+        fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
+        return send;
+}
+static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry,
+                                 struct fsnotify_group *group)
+{
+        /* dnotify doesn't care than an inode is on the way out */
+}
+static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+        struct dnotify_mark_entry *dnentry = container_of(entry,
+                                                          struct dnotify_mark_entry,
+                                                          fsn_entry);
+        BUG_ON(dnentry->dn);
+        kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+}
+static struct fsnotify_ops dnotify_fsnotify_ops = {
+        .handle_event = dnotify_handle_event,
+        .should_send_event = dnotify_should_send_event,
+        .free_group_priv = NULL,
+        .freeing_mark = dnotify_freeing_mark,
+};
+/*
+ * Called every time a file is closed.  Looks first for a dnotify mark on the
+ * inode.  If one is found run all of the ->dn entries attached to that
+ * mark for one relevant to this process closing the file and remove that
+ * dnotify_struct.  If that was the last dnotify_struct also remove the
+ * fsnotify_mark_entry.
+ */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
+        struct fsnotify_mark_entry *entry;
+        struct dnotify_mark_entry *dnentry;
        struct dnotify_struct *dn;
        struct dnotify_struct **prev;
        struct inode *inode;
@@ -46,145 +203,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
        inode = filp->f_path.dentry->d_inode;
        if (!S_ISDIR(inode->i_mode))
                return;
        spin_lock(&inode->i_lock);
-        prev = &inode->i_dnotify;
+        entry = fsnotify_find_mark_entry(dnotify_group, inode);
+        spin_unlock(&inode->i_lock);
+        if (!entry)
+                return;
+        dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+        mutex_lock(&dnotify_mark_mutex);
+        spin_lock(&entry->lock);
+        prev = &dnentry->dn;
        while ((dn = *prev) != NULL) {
                if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
                        *prev = dn->dn_next;
-                        redo_inode_mask(inode);
+                        kmem_cache_free(dnotify_struct_cache, dn);
-                        kmem_cache_free(dn_cache, dn);
+                        dnotify_recalc_inode_mask(entry);
                        break;
                }
                prev = &dn->dn_next;
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&entry->lock);
+        /* nothing else could have found us thanks to the dnotify_mark_mutex */
+        if (dnentry->dn == NULL)
+                fsnotify_destroy_mark_by_entry(entry);
+        fsnotify_recalc_group_mask(dnotify_group);
+        mutex_unlock(&dnotify_mark_mutex);
+        fsnotify_put_mark(entry);
+}
+/* this conversion is done only at watch creation */
+static __u32 convert_arg(unsigned long arg)
+{
+        __u32 new_mask = FS_EVENT_ON_CHILD;
+        if (arg & DN_MULTISHOT)
+                new_mask |= FS_DN_MULTISHOT;
+        if (arg & DN_DELETE)
+                new_mask |= (FS_DELETE | FS_MOVED_FROM);
+        if (arg & DN_MODIFY)
+                new_mask |= FS_MODIFY;
+        if (arg & DN_ACCESS)
+                new_mask |= FS_ACCESS;
+        if (arg & DN_ATTRIB)
+                new_mask |= FS_ATTRIB;
+        if (arg & DN_RENAME)
+                new_mask |= FS_DN_RENAME;
+        if (arg & DN_CREATE)
+                new_mask |= (FS_CREATE | FS_MOVED_TO);
+        return new_mask;
 }
+/*
+ * If multiple processes watch the same inode with dnotify there is only one
+ * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * onto that mark.  This function either attaches the new dnotify_struct onto
+ * that list, or it |= the mask onto an existing dnofiy_struct.
+ */
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+                     fl_owner_t id, int fd, struct file *filp, __u32 mask)
+{
+        struct dnotify_struct *odn;
+        odn = dnentry->dn;
+        while (odn != NULL) {
+                /* adding more events to existing dnofiy_struct? */
+                if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
+                        odn->dn_fd = fd;
+                        odn->dn_mask |= mask;
+                        return -EEXIST;
+                }
+                odn = odn->dn_next;
+        }
+        dn->dn_mask = mask;
+        dn->dn_fd = fd;
+        dn->dn_filp = filp;
+        dn->dn_owner = id;
+        dn->dn_next = dnentry->dn;
+        dnentry->dn = dn;
+        return 0;
+}
+/*
+ * When a process calls fcntl to attach a dnotify watch to a directory it ends
+ * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
+ * attached to the fsnotify_mark.
+ */
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
+        struct dnotify_mark_entry *new_dnentry, *dnentry;
+        struct fsnotify_mark_entry *new_entry, *entry;
        struct dnotify_struct *dn;
-        struct dnotify_struct *odn;
-        struct dnotify_struct **prev;
        struct inode *inode;
        fl_owner_t id = current->files;
        struct file *f;
-        int error = 0;
+        int destroy = 0, error = 0;
+        __u32 mask;
+        /* we use these to tell if we need to kfree */
+        new_entry = NULL;
+        dn = NULL;
+        if (!dir_notify_enable) {
+                error = -EINVAL;
+                goto out_err;
+        }
+        /* a 0 mask means we are explicitly removing the watch */
        if ((arg & ~DN_MULTISHOT) == 0) {
                dnotify_flush(filp, id);
-                return 0;
+                error = 0;
+                goto out_err;
        }
-        if (!dir_notify_enable)
-                return -EINVAL;
+        /* dnotify only works on directories */
        inode = filp->f_path.dentry->d_inode;
-        if (!S_ISDIR(inode->i_mode))
+        if (!S_ISDIR(inode->i_mode)) {
-                return -ENOTDIR;
+                error = -ENOTDIR;
-        dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
+                goto out_err;
-        if (dn == NULL)
-                return -ENOMEM;
-        spin_lock(&inode->i_lock);
-        prev = &inode->i_dnotify;
-        while ((odn = *prev) != NULL) {
-                if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
-                        odn->dn_fd = fd;
-                        odn->dn_mask |= arg;
-                        inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-                        goto out_free;
-                }
-                prev = &odn->dn_next;
        }
-        rcu_read_lock();
+        /* expect most fcntl to add new rather than augment old */
-        f = fcheck(fd);
+        dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
-        rcu_read_unlock();
+        if (!dn) {
-        /* we'd lost the race with close(), sod off silently */
+                error = -ENOMEM;
-        /* note that inode->i_lock prevents reordering problems
+                goto out_err;
-         * between accesses to descriptor table and ->i_dnotify */
+        }
-        if (f != filp)
-                goto out_free;
-        error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+        /* new fsnotify mark, we expect most fcntl calls to add a new mark */
-        if (error)
+        new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
-                goto out_free;
+        if (!new_dnentry) {
+                error = -ENOMEM;
+                goto out_err;
+        }
-        dn->dn_mask = arg;
+        /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
-        dn->dn_fd = fd;
+        mask = convert_arg(arg);
-        dn->dn_filp = filp;
-        dn->dn_owner = id;
-        inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-        dn->dn_next = inode->i_dnotify;
-        inode->i_dnotify = dn;
-        spin_unlock(&inode->i_lock);
-        return 0;
-out_free:
+        /* set up the new_entry and new_dnentry */
-        spin_unlock(&inode->i_lock);
+        new_entry = &new_dnentry->fsn_entry;
-        kmem_cache_free(dn_cache, dn);
+        fsnotify_init_mark(new_entry, dnotify_free_mark);
-        return error;
+        new_entry->mask = mask;
-}
+        new_dnentry->dn = NULL;
-void __inode_dir_notify(struct inode *inode, unsigned long event)
+        /* this is needed to prevent the fcntl/close race described below */
-{
+        mutex_lock(&dnotify_mark_mutex);
-        struct dnotify_struct * dn;
-        struct dnotify_struct **prev;
-        struct fown_struct *    fown;
-        int                     changed = 0;
+        /* add the new_entry or find an old one. */
        spin_lock(&inode->i_lock);
-        prev = &inode->i_dnotify;
+        entry = fsnotify_find_mark_entry(dnotify_group, inode);
-        while ((dn = *prev) != NULL) {
-                if ((dn->dn_mask & event) == 0) {
-                        prev = &dn->dn_next;
-                        continue;
-                }
-                fown = &dn->dn_filp->f_owner;
-                send_sigio(fown, dn->dn_fd, POLL_MSG);
-                if (dn->dn_mask & DN_MULTISHOT)
-                        prev = &dn->dn_next;
-                else {
-                        *prev = dn->dn_next;
-                        changed = 1;
-                        kmem_cache_free(dn_cache, dn);
-                }
-        }
-        if (changed)
-                redo_inode_mask(inode);
        spin_unlock(&inode->i_lock);
-}
+        if (entry) {
+                dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
-EXPORT_SYMBOL(__inode_dir_notify);
+                spin_lock(&entry->lock);
+        } else {
+                fsnotify_add_mark(new_entry, dnotify_group, inode);
+                spin_lock(&new_entry->lock);
+                entry = new_entry;
+                dnentry = new_dnentry;
+                /* we used new_entry, so don't free it */
+                new_entry = NULL;
+        }
-/*
+        rcu_read_lock();
- * This is hopelessly wrong, but unfixable without API changes.  At
+        f = fcheck(fd);
- * least it doesn't oops the kernel...
+        rcu_read_unlock();
- *
- * To safely access ->d_parent we need to keep d_move away from it.  Use the
- * dentry's d_lock for this.
- */
-void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-        struct dentry *parent;
-        if (!dir_notify_enable)
+        /* if (f != filp) means that we lost a race and another task/thread
-                return;
+         * actually closed the fd we are still playing with before we grabbed
+         * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
+         * only time we clean up the mark entries we need to get our mark off
+         * the list. */
+        if (f != filp) {
+                /* if we added ourselves, shoot ourselves, it's possible that
+                 * the flush actually did shoot this entry.  That's fine too
+                 * since multiple calls to destroy_mark is perfectly safe, if
+                 * we found a dnentry already attached to the inode, just sod
+                 * off silently as the flush at close time dealt with it.
+                 */
+                if (dnentry == new_dnentry)
+                        destroy = 1;
+                goto out;
+        }
-        spin_lock(&dentry->d_lock);
+        error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-        parent = dentry->d_parent;
+        if (error) {
-        if (parent->d_inode->i_dnotify_mask & event) {
+                /* if we added, we must shoot */
-                dget(parent);
+                if (dnentry == new_dnentry)
-                spin_unlock(&dentry->d_lock);
+                        destroy = 1;
-                __inode_dir_notify(parent->d_inode, event);
+                goto out;
-                dput(parent);
-        } else {
-                spin_unlock(&dentry->d_lock);
        }
+        error = attach_dn(dn, dnentry, id, fd, filp, mask);
+        /* !error means that we attached the dn to the dnentry, so don't free it */
+        if (!error)
+                dn = NULL;
+        /* -EEXIST means that we didn't add this new dn and used an old one.
+         * that isn't an error (and the unused dn should be freed) */
+        else if (error == -EEXIST)
+                error = 0;
+        dnotify_recalc_inode_mask(entry);
+out:
+        spin_unlock(&entry->lock);
+        if (destroy)
+                fsnotify_destroy_mark_by_entry(entry);
+        fsnotify_recalc_group_mask(dnotify_group);
+        mutex_unlock(&dnotify_mark_mutex);
+        fsnotify_put_mark(entry);
+out_err:
+        if (new_entry)
+                fsnotify_put_mark(new_entry);
+        if (dn)
+                kmem_cache_free(dnotify_struct_cache, dn);
+        return error;
 }
-EXPORT_SYMBOL_GPL(dnotify_parent);
 static int __init dnotify_init(void)
 {
-        dn_cache = kmem_cache_create("dnotify_cache",
+        dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-                sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
+        dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+        dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
+                                              0, &dnotify_fsnotify_ops);
+        if (IS_ERR(dnotify_group))
+                panic("unable to allocate fsnotify group for dnotify\n");
        return 0;
 }
author	Eric Paris <eparis@redhat.com>	2009-05-21 17:01:33 -0400
committer	Eric Paris <eparis@redhat.com>	2009-06-11 14:57:53 -0400
commit	3c5119c05d624f95f4967d16b38c9624b816bdb9 (patch)
tree	0b5f66106aea38e52adf62958762b0a975607322 /fs/notify/dnotify/dnotify.c
parent	c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 (diff)

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index b0aa2cde80bd..d9d80f502c6f 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
3	*	3	*
4	* Copyright (C) 2000,2001,2002 Stephen Rothwell	4	* Copyright (C) 2000,2001,2002 Stephen Rothwell
5	*	5	*
		6	* Copyright (C) 2009 Eric Paris <Red Hat Inc>
		7	* dnotify was largly rewritten to use the new fsnotify infrastructure
		8	*
6	* This program is free software; you can redistribute it and/or modify it	9	* This program is free software; you can redistribute it and/or modify it
7	* under the terms of the GNU General Public License as published by the	10	* under the terms of the GNU General Public License as published by the
8	* Free Software Foundation; either version 2, or (at your option) any	11	* Free Software Foundation; either version 2, or (at your option) any
@@ -21,24 +24,178 @@
21	#include <linux/spinlock.h>	24	#include <linux/spinlock.h>
22	#include <linux/slab.h>	25	#include <linux/slab.h>
23	#include <linux/fdtable.h>	26	#include <linux/fdtable.h>
		27	#include <linux/fsnotify_backend.h>
24		28
25	int dir_notify_enable __read_mostly = 1;	29	int dir_notify_enable __read_mostly = 1;
26		30
27	static struct kmem_cache *dn_cache __read_mostly;	31	static struct kmem_cache *dnotify_struct_cache __read_mostly;
		32	static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
		33	static struct fsnotify_group *dnotify_group __read_mostly;
		34	static DEFINE_MUTEX(dnotify_mark_mutex);
		35
		36	/*
		37	* dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
		38	* is being watched by dnotify. If multiple userspace applications are watching
		39	* the same directory with dnotify their information is chained in dn
		40	*/
		41	struct dnotify_mark_entry {
		42	struct fsnotify_mark_entry fsn_entry;
		43	struct dnotify_struct *dn;
		44	};
28		45
29	static void redo_inode_mask(struct inode *inode)	46	/*
		47	* When a process starts or stops watching an inode the set of events which
		48	* dnotify cares about for that inode may change. This function runs the
		49	* list of everything receiving dnotify events about this directory and calculates
		50	* the set of all those events. After it updates what dnotify is interested in
		51	* it calls the fsnotify function so it can update the set of all events relevant
		52	* to this inode.
		53	*/
		54	static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
30	{	55	{
31	unsigned long new_mask;	56	__u32 new_mask, old_mask;
32	struct dnotify_struct *dn;	57	struct dnotify_struct *dn;
		58	struct dnotify_mark_entry *dnentry = container_of(entry,
		59	struct dnotify_mark_entry,
		60	fsn_entry);
		61
		62	assert_spin_locked(&entry->lock);
33		63
		64	old_mask = entry->mask;
34	new_mask = 0;	65	new_mask = 0;
35	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)	66	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
36	new_mask \|= dn->dn_mask & ~DN_MULTISHOT;	67	new_mask \|= (dn->dn_mask & ~FS_DN_MULTISHOT);
37	inode->i_dnotify_mask = new_mask;	68	entry->mask = new_mask;
		69
		70	if (old_mask == new_mask)
		71	return;
		72
		73	if (entry->inode)
		74	fsnotify_recalc_inode_mask(entry->inode);
38	}	75	}
39		76
		77	/*
		78	* Mains fsnotify call where events are delivered to dnotify.
		79	* Find the dnotify mark on the relevant inode, run the list of dnotify structs
		80	* on that mark and determine which of them has expressed interest in receiving
		81	* events of this type. When found send the correct process and signal and
		82	* destroy the dnotify struct if it was not registered to receive multiple
		83	* events.
		84	*/
		85	static int dnotify_handle_event(struct fsnotify_group *group,
		86	struct fsnotify_event *event)
		87	{
		88	struct fsnotify_mark_entry *entry = NULL;
		89	struct dnotify_mark_entry *dnentry;
		90	struct inode *to_tell;
		91	struct dnotify_struct *dn;
		92	struct dnotify_struct **prev;
		93	struct fown_struct *fown;
		94
		95	to_tell = event->to_tell;
		96
		97	spin_lock(&to_tell->i_lock);
		98	entry = fsnotify_find_mark_entry(group, to_tell);
		99	spin_unlock(&to_tell->i_lock);
		100
		101	/* unlikely since we alreay passed dnotify_should_send_event() */
		102	if (unlikely(!entry))
		103	return 0;
		104	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
		105
		106	spin_lock(&entry->lock);
		107	prev = &dnentry->dn;
		108	while ((dn = *prev) != NULL) {
		109	if ((dn->dn_mask & event->mask) == 0) {
		110	prev = &dn->dn_next;
		111	continue;
		112	}
		113	fown = &dn->dn_filp->f_owner;
		114	send_sigio(fown, dn->dn_fd, POLL_MSG);
		115	if (dn->dn_mask & FS_DN_MULTISHOT)
		116	prev = &dn->dn_next;
		117	else {
		118	*prev = dn->dn_next;
		119	kmem_cache_free(dnotify_struct_cache, dn);
		120	dnotify_recalc_inode_mask(entry);
		121	}
		122	}
		123
		124	spin_unlock(&entry->lock);
		125	fsnotify_put_mark(entry);
		126
		127	return 0;
		128	}
		129
		130	/*
		131	* Given an inode and mask determine if dnotify would be interested in sending
		132	* userspace notification for that pair.
		133	*/
		134	static bool dnotify_should_send_event(struct fsnotify_group *group,
		135	struct inode *inode, __u32 mask)
		136	{
		137	struct fsnotify_mark_entry *entry;
		138	bool send;
		139
		140	/* !dir_notify_enable should never get here, don't waste time checking
		141	if (!dir_notify_enable)
		142	return 0; */
		143
		144	/* not a dir, dnotify doesn't care */
		145	if (!S_ISDIR(inode->i_mode))
		146	return false;
		147
		148	spin_lock(&inode->i_lock);
		149	entry = fsnotify_find_mark_entry(group, inode);
		150	spin_unlock(&inode->i_lock);
		151
		152	/* no mark means no dnotify watch */
		153	if (!entry)
		154	return false;
		155
		156	spin_lock(&entry->lock);
		157	send = (mask & entry->mask) ? true : false;
		158	spin_unlock(&entry->lock);
		159	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
		160
		161	return send;
		162	}
		163
		164	static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry,
		165	struct fsnotify_group *group)
		166	{
		167	/* dnotify doesn't care than an inode is on the way out */
		168	}
		169
		170	static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
		171	{
		172	struct dnotify_mark_entry *dnentry = container_of(entry,
		173	struct dnotify_mark_entry,
		174	fsn_entry);
		175
		176	BUG_ON(dnentry->dn);
		177
		178	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
		179	}
		180
		181	static struct fsnotify_ops dnotify_fsnotify_ops = {
		182	.handle_event = dnotify_handle_event,
		183	.should_send_event = dnotify_should_send_event,
		184	.free_group_priv = NULL,
		185	.freeing_mark = dnotify_freeing_mark,
		186	};
		187
		188	/*
		189	* Called every time a file is closed. Looks first for a dnotify mark on the
		190	* inode. If one is found run all of the ->dn entries attached to that
		191	* mark for one relevant to this process closing the file and remove that
		192	* dnotify_struct. If that was the last dnotify_struct also remove the
		193	* fsnotify_mark_entry.
		194	*/
40	void dnotify_flush(struct file *filp, fl_owner_t id)	195	void dnotify_flush(struct file *filp, fl_owner_t id)
41	{	196	{
		197	struct fsnotify_mark_entry *entry;
		198	struct dnotify_mark_entry *dnentry;
42	struct dnotify_struct *dn;	199	struct dnotify_struct *dn;
43	struct dnotify_struct **prev;	200	struct dnotify_struct **prev;
44	struct inode *inode;	201	struct inode *inode;
@@ -46,145 +203,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
46	inode = filp->f_path.dentry->d_inode;	203	inode = filp->f_path.dentry->d_inode;
47	if (!S_ISDIR(inode->i_mode))	204	if (!S_ISDIR(inode->i_mode))
48	return;	205	return;
		206
49	spin_lock(&inode->i_lock);	207	spin_lock(&inode->i_lock);
50	prev = &inode->i_dnotify;	208	entry = fsnotify_find_mark_entry(dnotify_group, inode);
		209	spin_unlock(&inode->i_lock);
		210	if (!entry)
		211	return;
		212	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
		213
		214	mutex_lock(&dnotify_mark_mutex);
		215
		216	spin_lock(&entry->lock);
		217	prev = &dnentry->dn;
51	while ((dn = *prev) != NULL) {	218	while ((dn = *prev) != NULL) {
52	if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {	219	if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
53	*prev = dn->dn_next;	220	*prev = dn->dn_next;
54	redo_inode_mask(inode);	221	kmem_cache_free(dnotify_struct_cache, dn);
55	kmem_cache_free(dn_cache, dn);	222	dnotify_recalc_inode_mask(entry);
56	break;	223	break;
57	}	224	}
58	prev = &dn->dn_next;	225	prev = &dn->dn_next;
59	}	226	}
60	spin_unlock(&inode->i_lock);	227
		228	spin_unlock(&entry->lock);
		229
		230	/* nothing else could have found us thanks to the dnotify_mark_mutex */
		231	if (dnentry->dn == NULL)
		232	fsnotify_destroy_mark_by_entry(entry);
		233
		234	fsnotify_recalc_group_mask(dnotify_group);
		235
		236	mutex_unlock(&dnotify_mark_mutex);
		237
		238	fsnotify_put_mark(entry);
		239	}
		240
		241	/* this conversion is done only at watch creation */
		242	static __u32 convert_arg(unsigned long arg)
		243	{
		244	__u32 new_mask = FS_EVENT_ON_CHILD;
		245
		246	if (arg & DN_MULTISHOT)
		247	new_mask \|= FS_DN_MULTISHOT;
		248	if (arg & DN_DELETE)
		249	new_mask \|= (FS_DELETE \| FS_MOVED_FROM);
		250	if (arg & DN_MODIFY)
		251	new_mask \|= FS_MODIFY;
		252	if (arg & DN_ACCESS)
		253	new_mask \|= FS_ACCESS;
		254	if (arg & DN_ATTRIB)
		255	new_mask \|= FS_ATTRIB;
		256	if (arg & DN_RENAME)
		257	new_mask \|= FS_DN_RENAME;
		258	if (arg & DN_CREATE)
		259	new_mask \|= (FS_CREATE \| FS_MOVED_TO);
		260
		261	return new_mask;
61	}	262	}
62		263
		264	/*
		265	* If multiple processes watch the same inode with dnotify there is only one
		266	* dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
		267	* onto that mark. This function either attaches the new dnotify_struct onto
		268	* that list, or it \|= the mask onto an existing dnofiy_struct.
		269	*/
		270	static int attach_dn(struct dnotify_struct dn, struct dnotify_mark_entry dnentry,
		271	fl_owner_t id, int fd, struct file *filp, __u32 mask)
		272	{
		273	struct dnotify_struct *odn;
		274
		275	odn = dnentry->dn;
		276	while (odn != NULL) {
		277	/* adding more events to existing dnofiy_struct? */
		278	if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
		279	odn->dn_fd = fd;
		280	odn->dn_mask \|= mask;
		281	return -EEXIST;
		282	}
		283	odn = odn->dn_next;
		284	}
		285
		286	dn->dn_mask = mask;
		287	dn->dn_fd = fd;
		288	dn->dn_filp = filp;
		289	dn->dn_owner = id;
		290	dn->dn_next = dnentry->dn;
		291	dnentry->dn = dn;
		292
		293	return 0;
		294	}
		295
		296	/*
		297	* When a process calls fcntl to attach a dnotify watch to a directory it ends
		298	* up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
		299	* attached to the fsnotify_mark.
		300	*/
63	int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)	301	int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
64	{	302	{
		303	struct dnotify_mark_entry new_dnentry, dnentry;
		304	struct fsnotify_mark_entry new_entry, entry;
65	struct dnotify_struct *dn;	305	struct dnotify_struct *dn;
66	struct dnotify_struct *odn;
67	struct dnotify_struct **prev;
68	struct inode *inode;	306	struct inode *inode;
69	fl_owner_t id = current->files;	307	fl_owner_t id = current->files;
70	struct file *f;	308	struct file *f;
71	int error = 0;	309	int destroy = 0, error = 0;
		310	__u32 mask;
		311
		312	/* we use these to tell if we need to kfree */
		313	new_entry = NULL;
		314	dn = NULL;
72		315
		316	if (!dir_notify_enable) {
		317	error = -EINVAL;
		318	goto out_err;
		319	}
		320
		321	/* a 0 mask means we are explicitly removing the watch */
73	if ((arg & ~DN_MULTISHOT) == 0) {	322	if ((arg & ~DN_MULTISHOT) == 0) {
74	dnotify_flush(filp, id);	323	dnotify_flush(filp, id);
75	return 0;	324	error = 0;
		325	goto out_err;
76	}	326	}
77	if (!dir_notify_enable)	327
78	return -EINVAL;	328	/* dnotify only works on directories */
79	inode = filp->f_path.dentry->d_inode;	329	inode = filp->f_path.dentry->d_inode;
80	if (!S_ISDIR(inode->i_mode))	330	if (!S_ISDIR(inode->i_mode)) {
81	return -ENOTDIR;	331	error = -ENOTDIR;
82	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);	332	goto out_err;
83	if (dn == NULL)
84	return -ENOMEM;
85	spin_lock(&inode->i_lock);
86	prev = &inode->i_dnotify;
87	while ((odn = *prev) != NULL) {
88	if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
89	odn->dn_fd = fd;
90	odn->dn_mask \|= arg;
91	inode->i_dnotify_mask \|= arg & ~DN_MULTISHOT;
92	goto out_free;
93	}
94	prev = &odn->dn_next;
95	}	333	}
96		334
97	rcu_read_lock();	335	/* expect most fcntl to add new rather than augment old */
98	f = fcheck(fd);	336	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
99	rcu_read_unlock();	337	if (!dn) {
100	/* we'd lost the race with close(), sod off silently */	338	error = -ENOMEM;
101	/* note that inode->i_lock prevents reordering problems	339	goto out_err;
102	* between accesses to descriptor table and ->i_dnotify */	340	}
103	if (f != filp)
104	goto out_free;
105		341
106	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);	342	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
107	if (error)	343	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
108	goto out_free;	344	if (!new_dnentry) {
		345	error = -ENOMEM;
		346	goto out_err;
		347	}
109		348
110	dn->dn_mask = arg;	349	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
111	dn->dn_fd = fd;	350	mask = convert_arg(arg);
112	dn->dn_filp = filp;
113	dn->dn_owner = id;
114	inode->i_dnotify_mask \|= arg & ~DN_MULTISHOT;
115	dn->dn_next = inode->i_dnotify;
116	inode->i_dnotify = dn;
117	spin_unlock(&inode->i_lock);
118	return 0;
119		351
120	out_free:	352	/* set up the new_entry and new_dnentry */
121	spin_unlock(&inode->i_lock);	353	new_entry = &new_dnentry->fsn_entry;
122	kmem_cache_free(dn_cache, dn);	354	fsnotify_init_mark(new_entry, dnotify_free_mark);
123	return error;	355	new_entry->mask = mask;
124	}	356	new_dnentry->dn = NULL;
125		357
126	void __inode_dir_notify(struct inode *inode, unsigned long event)	358	/* this is needed to prevent the fcntl/close race described below */
127	{	359	mutex_lock(&dnotify_mark_mutex);
128	struct dnotify_struct * dn;
129	struct dnotify_struct **prev;
130	struct fown_struct * fown;
131	int changed = 0;
132		360
		361	/* add the new_entry or find an old one. */
133	spin_lock(&inode->i_lock);	362	spin_lock(&inode->i_lock);
134	prev = &inode->i_dnotify;	363	entry = fsnotify_find_mark_entry(dnotify_group, inode);
135	while ((dn = *prev) != NULL) {
136	if ((dn->dn_mask & event) == 0) {
137	prev = &dn->dn_next;
138	continue;
139	}
140	fown = &dn->dn_filp->f_owner;
141	send_sigio(fown, dn->dn_fd, POLL_MSG);
142	if (dn->dn_mask & DN_MULTISHOT)
143	prev = &dn->dn_next;
144	else {
145	*prev = dn->dn_next;
146	changed = 1;
147	kmem_cache_free(dn_cache, dn);
148	}
149	}
150	if (changed)
151	redo_inode_mask(inode);
152	spin_unlock(&inode->i_lock);	364	spin_unlock(&inode->i_lock);
153	}	365	if (entry) {
154		366	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
155	EXPORT_SYMBOL(__inode_dir_notify);	367	spin_lock(&entry->lock);
		368	} else {
		369	fsnotify_add_mark(new_entry, dnotify_group, inode);
		370	spin_lock(&new_entry->lock);
		371	entry = new_entry;
		372	dnentry = new_dnentry;
		373	/* we used new_entry, so don't free it */
		374	new_entry = NULL;
		375	}
156		376
157	/*	377	rcu_read_lock();
158	* This is hopelessly wrong, but unfixable without API changes. At	378	f = fcheck(fd);
159	* least it doesn't oops the kernel...	379	rcu_read_unlock();
160	*
161	* To safely access ->d_parent we need to keep d_move away from it. Use the
162	* dentry's d_lock for this.
163	*/
164	void dnotify_parent(struct dentry *dentry, unsigned long event)
165	{
166	struct dentry *parent;
167		380
168	if (!dir_notify_enable)	381	/* if (f != filp) means that we lost a race and another task/thread
169	return;	382	* actually closed the fd we are still playing with before we grabbed
		383	* the dnotify_mark_mutex and entry->lock. Since closing the fd is the
		384	* only time we clean up the mark entries we need to get our mark off
		385	* the list. */
		386	if (f != filp) {
		387	/* if we added ourselves, shoot ourselves, it's possible that
		388	* the flush actually did shoot this entry. That's fine too
		389	* since multiple calls to destroy_mark is perfectly safe, if
		390	* we found a dnentry already attached to the inode, just sod
		391	* off silently as the flush at close time dealt with it.
		392	*/
		393	if (dnentry == new_dnentry)
		394	destroy = 1;
		395	goto out;
		396	}
170		397
171	spin_lock(&dentry->d_lock);	398	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
172	parent = dentry->d_parent;	399	if (error) {
173	if (parent->d_inode->i_dnotify_mask & event) {	400	/* if we added, we must shoot */
174	dget(parent);	401	if (dnentry == new_dnentry)
175	spin_unlock(&dentry->d_lock);	402	destroy = 1;
176	__inode_dir_notify(parent->d_inode, event);	403	goto out;
177	dput(parent);
178	} else {
179	spin_unlock(&dentry->d_lock);
180	}	404	}
		405
		406	error = attach_dn(dn, dnentry, id, fd, filp, mask);
		407	/* !error means that we attached the dn to the dnentry, so don't free it */
		408	if (!error)
		409	dn = NULL;
		410	/* -EEXIST means that we didn't add this new dn and used an old one.
		411	* that isn't an error (and the unused dn should be freed) */
		412	else if (error == -EEXIST)
		413	error = 0;
		414
		415	dnotify_recalc_inode_mask(entry);
		416	out:
		417	spin_unlock(&entry->lock);
		418
		419	if (destroy)
		420	fsnotify_destroy_mark_by_entry(entry);
		421
		422	fsnotify_recalc_group_mask(dnotify_group);
		423
		424	mutex_unlock(&dnotify_mark_mutex);
		425	fsnotify_put_mark(entry);
		426	out_err:
		427	if (new_entry)
		428	fsnotify_put_mark(new_entry);
		429	if (dn)
		430	kmem_cache_free(dnotify_struct_cache, dn);
		431	return error;
181	}	432	}
182	EXPORT_SYMBOL_GPL(dnotify_parent);
183		433
184	static int __init dnotify_init(void)	434	static int __init dnotify_init(void)
185	{	435	{
186	dn_cache = kmem_cache_create("dnotify_cache",	436	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
187	sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);	437	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
		438
		439	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
		440	0, &dnotify_fsnotify_ops);
		441	if (IS_ERR(dnotify_group))
		442	panic("unable to allocate fsnotify group for dnotify\n");
188	return 0;	443	return 0;
189	}	444	}
190		445