ocfs2: Add dentry tracking API

Replace the dentry vote mechanism with a cluster lock which covers a set of dentries. This allows us to force d_delete() only on nodes which actually care about an unlink. Every node that does a ->lookup() gets a read only lock on the dentry, until an unlink during which the unlinking node, will request an exclusive lock, forcing the other nodes who care about that dentry to d_delete() it. The effect is that we retain a very lightweight ->d_revalidate(), and at the same time get to make large improvements to the average case performance of the ocfs2 unlink and rename operations. This patch adds the higher level API and the dentry manipulation code. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
author: Mark Fasheh <mark.fasheh@oracle.com> 2006-09-08 17:43:18 -0400
committer: Mark Fasheh <mark.fasheh@oracle.com> 2006-09-24 16:50:43 -0400
commit: 80c05846f604bab6d61e9732c262420ee9f5f358 (patch)
tree: 3fcd80cec6e3a3a1e56abaff0a559817dbcb95a2
parent: d680efe9d8fe0eb99d9dd063a4def6b362cdb40d (diff)
3 files changed, 369 insertions, 32 deletions
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index aea457718946..09efe240e652 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
 #include "alloc.h"
 #include "dcache.h"
+#include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
                                   struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
        int ret = 0;    /* if all else fails, just return false */
-        struct ocfs2_super *osb;
+        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
        mlog_entry("(0x%p, '%.*s')\n", dentry,
                   dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
                goto bail;
        }
-        osb = OCFS2_SB(inode->i_sb);
        BUG_ON(!osb);
-        if (inode != osb->root_inode) {
+        if (inode == osb->root_inode || is_bad_inode(inode))
-                spin_lock(&OCFS2_I(inode)->ip_lock);
+                goto bail;
-                /* did we or someone else delete this inode? */
-                if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
+        spin_lock(&OCFS2_I(inode)->ip_lock);
-                        spin_unlock(&OCFS2_I(inode)->ip_lock);
+        /* did we or someone else delete this inode? */
-                        mlog(0, "inode (%llu) deleted, returning false\n",
+        if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
-                             (unsigned long long)OCFS2_I(inode)->ip_blkno);
-                        goto bail;
-                }
                spin_unlock(&OCFS2_I(inode)->ip_lock);
+                mlog(0, "inode (%llu) deleted, returning false\n",
+                     (unsigned long long)OCFS2_I(inode)->ip_blkno);
+                goto bail;
+        }
+        spin_unlock(&OCFS2_I(inode)->ip_lock);
-                if (!inode->i_nlink) {
+        /*
-                        mlog(0, "Inode %llu orphaned, returning false "
+         * We don't need a cluster lock to test this because once an
-                             "dir = %d\n",
+         * inode nlink hits zero, it never goes back.
-                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
+         */
-                             S_ISDIR(inode->i_mode));
+        if (inode->i_nlink == 0) {
-                        goto bail;
+                mlog(0, "Inode %llu orphaned, returning false "
-                }
+                     "dir = %d\n",
+                     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+                     S_ISDIR(inode->i_mode));
+                goto bail;
        }
        ret = 1;
@@ -87,8 +92,340 @@ bail:
        return ret;
 }
+static int ocfs2_match_dentry(struct dentry *dentry,
+                              u64 parent_blkno,
+                              int skip_unhashed)
+{
+        struct inode *parent;
+        /*
+         * ocfs2_lookup() does a d_splice_alias() _before_ attaching
+         * to the lock data, so we skip those here, otherwise
+         * ocfs2_dentry_attach_lock() will get its original dentry
+         * back.
+         */
+        if (!dentry->d_fsdata)
+                return 0;
+        if (!dentry->d_parent)
+                return 0;
+        if (skip_unhashed && d_unhashed(dentry))
+                return 0;
+        parent = dentry->d_parent->d_inode;
+        /* Negative parent dentry? */
+        if (!parent)
+                return 0;
+        /* Name is in a different directory. */
+        if (OCFS2_I(parent)->ip_blkno != parent_blkno)
+                return 0;
+        return 1;
+}
+/*
+ * Walk the inode alias list, and find a dentry which has a given
+ * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
+ * is looking for a dentry_lock reference. The vote thread is looking
+ * to unhash aliases, so we allow it to skip any that already have
+ * that property.
+ */
+struct dentry *ocfs2_find_local_alias(struct inode *inode,
+                                      u64 parent_blkno,
+                                      int skip_unhashed)
+{
+        struct list_head *p;
+        struct dentry *dentry = NULL;
+        spin_lock(&dcache_lock);
+        list_for_each(p, &inode->i_dentry) {
+                dentry = list_entry(p, struct dentry, d_alias);
+                if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
+                        mlog(0, "dentry found: %.*s\n",
+                             dentry->d_name.len, dentry->d_name.name);
+                        dget_locked(dentry);
+                        break;
+                }
+                dentry = NULL;
+        }
+        spin_unlock(&dcache_lock);
+        return dentry;
+}
 DEFINE_SPINLOCK(dentry_attach_lock);
+/*
+ * Attach this dentry to a cluster lock.
+ *
+ * Dentry locks cover all links in a given directory to a particular
+ * inode. We do this so that ocfs2 can build a lock name which all
+ * nodes in the cluster can agree on at all times. Shoving full names
+ * in the cluster lock won't work due to size restrictions. Covering
+ * links inside of a directory is a good compromise because it still
+ * allows us to use the parent directory lock to synchronize
+ * operations.
+ *
+ * Call this function with the parent dir semaphore and the parent dir
+ * cluster lock held.
+ *
+ * The dir semaphore will protect us from having to worry about
+ * concurrent processes on our node trying to attach a lock at the
+ * same time.
+ *
+ * The dir cluster lock (held at either PR or EX mode) protects us
+ * from unlink and rename on other nodes.
+ *
+ * The 'create' flag tells us whether we're doing this as a result of
+ * a file creation.
+ *
+ * A dput() can happen asynchronously due to pruning, so we cover
+ * attaching and detaching the dentry lock with a
+ * dentry_attach_lock.
+ *
+ * A node which has done lookup on a name retains a protected read
+ * lock until final dput. If the user requests and unlink or rename,
+ * the protected read is upgraded to an exclusive lock. Other nodes
+ * who have seen the dentry will then be informed that they need to
+ * downgrade their lock, which will involve d_delete on the
+ * dentry. This happens in ocfs2_dentry_convert_worker().
+ */
+int ocfs2_dentry_attach_lock(struct dentry *dentry,
+                             struct inode *inode,
+                             u64 parent_blkno,
+                             int create)
+{
+        int ret;
+        struct dentry *alias;
+        struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+        mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n",
+             dentry->d_name.len, dentry->d_name.name,
+             (unsigned long long)parent_blkno, create, dl);
+        /*
+         * Negative dentry. We ignore these for now.
+         *
+         * XXX: Could we can improve ocfs2_dentry_revalidate() by
+         * tracking these?
+         */
+        if (!inode)
+                return 0;
+        if (dl) {
+                mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+                                " \"%.*s\": old parent: %llu, new: %llu\n",
+                                dentry->d_name.len, dentry->d_name.name,
+                                (unsigned long long)parent_blkno,
+                                (unsigned long long)dl->dl_parent_blkno);
+                return 0;
+        }
+        alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
+        if (alias) {
+                /*
+                 * Great, an alias exists, which means we must have a
+                 * dentry lock already. We can just grab the lock off
+                 * the alias and add it to the list.
+                 *
+                 * We're depending here on the fact that this dentry
+                 * was found and exists in the dcache and so must have
+                 * a reference to the dentry_lock because we can't
+                 * race creates. Final dput() cannot happen on it
+                 * since we have it pinned, so our reference is safe.
+                 */
+                dl = alias->d_fsdata;
+                mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n",
+                                (unsigned long long)parent_blkno,
+                                (unsigned long long)OCFS2_I(inode)->ip_blkno,
+                                create);
+                mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+                                " \"%.*s\": old parent: %llu, new: %llu\n",
+                                dentry->d_name.len, dentry->d_name.name,
+                                (unsigned long long)parent_blkno,
+                                (unsigned long long)dl->dl_parent_blkno);
+                mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
+                goto out_attach;
+        }
+        /*
+         * There are no other aliases
+         */
+        dl = kmalloc(sizeof(*dl), GFP_NOFS);
+        if (!dl) {
+                ret = -ENOMEM;
+                mlog_errno(ret);
+                return ret;
+        }
+        dl->dl_count = 0;
+        /*
+         * Does this have to happen below, for all attaches, in case
+         * the struct inode gets blown away by votes?
+         */
+        dl->dl_inode = igrab(inode);
+        dl->dl_parent_blkno = parent_blkno;
+        ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
+out_attach:
+        spin_lock(&dentry_attach_lock);
+        dentry->d_fsdata = dl;
+        dl->dl_count++;
+        spin_unlock(&dentry_attach_lock);
+        /*
+         * Creation of a new file means that nobody can possibly have
+         * this name in the system, which means that acquiry of those
+         * locks can easily be optimized.
+         */
+        if (create) {
+                ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb),
+                                            &dl->dl_lockres, 0);
+                if (ret)
+                        mlog_errno(ret);
+                goto out;
+        }
+        /*
+         * This actually gets us our PRMODE level lock. From now on,
+         * we'll have a notification if one of these names is
+         * destroyed on another node.
+         */
+        ret = ocfs2_dentry_lock(dentry, 0);
+        if (ret) {
+                mlog_errno(ret);
+                goto out;
+        }
+        ocfs2_dentry_unlock(dentry, 0);
+out:
+        dput(alias);
+        return ret;
+}
+/*
+ * ocfs2_dentry_iput() and friends.
+ *
+ * At this point, our particular dentry is detached from the inodes
+ * alias list, so there's no way that the locking code can find it.
+ *
+ * The interesting stuff happens when we determine that our lock needs
+ * to go away because this is the last subdir alias in the
+ * system. This function needs to handle a couple things:
+ *
+ * 1) Synchronizing lock shutdown with the downconvert threads. This
+ *    is already handled for us via the lockres release drop function
+ *    called in ocfs2_release_dentry_lock()
+ *
+ * 2) A race may occur when we're doing our lock shutdown and
+ *    another process wants to create a new dentry lock. Right now we
+ *    let them race, which means that for a very short while, this
+ *    node might have two locks on a lock resource. This should be a
+ *    problem though because one of them is in the process of being
+ *    thrown out.
+ */
+static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
+                                   struct ocfs2_dentry_lock *dl)
+{
+        ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+        ocfs2_lock_res_free(&dl->dl_lockres);
+        iput(dl->dl_inode);
+        kfree(dl);
+}
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+                           struct ocfs2_dentry_lock *dl)
+{
+        int unlock = 0;
+        BUG_ON(dl->dl_count == 0);
+        spin_lock(&dentry_attach_lock);
+        dl->dl_count--;
+        unlock = !dl->dl_count;
+        spin_unlock(&dentry_attach_lock);
+        if (unlock)
+                ocfs2_drop_dentry_lock(osb, dl);
+}
+static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+        struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+        mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
+                        "dentry: %.*s\n", dentry->d_name.len,
+                        dentry->d_name.name);
+        if (!dl)
+                goto out;
+        mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
+                        dentry->d_name.len, dentry->d_name.name,
+                        dl->dl_count);
+        ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
+out:
+        iput(inode);
+}
+/*
+ * d_move(), but keep the locks in sync.
+ *
+ * When we are done, "dentry" will have the parent dir and name of
+ * "target", which will be thrown away.
+ *
+ * We manually update the lock of "dentry" if need be.
+ *
+ * "target" doesn't have it's dentry lock touched - we allow the later
+ * dput() to handle this for us.
+ *
+ * This is called during ocfs2_rename(), while holding parent
+ * directory locks. The dentries have already been deleted on other
+ * nodes via ocfs2_remote_dentry_delete().
+ *
+ * Normally, the VFS handles the d_move() for the file sytem, after
+ * the ->rename() callback. OCFS2 wants to handle this internally, so
+ * the new lock can be created atomically with respect to the cluster.
+ */
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
+                       struct inode *old_dir, struct inode *new_dir)
+{
+        int ret;
+        struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
+        struct inode *inode = dentry->d_inode;
+        /*
+         * Move within the same directory, so the actual lock info won't
+         * change.
+         *
+         * XXX: Is there any advantage to dropping the lock here?
+         */
+        if (old_dir == new_dir)
+                return;
+        ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
+        dentry->d_fsdata = NULL;
+        ret = ocfs2_dentry_attach_lock(dentry, inode,
+                                       OCFS2_I(new_dir)->ip_blkno, 0);
+        if (ret)
+                mlog_errno(ret);
+}
 struct dentry_operations ocfs2_dentry_ops = {
        .d_revalidate           = ocfs2_dentry_revalidate,
+        .d_iput                 = ocfs2_dentry_iput,
 };
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index f1423c2134ee..e53abe766cab 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -41,17 +41,17 @@ struct ocfs2_dentry_lock {
        struct ocfs2_lock_res   dl_lockres;
 };
-static inline void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
-                                         struct ocfs2_dentry_lock *dl)
+                             u64 parent_blkno, int create);
-{
-}
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+                           struct ocfs2_dentry_lock *dl);
-static inline struct dentry *ocfs2_find_local_alias(struct inode *inode,
-                                                    u64 parent_blkno,
+struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
-                                                    int skip_unhashed)
+                                      int skip_unhashed);
-{
-        return NULL;
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
-}
+                       struct inode *old_dir, struct inode *new_dir);
 extern spinlock_t dentry_attach_lock;
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..98435002ac44 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include "ocfs2.h"
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
+#include "ocfs2.h"
 #include "alloc.h"
 #include "dir.h"
 #include "inode.h"
author	Mark Fasheh <mark.fasheh@oracle.com>	2006-09-08 17:43:18 -0400
committer	Mark Fasheh <mark.fasheh@oracle.com>	2006-09-24 16:50:43 -0400
commit	80c05846f604bab6d61e9732c262420ee9f5f358 (patch)
tree	3fcd80cec6e3a3a1e56abaff0a559817dbcb95a2
parent	d680efe9d8fe0eb99d9dd063a4def6b362cdb40d (diff)

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index aea457718946..09efe240e652 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
35		35
36	#include "alloc.h"	36	#include "alloc.h"
37	#include "dcache.h"	37	#include "dcache.h"
		38	#include "dlmglue.h"
38	#include "file.h"	39	#include "file.h"
39	#include "inode.h"	40	#include "inode.h"
40		41
		42
41	static int ocfs2_dentry_revalidate(struct dentry *dentry,	43	static int ocfs2_dentry_revalidate(struct dentry *dentry,
42	struct nameidata *nd)	44	struct nameidata *nd)
43	{	45	{
44	struct inode *inode = dentry->d_inode;	46	struct inode *inode = dentry->d_inode;
45	int ret = 0; /* if all else fails, just return false */	47	int ret = 0; /* if all else fails, just return false */
46	struct ocfs2_super *osb;	48	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
47		49
48	mlog_entry("(0x%p, '%.*s')\n", dentry,	50	mlog_entry("(0x%p, '%.*s')\n", dentry,
49	dentry->d_name.len, dentry->d_name.name);	51	dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
55	goto bail;	57	goto bail;
56	}	58	}
57		59
58	osb = OCFS2_SB(inode->i_sb);
59
60	BUG_ON(!osb);	60	BUG_ON(!osb);
61		61
62	if (inode != osb->root_inode) {	62	if (inode == osb->root_inode \|\| is_bad_inode(inode))
63	spin_lock(&OCFS2_I(inode)->ip_lock);	63	goto bail;
64	/* did we or someone else delete this inode? */	64
65	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {	65	spin_lock(&OCFS2_I(inode)->ip_lock);
66	spin_unlock(&OCFS2_I(inode)->ip_lock);	66	/* did we or someone else delete this inode? */
67	mlog(0, "inode (%llu) deleted, returning false\n",	67	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
68	(unsigned long long)OCFS2_I(inode)->ip_blkno);
69	goto bail;
70	}
71	spin_unlock(&OCFS2_I(inode)->ip_lock);	68	spin_unlock(&OCFS2_I(inode)->ip_lock);
		69	mlog(0, "inode (%llu) deleted, returning false\n",
		70	(unsigned long long)OCFS2_I(inode)->ip_blkno);
		71	goto bail;
		72	}
		73	spin_unlock(&OCFS2_I(inode)->ip_lock);
72		74
73	if (!inode->i_nlink) {	75	/*
74	mlog(0, "Inode %llu orphaned, returning false "	76	* We don't need a cluster lock to test this because once an
75	"dir = %d\n",	77	* inode nlink hits zero, it never goes back.
76	(unsigned long long)OCFS2_I(inode)->ip_blkno,	78	*/
77	S_ISDIR(inode->i_mode));	79	if (inode->i_nlink == 0) {
78	goto bail;	80	mlog(0, "Inode %llu orphaned, returning false "
79	}	81	"dir = %d\n",
		82	(unsigned long long)OCFS2_I(inode)->ip_blkno,
		83	S_ISDIR(inode->i_mode));
		84	goto bail;
80	}	85	}
81		86
82	ret = 1;	87	ret = 1;
@@ -87,8 +92,340 @@ bail:
87	return ret;	92	return ret;
88	}	93	}
89		94
		95	static int ocfs2_match_dentry(struct dentry *dentry,
		96	u64 parent_blkno,
		97	int skip_unhashed)
		98	{
		99	struct inode *parent;
		100
		101	/*
		102	* ocfs2_lookup() does a d_splice_alias() _before_ attaching
		103	* to the lock data, so we skip those here, otherwise
		104	* ocfs2_dentry_attach_lock() will get its original dentry
		105	* back.
		106	*/
		107	if (!dentry->d_fsdata)
		108	return 0;
		109
		110	if (!dentry->d_parent)
		111	return 0;
		112
		113	if (skip_unhashed && d_unhashed(dentry))
		114	return 0;
		115
		116	parent = dentry->d_parent->d_inode;
		117	/* Negative parent dentry? */
		118	if (!parent)
		119	return 0;
		120
		121	/* Name is in a different directory. */
		122	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
		123	return 0;
		124
		125	return 1;
		126	}
		127
		128	/*
		129	* Walk the inode alias list, and find a dentry which has a given
		130	* parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
		131	* is looking for a dentry_lock reference. The vote thread is looking
		132	* to unhash aliases, so we allow it to skip any that already have
		133	* that property.
		134	*/
		135	struct dentry ocfs2_find_local_alias(struct inode inode,
		136	u64 parent_blkno,
		137	int skip_unhashed)
		138	{
		139	struct list_head *p;
		140	struct dentry *dentry = NULL;
		141
		142	spin_lock(&dcache_lock);
		143
		144	list_for_each(p, &inode->i_dentry) {
		145	dentry = list_entry(p, struct dentry, d_alias);
		146
		147	if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
		148	mlog(0, "dentry found: %.*s\n",
		149	dentry->d_name.len, dentry->d_name.name);
		150
		151	dget_locked(dentry);
		152	break;
		153	}
		154
		155	dentry = NULL;
		156	}
		157
		158	spin_unlock(&dcache_lock);
		159
		160	return dentry;
		161	}
		162
90	DEFINE_SPINLOCK(dentry_attach_lock);	163	DEFINE_SPINLOCK(dentry_attach_lock);
91		164
		165	/*
		166	* Attach this dentry to a cluster lock.
		167	*
		168	* Dentry locks cover all links in a given directory to a particular
		169	* inode. We do this so that ocfs2 can build a lock name which all
		170	* nodes in the cluster can agree on at all times. Shoving full names
		171	* in the cluster lock won't work due to size restrictions. Covering
		172	* links inside of a directory is a good compromise because it still
		173	* allows us to use the parent directory lock to synchronize
		174	* operations.
		175	*
		176	* Call this function with the parent dir semaphore and the parent dir
		177	* cluster lock held.
		178	*
		179	* The dir semaphore will protect us from having to worry about
		180	* concurrent processes on our node trying to attach a lock at the
		181	* same time.
		182	*
		183	* The dir cluster lock (held at either PR or EX mode) protects us
		184	* from unlink and rename on other nodes.
		185	*
		186	* The 'create' flag tells us whether we're doing this as a result of
		187	* a file creation.
		188	*
		189	* A dput() can happen asynchronously due to pruning, so we cover
		190	* attaching and detaching the dentry lock with a
		191	* dentry_attach_lock.
		192	*
		193	* A node which has done lookup on a name retains a protected read
		194	* lock until final dput. If the user requests and unlink or rename,
		195	* the protected read is upgraded to an exclusive lock. Other nodes
		196	* who have seen the dentry will then be informed that they need to
		197	* downgrade their lock, which will involve d_delete on the
		198	* dentry. This happens in ocfs2_dentry_convert_worker().
		199	*/
		200	int ocfs2_dentry_attach_lock(struct dentry *dentry,
		201	struct inode *inode,
		202	u64 parent_blkno,
		203	int create)
		204	{
		205	int ret;
		206	struct dentry *alias;
		207	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
		208
		209	mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n",
		210	dentry->d_name.len, dentry->d_name.name,
		211	(unsigned long long)parent_blkno, create, dl);
		212
		213	/*
		214	* Negative dentry. We ignore these for now.
		215	*
		216	* XXX: Could we can improve ocfs2_dentry_revalidate() by
		217	* tracking these?
		218	*/
		219	if (!inode)
		220	return 0;
		221
		222	if (dl) {
		223	mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
		224	" \"%.*s\": old parent: %llu, new: %llu\n",
		225	dentry->d_name.len, dentry->d_name.name,
		226	(unsigned long long)parent_blkno,
		227	(unsigned long long)dl->dl_parent_blkno);
		228	return 0;
		229	}
		230
		231	alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
		232	if (alias) {
		233	/*
		234	* Great, an alias exists, which means we must have a
		235	* dentry lock already. We can just grab the lock off
		236	* the alias and add it to the list.
		237	*
		238	* We're depending here on the fact that this dentry
		239	* was found and exists in the dcache and so must have
		240	* a reference to the dentry_lock because we can't
		241	* race creates. Final dput() cannot happen on it
		242	* since we have it pinned, so our reference is safe.
		243	*/
		244	dl = alias->d_fsdata;
		245	mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n",
		246	(unsigned long long)parent_blkno,
		247	(unsigned long long)OCFS2_I(inode)->ip_blkno,
		248	create);
		249
		250	mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
		251	" \"%.*s\": old parent: %llu, new: %llu\n",
		252	dentry->d_name.len, dentry->d_name.name,
		253	(unsigned long long)parent_blkno,
		254	(unsigned long long)dl->dl_parent_blkno);
		255
		256	mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
		257
		258	goto out_attach;
		259	}
		260
		261	/*
		262	* There are no other aliases
		263	*/
		264	dl = kmalloc(sizeof(*dl), GFP_NOFS);
		265	if (!dl) {
		266	ret = -ENOMEM;
		267	mlog_errno(ret);
		268	return ret;
		269	}
		270
		271	dl->dl_count = 0;
		272	/*
		273	* Does this have to happen below, for all attaches, in case
		274	* the struct inode gets blown away by votes?
		275	*/
		276	dl->dl_inode = igrab(inode);
		277	dl->dl_parent_blkno = parent_blkno;
		278	ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
		279
		280	out_attach:
		281	spin_lock(&dentry_attach_lock);
		282	dentry->d_fsdata = dl;
		283	dl->dl_count++;
		284	spin_unlock(&dentry_attach_lock);
		285
		286	/*
		287	* Creation of a new file means that nobody can possibly have
		288	* this name in the system, which means that acquiry of those
		289	* locks can easily be optimized.
		290	*/
		291	if (create) {
		292	ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb),
		293	&dl->dl_lockres, 0);
		294	if (ret)
		295	mlog_errno(ret);
		296	goto out;
		297	}
		298
		299	/*
		300	* This actually gets us our PRMODE level lock. From now on,
		301	* we'll have a notification if one of these names is
		302	* destroyed on another node.
		303	*/
		304	ret = ocfs2_dentry_lock(dentry, 0);
		305	if (ret) {
		306	mlog_errno(ret);
		307	goto out;
		308	}
		309	ocfs2_dentry_unlock(dentry, 0);
		310
		311	out:
		312	dput(alias);
		313
		314	return ret;
		315	}
		316
		317	/*
		318	* ocfs2_dentry_iput() and friends.
		319	*
		320	* At this point, our particular dentry is detached from the inodes
		321	* alias list, so there's no way that the locking code can find it.
		322	*
		323	* The interesting stuff happens when we determine that our lock needs
		324	* to go away because this is the last subdir alias in the
		325	* system. This function needs to handle a couple things:
		326	*
		327	* 1) Synchronizing lock shutdown with the downconvert threads. This
		328	* is already handled for us via the lockres release drop function
		329	* called in ocfs2_release_dentry_lock()
		330	*
		331	* 2) A race may occur when we're doing our lock shutdown and
		332	* another process wants to create a new dentry lock. Right now we
		333	* let them race, which means that for a very short while, this
		334	* node might have two locks on a lock resource. This should be a
		335	* problem though because one of them is in the process of being
		336	* thrown out.
		337	*/
		338	static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
		339	struct ocfs2_dentry_lock *dl)
		340	{
		341	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
		342	ocfs2_lock_res_free(&dl->dl_lockres);
		343	iput(dl->dl_inode);
		344	kfree(dl);
		345	}
		346
		347	void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
		348	struct ocfs2_dentry_lock *dl)
		349	{
		350	int unlock = 0;
		351
		352	BUG_ON(dl->dl_count == 0);
		353
		354	spin_lock(&dentry_attach_lock);
		355	dl->dl_count--;
		356	unlock = !dl->dl_count;
		357	spin_unlock(&dentry_attach_lock);
		358
		359	if (unlock)
		360	ocfs2_drop_dentry_lock(osb, dl);
		361	}
		362
		363	static void ocfs2_dentry_iput(struct dentry dentry, struct inode inode)
		364	{
		365	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
		366
		367	mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
		368	"dentry: %.*s\n", dentry->d_name.len,
		369	dentry->d_name.name);
		370
		371	if (!dl)
		372	goto out;
		373
		374	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
		375	dentry->d_name.len, dentry->d_name.name,
		376	dl->dl_count);
		377
		378	ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
		379
		380	out:
		381	iput(inode);
		382	}
		383
		384	/*
		385	* d_move(), but keep the locks in sync.
		386	*
		387	* When we are done, "dentry" will have the parent dir and name of
		388	* "target", which will be thrown away.
		389	*
		390	* We manually update the lock of "dentry" if need be.
		391	*
		392	* "target" doesn't have it's dentry lock touched - we allow the later
		393	* dput() to handle this for us.
		394	*
		395	* This is called during ocfs2_rename(), while holding parent
		396	* directory locks. The dentries have already been deleted on other
		397	* nodes via ocfs2_remote_dentry_delete().
		398	*
		399	* Normally, the VFS handles the d_move() for the file sytem, after
		400	* the ->rename() callback. OCFS2 wants to handle this internally, so
		401	* the new lock can be created atomically with respect to the cluster.
		402	*/
		403	void ocfs2_dentry_move(struct dentry dentry, struct dentry target,
		404	struct inode old_dir, struct inode new_dir)
		405	{
		406	int ret;
		407	struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
		408	struct inode *inode = dentry->d_inode;
		409
		410	/*
		411	* Move within the same directory, so the actual lock info won't
		412	* change.
		413	*
		414	* XXX: Is there any advantage to dropping the lock here?
		415	*/
		416	if (old_dir == new_dir)
		417	return;
		418
		419	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
		420
		421	dentry->d_fsdata = NULL;
		422	ret = ocfs2_dentry_attach_lock(dentry, inode,
		423	OCFS2_I(new_dir)->ip_blkno, 0);
		424	if (ret)
		425	mlog_errno(ret);
		426	}
		427
92	struct dentry_operations ocfs2_dentry_ops = {	428	struct dentry_operations ocfs2_dentry_ops = {
93	.d_revalidate = ocfs2_dentry_revalidate,	429	.d_revalidate = ocfs2_dentry_revalidate,
		430	.d_iput = ocfs2_dentry_iput,
94	};	431	};


diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index f1423c2134ee..e53abe766cab 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h
@@ -41,17 +41,17 @@ struct ocfs2_dentry_lock {
41	struct ocfs2_lock_res dl_lockres;	41	struct ocfs2_lock_res dl_lockres;
42	};	42	};
43		43
44	static inline void ocfs2_dentry_lock_put(struct ocfs2_super *osb,	44	int ocfs2_dentry_attach_lock(struct dentry dentry, struct inode inode,
45	struct ocfs2_dentry_lock *dl)	45	u64 parent_blkno, int create);
46	{	46
47	}	47	void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48		48	struct ocfs2_dentry_lock *dl);
49	static inline struct dentry ocfs2_find_local_alias(struct inode inode,	49
50	u64 parent_blkno,	50	struct dentry ocfs2_find_local_alias(struct inode inode, u64 parent_blkno,
51	int skip_unhashed)	51	int skip_unhashed);
52	{	52
53	return NULL;	53	void ocfs2_dentry_move(struct dentry dentry, struct dentry target,
54	}	54	struct inode old_dir, struct inode new_dir);
55		55
56	extern spinlock_t dentry_attach_lock;	56	extern spinlock_t dentry_attach_lock;
57		57


diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fc29cb7a437d..98435002ac44 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
28	#include <linux/slab.h>	28	#include <linux/slab.h>
29	#include <linux/highmem.h>	29	#include <linux/highmem.h>
30		30
31	#include "ocfs2.h"
32
33	#define MLOG_MASK_PREFIX ML_INODE	31	#define MLOG_MASK_PREFIX ML_INODE
34	#include <cluster/masklog.h>	32	#include <cluster/masklog.h>
35		33
		34	#include "ocfs2.h"
		35
36	#include "alloc.h"	36	#include "alloc.h"
37	#include "dir.h"	37	#include "dir.h"
38	#include "inode.h"	38	#include "inode.h"