aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-09-24 18:28:50 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-24 18:28:50 -0400
commitb7a818e4fcd2c3ee8c34c2367d345561c4c76a15 (patch)
tree7d05ad4a8eadd18cce315af47ccd91304c4f72c3
parent398477d4bd57cc33792fd93035c2763ad78629c4 (diff)
parent0d5dc6c2dd7a3cd2b2f505b0625c4ec9c0e5b4f0 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (28 commits) ocfs2: Teach ocfs2_drop_lock() to use ->set_lvb() callback ocfs2: Remove ->unblock lockres operation ocfs2: move downconvert worker to lockres ops ocfs2: Remove unused dlmglue functions ocfs2: Have the metadata lock use generic dlmglue functions ocfs2: Add ->set_lvb callback in dlmglue ocfs2: Add ->check_downconvert callback in dlmglue ocfs2: Check for refreshing locks in generic unblock function ocfs2: don't unconditionally pass LVB flags ocfs2: combine inode and generic blocking AST functions ocfs2: Add ->get_osb() dlmglue locking operation ocfs2: remove ->unlock_ast() callback from ocfs2_lock_res_ops ocfs2: combine inode and generic AST functions ocfs2: Clean up lock resource refresh flags ocfs2: Remove i_generation from inode lock names ocfs2: Encode i_generation in the meta data lvb ocfs2: Free up some space in the lvb ocfs2: Remove special casing for inode creation in ocfs2_dentry_attach_lock() ocfs2: manually d_move() during ocfs2_rename() [PATCH] Allow file systems to manually d_move() inside of ->rename() ...
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h8
-rw-r--r--fs/ocfs2/dcache.c359
-rw-r--r--fs/ocfs2/dcache.h27
-rw-r--r--fs/ocfs2/dlm/dlmapi.h1
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/userdlm.c81
-rw-r--r--fs/ocfs2/dlm/userdlm.h1
-rw-r--r--fs/ocfs2/dlmglue.c1094
-rw-r--r--fs/ocfs2/dlmglue.h21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/inode.c156
-rw-r--r--fs/ocfs2/inode.h8
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c116
-rw-r--r--fs/ocfs2/ocfs2_lockid.h25
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c6
-rw-r--r--fs/ocfs2/vote.c180
-rw-r--r--fs/ocfs2/vote.h5
-rw-r--r--include/linux/fs.h7
27 files changed, 1245 insertions, 910 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc6fab0..6b591c01b09f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2370 dput(new_dentry); 2370 dput(new_dentry);
2371 } 2371 }
2372 if (!error) 2372 if (!error)
2373 d_move(old_dentry,new_dentry); 2373 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2374 d_move(old_dentry,new_dentry);
2374 return error; 2375 return error;
2375} 2376}
2376 2377
@@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2393 else 2394 else
2394 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2395 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2395 if (!error) { 2396 if (!error) {
2396 /* The following d_move() should become unconditional */ 2397 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2397 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2398 d_move(old_dentry, new_dentry); 2398 d_move(old_dentry, new_dentry);
2399 } 2399 }
2400 if (target) 2400 if (target)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3419c2da9ba9..7432f1a43f3d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1669,8 +1669,7 @@ out:
1669 if (rehash) 1669 if (rehash)
1670 d_rehash(rehash); 1670 d_rehash(rehash);
1671 if (!error) { 1671 if (!error) {
1672 if (!S_ISDIR(old_inode->i_mode)) 1672 d_move(old_dentry, new_dentry);
1673 d_move(old_dentry, new_dentry);
1674 nfs_renew_times(new_dentry); 1673 nfs_renew_times(new_dentry);
1675 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); 1674 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
1676 } 1675 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b99113b0f65f..e8d40030cab4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = {
71 .name = "nfs", 71 .name = "nfs",
72 .get_sb = nfs_get_sb, 72 .get_sb = nfs_get_sb,
73 .kill_sb = nfs_kill_super, 73 .kill_sb = nfs_kill_super,
74 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 74 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
75}; 75};
76 76
77struct file_system_type nfs_xdev_fs_type = { 77struct file_system_type nfs_xdev_fs_type = {
@@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = {
79 .name = "nfs", 79 .name = "nfs",
80 .get_sb = nfs_xdev_get_sb, 80 .get_sb = nfs_xdev_get_sb,
81 .kill_sb = nfs_kill_super, 81 .kill_sb = nfs_kill_super,
82 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 82 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
83}; 83};
84 84
85static struct super_operations nfs_sops = { 85static struct super_operations nfs_sops = {
@@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = {
107 .name = "nfs4", 107 .name = "nfs4",
108 .get_sb = nfs4_get_sb, 108 .get_sb = nfs4_get_sb,
109 .kill_sb = nfs4_kill_super, 109 .kill_sb = nfs4_kill_super,
110 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 110 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
111}; 111};
112 112
113struct file_system_type nfs4_xdev_fs_type = { 113struct file_system_type nfs4_xdev_fs_type = {
@@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = {
115 .name = "nfs4", 115 .name = "nfs4",
116 .get_sb = nfs4_xdev_get_sb, 116 .get_sb = nfs4_xdev_get_sb,
117 .kill_sb = nfs4_kill_super, 117 .kill_sb = nfs4_kill_super,
118 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 118 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
119}; 119};
120 120
121struct file_system_type nfs4_referral_fs_type = { 121struct file_system_type nfs4_referral_fs_type = {
@@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = {
123 .name = "nfs4", 123 .name = "nfs4",
124 .get_sb = nfs4_referral_get_sb, 124 .get_sb = nfs4_referral_get_sb,
125 .kill_sb = nfs4_kill_super, 125 .kill_sb = nfs4_kill_super,
126 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 126 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
127}; 127};
128 128
129static struct super_operations nfs4_sops = { 129static struct super_operations nfs4_sops = {
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index ff9e2e2104c2..4b46aac7d243 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,17 @@
44 * locking semantics of the file system using the protocol. It should 44 * locking semantics of the file system using the protocol. It should
45 * be somewhere else, I'm sure, but right now it isn't. 45 * be somewhere else, I'm sure, but right now it isn't.
46 * 46 *
47 * New in version 4:
48 * - Remove i_generation from lock names for better stat performance.
49 *
50 * New in version 3:
51 * - Replace dentry votes with a cluster lock
52 *
47 * New in version 2: 53 * New in version 2:
48 * - full 64 bit i_size in the metadata lock lvbs 54 * - full 64 bit i_size in the metadata lock lvbs
49 * - introduction of "rw" lock and pushing meta/data locking down 55 * - introduction of "rw" lock and pushing meta/data locking down
50 */ 56 */
51#define O2NET_PROTOCOL_VERSION 2ULL 57#define O2NET_PROTOCOL_VERSION 4ULL
52struct o2net_handshake { 58struct o2net_handshake {
53 __be64 protocol_version; 59 __be64 protocol_version;
54 __be64 connector_id; 60 __be64 connector_id;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1a01380e3878..014e73978dac 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
35 35
36#include "alloc.h" 36#include "alloc.h"
37#include "dcache.h" 37#include "dcache.h"
38#include "dlmglue.h"
38#include "file.h" 39#include "file.h"
39#include "inode.h" 40#include "inode.h"
40 41
42
41static int ocfs2_dentry_revalidate(struct dentry *dentry, 43static int ocfs2_dentry_revalidate(struct dentry *dentry,
42 struct nameidata *nd) 44 struct nameidata *nd)
43{ 45{
44 struct inode *inode = dentry->d_inode; 46 struct inode *inode = dentry->d_inode;
45 int ret = 0; /* if all else fails, just return false */ 47 int ret = 0; /* if all else fails, just return false */
46 struct ocfs2_super *osb; 48 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
47 49
48 mlog_entry("(0x%p, '%.*s')\n", dentry, 50 mlog_entry("(0x%p, '%.*s')\n", dentry,
49 dentry->d_name.len, dentry->d_name.name); 51 dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
55 goto bail; 57 goto bail;
56 } 58 }
57 59
58 osb = OCFS2_SB(inode->i_sb);
59
60 BUG_ON(!osb); 60 BUG_ON(!osb);
61 61
62 if (inode != osb->root_inode) { 62 if (inode == osb->root_inode || is_bad_inode(inode))
63 spin_lock(&OCFS2_I(inode)->ip_lock); 63 goto bail;
64 /* did we or someone else delete this inode? */ 64
65 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 65 spin_lock(&OCFS2_I(inode)->ip_lock);
66 spin_unlock(&OCFS2_I(inode)->ip_lock); 66 /* did we or someone else delete this inode? */
67 mlog(0, "inode (%llu) deleted, returning false\n", 67 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
68 (unsigned long long)OCFS2_I(inode)->ip_blkno);
69 goto bail;
70 }
71 spin_unlock(&OCFS2_I(inode)->ip_lock); 68 spin_unlock(&OCFS2_I(inode)->ip_lock);
69 mlog(0, "inode (%llu) deleted, returning false\n",
70 (unsigned long long)OCFS2_I(inode)->ip_blkno);
71 goto bail;
72 }
73 spin_unlock(&OCFS2_I(inode)->ip_lock);
72 74
73 if (!inode->i_nlink) { 75 /*
74 mlog(0, "Inode %llu orphaned, returning false " 76 * We don't need a cluster lock to test this because once an
75 "dir = %d\n", 77 * inode nlink hits zero, it never goes back.
76 (unsigned long long)OCFS2_I(inode)->ip_blkno, 78 */
77 S_ISDIR(inode->i_mode)); 79 if (inode->i_nlink == 0) {
78 goto bail; 80 mlog(0, "Inode %llu orphaned, returning false "
79 } 81 "dir = %d\n",
82 (unsigned long long)OCFS2_I(inode)->ip_blkno,
83 S_ISDIR(inode->i_mode));
84 goto bail;
80 } 85 }
81 86
82 ret = 1; 87 ret = 1;
@@ -87,6 +92,322 @@ bail:
87 return ret; 92 return ret;
88} 93}
89 94
95static int ocfs2_match_dentry(struct dentry *dentry,
96 u64 parent_blkno,
97 int skip_unhashed)
98{
99 struct inode *parent;
100
101 /*
102 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
103 * to the lock data, so we skip those here, otherwise
104 * ocfs2_dentry_attach_lock() will get its original dentry
105 * back.
106 */
107 if (!dentry->d_fsdata)
108 return 0;
109
110 if (!dentry->d_parent)
111 return 0;
112
113 if (skip_unhashed && d_unhashed(dentry))
114 return 0;
115
116 parent = dentry->d_parent->d_inode;
117 /* Negative parent dentry? */
118 if (!parent)
119 return 0;
120
121 /* Name is in a different directory. */
122 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
123 return 0;
124
125 return 1;
126}
127
128/*
129 * Walk the inode alias list, and find a dentry which has a given
130 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
131 * is looking for a dentry_lock reference. The vote thread is looking
132 * to unhash aliases, so we allow it to skip any that already have
133 * that property.
134 */
135struct dentry *ocfs2_find_local_alias(struct inode *inode,
136 u64 parent_blkno,
137 int skip_unhashed)
138{
139 struct list_head *p;
140 struct dentry *dentry = NULL;
141
142 spin_lock(&dcache_lock);
143
144 list_for_each(p, &inode->i_dentry) {
145 dentry = list_entry(p, struct dentry, d_alias);
146
147 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
148 mlog(0, "dentry found: %.*s\n",
149 dentry->d_name.len, dentry->d_name.name);
150
151 dget_locked(dentry);
152 break;
153 }
154
155 dentry = NULL;
156 }
157
158 spin_unlock(&dcache_lock);
159
160 return dentry;
161}
162
163DEFINE_SPINLOCK(dentry_attach_lock);
164
165/*
166 * Attach this dentry to a cluster lock.
167 *
168 * Dentry locks cover all links in a given directory to a particular
169 * inode. We do this so that ocfs2 can build a lock name which all
170 * nodes in the cluster can agree on at all times. Shoving full names
171 * in the cluster lock won't work due to size restrictions. Covering
172 * links inside of a directory is a good compromise because it still
173 * allows us to use the parent directory lock to synchronize
174 * operations.
175 *
176 * Call this function with the parent dir semaphore and the parent dir
177 * cluster lock held.
178 *
179 * The dir semaphore will protect us from having to worry about
180 * concurrent processes on our node trying to attach a lock at the
181 * same time.
182 *
183 * The dir cluster lock (held at either PR or EX mode) protects us
184 * from unlink and rename on other nodes.
185 *
186 * A dput() can happen asynchronously due to pruning, so we cover
187 * attaching and detaching the dentry lock with a
188 * dentry_attach_lock.
189 *
190 * A node which has done lookup on a name retains a protected read
191 * lock until final dput. If the user requests and unlink or rename,
192 * the protected read is upgraded to an exclusive lock. Other nodes
193 * who have seen the dentry will then be informed that they need to
194 * downgrade their lock, which will involve d_delete on the
195 * dentry. This happens in ocfs2_dentry_convert_worker().
196 */
197int ocfs2_dentry_attach_lock(struct dentry *dentry,
198 struct inode *inode,
199 u64 parent_blkno)
200{
201 int ret;
202 struct dentry *alias;
203 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
204
205 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
206 dentry->d_name.len, dentry->d_name.name,
207 (unsigned long long)parent_blkno, dl);
208
209 /*
210 * Negative dentry. We ignore these for now.
211 *
212 * XXX: Could we can improve ocfs2_dentry_revalidate() by
213 * tracking these?
214 */
215 if (!inode)
216 return 0;
217
218 if (dl) {
219 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
220 " \"%.*s\": old parent: %llu, new: %llu\n",
221 dentry->d_name.len, dentry->d_name.name,
222 (unsigned long long)parent_blkno,
223 (unsigned long long)dl->dl_parent_blkno);
224 return 0;
225 }
226
227 alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
228 if (alias) {
229 /*
230 * Great, an alias exists, which means we must have a
231 * dentry lock already. We can just grab the lock off
232 * the alias and add it to the list.
233 *
234 * We're depending here on the fact that this dentry
235 * was found and exists in the dcache and so must have
236 * a reference to the dentry_lock because we can't
237 * race creates. Final dput() cannot happen on it
238 * since we have it pinned, so our reference is safe.
239 */
240 dl = alias->d_fsdata;
241 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
242 (unsigned long long)parent_blkno,
243 (unsigned long long)OCFS2_I(inode)->ip_blkno);
244
245 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
246 " \"%.*s\": old parent: %llu, new: %llu\n",
247 dentry->d_name.len, dentry->d_name.name,
248 (unsigned long long)parent_blkno,
249 (unsigned long long)dl->dl_parent_blkno);
250
251 mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
252
253 goto out_attach;
254 }
255
256 /*
257 * There are no other aliases
258 */
259 dl = kmalloc(sizeof(*dl), GFP_NOFS);
260 if (!dl) {
261 ret = -ENOMEM;
262 mlog_errno(ret);
263 return ret;
264 }
265
266 dl->dl_count = 0;
267 /*
268 * Does this have to happen below, for all attaches, in case
269 * the struct inode gets blown away by votes?
270 */
271 dl->dl_inode = igrab(inode);
272 dl->dl_parent_blkno = parent_blkno;
273 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
274
275out_attach:
276 spin_lock(&dentry_attach_lock);
277 dentry->d_fsdata = dl;
278 dl->dl_count++;
279 spin_unlock(&dentry_attach_lock);
280
281 /*
282 * This actually gets us our PRMODE level lock. From now on,
283 * we'll have a notification if one of these names is
284 * destroyed on another node.
285 */
286 ret = ocfs2_dentry_lock(dentry, 0);
287 if (!ret)
288 ocfs2_dentry_unlock(dentry, 0);
289 else
290 mlog_errno(ret);
291
292 dput(alias);
293
294 return ret;
295}
296
297/*
298 * ocfs2_dentry_iput() and friends.
299 *
300 * At this point, our particular dentry is detached from the inodes
301 * alias list, so there's no way that the locking code can find it.
302 *
303 * The interesting stuff happens when we determine that our lock needs
304 * to go away because this is the last subdir alias in the
305 * system. This function needs to handle a couple things:
306 *
307 * 1) Synchronizing lock shutdown with the downconvert threads. This
308 * is already handled for us via the lockres release drop function
309 * called in ocfs2_release_dentry_lock()
310 *
311 * 2) A race may occur when we're doing our lock shutdown and
312 * another process wants to create a new dentry lock. Right now we
313 * let them race, which means that for a very short while, this
314 * node might have two locks on a lock resource. This should be a
315 * problem though because one of them is in the process of being
316 * thrown out.
317 */
318static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
319 struct ocfs2_dentry_lock *dl)
320{
321 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
322 ocfs2_lock_res_free(&dl->dl_lockres);
323 iput(dl->dl_inode);
324 kfree(dl);
325}
326
327void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
328 struct ocfs2_dentry_lock *dl)
329{
330 int unlock = 0;
331
332 BUG_ON(dl->dl_count == 0);
333
334 spin_lock(&dentry_attach_lock);
335 dl->dl_count--;
336 unlock = !dl->dl_count;
337 spin_unlock(&dentry_attach_lock);
338
339 if (unlock)
340 ocfs2_drop_dentry_lock(osb, dl);
341}
342
343static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
344{
345 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
346
347 mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
348 "dentry: %.*s\n", dentry->d_name.len,
349 dentry->d_name.name);
350
351 if (!dl)
352 goto out;
353
354 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
355 dentry->d_name.len, dentry->d_name.name,
356 dl->dl_count);
357
358 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
359
360out:
361 iput(inode);
362}
363
364/*
365 * d_move(), but keep the locks in sync.
366 *
367 * When we are done, "dentry" will have the parent dir and name of
368 * "target", which will be thrown away.
369 *
370 * We manually update the lock of "dentry" if need be.
371 *
372 * "target" doesn't have it's dentry lock touched - we allow the later
373 * dput() to handle this for us.
374 *
375 * This is called during ocfs2_rename(), while holding parent
376 * directory locks. The dentries have already been deleted on other
377 * nodes via ocfs2_remote_dentry_delete().
378 *
379 * Normally, the VFS handles the d_move() for the file sytem, after
380 * the ->rename() callback. OCFS2 wants to handle this internally, so
381 * the new lock can be created atomically with respect to the cluster.
382 */
383void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
384 struct inode *old_dir, struct inode *new_dir)
385{
386 int ret;
387 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
388 struct inode *inode = dentry->d_inode;
389
390 /*
391 * Move within the same directory, so the actual lock info won't
392 * change.
393 *
394 * XXX: Is there any advantage to dropping the lock here?
395 */
396 if (old_dir == new_dir)
397 goto out_move;
398
399 ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
400
401 dentry->d_fsdata = NULL;
402 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
403 if (ret)
404 mlog_errno(ret);
405
406out_move:
407 d_move(dentry, target);
408}
409
90struct dentry_operations ocfs2_dentry_ops = { 410struct dentry_operations ocfs2_dentry_ops = {
91 .d_revalidate = ocfs2_dentry_revalidate, 411 .d_revalidate = ocfs2_dentry_revalidate,
412 .d_iput = ocfs2_dentry_iput,
92}; 413};
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index 90072771114b..c091c34d9883 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
28 28
29extern struct dentry_operations ocfs2_dentry_ops; 29extern struct dentry_operations ocfs2_dentry_ops;
30 30
31struct ocfs2_dentry_lock {
32 unsigned int dl_count;
33 u64 dl_parent_blkno;
34
35 /*
36 * The ocfs2_dentry_lock keeps an inode reference until
37 * dl_lockres has been destroyed. This is usually done in
38 * ->d_iput() anyway, so there should be minimal impact.
39 */
40 struct inode *dl_inode;
41 struct ocfs2_lock_res dl_lockres;
42};
43
44int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
45 u64 parent_blkno);
46
47void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48 struct ocfs2_dentry_lock *dl);
49
50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
51 int skip_unhashed);
52
53void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
54 struct inode *old_dir, struct inode *new_dir);
55
56extern spinlock_t dentry_attach_lock;
57
31#endif /* OCFS2_DCACHE_H */ 58#endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index 53652f51c0e1..cfd5cb65cab0 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
182 struct dlm_lockstatus *lksb, 182 struct dlm_lockstatus *lksb,
183 int flags, 183 int flags,
184 const char *name, 184 const char *name,
185 int namelen,
185 dlm_astlockfunc_t *ast, 186 dlm_astlockfunc_t *ast,
186 void *data, 187 void *data,
187 dlm_bastlockfunc_t *bast); 188 dlm_bastlockfunc_t *bast);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f13a4bac41f0..681046d51393 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
320 320
321 res = dlm_lookup_lockres(dlm, name, locklen); 321 res = dlm_lookup_lockres(dlm, name, locklen);
322 if (!res) { 322 if (!res) {
323 mlog(ML_ERROR, "got %sast for unknown lockres! " 323 mlog(0, "got %sast for unknown lockres! "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 324 "cookie=%u:%llu, name=%.*s, namelen=%u\n",
325 past->type == DLM_AST ? "" : "b", 325 past->type == DLM_AST ? "" : "b",
326 dlm_get_lock_cookie_node(cookie), 326 dlm_get_lock_cookie_node(cookie),
327 dlm_get_lock_cookie_seq(cookie), 327 dlm_get_lock_cookie_seq(cookie),
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
462 mlog(ML_ERROR, "sent AST to node %u, it returned " 462 mlog(ML_ERROR, "sent AST to node %u, it returned "
463 "DLM_MIGRATING!\n", lock->ml.node); 463 "DLM_MIGRATING!\n", lock->ml.node);
464 BUG(); 464 BUG();
465 } else if (status != DLM_NORMAL) { 465 } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
466 mlog(ML_ERROR, "AST to node %u returned %d!\n", 466 mlog(ML_ERROR, "AST to node %u returned %d!\n",
467 lock->ml.node, status); 467 lock->ml.node, status);
468 /* ignore it */ 468 /* ignore it */
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 14530ee7e11d..fa968180b072 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
747 u8 owner); 747 u8 owner);
748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
749 const char *lockid, 749 const char *lockid,
750 int namelen,
750 int flags); 751 int flags);
751struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, 752struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
752 const char *name, 753 const char *name,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5ca57ec650c7..42a1b91979b5 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
540 540
541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, 541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
542 struct dlm_lockstatus *lksb, int flags, 542 struct dlm_lockstatus *lksb, int flags,
543 const char *name, dlm_astlockfunc_t *ast, void *data, 543 const char *name, int namelen, dlm_astlockfunc_t *ast,
544 dlm_bastlockfunc_t *bast) 544 void *data, dlm_bastlockfunc_t *bast)
545{ 545{
546 enum dlm_status status; 546 enum dlm_status status;
547 struct dlm_lock_resource *res = NULL; 547 struct dlm_lock_resource *res = NULL;
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
571 recovery = (flags & LKM_RECOVERY); 571 recovery = (flags & LKM_RECOVERY);
572 572
573 if (recovery && 573 if (recovery &&
574 (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) { 574 (!dlm_is_recovery_lock(name, namelen) || convert) ) {
575 dlm_error(status); 575 dlm_error(status);
576 goto error; 576 goto error;
577 } 577 }
@@ -643,7 +643,7 @@ retry_convert:
643 } 643 }
644 644
645 status = DLM_IVBUFLEN; 645 status = DLM_IVBUFLEN;
646 if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) { 646 if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
647 dlm_error(status); 647 dlm_error(status);
648 goto error; 648 goto error;
649 } 649 }
@@ -659,7 +659,7 @@ retry_convert:
659 dlm_wait_for_recovery(dlm); 659 dlm_wait_for_recovery(dlm);
660 660
661 /* find or create the lock resource */ 661 /* find or create the lock resource */
662 res = dlm_get_lock_resource(dlm, name, flags); 662 res = dlm_get_lock_resource(dlm, name, namelen, flags);
663 if (!res) { 663 if (!res) {
664 status = DLM_IVLOCKID; 664 status = DLM_IVLOCKID;
665 dlm_error(status); 665 dlm_error(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9503240ef0e5..f784177b6241 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
740 */ 740 */
741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
742 const char *lockid, 742 const char *lockid,
743 int namelen,
743 int flags) 744 int flags)
744{ 745{
745 struct dlm_lock_resource *tmpres=NULL, *res=NULL; 746 struct dlm_lock_resource *tmpres=NULL, *res=NULL;
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
748 int blocked = 0; 749 int blocked = 0;
749 int ret, nodenum; 750 int ret, nodenum;
750 struct dlm_node_iter iter; 751 struct dlm_node_iter iter;
751 unsigned int namelen, hash; 752 unsigned int hash;
752 int tries = 0; 753 int tries = 0;
753 int bit, wait_on_recovery = 0; 754 int bit, wait_on_recovery = 0;
754 755
755 BUG_ON(!lockid); 756 BUG_ON(!lockid);
756 757
757 namelen = strlen(lockid);
758 hash = dlm_lockid_hash(lockid, namelen); 758 hash = dlm_lockid_hash(lockid, namelen);
759 759
760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen); 760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 594745fab0b5..9d950d7cea38 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
2285 memset(&lksb, 0, sizeof(lksb)); 2285 memset(&lksb, 0, sizeof(lksb));
2286 2286
2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
2288 DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); 2288 DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
2289 dlm_reco_ast, dlm, dlm_reco_bast);
2289 2290
2290 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", 2291 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
2291 dlm->name, ret, lksb.status); 2292 dlm->name, ret, lksb.status);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e641b084b343..eead48bbfac6 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
102 spin_unlock(&lockres->l_lock); 102 spin_unlock(&lockres->l_lock);
103} 103}
104 104
105#define user_log_dlm_error(_func, _stat, _lockres) do { \ 105#define user_log_dlm_error(_func, _stat, _lockres) do { \
106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
107 "resource %s: %s\n", dlm_errname(_stat), _func, \ 107 "resource %.*s: %s\n", dlm_errname(_stat), _func, \
108 _lockres->l_name, dlm_errmsg(_stat)); \ 108 _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
109} while (0) 109} while (0)
110 110
111/* WARNING: This function lives in a world where the only three lock 111/* WARNING: This function lives in a world where the only three lock
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
127 struct user_lock_res *lockres = opaque; 127 struct user_lock_res *lockres = opaque;
128 struct dlm_lockstatus *lksb; 128 struct dlm_lockstatus *lksb;
129 129
130 mlog(0, "AST fired for lockres %s\n", lockres->l_name); 130 mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
131 lockres->l_name);
131 132
132 spin_lock(&lockres->l_lock); 133 spin_lock(&lockres->l_lock);
133 134
134 lksb = &(lockres->l_lksb); 135 lksb = &(lockres->l_lksb);
135 if (lksb->status != DLM_NORMAL) { 136 if (lksb->status != DLM_NORMAL) {
136 mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", 137 mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
137 lksb->status, lockres->l_name); 138 lksb->status, lockres->l_namelen, lockres->l_name);
138 spin_unlock(&lockres->l_lock); 139 spin_unlock(&lockres->l_lock);
139 return; 140 return;
140 } 141 }
141 142
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, 143 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n", 144 "Lockres %.*s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags); 145 lockres->l_namelen, lockres->l_name, lockres->l_flags);
145 146
146 /* we're downconverting. */ 147 /* we're downconverting. */
147 if (lockres->l_requested < lockres->l_level) { 148 if (lockres->l_requested < lockres->l_level) {
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
213{ 214{
214 struct user_lock_res *lockres = opaque; 215 struct user_lock_res *lockres = opaque;
215 216
216 mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", 217 mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
217 lockres->l_name, level); 218 lockres->l_namelen, lockres->l_name, level);
218 219
219 spin_lock(&lockres->l_lock); 220 spin_lock(&lockres->l_lock);
220 lockres->l_flags |= USER_LOCK_BLOCKED; 221 lockres->l_flags |= USER_LOCK_BLOCKED;
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
231{ 232{
232 struct user_lock_res *lockres = opaque; 233 struct user_lock_res *lockres = opaque;
233 234
234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 235 mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
236 lockres->l_name);
235 237
236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT) 238 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
237 mlog(ML_ERROR, "Dlm returns status %d\n", status); 239 mlog(ML_ERROR, "Dlm returns status %d\n", status);
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { 246 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
245 lockres->l_level = LKM_IVMODE; 247 lockres->l_level = LKM_IVMODE;
246 } else if (status == DLM_CANCELGRANT) { 248 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was 249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the 250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */ 251 * ast should've done this already. */
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
255 } else { 255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */ 257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
260 lockres->l_requested = LKM_IVMODE; /* cancel an 258 lockres->l_requested = LKM_IVMODE; /* cancel an
261 * upconvert 259 * upconvert
262 * request. */ 260 * request. */
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
287 struct user_lock_res *lockres = (struct user_lock_res *) opaque; 285 struct user_lock_res *lockres = (struct user_lock_res *) opaque;
288 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 286 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
289 287
290 mlog(0, "processing lockres %s\n", lockres->l_name); 288 mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
289 lockres->l_name);
291 290
292 spin_lock(&lockres->l_lock); 291 spin_lock(&lockres->l_lock);
293 292
294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), 293 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
295 "Lockres %s, flags 0x%x\n", 294 "Lockres %.*s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags); 295 lockres->l_namelen, lockres->l_name, lockres->l_flags);
297 296
298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's 297 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
299 * set, we want user_ast clear it. */ 298 * set, we want user_ast clear it. */
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
305 * flag, and finally we might get another bast which re-queues 304 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */ 305 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { 306 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock); 307 spin_unlock(&lockres->l_lock);
311 goto drop_ref; 308 goto drop_ref;
312 } 309 }
313 310
314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 311 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
315 mlog(0, "lock is in teardown so we do nothing\n");
316 spin_unlock(&lockres->l_lock); 312 spin_unlock(&lockres->l_lock);
317 goto drop_ref; 313 goto drop_ref;
318 } 314 }
319 315
320 if (lockres->l_flags & USER_LOCK_BUSY) { 316 if (lockres->l_flags & USER_LOCK_BUSY) {
321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 317 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
325 spin_unlock(&lockres->l_lock); 318 spin_unlock(&lockres->l_lock);
326 goto drop_ref; 319 goto drop_ref;
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
372 &lockres->l_lksb, 365 &lockres->l_lksb,
373 LKM_CONVERT|LKM_VALBLK, 366 LKM_CONVERT|LKM_VALBLK,
374 lockres->l_name, 367 lockres->l_name,
368 lockres->l_namelen,
375 user_ast, 369 user_ast,
376 lockres, 370 lockres,
377 user_bast); 371 user_bast);
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
420 414
421 if (level != LKM_EXMODE && 415 if (level != LKM_EXMODE &&
422 level != LKM_PRMODE) { 416 level != LKM_PRMODE) {
423 mlog(ML_ERROR, "lockres %s: invalid request!\n", 417 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
424 lockres->l_name); 418 lockres->l_namelen, lockres->l_name);
425 status = -EINVAL; 419 status = -EINVAL;
426 goto bail; 420 goto bail;
427 } 421 }
428 422
429 mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", 423 mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
430 lockres->l_name, 424 lockres->l_namelen, lockres->l_name,
431 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", 425 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
432 lkm_flags); 426 lkm_flags);
433 427
434again: 428again:
435 if (signal_pending(current)) { 429 if (signal_pending(current)) {
@@ -474,15 +468,13 @@ again:
474 BUG_ON(level == LKM_IVMODE); 468 BUG_ON(level == LKM_IVMODE);
475 BUG_ON(level == LKM_NLMODE); 469 BUG_ON(level == LKM_NLMODE);
476 470
477 mlog(0, "lock %s, get lock from %d to level = %d\n",
478 lockres->l_name, lockres->l_level, level);
479
480 /* call dlm_lock to upgrade lock now */ 471 /* call dlm_lock to upgrade lock now */
481 status = dlmlock(dlm, 472 status = dlmlock(dlm,
482 level, 473 level,
483 &lockres->l_lksb, 474 &lockres->l_lksb,
484 local_flags, 475 local_flags,
485 lockres->l_name, 476 lockres->l_name,
477 lockres->l_namelen,
486 user_ast, 478 user_ast,
487 lockres, 479 lockres,
488 user_bast); 480 user_bast);
@@ -498,9 +490,6 @@ again:
498 goto bail; 490 goto bail;
499 } 491 }
500 492
501 mlog(0, "lock %s, successfull return from dlmlock\n",
502 lockres->l_name);
503
504 user_wait_on_busy_lock(lockres); 493 user_wait_on_busy_lock(lockres);
505 goto again; 494 goto again;
506 } 495 }
@@ -508,9 +497,6 @@ again:
508 user_dlm_inc_holders(lockres, level); 497 user_dlm_inc_holders(lockres, level);
509 spin_unlock(&lockres->l_lock); 498 spin_unlock(&lockres->l_lock);
510 499
511 mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
512 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
513
514 status = 0; 500 status = 0;
515bail: 501bail:
516 return status; 502 return status;
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
538{ 524{
539 if (level != LKM_EXMODE && 525 if (level != LKM_EXMODE &&
540 level != LKM_PRMODE) { 526 level != LKM_PRMODE) {
541 mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); 527 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
528 lockres->l_namelen, lockres->l_name);
542 return; 529 return;
543 } 530 }
544 531
545 mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
546 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
547
548 spin_lock(&lockres->l_lock); 532 spin_lock(&lockres->l_lock);
549 user_dlm_dec_holders(lockres, level); 533 user_dlm_dec_holders(lockres, level);
550 __user_dlm_cond_queue_lockres(lockres); 534 __user_dlm_cond_queue_lockres(lockres);
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
602 memcpy(lockres->l_name, 586 memcpy(lockres->l_name,
603 dentry->d_name.name, 587 dentry->d_name.name,
604 dentry->d_name.len); 588 dentry->d_name.len);
589 lockres->l_namelen = dentry->d_name.len;
605} 590}
606 591
607int user_dlm_destroy_lock(struct user_lock_res *lockres) 592int user_dlm_destroy_lock(struct user_lock_res *lockres)
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
609 int status = -EBUSY; 594 int status = -EBUSY;
610 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 595 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
611 596
612 mlog(0, "asked to destroy %s\n", lockres->l_name); 597 mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
613 598
614 spin_lock(&lockres->l_lock); 599 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 600 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock); 601 spin_unlock(&lockres->l_lock);
618 return 0; 602 return 0;
619 } 603 }
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
623 while (lockres->l_flags & USER_LOCK_BUSY) { 607 while (lockres->l_flags & USER_LOCK_BUSY) {
624 spin_unlock(&lockres->l_lock); 608 spin_unlock(&lockres->l_lock);
625 609
626 mlog(0, "lock %s is busy\n", lockres->l_name);
627
628 user_wait_on_busy_lock(lockres); 610 user_wait_on_busy_lock(lockres);
629 611
630 spin_lock(&lockres->l_lock); 612 spin_lock(&lockres->l_lock);
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
632 614
633 if (lockres->l_ro_holders || lockres->l_ex_holders) { 615 if (lockres->l_ro_holders || lockres->l_ex_holders) {
634 spin_unlock(&lockres->l_lock); 616 spin_unlock(&lockres->l_lock);
635 mlog(0, "lock %s has holders\n", lockres->l_name);
636 goto bail; 617 goto bail;
637 } 618 }
638 619
639 status = 0; 620 status = 0;
640 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { 621 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
641 spin_unlock(&lockres->l_lock); 622 spin_unlock(&lockres->l_lock);
642 mlog(0, "lock %s is not attached\n", lockres->l_name);
643 goto bail; 623 goto bail;
644 } 624 }
645 625
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
647 lockres->l_flags |= USER_LOCK_BUSY; 627 lockres->l_flags |= USER_LOCK_BUSY;
648 spin_unlock(&lockres->l_lock); 628 spin_unlock(&lockres->l_lock);
649 629
650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
651 status = dlmunlock(dlm, 630 status = dlmunlock(dlm,
652 &lockres->l_lksb, 631 &lockres->l_lksb,
653 LKM_VALBLK, 632 LKM_VALBLK,
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 04178bc40b76..c400e93bbf79 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
53 53
54#define USER_DLM_LOCK_ID_MAX_LEN 32 54#define USER_DLM_LOCK_ID_MAX_LEN 32
55 char l_name[USER_DLM_LOCK_ID_MAX_LEN]; 55 char l_name[USER_DLM_LOCK_ID_MAX_LEN];
56 int l_namelen;
56 int l_level; 57 int l_level;
57 unsigned int l_ro_holders; 58 unsigned int l_ro_holders;
58 unsigned int l_ex_holders; 59 unsigned int l_ex_holders;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 151b41781eab..de887063dcfc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -46,6 +46,7 @@
46#include "ocfs2.h" 46#include "ocfs2.h"
47 47
48#include "alloc.h" 48#include "alloc.h"
49#include "dcache.h"
49#include "dlmglue.h" 50#include "dlmglue.h"
50#include "extent_map.h" 51#include "extent_map.h"
51#include "heartbeat.h" 52#include "heartbeat.h"
@@ -66,78 +67,161 @@ struct ocfs2_mask_waiter {
66 unsigned long mw_goal; 67 unsigned long mw_goal;
67}; 68};
68 69
69static void ocfs2_inode_ast_func(void *opaque); 70static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
70static void ocfs2_inode_bast_func(void *opaque, 71static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
71 int level);
72static void ocfs2_super_ast_func(void *opaque);
73static void ocfs2_super_bast_func(void *opaque,
74 int level);
75static void ocfs2_rename_ast_func(void *opaque);
76static void ocfs2_rename_bast_func(void *opaque,
77 int level);
78
79/* so far, all locks have gotten along with the same unlock ast */
80static void ocfs2_unlock_ast_func(void *opaque,
81 enum dlm_status status);
82static int ocfs2_do_unblock_meta(struct inode *inode,
83 int *requeue);
84static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
85 int *requeue);
86static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
87 int *requeue);
88static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
89 int *requeue);
90static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
91 int *requeue);
92typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
93static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
94 struct ocfs2_lock_res *lockres,
95 int *requeue,
96 ocfs2_convert_worker_t *worker);
97 72
73/*
74 * Return value from ->downconvert_worker functions.
75 *
76 * These control the precise actions of ocfs2_unblock_lock()
77 * and ocfs2_process_blocked_lock()
78 *
79 */
80enum ocfs2_unblock_action {
81 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
82 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
83 * ->post_unlock callback */
84 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
85 * ->post_unlock() callback. */
86};
87
88struct ocfs2_unblock_ctl {
89 int requeue;
90 enum ocfs2_unblock_action unblock_action;
91};
92
93static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
94 int new_level);
95static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
96
97static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
98 int blocking);
99
100static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
101 int blocking);
102
103static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
104 struct ocfs2_lock_res *lockres);
105
106/*
107 * OCFS2 Lock Resource Operations
108 *
109 * These fine tune the behavior of the generic dlmglue locking infrastructure.
110 *
111 * The most basic of lock types can point ->l_priv to their respective
112 * struct ocfs2_super and allow the default actions to manage things.
113 *
114 * Right now, each lock type also needs to implement an init function,
115 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
116 * should be called when the lock is no longer needed (i.e., object
117 * destruction time).
118 */
98struct ocfs2_lock_res_ops { 119struct ocfs2_lock_res_ops {
99 void (*ast)(void *); 120 /*
100 void (*bast)(void *, int); 121 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
101 void (*unlock_ast)(void *, enum dlm_status); 122 * this callback if ->l_priv is not an ocfs2_super pointer
102 int (*unblock)(struct ocfs2_lock_res *, int *); 123 */
124 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
125
126 /*
127 * Optionally called in the downconvert (or "vote") thread
128 * after a successful downconvert. The lockres will not be
129 * referenced after this callback is called, so it is safe to
130 * free memory, etc.
131 *
132 * The exact semantics of when this is called are controlled
133 * by ->downconvert_worker()
134 */
135 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
136
137 /*
138 * Allow a lock type to add checks to determine whether it is
139 * safe to downconvert a lock. Return 0 to re-queue the
140 * downconvert at a later time, nonzero to continue.
141 *
142 * For most locks, the default checks that there are no
143 * incompatible holders are sufficient.
144 *
145 * Called with the lockres spinlock held.
146 */
147 int (*check_downconvert)(struct ocfs2_lock_res *, int);
148
149 /*
150 * Allows a lock type to populate the lock value block. This
151 * is called on downconvert, and when we drop a lock.
152 *
153 * Locks that want to use this should set LOCK_TYPE_USES_LVB
154 * in the flags field.
155 *
156 * Called with the lockres spinlock held.
157 */
158 void (*set_lvb)(struct ocfs2_lock_res *);
159
160 /*
161 * Called from the downconvert thread when it is determined
162 * that a lock will be downconverted. This is called without
163 * any locks held so the function can do work that might
164 * schedule (syncing out data, etc).
165 *
166 * This should return any one of the ocfs2_unblock_action
167 * values, depending on what it wants the thread to do.
168 */
169 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
170
171 /*
172 * LOCK_TYPE_* flags which describe the specific requirements
173 * of a lock type. Descriptions of each individual flag follow.
174 */
175 int flags;
103}; 176};
104 177
178/*
179 * Some locks want to "refresh" potentially stale data when a
180 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
181 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
182 * individual lockres l_flags member from the ast function. It is
183 * expected that the locking wrapper will clear the
184 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
185 */
186#define LOCK_TYPE_REQUIRES_REFRESH 0x1
187
188/*
189 * Indicate that a lock type makes use of the lock value block. The
190 * ->set_lvb lock type callback must be defined.
191 */
192#define LOCK_TYPE_USES_LVB 0x2
193
105static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 194static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
106 .ast = ocfs2_inode_ast_func, 195 .get_osb = ocfs2_get_inode_osb,
107 .bast = ocfs2_inode_bast_func, 196 .flags = 0,
108 .unlock_ast = ocfs2_unlock_ast_func,
109 .unblock = ocfs2_unblock_inode_lock,
110}; 197};
111 198
112static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { 199static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
113 .ast = ocfs2_inode_ast_func, 200 .get_osb = ocfs2_get_inode_osb,
114 .bast = ocfs2_inode_bast_func, 201 .check_downconvert = ocfs2_check_meta_downconvert,
115 .unlock_ast = ocfs2_unlock_ast_func, 202 .set_lvb = ocfs2_set_meta_lvb,
116 .unblock = ocfs2_unblock_meta, 203 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
117}; 204};
118 205
119static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
120 int blocking);
121
122static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { 206static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
123 .ast = ocfs2_inode_ast_func, 207 .get_osb = ocfs2_get_inode_osb,
124 .bast = ocfs2_inode_bast_func, 208 .downconvert_worker = ocfs2_data_convert_worker,
125 .unlock_ast = ocfs2_unlock_ast_func, 209 .flags = 0,
126 .unblock = ocfs2_unblock_data,
127}; 210};
128 211
129static struct ocfs2_lock_res_ops ocfs2_super_lops = { 212static struct ocfs2_lock_res_ops ocfs2_super_lops = {
130 .ast = ocfs2_super_ast_func, 213 .flags = LOCK_TYPE_REQUIRES_REFRESH,
131 .bast = ocfs2_super_bast_func,
132 .unlock_ast = ocfs2_unlock_ast_func,
133 .unblock = ocfs2_unblock_osb_lock,
134}; 214};
135 215
136static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 216static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
137 .ast = ocfs2_rename_ast_func, 217 .flags = 0,
138 .bast = ocfs2_rename_bast_func, 218};
139 .unlock_ast = ocfs2_unlock_ast_func, 219
140 .unblock = ocfs2_unblock_osb_lock, 220static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
221 .get_osb = ocfs2_get_dentry_osb,
222 .post_unlock = ocfs2_dentry_post_unlock,
223 .downconvert_worker = ocfs2_dentry_convert_worker,
224 .flags = 0,
141}; 225};
142 226
143static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 227static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
@@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
147 lockres->l_type == OCFS2_LOCK_TYPE_RW; 231 lockres->l_type == OCFS2_LOCK_TYPE_RW;
148} 232}
149 233
150static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) 234static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
151{ 235{
152 return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; 236 BUG_ON(!ocfs2_is_inode_lock(lockres));
153}
154 237
155static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) 238 return (struct inode *) lockres->l_priv;
156{
157 return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
158} 239}
159 240
160static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) 241static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
161{ 242{
162 BUG_ON(!ocfs2_is_super_lock(lockres) 243 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
163 && !ocfs2_is_rename_lock(lockres));
164 244
165 return (struct ocfs2_super *) lockres->l_priv; 245 return (struct ocfs2_dentry_lock *)lockres->l_priv;
166} 246}
167 247
168static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 248static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
169{ 249{
170 BUG_ON(!ocfs2_is_inode_lock(lockres)); 250 if (lockres->l_ops->get_osb)
251 return lockres->l_ops->get_osb(lockres);
171 252
172 return (struct inode *) lockres->l_priv; 253 return (struct ocfs2_super *)lockres->l_priv;
173} 254}
174 255
175static int ocfs2_lock_create(struct ocfs2_super *osb, 256static int ocfs2_lock_create(struct ocfs2_super *osb,
@@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode,
200 struct buffer_head **bh); 281 struct buffer_head **bh);
201static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 282static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
202static inline int ocfs2_highest_compat_lock_level(int level); 283static inline int ocfs2_highest_compat_lock_level(int level);
203static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
204 struct ocfs2_lock_res *lockres,
205 int new_level);
206
207static char *ocfs2_lock_type_strings[] = {
208 [OCFS2_LOCK_TYPE_META] = "Meta",
209 [OCFS2_LOCK_TYPE_DATA] = "Data",
210 [OCFS2_LOCK_TYPE_SUPER] = "Super",
211 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
212 /* Need to differntiate from [R]ename.. serializing writes is the
213 * important job it does, anyway. */
214 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
215};
216
217static char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
218{
219 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
220 return ocfs2_lock_type_strings[type];
221}
222 284
223static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 285static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
224 u64 blkno, 286 u64 blkno,
@@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
265static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 327static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
266 struct ocfs2_lock_res *res, 328 struct ocfs2_lock_res *res,
267 enum ocfs2_lock_type type, 329 enum ocfs2_lock_type type,
268 u64 blkno,
269 u32 generation,
270 struct ocfs2_lock_res_ops *ops, 330 struct ocfs2_lock_res_ops *ops,
271 void *priv) 331 void *priv)
272{ 332{
273 ocfs2_build_lock_name(type, blkno, generation, res->l_name);
274
275 res->l_type = type; 333 res->l_type = type;
276 res->l_ops = ops; 334 res->l_ops = ops;
277 res->l_priv = priv; 335 res->l_priv = priv;
@@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
299 357
300void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 358void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
301 enum ocfs2_lock_type type, 359 enum ocfs2_lock_type type,
360 unsigned int generation,
302 struct inode *inode) 361 struct inode *inode)
303{ 362{
304 struct ocfs2_lock_res_ops *ops; 363 struct ocfs2_lock_res_ops *ops;
@@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
319 break; 378 break;
320 }; 379 };
321 380
322 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, 381 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
323 OCFS2_I(inode)->ip_blkno, 382 generation, res->l_name);
324 inode->i_generation, ops, inode); 383 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
384}
385
386static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
387{
388 struct inode *inode = ocfs2_lock_res_inode(lockres);
389
390 return OCFS2_SB(inode->i_sb);
391}
392
393static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
394{
395 __be64 inode_blkno_be;
396
397 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
398 sizeof(__be64));
399
400 return be64_to_cpu(inode_blkno_be);
401}
402
403static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
404{
405 struct ocfs2_dentry_lock *dl = lockres->l_priv;
406
407 return OCFS2_SB(dl->dl_inode->i_sb);
408}
409
410void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
411 u64 parent, struct inode *inode)
412{
413 int len;
414 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
415 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
416 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
417
418 ocfs2_lock_res_init_once(lockres);
419
420 /*
421 * Unfortunately, the standard lock naming scheme won't work
422 * here because we have two 16 byte values to use. Instead,
423 * we'll stuff the inode number as a binary value. We still
424 * want error prints to show something without garbling the
425 * display, so drop a null byte in there before the inode
426 * number. A future version of OCFS2 will likely use all
427 * binary lock names. The stringified names have been a
428 * tremendous aid in debugging, but now that the debugfs
429 * interface exists, we can mangle things there if need be.
430 *
431 * NOTE: We also drop the standard "pad" value (the total lock
432 * name size stays the same though - the last part is all
433 * zeros due to the memset in ocfs2_lock_res_init_once()
434 */
435 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
436 "%c%016llx",
437 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
438 (long long)parent);
439
440 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
441
442 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
443 sizeof(__be64));
444
445 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
446 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
447 dl);
325} 448}
326 449
327static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 450static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
@@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
330 /* Superblock lockres doesn't come from a slab so we call init 453 /* Superblock lockres doesn't come from a slab so we call init
331 * once on it manually. */ 454 * once on it manually. */
332 ocfs2_lock_res_init_once(res); 455 ocfs2_lock_res_init_once(res);
456 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
457 0, res->l_name);
333 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 458 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
334 OCFS2_SUPER_BLOCK_BLKNO, 0,
335 &ocfs2_super_lops, osb); 459 &ocfs2_super_lops, osb);
336} 460}
337 461
@@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
341 /* Rename lockres doesn't come from a slab so we call init 465 /* Rename lockres doesn't come from a slab so we call init
342 * once on it manually. */ 466 * once on it manually. */
343 ocfs2_lock_res_init_once(res); 467 ocfs2_lock_res_init_once(res);
344 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, 468 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
469 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
345 &ocfs2_rename_lops, osb); 470 &ocfs2_rename_lops, osb);
346} 471}
347 472
@@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
495 * information is already up to data. Convert from NL to 620 * information is already up to data. Convert from NL to
496 * *anything* however should mark ourselves as needing an 621 * *anything* however should mark ourselves as needing an
497 * update */ 622 * update */
498 if (lockres->l_level == LKM_NLMODE) 623 if (lockres->l_level == LKM_NLMODE &&
624 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
499 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 625 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
500 626
501 lockres->l_level = lockres->l_requested; 627 lockres->l_level = lockres->l_requested;
@@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
512 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 638 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
513 639
514 if (lockres->l_requested > LKM_NLMODE && 640 if (lockres->l_requested > LKM_NLMODE &&
515 !(lockres->l_flags & OCFS2_LOCK_LOCAL)) 641 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
642 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
516 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 643 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
517 644
518 lockres->l_level = lockres->l_requested; 645 lockres->l_level = lockres->l_requested;
@@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
522 mlog_exit_void(); 649 mlog_exit_void();
523} 650}
524 651
525static void ocfs2_inode_ast_func(void *opaque)
526{
527 struct ocfs2_lock_res *lockres = opaque;
528 struct inode *inode;
529 struct dlm_lockstatus *lksb;
530 unsigned long flags;
531
532 mlog_entry_void();
533
534 inode = ocfs2_lock_res_inode(lockres);
535
536 mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
537 (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
538 ocfs2_lock_type_string(lockres->l_type));
539
540 BUG_ON(!ocfs2_is_inode_lock(lockres));
541
542 spin_lock_irqsave(&lockres->l_lock, flags);
543
544 lksb = &(lockres->l_lksb);
545 if (lksb->status != DLM_NORMAL) {
546 mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
547 "on inode %llu\n", lksb->status,
548 (unsigned long long)OCFS2_I(inode)->ip_blkno);
549 spin_unlock_irqrestore(&lockres->l_lock, flags);
550 mlog_exit_void();
551 return;
552 }
553
554 switch(lockres->l_action) {
555 case OCFS2_AST_ATTACH:
556 ocfs2_generic_handle_attach_action(lockres);
557 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
558 break;
559 case OCFS2_AST_CONVERT:
560 ocfs2_generic_handle_convert_action(lockres);
561 break;
562 case OCFS2_AST_DOWNCONVERT:
563 ocfs2_generic_handle_downconvert_action(lockres);
564 break;
565 default:
566 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
567 "lockres flags = 0x%lx, unlock action: %u\n",
568 lockres->l_name, lockres->l_action, lockres->l_flags,
569 lockres->l_unlock_action);
570
571 BUG();
572 }
573
574 /* data and rw locking ignores refresh flag for now. */
575 if (lockres->l_type != OCFS2_LOCK_TYPE_META)
576 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
577
578 /* set it to something invalid so if we get called again we
579 * can catch it. */
580 lockres->l_action = OCFS2_AST_INVALID;
581 spin_unlock_irqrestore(&lockres->l_lock, flags);
582 wake_up(&lockres->l_event);
583
584 mlog_exit_void();
585}
586
587static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 652static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
588 int level) 653 int level)
589{ 654{
@@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
610 return needs_downconvert; 675 return needs_downconvert;
611} 676}
612 677
613static void ocfs2_generic_bast_func(struct ocfs2_super *osb, 678static void ocfs2_blocking_ast(void *opaque, int level)
614 struct ocfs2_lock_res *lockres,
615 int level)
616{ 679{
680 struct ocfs2_lock_res *lockres = opaque;
681 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
617 int needs_downconvert; 682 int needs_downconvert;
618 unsigned long flags; 683 unsigned long flags;
619 684
620 mlog_entry_void();
621
622 BUG_ON(level <= LKM_NLMODE); 685 BUG_ON(level <= LKM_NLMODE);
623 686
687 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
688 lockres->l_name, level, lockres->l_level,
689 ocfs2_lock_type_string(lockres->l_type));
690
624 spin_lock_irqsave(&lockres->l_lock, flags); 691 spin_lock_irqsave(&lockres->l_lock, flags);
625 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 692 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
626 if (needs_downconvert) 693 if (needs_downconvert)
627 ocfs2_schedule_blocked_lock(osb, lockres); 694 ocfs2_schedule_blocked_lock(osb, lockres);
628 spin_unlock_irqrestore(&lockres->l_lock, flags); 695 spin_unlock_irqrestore(&lockres->l_lock, flags);
629 696
630 ocfs2_kick_vote_thread(osb);
631
632 wake_up(&lockres->l_event); 697 wake_up(&lockres->l_event);
633 mlog_exit_void();
634}
635
636static void ocfs2_inode_bast_func(void *opaque, int level)
637{
638 struct ocfs2_lock_res *lockres = opaque;
639 struct inode *inode;
640 struct ocfs2_super *osb;
641 698
642 mlog_entry_void(); 699 ocfs2_kick_vote_thread(osb);
643
644 BUG_ON(!ocfs2_is_inode_lock(lockres));
645
646 inode = ocfs2_lock_res_inode(lockres);
647 osb = OCFS2_SB(inode->i_sb);
648
649 mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
650 (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
651 lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
652
653 ocfs2_generic_bast_func(osb, lockres, level);
654
655 mlog_exit_void();
656} 700}
657 701
658static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, 702static void ocfs2_locking_ast(void *opaque)
659 int ignore_refresh)
660{ 703{
704 struct ocfs2_lock_res *lockres = opaque;
661 struct dlm_lockstatus *lksb = &lockres->l_lksb; 705 struct dlm_lockstatus *lksb = &lockres->l_lksb;
662 unsigned long flags; 706 unsigned long flags;
663 707
@@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
673 switch(lockres->l_action) { 717 switch(lockres->l_action) {
674 case OCFS2_AST_ATTACH: 718 case OCFS2_AST_ATTACH:
675 ocfs2_generic_handle_attach_action(lockres); 719 ocfs2_generic_handle_attach_action(lockres);
720 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
676 break; 721 break;
677 case OCFS2_AST_CONVERT: 722 case OCFS2_AST_CONVERT:
678 ocfs2_generic_handle_convert_action(lockres); 723 ocfs2_generic_handle_convert_action(lockres);
@@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
681 ocfs2_generic_handle_downconvert_action(lockres); 726 ocfs2_generic_handle_downconvert_action(lockres);
682 break; 727 break;
683 default: 728 default:
729 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
730 "lockres flags = 0x%lx, unlock action: %u\n",
731 lockres->l_name, lockres->l_action, lockres->l_flags,
732 lockres->l_unlock_action);
684 BUG(); 733 BUG();
685 } 734 }
686 735
687 if (ignore_refresh)
688 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
689
690 /* set it to something invalid so if we get called again we 736 /* set it to something invalid so if we get called again we
691 * can catch it. */ 737 * can catch it. */
692 lockres->l_action = OCFS2_AST_INVALID; 738 lockres->l_action = OCFS2_AST_INVALID;
693 spin_unlock_irqrestore(&lockres->l_lock, flags);
694 739
695 wake_up(&lockres->l_event); 740 wake_up(&lockres->l_event);
696} 741 spin_unlock_irqrestore(&lockres->l_lock, flags);
697
698static void ocfs2_super_ast_func(void *opaque)
699{
700 struct ocfs2_lock_res *lockres = opaque;
701
702 mlog_entry_void();
703 mlog(0, "Superblock AST fired\n");
704
705 BUG_ON(!ocfs2_is_super_lock(lockres));
706 ocfs2_generic_ast_func(lockres, 0);
707
708 mlog_exit_void();
709}
710
711static void ocfs2_super_bast_func(void *opaque,
712 int level)
713{
714 struct ocfs2_lock_res *lockres = opaque;
715 struct ocfs2_super *osb;
716
717 mlog_entry_void();
718 mlog(0, "Superblock BAST fired\n");
719
720 BUG_ON(!ocfs2_is_super_lock(lockres));
721 osb = ocfs2_lock_res_super(lockres);
722 ocfs2_generic_bast_func(osb, lockres, level);
723
724 mlog_exit_void();
725}
726
727static void ocfs2_rename_ast_func(void *opaque)
728{
729 struct ocfs2_lock_res *lockres = opaque;
730
731 mlog_entry_void();
732
733 mlog(0, "Rename AST fired\n");
734
735 BUG_ON(!ocfs2_is_rename_lock(lockres));
736
737 ocfs2_generic_ast_func(lockres, 1);
738
739 mlog_exit_void();
740}
741
742static void ocfs2_rename_bast_func(void *opaque,
743 int level)
744{
745 struct ocfs2_lock_res *lockres = opaque;
746 struct ocfs2_super *osb;
747
748 mlog_entry_void();
749
750 mlog(0, "Rename BAST fired\n");
751
752 BUG_ON(!ocfs2_is_rename_lock(lockres));
753
754 osb = ocfs2_lock_res_super(lockres);
755 ocfs2_generic_bast_func(osb, lockres, level);
756
757 mlog_exit_void();
758} 742}
759 743
760static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 744static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
@@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
810 &lockres->l_lksb, 794 &lockres->l_lksb,
811 dlm_flags, 795 dlm_flags,
812 lockres->l_name, 796 lockres->l_name,
813 lockres->l_ops->ast, 797 OCFS2_LOCK_ID_MAX_LEN - 1,
798 ocfs2_locking_ast,
814 lockres, 799 lockres,
815 lockres->l_ops->bast); 800 ocfs2_blocking_ast);
816 if (status != DLM_NORMAL) { 801 if (status != DLM_NORMAL) {
817 ocfs2_log_dlm_error("dlmlock", status, lockres); 802 ocfs2_log_dlm_error("dlmlock", status, lockres);
818 ret = -EINVAL; 803 ret = -EINVAL;
@@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb,
930 915
931 ocfs2_init_mask_waiter(&mw); 916 ocfs2_init_mask_waiter(&mw);
932 917
918 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
919 lkm_flags |= LKM_VALBLK;
920
933again: 921again:
934 wait = 0; 922 wait = 0;
935 923
@@ -997,11 +985,12 @@ again:
997 status = dlmlock(osb->dlm, 985 status = dlmlock(osb->dlm,
998 level, 986 level,
999 &lockres->l_lksb, 987 &lockres->l_lksb,
1000 lkm_flags|LKM_CONVERT|LKM_VALBLK, 988 lkm_flags|LKM_CONVERT,
1001 lockres->l_name, 989 lockres->l_name,
1002 lockres->l_ops->ast, 990 OCFS2_LOCK_ID_MAX_LEN - 1,
991 ocfs2_locking_ast,
1003 lockres, 992 lockres,
1004 lockres->l_ops->bast); 993 ocfs2_blocking_ast);
1005 if (status != DLM_NORMAL) { 994 if (status != DLM_NORMAL) {
1006 if ((lkm_flags & LKM_NOQUEUE) && 995 if ((lkm_flags & LKM_NOQUEUE) &&
1007 (status == DLM_NOTQUEUED)) 996 (status == DLM_NOTQUEUED))
@@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1074 mlog_exit_void(); 1063 mlog_exit_void();
1075} 1064}
1076 1065
1077static int ocfs2_create_new_inode_lock(struct inode *inode, 1066int ocfs2_create_new_lock(struct ocfs2_super *osb,
1078 struct ocfs2_lock_res *lockres) 1067 struct ocfs2_lock_res *lockres,
1068 int ex,
1069 int local)
1079{ 1070{
1080 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1071 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1081 unsigned long flags; 1072 unsigned long flags;
1073 int lkm_flags = local ? LKM_LOCAL : 0;
1082 1074
1083 spin_lock_irqsave(&lockres->l_lock, flags); 1075 spin_lock_irqsave(&lockres->l_lock, flags);
1084 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1076 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1085 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1077 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1086 spin_unlock_irqrestore(&lockres->l_lock, flags); 1078 spin_unlock_irqrestore(&lockres->l_lock, flags);
1087 1079
1088 return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); 1080 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1089} 1081}
1090 1082
1091/* Grants us an EX lock on the data and metadata resources, skipping 1083/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
1097int ocfs2_create_new_inode_locks(struct inode *inode) 1089int ocfs2_create_new_inode_locks(struct inode *inode)
1098{ 1090{
1099 int ret; 1091 int ret;
1092 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1100 1093
1101 BUG_ON(!inode); 1094 BUG_ON(!inode);
1102 BUG_ON(!ocfs2_inode_is_new(inode)); 1095 BUG_ON(!ocfs2_inode_is_new(inode));
@@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1113 * on a resource which has an invalid one -- we'll set it 1106 * on a resource which has an invalid one -- we'll set it
1114 * valid when we release the EX. */ 1107 * valid when we release the EX. */
1115 1108
1116 ret = ocfs2_create_new_inode_lock(inode, 1109 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1117 &OCFS2_I(inode)->ip_rw_lockres);
1118 if (ret) { 1110 if (ret) {
1119 mlog_errno(ret); 1111 mlog_errno(ret);
1120 goto bail; 1112 goto bail;
1121 } 1113 }
1122 1114
1123 ret = ocfs2_create_new_inode_lock(inode, 1115 /*
1124 &OCFS2_I(inode)->ip_meta_lockres); 1116 * We don't want to use LKM_LOCAL on a meta data lock as they
1117 * don't use a generation in their lock names.
1118 */
1119 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
1125 if (ret) { 1120 if (ret) {
1126 mlog_errno(ret); 1121 mlog_errno(ret);
1127 goto bail; 1122 goto bail;
1128 } 1123 }
1129 1124
1130 ret = ocfs2_create_new_inode_lock(inode, 1125 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
1131 &OCFS2_I(inode)->ip_data_lockres);
1132 if (ret) { 1126 if (ret) {
1133 mlog_errno(ret); 1127 mlog_errno(ret);
1134 goto bail; 1128 goto bail;
@@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1317 1311
1318 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1312 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1319 1313
1320 lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); 1314 /*
1315 * Invalidate the LVB of a deleted inode - this way other
1316 * nodes are forced to go to disk and discover the new inode
1317 * status.
1318 */
1319 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1320 lvb->lvb_version = 0;
1321 goto out;
1322 }
1323
1324 lvb->lvb_version = OCFS2_LVB_VERSION;
1321 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1325 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1322 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1326 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1323 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1327 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
@@ -1331,7 +1335,9 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1331 lvb->lvb_imtime_packed = 1335 lvb->lvb_imtime_packed =
1332 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1336 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1333 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1337 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1338 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1334 1339
1340out:
1335 mlog_meta_lvb(0, lockres); 1341 mlog_meta_lvb(0, lockres);
1336 1342
1337 mlog_exit_void(); 1343 mlog_exit_void();
@@ -1386,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1386 mlog_exit_void(); 1392 mlog_exit_void();
1387} 1393}
1388 1394
1389static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) 1395static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1396 struct ocfs2_lock_res *lockres)
1390{ 1397{
1391 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1398 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1392 1399
1393 if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) 1400 if (lvb->lvb_version == OCFS2_LVB_VERSION
1401 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1394 return 1; 1402 return 1;
1395 return 0; 1403 return 0;
1396} 1404}
@@ -1487,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1487 * map (directories, bitmap files, etc) */ 1495 * map (directories, bitmap files, etc) */
1488 ocfs2_extent_map_trunc(inode, 0); 1496 ocfs2_extent_map_trunc(inode, 0);
1489 1497
1490 if (ocfs2_meta_lvb_is_trustable(lockres)) { 1498 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1491 mlog(0, "Trusting LVB on inode %llu\n", 1499 mlog(0, "Trusting LVB on inode %llu\n",
1492 (unsigned long long)oi->ip_blkno); 1500 (unsigned long long)oi->ip_blkno);
1493 ocfs2_refresh_inode_from_lvb(inode); 1501 ocfs2_refresh_inode_from_lvb(inode);
@@ -1628,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
1628 wait_event(osb->recovery_event, 1636 wait_event(osb->recovery_event,
1629 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1637 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1630 1638
1639 /*
1640 * We only see this flag if we're being called from
1641 * ocfs2_read_locked_inode(). It means we're locking an inode
1642 * which hasn't been populated yet, so clear the refresh flag
1643 * and let the caller handle it.
1644 */
1645 if (inode->i_state & I_NEW) {
1646 status = 0;
1647 ocfs2_complete_lock_res_refresh(lockres, 0);
1648 goto bail;
1649 }
1650
1631 /* This is fun. The caller may want a bh back, or it may 1651 /* This is fun. The caller may want a bh back, or it may
1632 * not. ocfs2_meta_lock_update definitely wants one in, but 1652 * not. ocfs2_meta_lock_update definitely wants one in, but
1633 * may or may not read one, depending on what's in the 1653 * may or may not read one, depending on what's in the
@@ -1807,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
1807 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1827 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
1808} 1828}
1809 1829
1830int ocfs2_dentry_lock(struct dentry *dentry, int ex)
1831{
1832 int ret;
1833 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1834 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1835 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1836
1837 BUG_ON(!dl);
1838
1839 if (ocfs2_is_hard_readonly(osb))
1840 return -EROFS;
1841
1842 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
1843 if (ret < 0)
1844 mlog_errno(ret);
1845
1846 return ret;
1847}
1848
1849void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
1850{
1851 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1852 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1853 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1854
1855 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
1856}
1857
1810/* Reference counting of the dlm debug structure. We want this because 1858/* Reference counting of the dlm debug structure. We want this because
1811 * open references on the debug inodes can live on after a mount, so 1859 * open references on the debug inodes can live on after a mount, so
1812 * we can't rely on the ocfs2_super to always exist. */ 1860 * we can't rely on the ocfs2_super to always exist. */
@@ -1937,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1937 if (!lockres) 1985 if (!lockres)
1938 return -EINVAL; 1986 return -EINVAL;
1939 1987
1940 seq_printf(m, "0x%x\t" 1988 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
1941 "%.*s\t" 1989
1942 "%d\t" 1990 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
1991 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
1992 lockres->l_name,
1993 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
1994 else
1995 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
1996
1997 seq_printf(m, "%d\t"
1943 "0x%lx\t" 1998 "0x%lx\t"
1944 "0x%x\t" 1999 "0x%x\t"
1945 "0x%x\t" 2000 "0x%x\t"
@@ -1947,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1947 "%u\t" 2002 "%u\t"
1948 "%d\t" 2003 "%d\t"
1949 "%d\t", 2004 "%d\t",
1950 OCFS2_DLM_DEBUG_STR_VERSION,
1951 OCFS2_LOCK_ID_MAX_LEN, lockres->l_name,
1952 lockres->l_level, 2005 lockres->l_level,
1953 lockres->l_flags, 2006 lockres->l_flags,
1954 lockres->l_action, 2007 lockres->l_action,
@@ -2138,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
2138 mlog_exit_void(); 2191 mlog_exit_void();
2139} 2192}
2140 2193
2141static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) 2194static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
2142{ 2195{
2143 struct ocfs2_lock_res *lockres = opaque; 2196 struct ocfs2_lock_res *lockres = opaque;
2144 unsigned long flags; 2197 unsigned long flags;
@@ -2194,24 +2247,20 @@ complete_unlock:
2194 mlog_exit_void(); 2247 mlog_exit_void();
2195} 2248}
2196 2249
2197typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
2198
2199struct drop_lock_cb {
2200 ocfs2_pre_drop_cb_t *drop_func;
2201 void *drop_data;
2202};
2203
2204static int ocfs2_drop_lock(struct ocfs2_super *osb, 2250static int ocfs2_drop_lock(struct ocfs2_super *osb,
2205 struct ocfs2_lock_res *lockres, 2251 struct ocfs2_lock_res *lockres)
2206 struct drop_lock_cb *dcb)
2207{ 2252{
2208 enum dlm_status status; 2253 enum dlm_status status;
2209 unsigned long flags; 2254 unsigned long flags;
2255 int lkm_flags = 0;
2210 2256
2211 /* We didn't get anywhere near actually using this lockres. */ 2257 /* We didn't get anywhere near actually using this lockres. */
2212 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2258 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2213 goto out; 2259 goto out;
2214 2260
2261 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
2262 lkm_flags |= LKM_VALBLK;
2263
2215 spin_lock_irqsave(&lockres->l_lock, flags); 2264 spin_lock_irqsave(&lockres->l_lock, flags);
2216 2265
2217 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2266 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
@@ -2234,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2234 spin_lock_irqsave(&lockres->l_lock, flags); 2283 spin_lock_irqsave(&lockres->l_lock, flags);
2235 } 2284 }
2236 2285
2237 if (dcb) 2286 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2238 dcb->drop_func(lockres, dcb->drop_data); 2287 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2288 lockres->l_level == LKM_EXMODE &&
2289 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2290 lockres->l_ops->set_lvb(lockres);
2291 }
2239 2292
2240 if (lockres->l_flags & OCFS2_LOCK_BUSY) 2293 if (lockres->l_flags & OCFS2_LOCK_BUSY)
2241 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2294 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
@@ -2261,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2261 2314
2262 mlog(0, "lock %s\n", lockres->l_name); 2315 mlog(0, "lock %s\n", lockres->l_name);
2263 2316
2264 status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, 2317 status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags,
2265 lockres->l_ops->unlock_ast, lockres); 2318 ocfs2_unlock_ast, lockres);
2266 if (status != DLM_NORMAL) { 2319 if (status != DLM_NORMAL) {
2267 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2320 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2268 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2321 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
@@ -2309,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
2309 spin_unlock_irqrestore(&lockres->l_lock, flags); 2362 spin_unlock_irqrestore(&lockres->l_lock, flags);
2310} 2363}
2311 2364
2312static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2365void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
2366 struct ocfs2_lock_res *lockres)
2313{ 2367{
2314 int status; 2368 int ret;
2315
2316 mlog_entry_void();
2317
2318 ocfs2_mark_lockres_freeing(&osb->osb_super_lockres);
2319
2320 status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
2321 if (status < 0)
2322 mlog_errno(status);
2323
2324 ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres);
2325
2326 status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
2327 if (status < 0)
2328 mlog_errno(status);
2329 2369
2330 mlog_exit(status); 2370 ocfs2_mark_lockres_freeing(lockres);
2371 ret = ocfs2_drop_lock(osb, lockres);
2372 if (ret)
2373 mlog_errno(ret);
2331} 2374}
2332 2375
2333static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) 2376static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
2334{ 2377{
2335 struct inode *inode = data; 2378 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
2336 2379 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
2337 /* the metadata lock requires a bit more work as we have an
2338 * LVB to worry about. */
2339 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2340 lockres->l_level == LKM_EXMODE &&
2341 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2342 __ocfs2_stuff_meta_lvb(inode);
2343} 2380}
2344 2381
2345int ocfs2_drop_inode_locks(struct inode *inode) 2382int ocfs2_drop_inode_locks(struct inode *inode)
2346{ 2383{
2347 int status, err; 2384 int status, err;
2348 struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
2349 2385
2350 mlog_entry_void(); 2386 mlog_entry_void();
2351 2387
@@ -2353,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode)
2353 * ocfs2_clear_inode has done it for us. */ 2389 * ocfs2_clear_inode has done it for us. */
2354 2390
2355 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2391 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2356 &OCFS2_I(inode)->ip_data_lockres, 2392 &OCFS2_I(inode)->ip_data_lockres);
2357 NULL);
2358 if (err < 0) 2393 if (err < 0)
2359 mlog_errno(err); 2394 mlog_errno(err);
2360 2395
2361 status = err; 2396 status = err;
2362 2397
2363 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2398 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2364 &OCFS2_I(inode)->ip_meta_lockres, 2399 &OCFS2_I(inode)->ip_meta_lockres);
2365 &meta_dcb);
2366 if (err < 0) 2400 if (err < 0)
2367 mlog_errno(err); 2401 mlog_errno(err);
2368 if (err < 0 && !status) 2402 if (err < 0 && !status)
2369 status = err; 2403 status = err;
2370 2404
2371 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2405 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2372 &OCFS2_I(inode)->ip_rw_lockres, 2406 &OCFS2_I(inode)->ip_rw_lockres);
2373 NULL);
2374 if (err < 0) 2407 if (err < 0)
2375 mlog_errno(err); 2408 mlog_errno(err);
2376 if (err < 0 && !status) 2409 if (err < 0 && !status)
@@ -2419,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
2419 &lockres->l_lksb, 2452 &lockres->l_lksb,
2420 dlm_flags, 2453 dlm_flags,
2421 lockres->l_name, 2454 lockres->l_name,
2422 lockres->l_ops->ast, 2455 OCFS2_LOCK_ID_MAX_LEN - 1,
2456 ocfs2_locking_ast,
2423 lockres, 2457 lockres,
2424 lockres->l_ops->bast); 2458 ocfs2_blocking_ast);
2425 if (status != DLM_NORMAL) { 2459 if (status != DLM_NORMAL) {
2426 ocfs2_log_dlm_error("dlmlock", status, lockres); 2460 ocfs2_log_dlm_error("dlmlock", status, lockres);
2427 ret = -EINVAL; 2461 ret = -EINVAL;
@@ -2480,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2480 status = dlmunlock(osb->dlm, 2514 status = dlmunlock(osb->dlm,
2481 &lockres->l_lksb, 2515 &lockres->l_lksb,
2482 LKM_CANCEL, 2516 LKM_CANCEL,
2483 lockres->l_ops->unlock_ast, 2517 ocfs2_unlock_ast,
2484 lockres); 2518 lockres);
2485 if (status != DLM_NORMAL) { 2519 if (status != DLM_NORMAL) {
2486 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2520 ocfs2_log_dlm_error("dlmunlock", status, lockres);
@@ -2494,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2494 return ret; 2528 return ret;
2495} 2529}
2496 2530
2497static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, 2531static int ocfs2_unblock_lock(struct ocfs2_super *osb,
2498 struct ocfs2_lock_res *lockres, 2532 struct ocfs2_lock_res *lockres,
2499 int new_level) 2533 struct ocfs2_unblock_ctl *ctl)
2500{
2501 int ret;
2502
2503 mlog_entry_void();
2504
2505 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2506
2507 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2508 ret = 0;
2509 mlog(0, "lockres %s currently being refreshed -- backing "
2510 "off!\n", lockres->l_name);
2511 } else if (new_level == LKM_PRMODE)
2512 ret = !lockres->l_ex_holders &&
2513 ocfs2_inode_fully_checkpointed(inode);
2514 else /* Must be NLMODE we're converting to. */
2515 ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
2516 ocfs2_inode_fully_checkpointed(inode);
2517
2518 mlog_exit(ret);
2519 return ret;
2520}
2521
2522static int ocfs2_do_unblock_meta(struct inode *inode,
2523 int *requeue)
2524{
2525 int new_level;
2526 int set_lvb = 0;
2527 int ret = 0;
2528 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
2529 unsigned long flags;
2530
2531 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2532
2533 mlog_entry_void();
2534
2535 spin_lock_irqsave(&lockres->l_lock, flags);
2536
2537 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
2538
2539 mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
2540 lockres->l_blocking);
2541
2542 BUG_ON(lockres->l_level != LKM_EXMODE &&
2543 lockres->l_level != LKM_PRMODE);
2544
2545 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2546 *requeue = 1;
2547 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2548 spin_unlock_irqrestore(&lockres->l_lock, flags);
2549 if (ret) {
2550 ret = ocfs2_cancel_convert(osb, lockres);
2551 if (ret < 0)
2552 mlog_errno(ret);
2553 }
2554 goto leave;
2555 }
2556
2557 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2558
2559 mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
2560 lockres->l_level, lockres->l_blocking, new_level);
2561
2562 if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
2563 if (lockres->l_level == LKM_EXMODE)
2564 set_lvb = 1;
2565
2566 /* If the lock hasn't been refreshed yet (rare), then
2567 * our memory inode values are old and we skip
2568 * stuffing the lvb. There's no need to actually clear
2569 * out the lvb here as it's value is still valid. */
2570 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2571 if (set_lvb)
2572 __ocfs2_stuff_meta_lvb(inode);
2573 } else
2574 mlog(0, "lockres %s: downconverting stale lock!\n",
2575 lockres->l_name);
2576
2577 mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
2578 "l_blocking=%d, new_level=%d\n",
2579 lockres->l_level, lockres->l_blocking, new_level);
2580
2581 ocfs2_prepare_downconvert(lockres, new_level);
2582 spin_unlock_irqrestore(&lockres->l_lock, flags);
2583 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2584 goto leave;
2585 }
2586 if (!ocfs2_inode_fully_checkpointed(inode))
2587 ocfs2_start_checkpoint(osb);
2588
2589 *requeue = 1;
2590 spin_unlock_irqrestore(&lockres->l_lock, flags);
2591 ret = 0;
2592leave:
2593 mlog_exit(ret);
2594 return ret;
2595}
2596
2597static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2598 struct ocfs2_lock_res *lockres,
2599 int *requeue,
2600 ocfs2_convert_worker_t *worker)
2601{ 2534{
2602 unsigned long flags; 2535 unsigned long flags;
2603 int blocking; 2536 int blocking;
2604 int new_level; 2537 int new_level;
2605 int ret = 0; 2538 int ret = 0;
2539 int set_lvb = 0;
2606 2540
2607 mlog_entry_void(); 2541 mlog_entry_void();
2608 2542
@@ -2612,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2612 2546
2613recheck: 2547recheck:
2614 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 2548 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2615 *requeue = 1; 2549 ctl->requeue = 1;
2616 ret = ocfs2_prepare_cancel_convert(osb, lockres); 2550 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2617 spin_unlock_irqrestore(&lockres->l_lock, flags); 2551 spin_unlock_irqrestore(&lockres->l_lock, flags);
2618 if (ret) { 2552 if (ret) {
@@ -2626,27 +2560,33 @@ recheck:
2626 /* if we're blocking an exclusive and we have *any* holders, 2560 /* if we're blocking an exclusive and we have *any* holders,
2627 * then requeue. */ 2561 * then requeue. */
2628 if ((lockres->l_blocking == LKM_EXMODE) 2562 if ((lockres->l_blocking == LKM_EXMODE)
2629 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 2563 && (lockres->l_ex_holders || lockres->l_ro_holders))
2630 spin_unlock_irqrestore(&lockres->l_lock, flags); 2564 goto leave_requeue;
2631 *requeue = 1;
2632 ret = 0;
2633 goto leave;
2634 }
2635 2565
2636 /* If it's a PR we're blocking, then only 2566 /* If it's a PR we're blocking, then only
2637 * requeue if we've got any EX holders */ 2567 * requeue if we've got any EX holders */
2638 if (lockres->l_blocking == LKM_PRMODE && 2568 if (lockres->l_blocking == LKM_PRMODE &&
2639 lockres->l_ex_holders) { 2569 lockres->l_ex_holders)
2640 spin_unlock_irqrestore(&lockres->l_lock, flags); 2570 goto leave_requeue;
2641 *requeue = 1; 2571
2642 ret = 0; 2572 /*
2643 goto leave; 2573 * Can we get a lock in this state if the holder counts are
2644 } 2574 * zero? The meta data unblock code used to check this.
2575 */
2576 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
2577 && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
2578 goto leave_requeue;
2579
2580 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2581
2582 if (lockres->l_ops->check_downconvert
2583 && !lockres->l_ops->check_downconvert(lockres, new_level))
2584 goto leave_requeue;
2645 2585
2646 /* If we get here, then we know that there are no more 2586 /* If we get here, then we know that there are no more
2647 * incompatible holders (and anyone asking for an incompatible 2587 * incompatible holders (and anyone asking for an incompatible
2648 * lock is blocked). We can now downconvert the lock */ 2588 * lock is blocked). We can now downconvert the lock */
2649 if (!worker) 2589 if (!lockres->l_ops->downconvert_worker)
2650 goto downconvert; 2590 goto downconvert;
2651 2591
2652 /* Some lockres types want to do a bit of work before 2592 /* Some lockres types want to do a bit of work before
@@ -2656,7 +2596,10 @@ recheck:
2656 blocking = lockres->l_blocking; 2596 blocking = lockres->l_blocking;
2657 spin_unlock_irqrestore(&lockres->l_lock, flags); 2597 spin_unlock_irqrestore(&lockres->l_lock, flags);
2658 2598
2659 worker(lockres, blocking); 2599 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
2600
2601 if (ctl->unblock_action == UNBLOCK_STOP_POST)
2602 goto leave;
2660 2603
2661 spin_lock_irqsave(&lockres->l_lock, flags); 2604 spin_lock_irqsave(&lockres->l_lock, flags);
2662 if (blocking != lockres->l_blocking) { 2605 if (blocking != lockres->l_blocking) {
@@ -2666,25 +2609,43 @@ recheck:
2666 } 2609 }
2667 2610
2668downconvert: 2611downconvert:
2669 *requeue = 0; 2612 ctl->requeue = 0;
2670 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 2613
2614 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2615 if (lockres->l_level == LKM_EXMODE)
2616 set_lvb = 1;
2617
2618 /*
2619 * We only set the lvb if the lock has been fully
2620 * refreshed - otherwise we risk setting stale
2621 * data. Otherwise, there's no need to actually clear
2622 * out the lvb here as it's value is still valid.
2623 */
2624 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2625 lockres->l_ops->set_lvb(lockres);
2626 }
2671 2627
2672 ocfs2_prepare_downconvert(lockres, new_level); 2628 ocfs2_prepare_downconvert(lockres, new_level);
2673 spin_unlock_irqrestore(&lockres->l_lock, flags); 2629 spin_unlock_irqrestore(&lockres->l_lock, flags);
2674 ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); 2630 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2675leave: 2631leave:
2676 mlog_exit(ret); 2632 mlog_exit(ret);
2677 return ret; 2633 return ret;
2634
2635leave_requeue:
2636 spin_unlock_irqrestore(&lockres->l_lock, flags);
2637 ctl->requeue = 1;
2638
2639 mlog_exit(0);
2640 return 0;
2678} 2641}
2679 2642
2680static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 2643static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2681 int blocking) 2644 int blocking)
2682{ 2645{
2683 struct inode *inode; 2646 struct inode *inode;
2684 struct address_space *mapping; 2647 struct address_space *mapping;
2685 2648
2686 mlog_entry_void();
2687
2688 inode = ocfs2_lock_res_inode(lockres); 2649 inode = ocfs2_lock_res_inode(lockres);
2689 mapping = inode->i_mapping; 2650 mapping = inode->i_mapping;
2690 2651
@@ -2705,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2705 filemap_fdatawait(mapping); 2666 filemap_fdatawait(mapping);
2706 } 2667 }
2707 2668
2708 mlog_exit_void(); 2669 return UNBLOCK_CONTINUE;
2709} 2670}
2710 2671
2711int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, 2672static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
2712 int *requeue) 2673 int new_level)
2713{ 2674{
2714 int status; 2675 struct inode *inode = ocfs2_lock_res_inode(lockres);
2715 struct inode *inode; 2676 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
2716 struct ocfs2_super *osb;
2717
2718 mlog_entry_void();
2719
2720 inode = ocfs2_lock_res_inode(lockres);
2721 osb = OCFS2_SB(inode->i_sb);
2722
2723 mlog(0, "unblock inode %llu\n",
2724 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2725 2677
2726 status = ocfs2_generic_unblock_lock(osb, 2678 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2727 lockres, 2679 BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed);
2728 requeue,
2729 ocfs2_data_convert_worker);
2730 if (status < 0)
2731 mlog_errno(status);
2732 2680
2733 mlog(0, "inode %llu, requeue = %d\n", 2681 if (checkpointed)
2734 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2682 return 1;
2735 2683
2736 mlog_exit(status); 2684 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
2737 return status; 2685 return 0;
2738} 2686}
2739 2687
2740static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, 2688static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
2741 int *requeue)
2742{ 2689{
2743 int status; 2690 struct inode *inode = ocfs2_lock_res_inode(lockres);
2744 struct inode *inode;
2745
2746 mlog_entry_void();
2747
2748 mlog(0, "Unblock lockres %s\n", lockres->l_name);
2749
2750 inode = ocfs2_lock_res_inode(lockres);
2751 2691
2752 status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), 2692 __ocfs2_stuff_meta_lvb(inode);
2753 lockres,
2754 requeue,
2755 NULL);
2756 if (status < 0)
2757 mlog_errno(status);
2758
2759 mlog_exit(status);
2760 return status;
2761} 2693}
2762 2694
2763 2695/*
2764int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, 2696 * Does the final reference drop on our dentry lock. Right now this
2765 int *requeue) 2697 * happens in the vote thread, but we could choose to simplify the
2698 * dlmglue API and push these off to the ocfs2_wq in the future.
2699 */
2700static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
2701 struct ocfs2_lock_res *lockres)
2766{ 2702{
2767 int status; 2703 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2768 struct inode *inode; 2704 ocfs2_dentry_lock_put(osb, dl);
2769 2705}
2770 mlog_entry_void();
2771 2706
2772 inode = ocfs2_lock_res_inode(lockres); 2707/*
2708 * d_delete() matching dentries before the lock downconvert.
2709 *
2710 * At this point, any process waiting to destroy the
2711 * dentry_lock due to last ref count is stopped by the
2712 * OCFS2_LOCK_QUEUED flag.
2713 *
2714 * We have two potential problems
2715 *
2716 * 1) If we do the last reference drop on our dentry_lock (via dput)
2717 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
2718 * the downconvert to finish. Instead we take an elevated
2719 * reference and push the drop until after we've completed our
2720 * unblock processing.
2721 *
2722 * 2) There might be another process with a final reference,
2723 * waiting on us to finish processing. If this is the case, we
2724 * detect it and exit out - there's no more dentries anyway.
2725 */
2726static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
2727 int blocking)
2728{
2729 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2730 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
2731 struct dentry *dentry;
2732 unsigned long flags;
2733 int extra_ref = 0;
2773 2734
2774 mlog(0, "unblock inode %llu\n", 2735 /*
2775 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2736 * This node is blocking another node from getting a read
2737 * lock. This happens when we've renamed within a
2738 * directory. We've forced the other nodes to d_delete(), but
2739 * we never actually dropped our lock because it's still
2740 * valid. The downconvert code will retain a PR for this node,
2741 * so there's no further work to do.
2742 */
2743 if (blocking == LKM_PRMODE)
2744 return UNBLOCK_CONTINUE;
2776 2745
2777 status = ocfs2_do_unblock_meta(inode, requeue); 2746 /*
2778 if (status < 0) 2747 * Mark this inode as potentially orphaned. The code in
2779 mlog_errno(status); 2748 * ocfs2_delete_inode() will figure out whether it actually
2749 * needs to be freed or not.
2750 */
2751 spin_lock(&oi->ip_lock);
2752 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2753 spin_unlock(&oi->ip_lock);
2780 2754
2781 mlog(0, "inode %llu, requeue = %d\n", 2755 /*
2782 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2756 * Yuck. We need to make sure however that the check of
2757 * OCFS2_LOCK_FREEING and the extra reference are atomic with
2758 * respect to a reference decrement or the setting of that
2759 * flag.
2760 */
2761 spin_lock_irqsave(&lockres->l_lock, flags);
2762 spin_lock(&dentry_attach_lock);
2763 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
2764 && dl->dl_count) {
2765 dl->dl_count++;
2766 extra_ref = 1;
2767 }
2768 spin_unlock(&dentry_attach_lock);
2769 spin_unlock_irqrestore(&lockres->l_lock, flags);
2783 2770
2784 mlog_exit(status); 2771 mlog(0, "extra_ref = %d\n", extra_ref);
2785 return status;
2786}
2787 2772
2788/* Generic unblock function for any lockres whose private data is an 2773 /*
2789 * ocfs2_super pointer. */ 2774 * We have a process waiting on us in ocfs2_dentry_iput(),
2790static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, 2775 * which means we can't have any more outstanding
2791 int *requeue) 2776 * aliases. There's no need to do any more work.
2792{ 2777 */
2793 int status; 2778 if (!extra_ref)
2794 struct ocfs2_super *osb; 2779 return UNBLOCK_CONTINUE;
2780
2781 spin_lock(&dentry_attach_lock);
2782 while (1) {
2783 dentry = ocfs2_find_local_alias(dl->dl_inode,
2784 dl->dl_parent_blkno, 1);
2785 if (!dentry)
2786 break;
2787 spin_unlock(&dentry_attach_lock);
2795 2788
2796 mlog_entry_void(); 2789 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
2790 dentry->d_name.name);
2797 2791
2798 mlog(0, "Unblock lockres %s\n", lockres->l_name); 2792 /*
2793 * The following dcache calls may do an
2794 * iput(). Normally we don't want that from the
2795 * downconverting thread, but in this case it's ok
2796 * because the requesting node already has an
2797 * exclusive lock on the inode, so it can't be queued
2798 * for a downconvert.
2799 */
2800 d_delete(dentry);
2801 dput(dentry);
2799 2802
2800 osb = ocfs2_lock_res_super(lockres); 2803 spin_lock(&dentry_attach_lock);
2804 }
2805 spin_unlock(&dentry_attach_lock);
2801 2806
2802 status = ocfs2_generic_unblock_lock(osb, 2807 /*
2803 lockres, 2808 * If we are the last holder of this dentry lock, there is no
2804 requeue, 2809 * reason to downconvert so skip straight to the unlock.
2805 NULL); 2810 */
2806 if (status < 0) 2811 if (dl->dl_count == 1)
2807 mlog_errno(status); 2812 return UNBLOCK_STOP_POST;
2808 2813
2809 mlog_exit(status); 2814 return UNBLOCK_CONTINUE_POST;
2810 return status;
2811} 2815}
2812 2816
2813void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 2817void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2814 struct ocfs2_lock_res *lockres) 2818 struct ocfs2_lock_res *lockres)
2815{ 2819{
2816 int status; 2820 int status;
2817 int requeue = 0; 2821 struct ocfs2_unblock_ctl ctl = {0, 0,};
2818 unsigned long flags; 2822 unsigned long flags;
2819 2823
2820 /* Our reference to the lockres in this function can be 2824 /* Our reference to the lockres in this function can be
@@ -2825,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2825 2829
2826 BUG_ON(!lockres); 2830 BUG_ON(!lockres);
2827 BUG_ON(!lockres->l_ops); 2831 BUG_ON(!lockres->l_ops);
2828 BUG_ON(!lockres->l_ops->unblock);
2829 2832
2830 mlog(0, "lockres %s blocked.\n", lockres->l_name); 2833 mlog(0, "lockres %s blocked.\n", lockres->l_name);
2831 2834
@@ -2839,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2839 goto unqueue; 2842 goto unqueue;
2840 spin_unlock_irqrestore(&lockres->l_lock, flags); 2843 spin_unlock_irqrestore(&lockres->l_lock, flags);
2841 2844
2842 status = lockres->l_ops->unblock(lockres, &requeue); 2845 status = ocfs2_unblock_lock(osb, lockres, &ctl);
2843 if (status < 0) 2846 if (status < 0)
2844 mlog_errno(status); 2847 mlog_errno(status);
2845 2848
2846 spin_lock_irqsave(&lockres->l_lock, flags); 2849 spin_lock_irqsave(&lockres->l_lock, flags);
2847unqueue: 2850unqueue:
2848 if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { 2851 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
2849 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 2852 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
2850 } else 2853 } else
2851 ocfs2_schedule_blocked_lock(osb, lockres); 2854 ocfs2_schedule_blocked_lock(osb, lockres);
2852 2855
2853 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 2856 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
2854 requeue ? "yes" : "no"); 2857 ctl.requeue ? "yes" : "no");
2855 spin_unlock_irqrestore(&lockres->l_lock, flags); 2858 spin_unlock_irqrestore(&lockres->l_lock, flags);
2856 2859
2860 if (ctl.unblock_action != UNBLOCK_CONTINUE
2861 && lockres->l_ops->post_unlock)
2862 lockres->l_ops->post_unlock(osb, lockres);
2863
2857 mlog_exit_void(); 2864 mlog_exit_void();
2858} 2865}
2859 2866
@@ -2896,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
2896 2903
2897 mlog(level, "LVB information for %s (called from %s:%u):\n", 2904 mlog(level, "LVB information for %s (called from %s:%u):\n",
2898 lockres->l_name, function, line); 2905 lockres->l_name, function, line);
2899 mlog(level, "version: %u, clusters: %u\n", 2906 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
2900 be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); 2907 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
2908 be32_to_cpu(lvb->lvb_igeneration));
2901 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 2909 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
2902 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 2910 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
2903 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 2911 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 243ae862ece5..4a2769387229 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,10 +27,14 @@
27#ifndef DLMGLUE_H 27#ifndef DLMGLUE_H
28#define DLMGLUE_H 28#define DLMGLUE_H
29 29
30#define OCFS2_LVB_VERSION 3 30#include "dcache.h"
31
32#define OCFS2_LVB_VERSION 4
31 33
32struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
33 __be32 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1;
34 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
35 __be32 lvb_iuid; 39 __be32 lvb_iuid;
36 __be32 lvb_igid; 40 __be32 lvb_igid;
@@ -41,7 +45,8 @@ struct ocfs2_meta_lvb {
41 __be16 lvb_imode; 45 __be16 lvb_imode;
42 __be16 lvb_inlink; 46 __be16 lvb_inlink;
43 __be32 lvb_iattr; 47 __be32 lvb_iattr;
44 __be32 lvb_reserved[2]; 48 __be32 lvb_igeneration;
49 __be32 lvb_reserved2;
45}; 50};
46 51
47/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ 52/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
@@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
57void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
58void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
59 enum ocfs2_lock_type type, 64 enum ocfs2_lock_type type,
65 unsigned int generation,
60 struct inode *inode); 66 struct inode *inode);
67void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
68 u64 parent, struct inode *inode);
61void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 69void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
62int ocfs2_create_new_inode_locks(struct inode *inode); 70int ocfs2_create_new_inode_locks(struct inode *inode);
71int ocfs2_create_new_lock(struct ocfs2_super *osb,
72 struct ocfs2_lock_res *lockres, int ex, int local);
63int ocfs2_drop_inode_locks(struct inode *inode); 73int ocfs2_drop_inode_locks(struct inode *inode);
64int ocfs2_data_lock_full(struct inode *inode, 74int ocfs2_data_lock_full(struct inode *inode,
65 int write, 75 int write,
@@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
93 int ex); 103 int ex);
94int ocfs2_rename_lock(struct ocfs2_super *osb); 104int ocfs2_rename_lock(struct ocfs2_super *osb);
95void ocfs2_rename_unlock(struct ocfs2_super *osb); 105void ocfs2_rename_unlock(struct ocfs2_super *osb);
106int ocfs2_dentry_lock(struct dentry *dentry, int ex);
107void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
108
96void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 109void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
110void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
111 struct ocfs2_lock_res *lockres);
97 112
98/* for the vote thread */ 113/* for the vote thread */
99void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 114void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ec55ab3c1214..fb91089a60a7 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
33 33
34#include "dir.h" 34#include "dir.h"
35#include "dlmglue.h" 35#include "dlmglue.h"
36#include "dcache.h"
36#include "export.h" 37#include "export.h"
37#include "inode.h" 38#include "inode.h"
38 39
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
57 return ERR_PTR(-ESTALE); 58 return ERR_PTR(-ESTALE);
58 } 59 }
59 60
60 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); 61 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
61 62
62 if (IS_ERR(inode)) { 63 if (IS_ERR(inode)) {
63 mlog_errno(PTR_ERR(inode)); 64 mlog_errno(PTR_ERR(inode));
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
77 mlog_errno(-ENOMEM); 78 mlog_errno(-ENOMEM);
78 return ERR_PTR(-ENOMEM); 79 return ERR_PTR(-ENOMEM);
79 } 80 }
81 result->d_op = &ocfs2_dentry_ops;
80 82
81 mlog_exit_ptr(result); 83 mlog_exit_ptr(result);
82 return result; 84 return result;
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
113 goto bail_unlock; 115 goto bail_unlock;
114 } 116 }
115 117
116 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 118 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
117 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
118 mlog(ML_ERROR, "Unable to create inode %llu\n", 120 mlog(ML_ERROR, "Unable to create inode %llu\n",
119 (unsigned long long)blkno); 121 (unsigned long long)blkno);
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
127 parent = ERR_PTR(-ENOMEM); 129 parent = ERR_PTR(-ENOMEM);
128 } 130 }
129 131
132 parent->d_op = &ocfs2_dentry_ops;
133
130bail_unlock: 134bail_unlock:
131 ocfs2_meta_unlock(dir, 0); 135 ocfs2_meta_unlock(dir, 0);
132 136
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7bcf69154592..69d3db569166 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
54 54
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56 56
57#define OCFS2_FI_FLAG_NOWAIT 0x1
58#define OCFS2_FI_FLAG_DELETE 0x2
59struct ocfs2_find_inode_args 57struct ocfs2_find_inode_args
60{ 58{
61 u64 fi_blkno; 59 u64 fi_blkno;
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
109 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); 107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
110} 108}
111 109
112struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
113{ 111{
114 struct inode *inode = NULL; 112 struct inode *inode = NULL;
115 struct super_block *sb = osb->sb; 113 struct super_block *sb = osb->sb;
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
127 } 125 }
128 126
129 args.fi_blkno = blkno; 127 args.fi_blkno = blkno;
130 args.fi_flags = 0; 128 args.fi_flags = flags;
131 args.fi_ino = ino_from_blkno(sb, blkno); 129 args.fi_ino = ino_from_blkno(sb, blkno);
132 130
133 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 131 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
297 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 295 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
298 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 296 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
299 297
300 if (create_ino)
301 inode->i_ino = ino_from_blkno(inode->i_sb,
302 le64_to_cpu(fe->i_blkno));
303
304 mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
305 (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
306
307 inode->i_nlink = le16_to_cpu(fe->i_links_count); 298 inode->i_nlink = le16_to_cpu(fe->i_links_count);
308 299
300 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
301 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
302
309 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
310 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
311 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 305 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
343 break; 337 break;
344 } 338 }
345 339
340 if (create_ino) {
341 inode->i_ino = ino_from_blkno(inode->i_sb,
342 le64_to_cpu(fe->i_blkno));
343
344 /*
345 * If we ever want to create system files from kernel,
346 * the generation argument to
347 * ocfs2_inode_lock_res_init() will have to change.
348 */
349 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
350
351 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
352 OCFS2_LOCK_TYPE_META, 0, inode);
353 }
354
346 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 355 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
347 OCFS2_LOCK_TYPE_RW, inode); 356 OCFS2_LOCK_TYPE_RW, inode->i_generation,
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 357 inode);
349 OCFS2_LOCK_TYPE_META, inode); 358
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 359 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
351 OCFS2_LOCK_TYPE_DATA, inode); 360 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
361 inode);
352 362
353 ocfs2_set_inode_flags(inode); 363 ocfs2_set_inode_flags(inode);
354 inode->i_flags |= S_NOATIME; 364 inode->i_flags |= S_NOATIME;
@@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
366 struct ocfs2_super *osb; 376 struct ocfs2_super *osb;
367 struct ocfs2_dinode *fe; 377 struct ocfs2_dinode *fe;
368 struct buffer_head *bh = NULL; 378 struct buffer_head *bh = NULL;
369 int status; 379 int status, can_lock;
370 int sysfile = 0; 380 u32 generation = 0;
371 381
372 mlog_entry("(0x%p, 0x%p)\n", inode, args); 382 mlog_entry("(0x%p, 0x%p)\n", inode, args);
373 383
374 status = -EINVAL; 384 status = -EINVAL;
375 if (inode == NULL || inode->i_sb == NULL) { 385 if (inode == NULL || inode->i_sb == NULL) {
376 mlog(ML_ERROR, "bad inode\n"); 386 mlog(ML_ERROR, "bad inode\n");
377 goto bail; 387 return status;
378 } 388 }
379 sb = inode->i_sb; 389 sb = inode->i_sb;
380 osb = OCFS2_SB(sb); 390 osb = OCFS2_SB(sb);
@@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
382 if (!args) { 392 if (!args) {
383 mlog(ML_ERROR, "bad inode args\n"); 393 mlog(ML_ERROR, "bad inode args\n");
384 make_bad_inode(inode); 394 make_bad_inode(inode);
385 goto bail; 395 return status;
396 }
397
398 /*
399 * To improve performance of cold-cache inode stats, we take
400 * the cluster lock here if possible.
401 *
402 * Generally, OCFS2 never trusts the contents of an inode
403 * unless it's holding a cluster lock, so taking it here isn't
404 * a correctness issue as much as it is a performance
405 * improvement.
406 *
407 * There are three times when taking the lock is not a good idea:
408 *
409 * 1) During startup, before we have initialized the DLM.
410 *
411 * 2) If we are reading certain system files which never get
412 * cluster locks (local alloc, truncate log).
413 *
414 * 3) If the process doing the iget() is responsible for
415 * orphan dir recovery. We're holding the orphan dir lock and
416 * can get into a deadlock with another process on another
417 * node in ->delete_inode().
418 *
419 * #1 and #2 can be simply solved by never taking the lock
420 * here for system files (which are the only type we read
421 * during mount). It's a heavier approach, but our main
422 * concern is user-accesible files anyway.
423 *
424 * #3 works itself out because we'll eventually take the
425 * cluster lock before trusting anything anyway.
426 */
427 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
428 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
429
430 /*
431 * To maintain backwards compatibility with older versions of
432 * ocfs2-tools, we still store the generation value for system
433 * files. The only ones that actually matter to userspace are
434 * the journals, but it's easier and inexpensive to just flag
435 * all system files similarly.
436 */
437 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
438 generation = osb->fs_generation;
439
440 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
441 OCFS2_LOCK_TYPE_META,
442 generation, inode);
443
444 if (can_lock) {
445 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
446 if (status) {
447 make_bad_inode(inode);
448 mlog_errno(status);
449 return status;
450 }
386 } 451 }
387 452
388 /* Read the FE off disk. This is safe because the kernel only 453 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
389 * does one read_inode2 for a new inode, and if it doesn't 454 can_lock ? inode : NULL);
390 * exist yet then nobody can be working on it! */
391 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
392 if (status < 0) { 455 if (status < 0) {
393 mlog_errno(status); 456 mlog_errno(status);
394 make_bad_inode(inode);
395 goto bail; 457 goto bail;
396 } 458 }
397 459
460 status = -EINVAL;
398 fe = (struct ocfs2_dinode *) bh->b_data; 461 fe = (struct ocfs2_dinode *) bh->b_data;
399 if (!OCFS2_IS_VALID_DINODE(fe)) { 462 if (!OCFS2_IS_VALID_DINODE(fe)) {
400 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 463 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
401 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 464 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
402 make_bad_inode(inode);
403 goto bail; 465 goto bail;
404 } 466 }
405 467
406 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 468 /*
407 sysfile = 1; 469 * This is a code bug. Right now the caller needs to
470 * understand whether it is asking for a system file inode or
471 * not so the proper lock names can be built.
472 */
473 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
474 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
475 "Inode %llu: system file state is ambigous\n",
476 (unsigned long long)args->fi_blkno);
408 477
409 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 478 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
410 S_ISBLK(le16_to_cpu(fe->i_mode))) 479 S_ISBLK(le16_to_cpu(fe->i_mode)))
411 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 480 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
412 481
413 status = -EINVAL;
414 if (ocfs2_populate_inode(inode, fe, 0) < 0) { 482 if (ocfs2_populate_inode(inode, fe, 0) < 0) {
415 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", 483 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
416 (unsigned long long)fe->i_blkno, inode->i_ino); 484 (unsigned long long)fe->i_blkno, inode->i_ino);
417 make_bad_inode(inode);
418 goto bail; 485 goto bail;
419 } 486 }
420 487
421 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 488 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
422 489
423 if (sysfile)
424 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
425
426 status = 0; 490 status = 0;
427 491
428bail: 492bail:
493 if (can_lock)
494 ocfs2_meta_unlock(inode, 0);
495
496 if (status < 0)
497 make_bad_inode(inode);
498
429 if (args && bh) 499 if (args && bh)
430 brelse(bh); 500 brelse(bh);
431 501
@@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
898 goto bail_unlock_inode; 968 goto bail_unlock_inode;
899 } 969 }
900 970
901 /* Mark the inode as successfully deleted. This is important 971 /*
902 * for ocfs2_clear_inode as it will check this flag and skip 972 * Mark the inode as successfully deleted.
903 * any checkpointing work */ 973 *
974 * This is important for ocfs2_clear_inode() as it will check
975 * this flag and skip any checkpointing work
976 *
977 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
978 * the LVB for other nodes.
979 */
904 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 980 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
905 981
906bail_unlock_inode: 982bail_unlock_inode:
@@ -1025,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode)
1025 /* Testing ip_orphaned_slot here wouldn't work because we may 1101 /* Testing ip_orphaned_slot here wouldn't work because we may
1026 * not have gotten a delete_inode vote from any other nodes 1102 * not have gotten a delete_inode vote from any other nodes
1027 * yet. */ 1103 * yet. */
1028 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { 1104 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1029 mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); 1105 generic_delete_inode(inode);
1030 inode->i_nlink = 0; 1106 else
1031 } 1107 generic_drop_inode(inode);
1032
1033 generic_drop_inode(inode);
1034 1108
1035 mlog_exit_void(); 1109 mlog_exit_void();
1036} 1110}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 4d1e53992566..9957810fdf85 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
122void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
123void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
124void ocfs2_drop_inode(struct inode *inode); 124void ocfs2_drop_inode(struct inode *inode);
125struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); 125
126/* Flags for ocfs2_iget() */
127#define OCFS2_FI_FLAG_NOWAIT 0x1
128#define OCFS2_FI_FLAG_DELETE 0x2
129#define OCFS2_FI_FLAG_SYSFILE 0x4
130#define OCFS2_FI_FLAG_NOLOCK 0x8
131struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
126struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 132struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
127 u64 blkno, 133 u64 blkno,
128 int delete_vote); 134 int delete_vote);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1dd379a..fd9734def551 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1493 if (de->name_len == 2 && !strncmp("..", de->name, 2)) 1493 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1494 continue; 1494 continue;
1495 1495
1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); 1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1497 OCFS2_FI_FLAG_NOLOCK);
1497 if (IS_ERR(iter)) 1498 if (IS_ERR(iter))
1498 continue; 1499 continue;
1499 1500
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0d3e939b1f56..849c3b4bb94a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
179 if (status < 0) 179 if (status < 0)
180 goto bail_add; 180 goto bail_add;
181 181
182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
183 if (IS_ERR(inode)) { 183 if (IS_ERR(inode)) {
184 mlog(ML_ERROR, "Unable to create inode %llu\n", 184 mlog(ML_ERROR, "Unable to create inode %llu\n",
185 (unsigned long long)blkno); 185 (unsigned long long)blkno);
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
199 spin_unlock(&oi->ip_lock); 199 spin_unlock(&oi->ip_lock);
200 200
201bail_add: 201bail_add:
202
203 dentry->d_op = &ocfs2_dentry_ops; 202 dentry->d_op = &ocfs2_dentry_ops;
204 ret = d_splice_alias(inode, dentry); 203 ret = d_splice_alias(inode, dentry);
205 204
205 if (inode) {
206 /*
207 * If d_splice_alias() finds a DCACHE_DISCONNECTED
208 * dentry, it will d_move() it on top of ourse. The
209 * return value will indicate this however, so in
210 * those cases, we switch them around for the locking
211 * code.
212 *
213 * NOTE: This dentry already has ->d_op set from
214 * ocfs2_get_parent() and ocfs2_get_dentry()
215 */
216 if (ret)
217 dentry = ret;
218
219 status = ocfs2_dentry_attach_lock(dentry, inode,
220 OCFS2_I(dir)->ip_blkno);
221 if (status) {
222 mlog_errno(status);
223 ret = ERR_PTR(status);
224 goto bail_unlock;
225 }
226 }
227
206bail_unlock: 228bail_unlock:
207 /* Don't drop the cluster lock until *after* the d_add -- 229 /* Don't drop the cluster lock until *after* the d_add --
208 * unlink on another node will message us to remove that 230 * unlink on another node will message us to remove that
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
418 goto leave; 440 goto leave;
419 } 441 }
420 442
443 status = ocfs2_dentry_attach_lock(dentry, inode,
444 OCFS2_I(dir)->ip_blkno);
445 if (status) {
446 mlog_errno(status);
447 goto leave;
448 }
449
421 insert_inode_hash(inode); 450 insert_inode_hash(inode);
422 dentry->d_op = &ocfs2_dentry_ops; 451 dentry->d_op = &ocfs2_dentry_ops;
423 d_instantiate(dentry, inode); 452 d_instantiate(dentry, inode);
@@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry,
725 goto bail; 754 goto bail;
726 } 755 }
727 756
757 err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
758 if (err) {
759 mlog_errno(err);
760 goto bail;
761 }
762
728 atomic_inc(&inode->i_count); 763 atomic_inc(&inode->i_count);
729 dentry->d_op = &ocfs2_dentry_ops; 764 dentry->d_op = &ocfs2_dentry_ops;
730 d_instantiate(dentry, inode); 765 d_instantiate(dentry, inode);
@@ -743,6 +778,23 @@ bail:
743 return err; 778 return err;
744} 779}
745 780
781/*
782 * Takes and drops an exclusive lock on the given dentry. This will
783 * force other nodes to drop it.
784 */
785static int ocfs2_remote_dentry_delete(struct dentry *dentry)
786{
787 int ret;
788
789 ret = ocfs2_dentry_lock(dentry, 1);
790 if (ret)
791 mlog_errno(ret);
792 else
793 ocfs2_dentry_unlock(dentry, 1);
794
795 return ret;
796}
797
746static int ocfs2_unlink(struct inode *dir, 798static int ocfs2_unlink(struct inode *dir,
747 struct dentry *dentry) 799 struct dentry *dentry)
748{ 800{
@@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
832 else 884 else
833 inode->i_nlink--; 885 inode->i_nlink--;
834 886
835 status = ocfs2_request_unlink_vote(inode, dentry, 887 status = ocfs2_remote_dentry_delete(dentry);
836 (unsigned int) inode->i_nlink);
837 if (status < 0) { 888 if (status < 0) {
838 /* This vote should succeed under all normal 889 /* This vote should succeed under all normal
839 * circumstances. */ 890 * circumstances. */
@@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir,
1019 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 1070 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1020 // this is the 1st dirent bh 1071 // this is the 1st dirent bh
1021 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; 1072 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1022 unsigned int links_count;
1023 1073
1024 /* At some point it might be nice to break this function up a 1074 /* At some point it might be nice to break this function up a
1025 * bit. */ 1075 * bit. */
@@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir,
1093 } 1143 }
1094 } 1144 }
1095 1145
1096 if (S_ISDIR(old_inode->i_mode)) { 1146 /*
1097 /* Directories actually require metadata updates to 1147 * Though we don't require an inode meta data update if
1098 * the directory info so we can't get away with not 1148 * old_inode is not a directory, we lock anyway here to ensure
1099 * doing node locking on it. */ 1149 * the vote thread on other nodes won't have to concurrently
1100 status = ocfs2_meta_lock(old_inode, handle, NULL, 1); 1150 * downconvert the inode and the dentry locks.
1101 if (status < 0) { 1151 */
1102 if (status != -ENOENT) 1152 status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
1103 mlog_errno(status); 1153 if (status < 0) {
1104 goto bail; 1154 if (status != -ENOENT)
1105 }
1106
1107 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1108 if (status < 0) {
1109 mlog_errno(status); 1155 mlog_errno(status);
1110 goto bail; 1156 goto bail;
1111 } 1157 }
1158
1159 status = ocfs2_remote_dentry_delete(old_dentry);
1160 if (status < 0) {
1161 mlog_errno(status);
1162 goto bail;
1163 }
1112 1164
1165 if (S_ISDIR(old_inode->i_mode)) {
1113 status = -EIO; 1166 status = -EIO;
1114 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); 1167 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1115 if (!old_inode_de_bh) 1168 if (!old_inode_de_bh)
@@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir,
1123 if (!new_inode && new_dir!=old_dir && 1176 if (!new_inode && new_dir!=old_dir &&
1124 new_dir->i_nlink >= OCFS2_LINK_MAX) 1177 new_dir->i_nlink >= OCFS2_LINK_MAX)
1125 goto bail; 1178 goto bail;
1126 } else {
1127 /* Ah, the simple case - we're a file so just send a
1128 * message. */
1129 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1130 if (status < 0) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 } 1179 }
1135 1180
1136 status = -ENOENT; 1181 status = -ENOENT;
@@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir,
1202 goto bail; 1247 goto bail;
1203 } 1248 }
1204 1249
1205 if (S_ISDIR(new_inode->i_mode)) 1250 status = ocfs2_remote_dentry_delete(new_dentry);
1206 links_count = 0;
1207 else
1208 links_count = (unsigned int) (new_inode->i_nlink - 1);
1209
1210 status = ocfs2_request_unlink_vote(new_inode, new_dentry,
1211 links_count);
1212 if (status < 0) { 1251 if (status < 0) {
1213 mlog_errno(status); 1252 mlog_errno(status);
1214 goto bail; 1253 goto bail;
@@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir,
1387 } 1426 }
1388 } 1427 }
1389 1428
1429 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1390 status = 0; 1430 status = 0;
1391bail: 1431bail:
1392 if (rename_lock) 1432 if (rename_lock)
@@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir,
1675 goto bail; 1715 goto bail;
1676 } 1716 }
1677 1717
1718 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1719 if (status) {
1720 mlog_errno(status);
1721 goto bail;
1722 }
1723
1678 insert_inode_hash(inode); 1724 insert_inode_hash(inode);
1679 dentry->d_op = &ocfs2_dentry_ops; 1725 dentry->d_op = &ocfs2_dentry_ops;
1680 d_instantiate(dentry, inode); 1726 d_instantiate(dentry, inode);
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 7dd9e1e705b0..4d5d5655c185 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
35#define OCFS2_LOCK_ID_MAX_LEN 32 35#define OCFS2_LOCK_ID_MAX_LEN 32
36#define OCFS2_LOCK_ID_PAD "000000" 36#define OCFS2_LOCK_ID_PAD "000000"
37 37
38#define OCFS2_DENTRY_LOCK_INO_START 18
39
38enum ocfs2_lock_type { 40enum ocfs2_lock_type {
39 OCFS2_LOCK_TYPE_META = 0, 41 OCFS2_LOCK_TYPE_META = 0,
40 OCFS2_LOCK_TYPE_DATA, 42 OCFS2_LOCK_TYPE_DATA,
41 OCFS2_LOCK_TYPE_SUPER, 43 OCFS2_LOCK_TYPE_SUPER,
42 OCFS2_LOCK_TYPE_RENAME, 44 OCFS2_LOCK_TYPE_RENAME,
43 OCFS2_LOCK_TYPE_RW, 45 OCFS2_LOCK_TYPE_RW,
46 OCFS2_LOCK_TYPE_DENTRY,
44 OCFS2_NUM_LOCK_TYPES 47 OCFS2_NUM_LOCK_TYPES
45}; 48};
46 49
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
63 case OCFS2_LOCK_TYPE_RW: 66 case OCFS2_LOCK_TYPE_RW:
64 c = 'W'; 67 c = 'W';
65 break; 68 break;
69 case OCFS2_LOCK_TYPE_DENTRY:
70 c = 'N';
71 break;
66 default: 72 default:
67 c = '\0'; 73 c = '\0';
68 } 74 }
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
70 return c; 76 return c;
71} 77}
72 78
79static char *ocfs2_lock_type_strings[] = {
80 [OCFS2_LOCK_TYPE_META] = "Meta",
81 [OCFS2_LOCK_TYPE_DATA] = "Data",
82 [OCFS2_LOCK_TYPE_SUPER] = "Super",
83 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
84 /* Need to differntiate from [R]ename.. serializing writes is the
85 * important job it does, anyway. */
86 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
87 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
88};
89
90static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
91{
92#ifdef __KERNEL__
93 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
94#endif
95 return ocfs2_lock_type_strings[type];
96}
97
73#endif /* OCFS2_LOCKID_H */ 98#endif /* OCFS2_LOCKID_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d17e33e66a1e..4c29cd7cc8e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
202 202
203 mlog_entry_void(); 203 mlog_entry_void();
204 204
205 new = ocfs2_iget(osb, osb->root_blkno); 205 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
206 if (IS_ERR(new)) { 206 if (IS_ERR(new)) {
207 status = PTR_ERR(new); 207 status = PTR_ERR(new);
208 mlog_errno(status); 208 mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
210 } 210 }
211 osb->root_inode = new; 211 osb->root_inode = new;
212 212
213 new = ocfs2_iget(osb, osb->system_dir_blkno); 213 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
214 if (IS_ERR(new)) { 214 if (IS_ERR(new)) {
215 status = PTR_ERR(new); 215 status = PTR_ERR(new);
216 mlog_errno(status); 216 mlog_errno(status);
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
682 .kill_sb = kill_block_super, /* set to the generic one 682 .kill_sb = kill_block_super, /* set to the generic one
683 * right now, but do we 683 * right now, but do we
684 * need to change that? */ 684 * need to change that? */
685 .fs_flags = FS_REQUIRES_DEV, 685 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
686 .next = NULL 686 .next = NULL
687}; 687};
688 688
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..5df6e35d09b1 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#include "ocfs2.h"
32
33#define MLOG_MASK_PREFIX ML_INODE 31#define MLOG_MASK_PREFIX ML_INODE
34#include <cluster/masklog.h> 32#include <cluster/masklog.h>
35 33
34#include "ocfs2.h"
35
36#include "alloc.h" 36#include "alloc.h"
37#include "dir.h" 37#include "dir.h"
38#include "inode.h" 38#include "inode.h"
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
115 goto bail; 115 goto bail;
116 } 116 }
117 117
118 inode = ocfs2_iget(osb, blkno); 118 inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
119 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
120 mlog_errno(PTR_ERR(inode)); 120 mlog_errno(PTR_ERR(inode));
121 inode = NULL; 121 inode = NULL;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index cf70fe2075b8..5b4dca79990b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
74 __be32 v_orphaned_slot; /* Used during delete votes */ 74 __be32 v_orphaned_slot; /* Used during delete votes */
75 __be32 v_nlink; /* Used during unlink votes */ 75 __be32 v_nlink; /* Used during unlink votes */
76 } md1; /* Message type dependant 1 */ 76 } md1; /* Message type dependant 1 */
77 __be32 v_unlink_namelen;
78 __be64 v_unlink_parent;
79 u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
80}; 77};
81 78
82/* Responses are given these values to maintain backwards 79/* Responses are given these values to maintain backwards
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
100enum ocfs2_vote_request { 97enum ocfs2_vote_request {
101 OCFS2_VOTE_REQ_INVALID = 0, 98 OCFS2_VOTE_REQ_INVALID = 0,
102 OCFS2_VOTE_REQ_DELETE, 99 OCFS2_VOTE_REQ_DELETE,
103 OCFS2_VOTE_REQ_UNLINK,
104 OCFS2_VOTE_REQ_RENAME,
105 OCFS2_VOTE_REQ_MOUNT, 100 OCFS2_VOTE_REQ_MOUNT,
106 OCFS2_VOTE_REQ_UMOUNT, 101 OCFS2_VOTE_REQ_UMOUNT,
107 OCFS2_VOTE_REQ_LAST 102 OCFS2_VOTE_REQ_LAST
@@ -261,103 +256,13 @@ done:
261 return response; 256 return response;
262} 257}
263 258
264static int ocfs2_match_dentry(struct dentry *dentry,
265 u64 parent_blkno,
266 unsigned int namelen,
267 const char *name)
268{
269 struct inode *parent;
270
271 if (!dentry->d_parent) {
272 mlog(0, "Detached from parent.\n");
273 return 0;
274 }
275
276 parent = dentry->d_parent->d_inode;
277 /* Negative parent dentry? */
278 if (!parent)
279 return 0;
280
281 /* Name is in a different directory. */
282 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
283 return 0;
284
285 if (dentry->d_name.len != namelen)
286 return 0;
287
288 /* comparison above guarantees this is safe. */
289 if (memcmp(dentry->d_name.name, name, namelen))
290 return 0;
291
292 return 1;
293}
294
295static void ocfs2_process_dentry_request(struct inode *inode,
296 int rename,
297 unsigned int new_nlink,
298 u64 parent_blkno,
299 unsigned int namelen,
300 const char *name)
301{
302 struct dentry *dentry = NULL;
303 struct list_head *p;
304 struct ocfs2_inode_info *oi = OCFS2_I(inode);
305
306 mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
307 (unsigned long long)parent_blkno, namelen, namelen, name);
308
309 spin_lock(&dcache_lock);
310
311 /* Another node is removing this name from the system. It is
312 * up to us to find the corresponding dentry and if it exists,
313 * unhash it from the dcache. */
314 list_for_each(p, &inode->i_dentry) {
315 dentry = list_entry(p, struct dentry, d_alias);
316
317 if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
318 mlog(0, "dentry found: %.*s\n",
319 dentry->d_name.len, dentry->d_name.name);
320
321 dget_locked(dentry);
322 break;
323 }
324
325 dentry = NULL;
326 }
327
328 spin_unlock(&dcache_lock);
329
330 if (dentry) {
331 d_delete(dentry);
332 dput(dentry);
333 }
334
335 /* rename votes don't send link counts */
336 if (!rename) {
337 mlog(0, "new_nlink = %u\n", new_nlink);
338
339 /* We don't have the proper locks here to directly
340 * change i_nlink and besides, the vote is sent
341 * *before* the operation so it may have failed on the
342 * other node. This passes a hint to ocfs2_drop_inode
343 * to force ocfs2_delete_inode, who will take the
344 * proper cluster locks to sort things out. */
345 if (new_nlink == 0) {
346 spin_lock(&oi->ip_lock);
347 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
348 spin_unlock(&OCFS2_I(inode)->ip_lock);
349 }
350 }
351}
352
353static void ocfs2_process_vote(struct ocfs2_super *osb, 259static void ocfs2_process_vote(struct ocfs2_super *osb,
354 struct ocfs2_vote_msg *msg) 260 struct ocfs2_vote_msg *msg)
355{ 261{
356 int net_status, vote_response; 262 int net_status, vote_response;
357 int orphaned_slot = 0; 263 int orphaned_slot = 0;
358 int rename = 0; 264 unsigned int node_num, generation;
359 unsigned int node_num, generation, new_nlink, namelen; 265 u64 blkno;
360 u64 blkno, parent_blkno;
361 enum ocfs2_vote_request request; 266 enum ocfs2_vote_request request;
362 struct inode *inode = NULL; 267 struct inode *inode = NULL;
363 struct ocfs2_msg_hdr *hdr = &msg->v_hdr; 268 struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
437 vote_response = ocfs2_process_delete_request(inode, 342 vote_response = ocfs2_process_delete_request(inode,
438 &orphaned_slot); 343 &orphaned_slot);
439 break; 344 break;
440 case OCFS2_VOTE_REQ_RENAME:
441 rename = 1;
442 /* fall through */
443 case OCFS2_VOTE_REQ_UNLINK:
444 parent_blkno = be64_to_cpu(msg->v_unlink_parent);
445 namelen = be32_to_cpu(msg->v_unlink_namelen);
446 /* new_nlink will be ignored in case of a rename vote */
447 new_nlink = be32_to_cpu(msg->md1.v_nlink);
448 ocfs2_process_dentry_request(inode, rename, new_nlink,
449 parent_blkno, namelen,
450 msg->v_unlink_dirent);
451 break;
452 default: 345 default:
453 mlog(ML_ERROR, "node %u, invalid request: %u\n", 346 mlog(ML_ERROR, "node %u, invalid request: %u\n",
454 node_num, request); 347 node_num, request);
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
889 return status; 782 return status;
890} 783}
891 784
892static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
893 struct dentry *dentry)
894{
895 struct inode *parent = dentry->d_parent->d_inode;
896
897 /* We need some values which will uniquely identify a dentry
898 * on the other nodes so that they can find it and run
899 * d_delete against it. Parent directory block and full name
900 * should suffice. */
901
902 mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
903 (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
904 dentry->d_name.name);
905
906 request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
907 request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
908 memcpy(request->v_unlink_dirent, dentry->d_name.name,
909 dentry->d_name.len);
910}
911
912int ocfs2_request_unlink_vote(struct inode *inode,
913 struct dentry *dentry,
914 unsigned int nlink)
915{
916 int status;
917 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
918 struct ocfs2_vote_msg *request;
919
920 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
921 return -ENAMETOOLONG;
922
923 status = -ENOMEM;
924 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
925 inode->i_generation,
926 OCFS2_VOTE_REQ_UNLINK, nlink);
927 if (request) {
928 ocfs2_setup_unlink_vote(request, dentry);
929
930 status = ocfs2_request_vote(inode, request, NULL);
931
932 kfree(request);
933 }
934 return status;
935}
936
937int ocfs2_request_rename_vote(struct inode *inode,
938 struct dentry *dentry)
939{
940 int status;
941 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
942 struct ocfs2_vote_msg *request;
943
944 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
945 return -ENAMETOOLONG;
946
947 status = -ENOMEM;
948 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
949 inode->i_generation,
950 OCFS2_VOTE_REQ_RENAME, 0);
951 if (request) {
952 ocfs2_setup_unlink_vote(request, dentry);
953
954 status = ocfs2_request_vote(inode, request, NULL);
955
956 kfree(request);
957 }
958 return status;
959}
960
961int ocfs2_request_mount_vote(struct ocfs2_super *osb) 785int ocfs2_request_mount_vote(struct ocfs2_super *osb)
962{ 786{
963 int status; 787 int status;
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h
index 9cce60703466..53ebc1c69e56 100644
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
39} 39}
40 40
41int ocfs2_request_delete_vote(struct inode *inode); 41int ocfs2_request_delete_vote(struct inode *inode);
42int ocfs2_request_unlink_vote(struct inode *inode,
43 struct dentry *dentry,
44 unsigned int nlink);
45int ocfs2_request_rename_vote(struct inode *inode,
46 struct dentry *dentry);
47int ocfs2_request_mount_vote(struct ocfs2_super *osb); 42int ocfs2_request_mount_vote(struct ocfs2_super *osb);
48int ocfs2_request_umount_vote(struct ocfs2_super *osb); 43int ocfs2_request_umount_vote(struct ocfs2_super *osb);
49int ocfs2_register_net_handlers(struct ocfs2_super *osb); 44int ocfs2_register_net_handlers(struct ocfs2_super *osb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 555bc195c420..1d3e601ece73 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,9 +92,10 @@ extern int dir_notify_enable;
92#define FS_REQUIRES_DEV 1 92#define FS_REQUIRES_DEV 1
93#define FS_BINARY_MOUNTDATA 2 93#define FS_BINARY_MOUNTDATA 2
94#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 94#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
95#define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon 95#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
96 * as nfs_rename() will be cleaned up 96 * during rename() internally.
97 */ 97 */
98
98/* 99/*
99 * These are the fs-independent mount-flags: up to 32 flags are supported 100 * These are the fs-independent mount-flags: up to 32 flags are supported
100 */ 101 */