aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2006-09-08 17:43:18 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-09-24 16:50:43 -0400
commit80c05846f604bab6d61e9732c262420ee9f5f358 (patch)
tree3fcd80cec6e3a3a1e56abaff0a559817dbcb95a2
parentd680efe9d8fe0eb99d9dd063a4def6b362cdb40d (diff)
ocfs2: Add dentry tracking API
Replace the dentry vote mechanism with a cluster lock which covers a set of dentries. This allows us to force d_delete() only on nodes which actually care about an unlink. Every node that does a ->lookup() gets a read only lock on the dentry, until an unlink during which the unlinking node, will request an exclusive lock, forcing the other nodes who care about that dentry to d_delete() it. The effect is that we retain a very lightweight ->d_revalidate(), and at the same time get to make large improvements to the average case performance of the ocfs2 unlink and rename operations. This patch adds the higher level API and the dentry manipulation code. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/dcache.c375
-rw-r--r--fs/ocfs2/dcache.h22
-rw-r--r--fs/ocfs2/sysfile.c4
3 files changed, 369 insertions, 32 deletions
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index aea457718946..09efe240e652 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
35 35
36#include "alloc.h" 36#include "alloc.h"
37#include "dcache.h" 37#include "dcache.h"
38#include "dlmglue.h"
38#include "file.h" 39#include "file.h"
39#include "inode.h" 40#include "inode.h"
40 41
42
41static int ocfs2_dentry_revalidate(struct dentry *dentry, 43static int ocfs2_dentry_revalidate(struct dentry *dentry,
42 struct nameidata *nd) 44 struct nameidata *nd)
43{ 45{
44 struct inode *inode = dentry->d_inode; 46 struct inode *inode = dentry->d_inode;
45 int ret = 0; /* if all else fails, just return false */ 47 int ret = 0; /* if all else fails, just return false */
46 struct ocfs2_super *osb; 48 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
47 49
48 mlog_entry("(0x%p, '%.*s')\n", dentry, 50 mlog_entry("(0x%p, '%.*s')\n", dentry,
49 dentry->d_name.len, dentry->d_name.name); 51 dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
55 goto bail; 57 goto bail;
56 } 58 }
57 59
58 osb = OCFS2_SB(inode->i_sb);
59
60 BUG_ON(!osb); 60 BUG_ON(!osb);
61 61
62 if (inode != osb->root_inode) { 62 if (inode == osb->root_inode || is_bad_inode(inode))
63 spin_lock(&OCFS2_I(inode)->ip_lock); 63 goto bail;
64 /* did we or someone else delete this inode? */ 64
65 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 65 spin_lock(&OCFS2_I(inode)->ip_lock);
66 spin_unlock(&OCFS2_I(inode)->ip_lock); 66 /* did we or someone else delete this inode? */
67 mlog(0, "inode (%llu) deleted, returning false\n", 67 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
68 (unsigned long long)OCFS2_I(inode)->ip_blkno);
69 goto bail;
70 }
71 spin_unlock(&OCFS2_I(inode)->ip_lock); 68 spin_unlock(&OCFS2_I(inode)->ip_lock);
69 mlog(0, "inode (%llu) deleted, returning false\n",
70 (unsigned long long)OCFS2_I(inode)->ip_blkno);
71 goto bail;
72 }
73 spin_unlock(&OCFS2_I(inode)->ip_lock);
72 74
73 if (!inode->i_nlink) { 75 /*
74 mlog(0, "Inode %llu orphaned, returning false " 76 * We don't need a cluster lock to test this because once an
75 "dir = %d\n", 77 * inode nlink hits zero, it never goes back.
76 (unsigned long long)OCFS2_I(inode)->ip_blkno, 78 */
77 S_ISDIR(inode->i_mode)); 79 if (inode->i_nlink == 0) {
78 goto bail; 80 mlog(0, "Inode %llu orphaned, returning false "
79 } 81 "dir = %d\n",
82 (unsigned long long)OCFS2_I(inode)->ip_blkno,
83 S_ISDIR(inode->i_mode));
84 goto bail;
80 } 85 }
81 86
82 ret = 1; 87 ret = 1;
@@ -87,8 +92,340 @@ bail:
87 return ret; 92 return ret;
88} 93}
89 94
95static int ocfs2_match_dentry(struct dentry *dentry,
96 u64 parent_blkno,
97 int skip_unhashed)
98{
99 struct inode *parent;
100
101 /*
102 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
103 * to the lock data, so we skip those here, otherwise
104 * ocfs2_dentry_attach_lock() will get its original dentry
105 * back.
106 */
107 if (!dentry->d_fsdata)
108 return 0;
109
110 if (!dentry->d_parent)
111 return 0;
112
113 if (skip_unhashed && d_unhashed(dentry))
114 return 0;
115
116 parent = dentry->d_parent->d_inode;
117 /* Negative parent dentry? */
118 if (!parent)
119 return 0;
120
121 /* Name is in a different directory. */
122 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
123 return 0;
124
125 return 1;
126}
127
128/*
129 * Walk the inode alias list, and find a dentry which has a given
130 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
131 * is looking for a dentry_lock reference. The vote thread is looking
132 * to unhash aliases, so we allow it to skip any that already have
133 * that property.
134 */
135struct dentry *ocfs2_find_local_alias(struct inode *inode,
136 u64 parent_blkno,
137 int skip_unhashed)
138{
139 struct list_head *p;
140 struct dentry *dentry = NULL;
141
142 spin_lock(&dcache_lock);
143
144 list_for_each(p, &inode->i_dentry) {
145 dentry = list_entry(p, struct dentry, d_alias);
146
147 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
148 mlog(0, "dentry found: %.*s\n",
149 dentry->d_name.len, dentry->d_name.name);
150
151 dget_locked(dentry);
152 break;
153 }
154
155 dentry = NULL;
156 }
157
158 spin_unlock(&dcache_lock);
159
160 return dentry;
161}
162
90DEFINE_SPINLOCK(dentry_attach_lock); 163DEFINE_SPINLOCK(dentry_attach_lock);
91 164
165/*
166 * Attach this dentry to a cluster lock.
167 *
168 * Dentry locks cover all links in a given directory to a particular
169 * inode. We do this so that ocfs2 can build a lock name which all
170 * nodes in the cluster can agree on at all times. Shoving full names
171 * in the cluster lock won't work due to size restrictions. Covering
172 * links inside of a directory is a good compromise because it still
173 * allows us to use the parent directory lock to synchronize
174 * operations.
175 *
176 * Call this function with the parent dir semaphore and the parent dir
177 * cluster lock held.
178 *
179 * The dir semaphore will protect us from having to worry about
180 * concurrent processes on our node trying to attach a lock at the
181 * same time.
182 *
183 * The dir cluster lock (held at either PR or EX mode) protects us
184 * from unlink and rename on other nodes.
185 *
186 * The 'create' flag tells us whether we're doing this as a result of
187 * a file creation.
188 *
189 * A dput() can happen asynchronously due to pruning, so we cover
190 * attaching and detaching the dentry lock with a
191 * dentry_attach_lock.
192 *
193 * A node which has done lookup on a name retains a protected read
194 * lock until final dput. If the user requests and unlink or rename,
195 * the protected read is upgraded to an exclusive lock. Other nodes
196 * who have seen the dentry will then be informed that they need to
197 * downgrade their lock, which will involve d_delete on the
198 * dentry. This happens in ocfs2_dentry_convert_worker().
199 */
200int ocfs2_dentry_attach_lock(struct dentry *dentry,
201 struct inode *inode,
202 u64 parent_blkno,
203 int create)
204{
205 int ret;
206 struct dentry *alias;
207 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
208
209 mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n",
210 dentry->d_name.len, dentry->d_name.name,
211 (unsigned long long)parent_blkno, create, dl);
212
213 /*
214 * Negative dentry. We ignore these for now.
215 *
216 * XXX: Could we can improve ocfs2_dentry_revalidate() by
217 * tracking these?
218 */
219 if (!inode)
220 return 0;
221
222 if (dl) {
223 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
224 " \"%.*s\": old parent: %llu, new: %llu\n",
225 dentry->d_name.len, dentry->d_name.name,
226 (unsigned long long)parent_blkno,
227 (unsigned long long)dl->dl_parent_blkno);
228 return 0;
229 }
230
231 alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
232 if (alias) {
233 /*
234 * Great, an alias exists, which means we must have a
235 * dentry lock already. We can just grab the lock off
236 * the alias and add it to the list.
237 *
238 * We're depending here on the fact that this dentry
239 * was found and exists in the dcache and so must have
240 * a reference to the dentry_lock because we can't
241 * race creates. Final dput() cannot happen on it
242 * since we have it pinned, so our reference is safe.
243 */
244 dl = alias->d_fsdata;
245 mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n",
246 (unsigned long long)parent_blkno,
247 (unsigned long long)OCFS2_I(inode)->ip_blkno,
248 create);
249
250 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
251 " \"%.*s\": old parent: %llu, new: %llu\n",
252 dentry->d_name.len, dentry->d_name.name,
253 (unsigned long long)parent_blkno,
254 (unsigned long long)dl->dl_parent_blkno);
255
256 mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
257
258 goto out_attach;
259 }
260
261 /*
262 * There are no other aliases
263 */
264 dl = kmalloc(sizeof(*dl), GFP_NOFS);
265 if (!dl) {
266 ret = -ENOMEM;
267 mlog_errno(ret);
268 return ret;
269 }
270
271 dl->dl_count = 0;
272 /*
273 * Does this have to happen below, for all attaches, in case
274 * the struct inode gets blown away by votes?
275 */
276 dl->dl_inode = igrab(inode);
277 dl->dl_parent_blkno = parent_blkno;
278 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
279
280out_attach:
281 spin_lock(&dentry_attach_lock);
282 dentry->d_fsdata = dl;
283 dl->dl_count++;
284 spin_unlock(&dentry_attach_lock);
285
286 /*
287 * Creation of a new file means that nobody can possibly have
288 * this name in the system, which means that acquiry of those
289 * locks can easily be optimized.
290 */
291 if (create) {
292 ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb),
293 &dl->dl_lockres, 0);
294 if (ret)
295 mlog_errno(ret);
296 goto out;
297 }
298
299 /*
300 * This actually gets us our PRMODE level lock. From now on,
301 * we'll have a notification if one of these names is
302 * destroyed on another node.
303 */
304 ret = ocfs2_dentry_lock(dentry, 0);
305 if (ret) {
306 mlog_errno(ret);
307 goto out;
308 }
309 ocfs2_dentry_unlock(dentry, 0);
310
311out:
312 dput(alias);
313
314 return ret;
315}
316
317/*
318 * ocfs2_dentry_iput() and friends.
319 *
320 * At this point, our particular dentry is detached from the inodes
321 * alias list, so there's no way that the locking code can find it.
322 *
323 * The interesting stuff happens when we determine that our lock needs
324 * to go away because this is the last subdir alias in the
325 * system. This function needs to handle a couple things:
326 *
327 * 1) Synchronizing lock shutdown with the downconvert threads. This
328 * is already handled for us via the lockres release drop function
329 * called in ocfs2_release_dentry_lock()
330 *
331 * 2) A race may occur when we're doing our lock shutdown and
332 * another process wants to create a new dentry lock. Right now we
333 * let them race, which means that for a very short while, this
334 * node might have two locks on a lock resource. This should be a
335 * problem though because one of them is in the process of being
336 * thrown out.
337 */
338static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
339 struct ocfs2_dentry_lock *dl)
340{
341 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
342 ocfs2_lock_res_free(&dl->dl_lockres);
343 iput(dl->dl_inode);
344 kfree(dl);
345}
346
347void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
348 struct ocfs2_dentry_lock *dl)
349{
350 int unlock = 0;
351
352 BUG_ON(dl->dl_count == 0);
353
354 spin_lock(&dentry_attach_lock);
355 dl->dl_count--;
356 unlock = !dl->dl_count;
357 spin_unlock(&dentry_attach_lock);
358
359 if (unlock)
360 ocfs2_drop_dentry_lock(osb, dl);
361}
362
363static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
364{
365 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
366
367 mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
368 "dentry: %.*s\n", dentry->d_name.len,
369 dentry->d_name.name);
370
371 if (!dl)
372 goto out;
373
374 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
375 dentry->d_name.len, dentry->d_name.name,
376 dl->dl_count);
377
378 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
379
380out:
381 iput(inode);
382}
383
384/*
385 * d_move(), but keep the locks in sync.
386 *
387 * When we are done, "dentry" will have the parent dir and name of
388 * "target", which will be thrown away.
389 *
390 * We manually update the lock of "dentry" if need be.
391 *
392 * "target" doesn't have it's dentry lock touched - we allow the later
393 * dput() to handle this for us.
394 *
395 * This is called during ocfs2_rename(), while holding parent
396 * directory locks. The dentries have already been deleted on other
397 * nodes via ocfs2_remote_dentry_delete().
398 *
399 * Normally, the VFS handles the d_move() for the file sytem, after
400 * the ->rename() callback. OCFS2 wants to handle this internally, so
401 * the new lock can be created atomically with respect to the cluster.
402 */
403void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
404 struct inode *old_dir, struct inode *new_dir)
405{
406 int ret;
407 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
408 struct inode *inode = dentry->d_inode;
409
410 /*
411 * Move within the same directory, so the actual lock info won't
412 * change.
413 *
414 * XXX: Is there any advantage to dropping the lock here?
415 */
416 if (old_dir == new_dir)
417 return;
418
419 ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
420
421 dentry->d_fsdata = NULL;
422 ret = ocfs2_dentry_attach_lock(dentry, inode,
423 OCFS2_I(new_dir)->ip_blkno, 0);
424 if (ret)
425 mlog_errno(ret);
426}
427
92struct dentry_operations ocfs2_dentry_ops = { 428struct dentry_operations ocfs2_dentry_ops = {
93 .d_revalidate = ocfs2_dentry_revalidate, 429 .d_revalidate = ocfs2_dentry_revalidate,
430 .d_iput = ocfs2_dentry_iput,
94}; 431};
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index f1423c2134ee..e53abe766cab 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -41,17 +41,17 @@ struct ocfs2_dentry_lock {
41 struct ocfs2_lock_res dl_lockres; 41 struct ocfs2_lock_res dl_lockres;
42}; 42};
43 43
44static inline void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 44int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
45 struct ocfs2_dentry_lock *dl) 45 u64 parent_blkno, int create);
46{ 46
47} 47void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48 48 struct ocfs2_dentry_lock *dl);
49static inline struct dentry *ocfs2_find_local_alias(struct inode *inode, 49
50 u64 parent_blkno, 50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
51 int skip_unhashed) 51 int skip_unhashed);
52{ 52
53 return NULL; 53void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
54} 54 struct inode *old_dir, struct inode *new_dir);
55 55
56extern spinlock_t dentry_attach_lock; 56extern spinlock_t dentry_attach_lock;
57 57
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..98435002ac44 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#include "ocfs2.h"
32
33#define MLOG_MASK_PREFIX ML_INODE 31#define MLOG_MASK_PREFIX ML_INODE
34#include <cluster/masklog.h> 32#include <cluster/masklog.h>
35 33
34#include "ocfs2.h"
35
36#include "alloc.h" 36#include "alloc.h"
37#include "dir.h" 37#include "dir.h"
38#include "inode.h" 38#include "inode.h"