diff options
Diffstat (limited to 'fs/ocfs2/dcache.c')
-rw-r--r-- | fs/ocfs2/dcache.c | 375 |
1 files changed, 356 insertions, 19 deletions
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index aea457718946..09efe240e652 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -35,15 +35,17 @@ | |||
35 | 35 | ||
36 | #include "alloc.h" | 36 | #include "alloc.h" |
37 | #include "dcache.h" | 37 | #include "dcache.h" |
38 | #include "dlmglue.h" | ||
38 | #include "file.h" | 39 | #include "file.h" |
39 | #include "inode.h" | 40 | #include "inode.h" |
40 | 41 | ||
42 | |||
41 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 43 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
42 | struct nameidata *nd) | 44 | struct nameidata *nd) |
43 | { | 45 | { |
44 | struct inode *inode = dentry->d_inode; | 46 | struct inode *inode = dentry->d_inode; |
45 | int ret = 0; /* if all else fails, just return false */ | 47 | int ret = 0; /* if all else fails, just return false */ |
46 | struct ocfs2_super *osb; | 48 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
47 | 49 | ||
48 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 50 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
49 | dentry->d_name.len, dentry->d_name.name); | 51 | dentry->d_name.len, dentry->d_name.name); |
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
55 | goto bail; | 57 | goto bail; |
56 | } | 58 | } |
57 | 59 | ||
58 | osb = OCFS2_SB(inode->i_sb); | ||
59 | |||
60 | BUG_ON(!osb); | 60 | BUG_ON(!osb); |
61 | 61 | ||
62 | if (inode != osb->root_inode) { | 62 | if (inode == osb->root_inode || is_bad_inode(inode)) |
63 | spin_lock(&OCFS2_I(inode)->ip_lock); | 63 | goto bail; |
64 | /* did we or someone else delete this inode? */ | 64 | |
65 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 65 | spin_lock(&OCFS2_I(inode)->ip_lock); |
66 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 66 | /* did we or someone else delete this inode? */ |
67 | mlog(0, "inode (%llu) deleted, returning false\n", | 67 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { |
68 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
69 | goto bail; | ||
70 | } | ||
71 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 68 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
69 | mlog(0, "inode (%llu) deleted, returning false\n", | ||
70 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
71 | goto bail; | ||
72 | } | ||
73 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
72 | 74 | ||
73 | if (!inode->i_nlink) { | 75 | /* |
74 | mlog(0, "Inode %llu orphaned, returning false " | 76 | * We don't need a cluster lock to test this because once an |
75 | "dir = %d\n", | 77 | * inode nlink hits zero, it never goes back. |
76 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 78 | */ |
77 | S_ISDIR(inode->i_mode)); | 79 | if (inode->i_nlink == 0) { |
78 | goto bail; | 80 | mlog(0, "Inode %llu orphaned, returning false " |
79 | } | 81 | "dir = %d\n", |
82 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
83 | S_ISDIR(inode->i_mode)); | ||
84 | goto bail; | ||
80 | } | 85 | } |
81 | 86 | ||
82 | ret = 1; | 87 | ret = 1; |
@@ -87,8 +92,340 @@ bail: | |||
87 | return ret; | 92 | return ret; |
88 | } | 93 | } |
89 | 94 | ||
95 | static int ocfs2_match_dentry(struct dentry *dentry, | ||
96 | u64 parent_blkno, | ||
97 | int skip_unhashed) | ||
98 | { | ||
99 | struct inode *parent; | ||
100 | |||
101 | /* | ||
102 | * ocfs2_lookup() does a d_splice_alias() _before_ attaching | ||
103 | * to the lock data, so we skip those here, otherwise | ||
104 | * ocfs2_dentry_attach_lock() will get its original dentry | ||
105 | * back. | ||
106 | */ | ||
107 | if (!dentry->d_fsdata) | ||
108 | return 0; | ||
109 | |||
110 | if (!dentry->d_parent) | ||
111 | return 0; | ||
112 | |||
113 | if (skip_unhashed && d_unhashed(dentry)) | ||
114 | return 0; | ||
115 | |||
116 | parent = dentry->d_parent->d_inode; | ||
117 | /* Negative parent dentry? */ | ||
118 | if (!parent) | ||
119 | return 0; | ||
120 | |||
121 | /* Name is in a different directory. */ | ||
122 | if (OCFS2_I(parent)->ip_blkno != parent_blkno) | ||
123 | return 0; | ||
124 | |||
125 | return 1; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Walk the inode alias list, and find a dentry which has a given | ||
130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | ||
131 | * is looking for a dentry_lock reference. The vote thread is looking | ||
132 | * to unhash aliases, so we allow it to skip any that already have | ||
133 | * that property. | ||
134 | */ | ||
135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | ||
136 | u64 parent_blkno, | ||
137 | int skip_unhashed) | ||
138 | { | ||
139 | struct list_head *p; | ||
140 | struct dentry *dentry = NULL; | ||
141 | |||
142 | spin_lock(&dcache_lock); | ||
143 | |||
144 | list_for_each(p, &inode->i_dentry) { | ||
145 | dentry = list_entry(p, struct dentry, d_alias); | ||
146 | |||
147 | if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { | ||
148 | mlog(0, "dentry found: %.*s\n", | ||
149 | dentry->d_name.len, dentry->d_name.name); | ||
150 | |||
151 | dget_locked(dentry); | ||
152 | break; | ||
153 | } | ||
154 | |||
155 | dentry = NULL; | ||
156 | } | ||
157 | |||
158 | spin_unlock(&dcache_lock); | ||
159 | |||
160 | return dentry; | ||
161 | } | ||
162 | |||
90 | DEFINE_SPINLOCK(dentry_attach_lock); | 163 | DEFINE_SPINLOCK(dentry_attach_lock); |
91 | 164 | ||
165 | /* | ||
166 | * Attach this dentry to a cluster lock. | ||
167 | * | ||
168 | * Dentry locks cover all links in a given directory to a particular | ||
169 | * inode. We do this so that ocfs2 can build a lock name which all | ||
170 | * nodes in the cluster can agree on at all times. Shoving full names | ||
171 | * in the cluster lock won't work due to size restrictions. Covering | ||
172 | * links inside of a directory is a good compromise because it still | ||
173 | * allows us to use the parent directory lock to synchronize | ||
174 | * operations. | ||
175 | * | ||
176 | * Call this function with the parent dir semaphore and the parent dir | ||
177 | * cluster lock held. | ||
178 | * | ||
179 | * The dir semaphore will protect us from having to worry about | ||
180 | * concurrent processes on our node trying to attach a lock at the | ||
181 | * same time. | ||
182 | * | ||
183 | * The dir cluster lock (held at either PR or EX mode) protects us | ||
184 | * from unlink and rename on other nodes. | ||
185 | * | ||
186 | * The 'create' flag tells us whether we're doing this as a result of | ||
187 | * a file creation. | ||
188 | * | ||
189 | * A dput() can happen asynchronously due to pruning, so we cover | ||
190 | * attaching and detaching the dentry lock with a | ||
191 | * dentry_attach_lock. | ||
192 | * | ||
193 | * A node which has done lookup on a name retains a protected read | ||
194 | * lock until final dput. If the user requests and unlink or rename, | ||
195 | * the protected read is upgraded to an exclusive lock. Other nodes | ||
196 | * who have seen the dentry will then be informed that they need to | ||
197 | * downgrade their lock, which will involve d_delete on the | ||
198 | * dentry. This happens in ocfs2_dentry_convert_worker(). | ||
199 | */ | ||
200 | int ocfs2_dentry_attach_lock(struct dentry *dentry, | ||
201 | struct inode *inode, | ||
202 | u64 parent_blkno, | ||
203 | int create) | ||
204 | { | ||
205 | int ret; | ||
206 | struct dentry *alias; | ||
207 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
208 | |||
209 | mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n", | ||
210 | dentry->d_name.len, dentry->d_name.name, | ||
211 | (unsigned long long)parent_blkno, create, dl); | ||
212 | |||
213 | /* | ||
214 | * Negative dentry. We ignore these for now. | ||
215 | * | ||
216 | * XXX: Could we can improve ocfs2_dentry_revalidate() by | ||
217 | * tracking these? | ||
218 | */ | ||
219 | if (!inode) | ||
220 | return 0; | ||
221 | |||
222 | if (dl) { | ||
223 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
224 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
225 | dentry->d_name.len, dentry->d_name.name, | ||
226 | (unsigned long long)parent_blkno, | ||
227 | (unsigned long long)dl->dl_parent_blkno); | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | alias = ocfs2_find_local_alias(inode, parent_blkno, 0); | ||
232 | if (alias) { | ||
233 | /* | ||
234 | * Great, an alias exists, which means we must have a | ||
235 | * dentry lock already. We can just grab the lock off | ||
236 | * the alias and add it to the list. | ||
237 | * | ||
238 | * We're depending here on the fact that this dentry | ||
239 | * was found and exists in the dcache and so must have | ||
240 | * a reference to the dentry_lock because we can't | ||
241 | * race creates. Final dput() cannot happen on it | ||
242 | * since we have it pinned, so our reference is safe. | ||
243 | */ | ||
244 | dl = alias->d_fsdata; | ||
245 | mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n", | ||
246 | (unsigned long long)parent_blkno, | ||
247 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
248 | create); | ||
249 | |||
250 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
251 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
252 | dentry->d_name.len, dentry->d_name.name, | ||
253 | (unsigned long long)parent_blkno, | ||
254 | (unsigned long long)dl->dl_parent_blkno); | ||
255 | |||
256 | mlog(0, "Found: %s\n", dl->dl_lockres.l_name); | ||
257 | |||
258 | goto out_attach; | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * There are no other aliases | ||
263 | */ | ||
264 | dl = kmalloc(sizeof(*dl), GFP_NOFS); | ||
265 | if (!dl) { | ||
266 | ret = -ENOMEM; | ||
267 | mlog_errno(ret); | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | dl->dl_count = 0; | ||
272 | /* | ||
273 | * Does this have to happen below, for all attaches, in case | ||
274 | * the struct inode gets blown away by votes? | ||
275 | */ | ||
276 | dl->dl_inode = igrab(inode); | ||
277 | dl->dl_parent_blkno = parent_blkno; | ||
278 | ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); | ||
279 | |||
280 | out_attach: | ||
281 | spin_lock(&dentry_attach_lock); | ||
282 | dentry->d_fsdata = dl; | ||
283 | dl->dl_count++; | ||
284 | spin_unlock(&dentry_attach_lock); | ||
285 | |||
286 | /* | ||
287 | * Creation of a new file means that nobody can possibly have | ||
288 | * this name in the system, which means that acquiry of those | ||
289 | * locks can easily be optimized. | ||
290 | */ | ||
291 | if (create) { | ||
292 | ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb), | ||
293 | &dl->dl_lockres, 0); | ||
294 | if (ret) | ||
295 | mlog_errno(ret); | ||
296 | goto out; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * This actually gets us our PRMODE level lock. From now on, | ||
301 | * we'll have a notification if one of these names is | ||
302 | * destroyed on another node. | ||
303 | */ | ||
304 | ret = ocfs2_dentry_lock(dentry, 0); | ||
305 | if (ret) { | ||
306 | mlog_errno(ret); | ||
307 | goto out; | ||
308 | } | ||
309 | ocfs2_dentry_unlock(dentry, 0); | ||
310 | |||
311 | out: | ||
312 | dput(alias); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * ocfs2_dentry_iput() and friends. | ||
319 | * | ||
320 | * At this point, our particular dentry is detached from the inodes | ||
321 | * alias list, so there's no way that the locking code can find it. | ||
322 | * | ||
323 | * The interesting stuff happens when we determine that our lock needs | ||
324 | * to go away because this is the last subdir alias in the | ||
325 | * system. This function needs to handle a couple things: | ||
326 | * | ||
327 | * 1) Synchronizing lock shutdown with the downconvert threads. This | ||
328 | * is already handled for us via the lockres release drop function | ||
329 | * called in ocfs2_release_dentry_lock() | ||
330 | * | ||
331 | * 2) A race may occur when we're doing our lock shutdown and | ||
332 | * another process wants to create a new dentry lock. Right now we | ||
333 | * let them race, which means that for a very short while, this | ||
334 | * node might have two locks on a lock resource. This should be a | ||
335 | * problem though because one of them is in the process of being | ||
336 | * thrown out. | ||
337 | */ | ||
338 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | ||
339 | struct ocfs2_dentry_lock *dl) | ||
340 | { | ||
341 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | ||
342 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
343 | iput(dl->dl_inode); | ||
344 | kfree(dl); | ||
345 | } | ||
346 | |||
347 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | ||
348 | struct ocfs2_dentry_lock *dl) | ||
349 | { | ||
350 | int unlock = 0; | ||
351 | |||
352 | BUG_ON(dl->dl_count == 0); | ||
353 | |||
354 | spin_lock(&dentry_attach_lock); | ||
355 | dl->dl_count--; | ||
356 | unlock = !dl->dl_count; | ||
357 | spin_unlock(&dentry_attach_lock); | ||
358 | |||
359 | if (unlock) | ||
360 | ocfs2_drop_dentry_lock(osb, dl); | ||
361 | } | ||
362 | |||
363 | static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | ||
364 | { | ||
365 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
366 | |||
367 | mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED), | ||
368 | "dentry: %.*s\n", dentry->d_name.len, | ||
369 | dentry->d_name.name); | ||
370 | |||
371 | if (!dl) | ||
372 | goto out; | ||
373 | |||
374 | mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n", | ||
375 | dentry->d_name.len, dentry->d_name.name, | ||
376 | dl->dl_count); | ||
377 | |||
378 | ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); | ||
379 | |||
380 | out: | ||
381 | iput(inode); | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * d_move(), but keep the locks in sync. | ||
386 | * | ||
387 | * When we are done, "dentry" will have the parent dir and name of | ||
388 | * "target", which will be thrown away. | ||
389 | * | ||
390 | * We manually update the lock of "dentry" if need be. | ||
391 | * | ||
392 | * "target" doesn't have it's dentry lock touched - we allow the later | ||
393 | * dput() to handle this for us. | ||
394 | * | ||
395 | * This is called during ocfs2_rename(), while holding parent | ||
396 | * directory locks. The dentries have already been deleted on other | ||
397 | * nodes via ocfs2_remote_dentry_delete(). | ||
398 | * | ||
399 | * Normally, the VFS handles the d_move() for the file sytem, after | ||
400 | * the ->rename() callback. OCFS2 wants to handle this internally, so | ||
401 | * the new lock can be created atomically with respect to the cluster. | ||
402 | */ | ||
403 | void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | ||
404 | struct inode *old_dir, struct inode *new_dir) | ||
405 | { | ||
406 | int ret; | ||
407 | struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); | ||
408 | struct inode *inode = dentry->d_inode; | ||
409 | |||
410 | /* | ||
411 | * Move within the same directory, so the actual lock info won't | ||
412 | * change. | ||
413 | * | ||
414 | * XXX: Is there any advantage to dropping the lock here? | ||
415 | */ | ||
416 | if (old_dir == new_dir) | ||
417 | return; | ||
418 | |||
419 | ocfs2_dentry_lock_put(osb, dentry->d_fsdata); | ||
420 | |||
421 | dentry->d_fsdata = NULL; | ||
422 | ret = ocfs2_dentry_attach_lock(dentry, inode, | ||
423 | OCFS2_I(new_dir)->ip_blkno, 0); | ||
424 | if (ret) | ||
425 | mlog_errno(ret); | ||
426 | } | ||
427 | |||
92 | struct dentry_operations ocfs2_dentry_ops = { | 428 | struct dentry_operations ocfs2_dentry_ops = { |
93 | .d_revalidate = ocfs2_dentry_revalidate, | 429 | .d_revalidate = ocfs2_dentry_revalidate, |
430 | .d_iput = ocfs2_dentry_iput, | ||
94 | }; | 431 | }; |