aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c921
1 files changed, 664 insertions, 257 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 953173a293a9..23a3401af2fb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,13 +35,34 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include "internal.h" 36#include "internal.h"
37 37
38/*
39 * Usage:
40 * dcache->d_inode->i_lock protects:
41 * - the inode alias lists, d_inode
42 * dcache_hash_bucket->lock protects:
43 * - the dcache hash table
44 * dcache_lru_lock protects:
45 * - the dcache lru lists and counters
46 * d_lock protects:
47 * - d_flags
48 * - d_name
49 * - d_lru
50 * - d_unhashed
51 * - d_subdirs and children's d_child
52 *
53 * Ordering:
54 * dcache->d_inode->i_lock
55 * dentry->d_lock
56 * dcache_lru_lock
57 * dcache_hash_bucket->lock
58 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 59int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 60EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 61
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 62static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 63__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 64
44EXPORT_SYMBOL(dcache_lock); 65EXPORT_SYMBOL(rename_lock);
45 66
46static struct kmem_cache *dentry_cache __read_mostly; 67static struct kmem_cache *dentry_cache __read_mostly;
47 68
@@ -60,13 +81,27 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 81
61static unsigned int d_hash_mask __read_mostly; 82static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 83static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 84
85struct dcache_hash_bucket {
86 spinlock_t lock;
87 struct hlist_head head;
88};
89static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
64 90
65/* Statistics gathering. */ 91/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 92struct dentry_stat_t dentry_stat = {
93 .nr_dentry = 0,
67 .age_limit = 45, 94 .age_limit = 45,
68}; 95};
69 96
97static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
98 unsigned long hash)
99{
100 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
101 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
102 return dentry_hashtable + (hash & D_HASHMASK);
103}
104
70static void __d_free(struct dentry *dentry) 105static void __d_free(struct dentry *dentry)
71{ 106{
72 WARN_ON(!list_empty(&dentry->d_alias)); 107 WARN_ON(!list_empty(&dentry->d_alias));
@@ -82,11 +117,11 @@ static void d_callback(struct rcu_head *head)
82} 117}
83 118
84/* 119/*
85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 120 * no locks, please.
86 * inside dcache_lock.
87 */ 121 */
88static void d_free(struct dentry *dentry) 122static void d_free(struct dentry *dentry)
89{ 123{
124 BUG_ON(atomic_read(&dentry->d_count));
90 if (dentry->d_op && dentry->d_op->d_release) 125 if (dentry->d_op && dentry->d_op->d_release)
91 dentry->d_op->d_release(dentry); 126 dentry->d_op->d_release(dentry);
92 /* if dentry was never inserted into hash, immediate free is OK */ 127 /* if dentry was never inserted into hash, immediate free is OK */
@@ -102,14 +137,13 @@ static void d_free(struct dentry *dentry)
102 */ 137 */
103static void dentry_iput(struct dentry * dentry) 138static void dentry_iput(struct dentry * dentry)
104 __releases(dentry->d_lock) 139 __releases(dentry->d_lock)
105 __releases(dcache_lock)
106{ 140{
107 struct inode *inode = dentry->d_inode; 141 struct inode *inode = dentry->d_inode;
108 if (inode) { 142 if (inode) {
109 dentry->d_inode = NULL; 143 dentry->d_inode = NULL;
110 list_del_init(&dentry->d_alias); 144 list_del_init(&dentry->d_alias);
111 spin_unlock(&dentry->d_lock); 145 spin_unlock(&dentry->d_lock);
112 spin_unlock(&dcache_lock); 146 spin_unlock(&inode->i_lock);
113 if (!inode->i_nlink) 147 if (!inode->i_nlink)
114 fsnotify_inoderemove(inode); 148 fsnotify_inoderemove(inode);
115 if (dentry->d_op && dentry->d_op->d_iput) 149 if (dentry->d_op && dentry->d_op->d_iput)
@@ -118,42 +152,60 @@ static void dentry_iput(struct dentry * dentry)
118 iput(inode); 152 iput(inode);
119 } else { 153 } else {
120 spin_unlock(&dentry->d_lock); 154 spin_unlock(&dentry->d_lock);
121 spin_unlock(&dcache_lock);
122 } 155 }
123} 156}
124 157
125/* 158/*
126 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. 159 * dentry_lru_(add|add_tail|del|del_init) must be called with d_lock held
160 * to protect list_empty(d_lru) condition.
127 */ 161 */
128static void dentry_lru_add(struct dentry *dentry) 162static void dentry_lru_add(struct dentry *dentry)
129{ 163{
164 spin_lock(&dcache_lru_lock);
130 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 165 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
131 dentry->d_sb->s_nr_dentry_unused++; 166 dentry->d_sb->s_nr_dentry_unused++;
132 dentry_stat.nr_unused++; 167 dentry_stat.nr_unused++;
168 spin_unlock(&dcache_lru_lock);
133} 169}
134 170
135static void dentry_lru_add_tail(struct dentry *dentry) 171static void dentry_lru_add_tail(struct dentry *dentry)
136{ 172{
173 spin_lock(&dcache_lru_lock);
137 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 174 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
138 dentry->d_sb->s_nr_dentry_unused++; 175 dentry->d_sb->s_nr_dentry_unused++;
139 dentry_stat.nr_unused++; 176 dentry_stat.nr_unused++;
177 spin_unlock(&dcache_lru_lock);
178}
179
180static void __dentry_lru_del(struct dentry *dentry)
181{
182 list_del(&dentry->d_lru);
183 dentry->d_sb->s_nr_dentry_unused--;
184 dentry_stat.nr_unused--;
185}
186
187static void __dentry_lru_del_init(struct dentry *dentry)
188{
189 list_del_init(&dentry->d_lru);
190 dentry->d_sb->s_nr_dentry_unused--;
191 dentry_stat.nr_unused--;
140} 192}
141 193
142static void dentry_lru_del(struct dentry *dentry) 194static void dentry_lru_del(struct dentry *dentry)
143{ 195{
144 if (!list_empty(&dentry->d_lru)) { 196 if (!list_empty(&dentry->d_lru)) {
145 list_del(&dentry->d_lru); 197 spin_lock(&dcache_lru_lock);
146 dentry->d_sb->s_nr_dentry_unused--; 198 __dentry_lru_del(dentry);
147 dentry_stat.nr_unused--; 199 spin_unlock(&dcache_lru_lock);
148 } 200 }
149} 201}
150 202
151static void dentry_lru_del_init(struct dentry *dentry) 203static void dentry_lru_del_init(struct dentry *dentry)
152{ 204{
153 if (likely(!list_empty(&dentry->d_lru))) { 205 if (likely(!list_empty(&dentry->d_lru))) {
154 list_del_init(&dentry->d_lru); 206 spin_lock(&dcache_lru_lock);
155 dentry->d_sb->s_nr_dentry_unused--; 207 __dentry_lru_del_init(dentry);
156 dentry_stat.nr_unused--; 208 spin_unlock(&dcache_lru_lock);
157 } 209 }
158} 210}
159 211
@@ -164,25 +216,87 @@ static void dentry_lru_del_init(struct dentry *dentry)
164 * The dentry must already be unhashed and removed from the LRU. 216 * The dentry must already be unhashed and removed from the LRU.
165 * 217 *
166 * If this is the root of the dentry tree, return NULL. 218 * If this is the root of the dentry tree, return NULL.
219 *
220 * d_lock and d_parent->d_lock must be held by caller, and
221 * are dropped by d_kill.
167 */ 222 */
168static struct dentry *d_kill(struct dentry *dentry) 223static struct dentry *d_kill(struct dentry *dentry)
169 __releases(dentry->d_lock) 224 __releases(dentry->d_lock)
170 __releases(dcache_lock)
171{ 225{
172 struct dentry *parent; 226 struct dentry *parent;
173 227
174 list_del(&dentry->d_u.d_child); 228 list_del(&dentry->d_u.d_child);
175 dentry_stat.nr_dentry--; /* For d_free, below */ 229 if (dentry->d_parent && dentry != dentry->d_parent)
176 /*drops the locks, at that point nobody can reach this dentry */ 230 spin_unlock(&dentry->d_parent->d_lock);
177 dentry_iput(dentry);
178 if (IS_ROOT(dentry)) 231 if (IS_ROOT(dentry))
179 parent = NULL; 232 parent = NULL;
180 else 233 else
181 parent = dentry->d_parent; 234 parent = dentry->d_parent;
235 /*drops the locks, at that point nobody can reach this dentry */
236 dentry_iput(dentry);
182 d_free(dentry); 237 d_free(dentry);
183 return parent; 238 return parent;
184} 239}
185 240
241void __d_drop(struct dentry *dentry)
242{
243 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
244 struct dcache_hash_bucket *b;
245 b = d_hash(dentry->d_parent, dentry->d_name.hash);
246 dentry->d_flags |= DCACHE_UNHASHED;
247 spin_lock(&b->lock);
248 hlist_del_rcu(&dentry->d_hash);
249 spin_unlock(&b->lock);
250 }
251}
252EXPORT_SYMBOL(__d_drop);
253
254void d_drop(struct dentry *dentry)
255{
256 spin_lock(&dentry->d_lock);
257 __d_drop(dentry);
258 spin_unlock(&dentry->d_lock);
259}
260EXPORT_SYMBOL(d_drop);
261
262static inline struct dentry *__dget_dlock(struct dentry *dentry)
263{
264 atomic_inc(&dentry->d_count);
265 return dentry;
266}
267
268static inline struct dentry *__dget(struct dentry *dentry)
269{
270 __dget_dlock(dentry);
271 return dentry;
272}
273
274struct dentry *dget_parent(struct dentry *dentry)
275{
276 struct dentry *ret;
277
278repeat:
279 spin_lock(&dentry->d_lock);
280 ret = dentry->d_parent;
281 if (!ret)
282 goto out;
283 if (dentry == ret) {
284 atomic_inc(&ret->d_count);
285 goto out;
286 }
287 if (!spin_trylock(&ret->d_lock)) {
288 spin_unlock(&dentry->d_lock);
289 goto repeat;
290 }
291 BUG_ON(!atomic_read(&ret->d_count));
292 atomic_inc(&ret->d_count);
293 spin_unlock(&ret->d_lock);
294out:
295 spin_unlock(&dentry->d_lock);
296 return ret;
297}
298EXPORT_SYMBOL(dget_parent);
299
186/* 300/*
187 * This is dput 301 * This is dput
188 * 302 *
@@ -214,48 +328,68 @@ static struct dentry *d_kill(struct dentry *dentry)
214 328
215void dput(struct dentry *dentry) 329void dput(struct dentry *dentry)
216{ 330{
331 struct dentry *parent;
332 struct inode *inode;
333
217 if (!dentry) 334 if (!dentry)
218 return; 335 return;
219 336
220repeat: 337repeat:
221 if (atomic_read(&dentry->d_count) == 1) 338 if (atomic_read(&dentry->d_count) == 1)
222 might_sleep(); 339 might_sleep();
223 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
224 return;
225 340
226 spin_lock(&dentry->d_lock); 341 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
227 if (atomic_read(&dentry->d_count)) {
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&dcache_lock);
230 return; 342 return;
231 } 343
232 344
233 /* 345 /*
234 * AV: ->d_delete() is _NOT_ allowed to block now. 346 * AV: ->d_delete() is _NOT_ allowed to block now.
235 */ 347 */
236 if (dentry->d_op && dentry->d_op->d_delete) { 348 if (dentry->d_op && dentry->d_op->d_delete) {
237 if (dentry->d_op->d_delete(dentry)) 349 if (dentry->d_op->d_delete(dentry)) {
238 goto unhash_it; 350 __d_drop(dentry);
351 goto kill_it;
352 }
239 } 353 }
240 /* Unreachable? Get rid of it */ 354 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry)) 355 if (d_unhashed(dentry))
242 goto kill_it; 356 goto kill_it;
243 if (list_empty(&dentry->d_lru)) { 357 if (list_empty(&dentry->d_lru)) {
244 dentry->d_flags |= DCACHE_REFERENCED; 358 dentry->d_flags |= DCACHE_REFERENCED;
245 dentry_lru_add(dentry); 359 dentry_lru_add(dentry);
246 } 360 }
247 spin_unlock(&dentry->d_lock); 361 spin_unlock(&dentry->d_lock);
248 spin_unlock(&dcache_lock);
249 return; 362 return;
250 363
251unhash_it:
252 __d_drop(dentry);
253kill_it: 364kill_it:
365 inode = dentry->d_inode;
366 if (inode && !spin_trylock(&inode->i_lock))
367 goto retry;
368
369 parent = dentry->d_parent;
370 if (parent && parent != dentry && !spin_trylock(&parent->d_lock)) {
371 if (inode)
372 spin_unlock(&inode->i_lock);
373 goto retry;
374 }
375
254 /* if dentry was on the d_lru list delete it from there */ 376 /* if dentry was on the d_lru list delete it from there */
255 dentry_lru_del(dentry); 377 dentry_lru_del(dentry);
256 dentry = d_kill(dentry); 378 dentry = d_kill(dentry);
257 if (dentry) 379 if (dentry)
258 goto repeat; 380 goto repeat;
381 return;
382
383retry:
384 /*
385 * We are about to drop dentry->d_lock. dentry->d_count is 0
386 * so it could be freed by someone else and leave us with a
387 * stale pointer. Prevent this by increasing d_count before
388 * dropping d_lock.
389 */
390 atomic_inc(&dentry->d_count);
391 spin_unlock(&dentry->d_lock);
392 goto repeat;
259} 393}
260 394
261/** 395/**
@@ -275,9 +409,9 @@ int d_invalidate(struct dentry * dentry)
275 /* 409 /*
276 * If it's already been dropped, return OK. 410 * If it's already been dropped, return OK.
277 */ 411 */
278 spin_lock(&dcache_lock); 412 spin_lock(&dentry->d_lock);
279 if (d_unhashed(dentry)) { 413 if (d_unhashed(dentry)) {
280 spin_unlock(&dcache_lock); 414 spin_unlock(&dentry->d_lock);
281 return 0; 415 return 0;
282 } 416 }
283 /* 417 /*
@@ -285,9 +419,9 @@ int d_invalidate(struct dentry * dentry)
285 * to get rid of unused child entries. 419 * to get rid of unused child entries.
286 */ 420 */
287 if (!list_empty(&dentry->d_subdirs)) { 421 if (!list_empty(&dentry->d_subdirs)) {
288 spin_unlock(&dcache_lock); 422 spin_unlock(&dentry->d_lock);
289 shrink_dcache_parent(dentry); 423 shrink_dcache_parent(dentry);
290 spin_lock(&dcache_lock); 424 spin_lock(&dentry->d_lock);
291 } 425 }
292 426
293 /* 427 /*
@@ -300,35 +434,18 @@ int d_invalidate(struct dentry * dentry)
300 * we might still populate it if it was a 434 * we might still populate it if it was a
301 * working directory or similar). 435 * working directory or similar).
302 */ 436 */
303 spin_lock(&dentry->d_lock);
304 if (atomic_read(&dentry->d_count) > 1) { 437 if (atomic_read(&dentry->d_count) > 1) {
305 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 438 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
306 spin_unlock(&dentry->d_lock); 439 spin_unlock(&dentry->d_lock);
307 spin_unlock(&dcache_lock);
308 return -EBUSY; 440 return -EBUSY;
309 } 441 }
310 } 442 }
311 443
312 __d_drop(dentry); 444 __d_drop(dentry);
313 spin_unlock(&dentry->d_lock); 445 spin_unlock(&dentry->d_lock);
314 spin_unlock(&dcache_lock);
315 return 0; 446 return 0;
316} 447}
317 448
318/* This should be called _only_ with dcache_lock held */
319
320static inline struct dentry * __dget_locked(struct dentry *dentry)
321{
322 atomic_inc(&dentry->d_count);
323 dentry_lru_del_init(dentry);
324 return dentry;
325}
326
327struct dentry * dget_locked(struct dentry *dentry)
328{
329 return __dget_locked(dentry);
330}
331
332/** 449/**
333 * d_find_alias - grab a hashed alias of inode 450 * d_find_alias - grab a hashed alias of inode
334 * @inode: inode in question 451 * @inode: inode in question
@@ -358,18 +475,21 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
358 next = tmp->next; 475 next = tmp->next;
359 prefetch(next); 476 prefetch(next);
360 alias = list_entry(tmp, struct dentry, d_alias); 477 alias = list_entry(tmp, struct dentry, d_alias);
478 spin_lock(&alias->d_lock);
361 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 479 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
362 if (IS_ROOT(alias) && 480 if (IS_ROOT(alias) &&
363 (alias->d_flags & DCACHE_DISCONNECTED)) 481 (alias->d_flags & DCACHE_DISCONNECTED))
364 discon_alias = alias; 482 discon_alias = alias;
365 else if (!want_discon) { 483 else if (!want_discon) {
366 __dget_locked(alias); 484 __dget_dlock(alias);
485 spin_unlock(&alias->d_lock);
367 return alias; 486 return alias;
368 } 487 }
369 } 488 }
489 spin_unlock(&alias->d_lock);
370 } 490 }
371 if (discon_alias) 491 if (discon_alias)
372 __dget_locked(discon_alias); 492 __dget(discon_alias);
373 return discon_alias; 493 return discon_alias;
374} 494}
375 495
@@ -378,9 +498,9 @@ struct dentry * d_find_alias(struct inode *inode)
378 struct dentry *de = NULL; 498 struct dentry *de = NULL;
379 499
380 if (!list_empty(&inode->i_dentry)) { 500 if (!list_empty(&inode->i_dentry)) {
381 spin_lock(&dcache_lock); 501 spin_lock(&inode->i_lock);
382 de = __d_find_alias(inode, 0); 502 de = __d_find_alias(inode, 0);
383 spin_unlock(&dcache_lock); 503 spin_unlock(&inode->i_lock);
384 } 504 }
385 return de; 505 return de;
386} 506}
@@ -393,20 +513,20 @@ void d_prune_aliases(struct inode *inode)
393{ 513{
394 struct dentry *dentry; 514 struct dentry *dentry;
395restart: 515restart:
396 spin_lock(&dcache_lock); 516 spin_lock(&inode->i_lock);
397 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 517 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
398 spin_lock(&dentry->d_lock); 518 spin_lock(&dentry->d_lock);
399 if (!atomic_read(&dentry->d_count)) { 519 if (!atomic_read(&dentry->d_count)) {
400 __dget_locked(dentry); 520 __dget_dlock(dentry);
401 __d_drop(dentry); 521 __d_drop(dentry);
402 spin_unlock(&dentry->d_lock); 522 spin_unlock(&dentry->d_lock);
403 spin_unlock(&dcache_lock); 523 spin_unlock(&inode->i_lock);
404 dput(dentry); 524 dput(dentry);
405 goto restart; 525 goto restart;
406 } 526 }
407 spin_unlock(&dentry->d_lock); 527 spin_unlock(&dentry->d_lock);
408 } 528 }
409 spin_unlock(&dcache_lock); 529 spin_unlock(&inode->i_lock);
410} 530}
411 531
412/* 532/*
@@ -419,27 +539,43 @@ restart:
419 */ 539 */
420static void prune_one_dentry(struct dentry * dentry) 540static void prune_one_dentry(struct dentry * dentry)
421 __releases(dentry->d_lock) 541 __releases(dentry->d_lock)
422 __releases(dcache_lock)
423 __acquires(dcache_lock)
424{ 542{
425 __d_drop(dentry); 543 __d_drop(dentry);
426 dentry = d_kill(dentry); 544 dentry = d_kill(dentry);
427 545
428 /* 546 /*
429 * Prune ancestors. Locking is simpler than in dput(), 547 * Prune ancestors.
430 * because dcache_lock needs to be taken anyway.
431 */ 548 */
432 spin_lock(&dcache_lock);
433 while (dentry) { 549 while (dentry) {
434 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 550 struct dentry *parent = NULL;
551 struct inode *inode = dentry->d_inode;
552
553 if (inode)
554 spin_lock(&inode->i_lock);
555again:
556 spin_lock(&dentry->d_lock);
557 if (dentry->d_parent && dentry != dentry->d_parent) {
558 if (!spin_trylock(&dentry->d_parent->d_lock)) {
559 spin_unlock(&dentry->d_lock);
560 goto again;
561 }
562 parent = dentry->d_parent;
563 }
564 atomic_dec(&dentry->d_count);
565 if (atomic_read(&dentry->d_count)) {
566 if (parent)
567 spin_unlock(&parent->d_lock);
568 spin_unlock(&dentry->d_lock);
569 if (inode)
570 spin_unlock(&inode->i_lock);
435 return; 571 return;
572 }
436 573
437 if (dentry->d_op && dentry->d_op->d_delete) 574 if (dentry->d_op && dentry->d_op->d_delete)
438 dentry->d_op->d_delete(dentry); 575 dentry->d_op->d_delete(dentry);
439 dentry_lru_del_init(dentry); 576 dentry_lru_del_init(dentry);
440 __d_drop(dentry); 577 __d_drop(dentry);
441 dentry = d_kill(dentry); 578 dentry = d_kill(dentry);
442 spin_lock(&dcache_lock);
443 } 579 }
444} 580}
445 581
@@ -460,10 +596,11 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
460 596
461 BUG_ON(!sb); 597 BUG_ON(!sb);
462 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); 598 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
463 spin_lock(&dcache_lock);
464 if (count != NULL) 599 if (count != NULL)
465 /* called from prune_dcache() and shrink_dcache_parent() */ 600 /* called from prune_dcache() and shrink_dcache_parent() */
466 cnt = *count; 601 cnt = *count;
602relock:
603 spin_lock(&dcache_lru_lock);
467restart: 604restart:
468 if (count == NULL) 605 if (count == NULL)
469 list_splice_init(&sb->s_dentry_lru, &tmp); 606 list_splice_init(&sb->s_dentry_lru, &tmp);
@@ -473,7 +610,10 @@ restart:
473 struct dentry, d_lru); 610 struct dentry, d_lru);
474 BUG_ON(dentry->d_sb != sb); 611 BUG_ON(dentry->d_sb != sb);
475 612
476 spin_lock(&dentry->d_lock); 613 if (!spin_trylock(&dentry->d_lock)) {
614 spin_unlock(&dcache_lru_lock);
615 goto relock;
616 }
477 /* 617 /*
478 * If we are honouring the DCACHE_REFERENCED flag and 618 * If we are honouring the DCACHE_REFERENCED flag and
479 * the dentry has this flag set, don't free it. Clear 619 * the dentry has this flag set, don't free it. Clear
@@ -491,33 +631,61 @@ restart:
491 if (!cnt) 631 if (!cnt)
492 break; 632 break;
493 } 633 }
494 cond_resched_lock(&dcache_lock); 634 cond_resched_lock(&dcache_lru_lock);
495 } 635 }
496 } 636 }
637 spin_unlock(&dcache_lru_lock);
638
639again:
640 spin_lock(&dcache_lru_lock); /* lru_lock also protects tmp list */
497 while (!list_empty(&tmp)) { 641 while (!list_empty(&tmp)) {
642 struct inode *inode;
643
498 dentry = list_entry(tmp.prev, struct dentry, d_lru); 644 dentry = list_entry(tmp.prev, struct dentry, d_lru);
499 dentry_lru_del_init(dentry); 645
500 spin_lock(&dentry->d_lock); 646 if (!spin_trylock(&dentry->d_lock)) {
647again1:
648 spin_unlock(&dcache_lru_lock);
649 goto again;
650 }
501 /* 651 /*
502 * We found an inuse dentry which was not removed from 652 * We found an inuse dentry which was not removed from
503 * the LRU because of laziness during lookup. Do not free 653 * the LRU because of laziness during lookup. Do not free
504 * it - just keep it off the LRU list. 654 * it - just keep it off the LRU list.
505 */ 655 */
506 if (atomic_read(&dentry->d_count)) { 656 if (atomic_read(&dentry->d_count)) {
657 __dentry_lru_del_init(dentry);
507 spin_unlock(&dentry->d_lock); 658 spin_unlock(&dentry->d_lock);
508 continue; 659 continue;
509 } 660 }
661 inode = dentry->d_inode;
662 if (inode && !spin_trylock(&inode->i_lock)) {
663again2:
664 spin_unlock(&dentry->d_lock);
665 goto again1;
666 }
667 if (dentry->d_parent && dentry->d_parent != dentry) {
668 if (!spin_trylock(&dentry->d_parent->d_lock)) {
669 if (inode)
670 spin_unlock(&inode->i_lock);
671 goto again2;
672 }
673 }
674 __dentry_lru_del_init(dentry);
675 spin_unlock(&dcache_lru_lock);
676
510 prune_one_dentry(dentry); 677 prune_one_dentry(dentry);
511 /* dentry->d_lock was dropped in prune_one_dentry() */ 678 /* dentry->d_lock dropped */
512 cond_resched_lock(&dcache_lock); 679 spin_lock(&dcache_lru_lock);
513 } 680 }
681
514 if (count == NULL && !list_empty(&sb->s_dentry_lru)) 682 if (count == NULL && !list_empty(&sb->s_dentry_lru))
515 goto restart; 683 goto restart;
516 if (count != NULL) 684 if (count != NULL)
517 *count = cnt; 685 *count = cnt;
518 if (!list_empty(&referenced)) 686 if (!list_empty(&referenced))
519 list_splice(&referenced, &sb->s_dentry_lru); 687 list_splice(&referenced, &sb->s_dentry_lru);
520 spin_unlock(&dcache_lock); 688 spin_unlock(&dcache_lru_lock);
521} 689}
522 690
523/** 691/**
@@ -539,7 +707,6 @@ static void prune_dcache(int count)
539 707
540 if (unused == 0 || count == 0) 708 if (unused == 0 || count == 0)
541 return; 709 return;
542 spin_lock(&dcache_lock);
543restart: 710restart:
544 if (count >= unused) 711 if (count >= unused)
545 prune_ratio = 1; 712 prune_ratio = 1;
@@ -575,11 +742,9 @@ restart:
575 if (down_read_trylock(&sb->s_umount)) { 742 if (down_read_trylock(&sb->s_umount)) {
576 if ((sb->s_root != NULL) && 743 if ((sb->s_root != NULL) &&
577 (!list_empty(&sb->s_dentry_lru))) { 744 (!list_empty(&sb->s_dentry_lru))) {
578 spin_unlock(&dcache_lock);
579 __shrink_dcache_sb(sb, &w_count, 745 __shrink_dcache_sb(sb, &w_count,
580 DCACHE_REFERENCED); 746 DCACHE_REFERENCED);
581 pruned -= w_count; 747 pruned -= w_count;
582 spin_lock(&dcache_lock);
583 } 748 }
584 up_read(&sb->s_umount); 749 up_read(&sb->s_umount);
585 } 750 }
@@ -595,7 +760,6 @@ restart:
595 } 760 }
596 } 761 }
597 spin_unlock(&sb_lock); 762 spin_unlock(&sb_lock);
598 spin_unlock(&dcache_lock);
599} 763}
600 764
601/** 765/**
@@ -624,10 +788,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
624 BUG_ON(!IS_ROOT(dentry)); 788 BUG_ON(!IS_ROOT(dentry));
625 789
626 /* detach this root from the system */ 790 /* detach this root from the system */
627 spin_lock(&dcache_lock); 791 spin_lock(&dentry->d_lock);
628 dentry_lru_del_init(dentry); 792 dentry_lru_del_init(dentry);
629 __d_drop(dentry); 793 __d_drop(dentry);
630 spin_unlock(&dcache_lock); 794 spin_unlock(&dentry->d_lock);
631 795
632 for (;;) { 796 for (;;) {
633 /* descend to the first leaf in the current subtree */ 797 /* descend to the first leaf in the current subtree */
@@ -636,14 +800,15 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
636 800
637 /* this is a branch with children - detach all of them 801 /* this is a branch with children - detach all of them
638 * from the system in one go */ 802 * from the system in one go */
639 spin_lock(&dcache_lock); 803 spin_lock(&dentry->d_lock);
640 list_for_each_entry(loop, &dentry->d_subdirs, 804 list_for_each_entry(loop, &dentry->d_subdirs,
641 d_u.d_child) { 805 d_u.d_child) {
806 spin_lock_nested(&loop->d_lock, DENTRY_D_LOCK_NESTED);
642 dentry_lru_del_init(loop); 807 dentry_lru_del_init(loop);
643 __d_drop(loop); 808 __d_drop(loop);
644 cond_resched_lock(&dcache_lock); 809 spin_unlock(&loop->d_lock);
645 } 810 }
646 spin_unlock(&dcache_lock); 811 spin_unlock(&dentry->d_lock);
647 812
648 /* move to the first child */ 813 /* move to the first child */
649 dentry = list_entry(dentry->d_subdirs.next, 814 dentry = list_entry(dentry->d_subdirs.next,
@@ -670,14 +835,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
670 BUG(); 835 BUG();
671 } 836 }
672 837
673 if (IS_ROOT(dentry)) 838 if (IS_ROOT(dentry)) {
674 parent = NULL; 839 parent = NULL;
675 else { 840 list_del(&dentry->d_u.d_child);
841 } else {
676 parent = dentry->d_parent; 842 parent = dentry->d_parent;
843 spin_lock(&parent->d_lock);
677 atomic_dec(&parent->d_count); 844 atomic_dec(&parent->d_count);
845 list_del(&dentry->d_u.d_child);
846 spin_unlock(&parent->d_lock);
678 } 847 }
679 848
680 list_del(&dentry->d_u.d_child);
681 detached++; 849 detached++;
682 850
683 inode = dentry->d_inode; 851 inode = dentry->d_inode;
@@ -706,16 +874,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
706 struct dentry, d_u.d_child); 874 struct dentry, d_u.d_child);
707 } 875 }
708out: 876out:
709 /* several dentries were freed, need to correct nr_dentry */ 877 return;
710 spin_lock(&dcache_lock);
711 dentry_stat.nr_dentry -= detached;
712 spin_unlock(&dcache_lock);
713} 878}
714 879
715/* 880/*
716 * destroy the dentries attached to a superblock on unmounting 881 * destroy the dentries attached to a superblock on unmounting
717 * - we don't need to use dentry->d_lock, and only need dcache_lock when 882 * - we don't need to use dentry->d_lock because:
718 * removing the dentry from the system lists and hashes because:
719 * - the superblock is detached from all mountings and open files, so the 883 * - the superblock is detached from all mountings and open files, so the
720 * dentry trees will not be rearranged by the VFS 884 * dentry trees will not be rearranged by the VFS
721 * - s_umount is write-locked, so the memory pressure shrinker will ignore 885 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -727,12 +891,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
727{ 891{
728 struct dentry *dentry; 892 struct dentry *dentry;
729 893
730 if (down_read_trylock(&sb->s_umount)) 894// -rt: this might succeed there ...
731 BUG(); 895// if (down_read_trylock(&sb->s_umount))
896// BUG();
732 897
733 dentry = sb->s_root; 898 dentry = sb->s_root;
734 sb->s_root = NULL; 899 sb->s_root = NULL;
900 spin_lock(&dentry->d_lock);
735 atomic_dec(&dentry->d_count); 901 atomic_dec(&dentry->d_count);
902 spin_unlock(&dentry->d_lock);
736 shrink_dcache_for_umount_subtree(dentry); 903 shrink_dcache_for_umount_subtree(dentry);
737 904
738 while (!hlist_empty(&sb->s_anon)) { 905 while (!hlist_empty(&sb->s_anon)) {
@@ -754,15 +921,19 @@ void shrink_dcache_for_umount(struct super_block *sb)
754 * Return true if the parent or its subdirectories contain 921 * Return true if the parent or its subdirectories contain
755 * a mount point 922 * a mount point
756 */ 923 */
757
758int have_submounts(struct dentry *parent) 924int have_submounts(struct dentry *parent)
759{ 925{
760 struct dentry *this_parent = parent; 926 struct dentry *this_parent;
761 struct list_head *next; 927 struct list_head *next;
928 unsigned seq;
929
930rename_retry:
931 this_parent = parent;
932 seq = read_seqbegin(&rename_lock);
762 933
763 spin_lock(&dcache_lock);
764 if (d_mountpoint(parent)) 934 if (d_mountpoint(parent))
765 goto positive; 935 goto positive;
936 spin_lock(&this_parent->d_lock);
766repeat: 937repeat:
767 next = this_parent->d_subdirs.next; 938 next = this_parent->d_subdirs.next;
768resume: 939resume:
@@ -770,26 +941,56 @@ resume:
770 struct list_head *tmp = next; 941 struct list_head *tmp = next;
771 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 942 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
772 next = tmp->next; 943 next = tmp->next;
944
945 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
773 /* Have we found a mount point ? */ 946 /* Have we found a mount point ? */
774 if (d_mountpoint(dentry)) 947 if (d_mountpoint(dentry)) {
948 spin_unlock(&dentry->d_lock);
949 spin_unlock(&this_parent->d_lock);
775 goto positive; 950 goto positive;
951 }
776 if (!list_empty(&dentry->d_subdirs)) { 952 if (!list_empty(&dentry->d_subdirs)) {
953 spin_unlock(&this_parent->d_lock);
954 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
777 this_parent = dentry; 955 this_parent = dentry;
956 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
778 goto repeat; 957 goto repeat;
779 } 958 }
959 spin_unlock(&dentry->d_lock);
780 } 960 }
781 /* 961 /*
782 * All done at this level ... ascend and resume the search. 962 * All done at this level ... ascend and resume the search.
783 */ 963 */
784 if (this_parent != parent) { 964 if (this_parent != parent) {
785 next = this_parent->d_u.d_child.next; 965 struct dentry *tmp;
786 this_parent = this_parent->d_parent; 966 struct dentry *child;
967
968 tmp = this_parent->d_parent;
969 rcu_read_lock();
970 spin_unlock(&this_parent->d_lock);
971 child = this_parent;
972 this_parent = tmp;
973 spin_lock(&this_parent->d_lock);
974 /* might go back up the wrong parent if we have had a rename
975 * or deletion */
976 if (this_parent != child->d_parent ||
977 // d_unlinked(this_parent) || XXX
978 read_seqretry(&rename_lock, seq)) {
979 spin_unlock(&this_parent->d_lock);
980 rcu_read_unlock();
981 goto rename_retry;
982 }
983 rcu_read_unlock();
984 next = child->d_u.d_child.next;
787 goto resume; 985 goto resume;
788 } 986 }
789 spin_unlock(&dcache_lock); 987 spin_unlock(&this_parent->d_lock);
988 if (read_seqretry(&rename_lock, seq))
989 goto rename_retry;
790 return 0; /* No mount points found in tree */ 990 return 0; /* No mount points found in tree */
791positive: 991positive:
792 spin_unlock(&dcache_lock); 992 if (read_seqretry(&rename_lock, seq))
993 goto rename_retry;
793 return 1; 994 return 1;
794} 995}
795 996
@@ -809,11 +1010,17 @@ positive:
809 */ 1010 */
810static int select_parent(struct dentry * parent) 1011static int select_parent(struct dentry * parent)
811{ 1012{
812 struct dentry *this_parent = parent; 1013 struct dentry *this_parent;
813 struct list_head *next; 1014 struct list_head *next;
814 int found = 0; 1015 unsigned seq;
1016 int found;
815 1017
816 spin_lock(&dcache_lock); 1018rename_retry:
1019 found = 0;
1020 this_parent = parent;
1021 seq = read_seqbegin(&rename_lock);
1022
1023 spin_lock(&this_parent->d_lock);
817repeat: 1024repeat:
818 next = this_parent->d_subdirs.next; 1025 next = this_parent->d_subdirs.next;
819resume: 1026resume:
@@ -822,6 +1029,7 @@ resume:
822 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1029 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
823 next = tmp->next; 1030 next = tmp->next;
824 1031
1032 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
825 dentry_lru_del_init(dentry); 1033 dentry_lru_del_init(dentry);
826 /* 1034 /*
827 * move only zero ref count dentries to the end 1035 * move only zero ref count dentries to the end
@@ -837,27 +1045,54 @@ resume:
837 * ensures forward progress). We'll be coming back to find 1045 * ensures forward progress). We'll be coming back to find
838 * the rest. 1046 * the rest.
839 */ 1047 */
840 if (found && need_resched()) 1048 if (found && need_resched()) {
1049 spin_unlock(&dentry->d_lock);
841 goto out; 1050 goto out;
1051 }
842 1052
843 /* 1053 /*
844 * Descend a level if the d_subdirs list is non-empty. 1054 * Descend a level if the d_subdirs list is non-empty.
845 */ 1055 */
846 if (!list_empty(&dentry->d_subdirs)) { 1056 if (!list_empty(&dentry->d_subdirs)) {
1057 spin_unlock(&this_parent->d_lock);
1058 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
847 this_parent = dentry; 1059 this_parent = dentry;
1060 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
848 goto repeat; 1061 goto repeat;
849 } 1062 }
1063
1064 spin_unlock(&dentry->d_lock);
850 } 1065 }
851 /* 1066 /*
852 * All done at this level ... ascend and resume the search. 1067 * All done at this level ... ascend and resume the search.
853 */ 1068 */
854 if (this_parent != parent) { 1069 if (this_parent != parent) {
855 next = this_parent->d_u.d_child.next; 1070 struct dentry *tmp;
856 this_parent = this_parent->d_parent; 1071 struct dentry *child;
1072
1073 tmp = this_parent->d_parent;
1074 rcu_read_lock();
1075 spin_unlock(&this_parent->d_lock);
1076 child = this_parent;
1077 this_parent = tmp;
1078 spin_lock(&this_parent->d_lock);
1079 /* might go back up the wrong parent if we have had a rename
1080 * or deletion */
1081 if (this_parent != child->d_parent ||
1082 // d_unlinked(this_parent) || XXX
1083 read_seqretry(&rename_lock, seq)) {
1084 spin_unlock(&this_parent->d_lock);
1085 rcu_read_unlock();
1086 goto rename_retry;
1087 }
1088 rcu_read_unlock();
1089 next = child->d_u.d_child.next;
857 goto resume; 1090 goto resume;
858 } 1091 }
859out: 1092out:
860 spin_unlock(&dcache_lock); 1093 spin_unlock(&this_parent->d_lock);
1094 if (read_seqretry(&rename_lock, seq))
1095 goto rename_retry;
861 return found; 1096 return found;
862} 1097}
863 1098
@@ -942,6 +1177,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
942 atomic_set(&dentry->d_count, 1); 1177 atomic_set(&dentry->d_count, 1);
943 dentry->d_flags = DCACHE_UNHASHED; 1178 dentry->d_flags = DCACHE_UNHASHED;
944 spin_lock_init(&dentry->d_lock); 1179 spin_lock_init(&dentry->d_lock);
1180 seqcount_init(&dentry->d_seq);
945 dentry->d_inode = NULL; 1181 dentry->d_inode = NULL;
946 dentry->d_parent = NULL; 1182 dentry->d_parent = NULL;
947 dentry->d_sb = NULL; 1183 dentry->d_sb = NULL;
@@ -952,19 +1188,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
952 INIT_LIST_HEAD(&dentry->d_lru); 1188 INIT_LIST_HEAD(&dentry->d_lru);
953 INIT_LIST_HEAD(&dentry->d_subdirs); 1189 INIT_LIST_HEAD(&dentry->d_subdirs);
954 INIT_LIST_HEAD(&dentry->d_alias); 1190 INIT_LIST_HEAD(&dentry->d_alias);
1191 INIT_LIST_HEAD(&dentry->d_u.d_child);
955 1192
956 if (parent) { 1193 if (parent) {
957 dentry->d_parent = dget(parent); 1194 spin_lock(&parent->d_lock);
1195 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1196 dentry->d_parent = dget_dlock(parent);
958 dentry->d_sb = parent->d_sb; 1197 dentry->d_sb = parent->d_sb;
959 } else {
960 INIT_LIST_HEAD(&dentry->d_u.d_child);
961 }
962
963 spin_lock(&dcache_lock);
964 if (parent)
965 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1198 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
966 dentry_stat.nr_dentry++; 1199 spin_unlock(&dentry->d_lock);
967 spin_unlock(&dcache_lock); 1200 spin_unlock(&parent->d_lock);
1201 }
968 1202
969 return dentry; 1203 return dentry;
970} 1204}
@@ -980,7 +1214,6 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
980} 1214}
981EXPORT_SYMBOL(d_alloc_name); 1215EXPORT_SYMBOL(d_alloc_name);
982 1216
983/* the caller must hold dcache_lock */
984static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1217static void __d_instantiate(struct dentry *dentry, struct inode *inode)
985{ 1218{
986 if (inode) 1219 if (inode)
@@ -1007,9 +1240,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1007void d_instantiate(struct dentry *entry, struct inode * inode) 1240void d_instantiate(struct dentry *entry, struct inode * inode)
1008{ 1241{
1009 BUG_ON(!list_empty(&entry->d_alias)); 1242 BUG_ON(!list_empty(&entry->d_alias));
1010 spin_lock(&dcache_lock); 1243 if (inode)
1244 spin_lock(&inode->i_lock);
1011 __d_instantiate(entry, inode); 1245 __d_instantiate(entry, inode);
1012 spin_unlock(&dcache_lock); 1246 if (inode)
1247 spin_unlock(&inode->i_lock);
1013 security_d_instantiate(entry, inode); 1248 security_d_instantiate(entry, inode);
1014} 1249}
1015 1250
@@ -1053,7 +1288,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1053 continue; 1288 continue;
1054 if (memcmp(qstr->name, name, len)) 1289 if (memcmp(qstr->name, name, len))
1055 continue; 1290 continue;
1056 dget_locked(alias); 1291 dget(alias);
1057 return alias; 1292 return alias;
1058 } 1293 }
1059 1294
@@ -1067,9 +1302,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1067 1302
1068 BUG_ON(!list_empty(&entry->d_alias)); 1303 BUG_ON(!list_empty(&entry->d_alias));
1069 1304
1070 spin_lock(&dcache_lock); 1305 if (inode)
1306 spin_lock(&inode->i_lock);
1071 result = __d_instantiate_unique(entry, inode); 1307 result = __d_instantiate_unique(entry, inode);
1072 spin_unlock(&dcache_lock); 1308 if (inode)
1309 spin_unlock(&inode->i_lock);
1073 1310
1074 if (!result) { 1311 if (!result) {
1075 security_d_instantiate(entry, inode); 1312 security_d_instantiate(entry, inode);
@@ -1109,14 +1346,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1109 return res; 1346 return res;
1110} 1347}
1111 1348
1112static inline struct hlist_head *d_hash(struct dentry *parent,
1113 unsigned long hash)
1114{
1115 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1116 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1117 return dentry_hashtable + (hash & D_HASHMASK);
1118}
1119
1120/** 1349/**
1121 * d_obtain_alias - find or allocate a dentry for a given inode 1350 * d_obtain_alias - find or allocate a dentry for a given inode
1122 * @inode: inode to allocate the dentry for 1351 * @inode: inode to allocate the dentry for
@@ -1157,10 +1386,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
1157 } 1386 }
1158 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1387 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1159 1388
1160 spin_lock(&dcache_lock); 1389 spin_lock(&inode->i_lock);
1161 res = __d_find_alias(inode, 0); 1390 res = __d_find_alias(inode, 0);
1162 if (res) { 1391 if (res) {
1163 spin_unlock(&dcache_lock); 1392 spin_unlock(&inode->i_lock);
1164 dput(tmp); 1393 dput(tmp);
1165 goto out_iput; 1394 goto out_iput;
1166 } 1395 }
@@ -1174,8 +1403,8 @@ struct dentry *d_obtain_alias(struct inode *inode)
1174 list_add(&tmp->d_alias, &inode->i_dentry); 1403 list_add(&tmp->d_alias, &inode->i_dentry);
1175 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1404 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1176 spin_unlock(&tmp->d_lock); 1405 spin_unlock(&tmp->d_lock);
1406 spin_unlock(&inode->i_lock);
1177 1407
1178 spin_unlock(&dcache_lock);
1179 return tmp; 1408 return tmp;
1180 1409
1181 out_iput: 1410 out_iput:
@@ -1205,19 +1434,19 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1205 struct dentry *new = NULL; 1434 struct dentry *new = NULL;
1206 1435
1207 if (inode && S_ISDIR(inode->i_mode)) { 1436 if (inode && S_ISDIR(inode->i_mode)) {
1208 spin_lock(&dcache_lock); 1437 spin_lock(&inode->i_lock);
1209 new = __d_find_alias(inode, 1); 1438 new = __d_find_alias(inode, 1);
1210 if (new) { 1439 if (new) {
1211 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1440 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1212 spin_unlock(&dcache_lock); 1441 spin_unlock(&inode->i_lock);
1213 security_d_instantiate(new, inode); 1442 security_d_instantiate(new, inode);
1214 d_rehash(dentry); 1443 d_rehash(dentry);
1215 d_move(new, dentry); 1444 d_move(new, dentry);
1216 iput(inode); 1445 iput(inode);
1217 } else { 1446 } else {
1218 /* already taking dcache_lock, so d_add() by hand */ 1447 /* already taken inode->i_lock, d_add() by hand */
1219 __d_instantiate(dentry, inode); 1448 __d_instantiate(dentry, inode);
1220 spin_unlock(&dcache_lock); 1449 spin_unlock(&inode->i_lock);
1221 security_d_instantiate(dentry, inode); 1450 security_d_instantiate(dentry, inode);
1222 d_rehash(dentry); 1451 d_rehash(dentry);
1223 } 1452 }
@@ -1289,10 +1518,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1289 * Negative dentry: instantiate it unless the inode is a directory and 1518 * Negative dentry: instantiate it unless the inode is a directory and
1290 * already has a dentry. 1519 * already has a dentry.
1291 */ 1520 */
1292 spin_lock(&dcache_lock); 1521 spin_lock(&inode->i_lock);
1293 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1522 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1294 __d_instantiate(found, inode); 1523 __d_instantiate(found, inode);
1295 spin_unlock(&dcache_lock); 1524 spin_unlock(&inode->i_lock);
1296 security_d_instantiate(found, inode); 1525 security_d_instantiate(found, inode);
1297 return found; 1526 return found;
1298 } 1527 }
@@ -1302,8 +1531,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1302 * reference to it, move it in place and use it. 1531 * reference to it, move it in place and use it.
1303 */ 1532 */
1304 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1533 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1305 dget_locked(new); 1534 dget(new);
1306 spin_unlock(&dcache_lock); 1535 spin_unlock(&inode->i_lock);
1307 security_d_instantiate(found, inode); 1536 security_d_instantiate(found, inode);
1308 d_move(new, found); 1537 d_move(new, found);
1309 iput(inode); 1538 iput(inode);
@@ -1325,7 +1554,7 @@ err_out:
1325 * is returned. The caller must use dput to free the entry when it has 1554 * is returned. The caller must use dput to free the entry when it has
1326 * finished using it. %NULL is returned on failure. 1555 * finished using it. %NULL is returned on failure.
1327 * 1556 *
1328 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1557 * __d_lookup is global lock free. The hash list is protected using RCU.
1329 * Memory barriers are used while updating and doing lockless traversal. 1558 * Memory barriers are used while updating and doing lockless traversal.
1330 * To avoid races with d_move while rename is happening, d_lock is used. 1559 * To avoid races with d_move while rename is happening, d_lock is used.
1331 * 1560 *
@@ -1337,33 +1566,18 @@ err_out:
1337 * 1566 *
1338 * The dentry unused LRU is not updated even if lookup finds the required dentry 1567 * The dentry unused LRU is not updated even if lookup finds the required dentry
1339 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1568 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1340 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1569 * select_parent. This laziness saves lookup from LRU lock acquisition.
1341 * acquisition.
1342 * 1570 *
1343 * d_lookup() is protected against the concurrent renames in some unrelated 1571 * d_lookup() is protected against the concurrent renames in some unrelated
1344 * directory using the seqlockt_t rename_lock. 1572 * directory using the seqlockt_t rename_lock.
1345 */ 1573 */
1346
1347struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1348{
1349 struct dentry * dentry = NULL;
1350 unsigned long seq;
1351
1352 do {
1353 seq = read_seqbegin(&rename_lock);
1354 dentry = __d_lookup(parent, name);
1355 if (dentry)
1356 break;
1357 } while (read_seqretry(&rename_lock, seq));
1358 return dentry;
1359}
1360
1361struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1574struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1362{ 1575{
1363 unsigned int len = name->len; 1576 unsigned int len = name->len;
1364 unsigned int hash = name->hash; 1577 unsigned int hash = name->hash;
1365 const unsigned char *str = name->name; 1578 const unsigned char *str = name->name;
1366 struct hlist_head *head = d_hash(parent,hash); 1579 struct dcache_hash_bucket *b = d_hash(parent, hash);
1580 struct hlist_head *head = &b->head;
1367 struct dentry *found = NULL; 1581 struct dentry *found = NULL;
1368 struct hlist_node *node; 1582 struct hlist_node *node;
1369 struct dentry *dentry; 1583 struct dentry *dentry;
@@ -1419,6 +1633,78 @@ next:
1419 return found; 1633 return found;
1420} 1634}
1421 1635
1636struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1637{
1638 struct dentry *dentry = NULL;
1639 unsigned seq;
1640
1641 do {
1642 seq = read_seqbegin(&rename_lock);
1643 dentry = __d_lookup(parent, name);
1644 if (dentry)
1645 break;
1646 } while (read_seqretry(&rename_lock, seq));
1647 return dentry;
1648}
1649
1650struct dentry * __d_lookup_rcu(struct dentry * parent, struct qstr * name)
1651{
1652 unsigned int len = name->len;
1653 unsigned int hash = name->hash;
1654 const unsigned char *str = name->name;
1655 struct dcache_hash_bucket *b = d_hash(parent, hash);
1656 struct hlist_head *head = &b->head;
1657 struct hlist_node *node;
1658 struct dentry *dentry;
1659
1660 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1661 unsigned seq;
1662 struct dentry *tparent;
1663 const char *tname;
1664 int tlen;
1665
1666 if (unlikely(dentry->d_name.hash != hash))
1667 continue;
1668
1669seqretry:
1670 seq = read_seqcount_begin(&dentry->d_seq);
1671 tparent = dentry->d_parent;
1672 if (unlikely(tparent != parent))
1673 continue;
1674 tlen = dentry->d_name.len;
1675 if (unlikely(tlen != len))
1676 continue;
1677 tname = dentry->d_name.name;
1678 if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
1679 goto seqretry;
1680 if (unlikely(memcmp(tname, str, tlen)))
1681 continue;
1682 if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
1683 goto seqretry;
1684
1685 return dentry;
1686 }
1687 return NULL;
1688}
1689
1690struct dentry *d_lookup_rcu(struct dentry *parent, struct qstr * name)
1691{
1692 struct dentry *dentry = NULL;
1693 unsigned seq;
1694
1695 if (parent->d_op && parent->d_op->d_compare)
1696 goto out;
1697
1698 do {
1699 seq = read_seqbegin(&rename_lock);
1700 dentry = __d_lookup_rcu(parent, name);
1701 if (dentry)
1702 break;
1703 } while (read_seqretry(&rename_lock, seq));
1704out:
1705 return dentry;
1706}
1707
1422/** 1708/**
1423 * d_hash_and_lookup - hash the qstr then search for a dentry 1709 * d_hash_and_lookup - hash the qstr then search for a dentry
1424 * @dir: Directory to search in 1710 * @dir: Directory to search in
@@ -1457,6 +1743,7 @@ out:
1457 1743
1458int d_validate(struct dentry *dentry, struct dentry *dparent) 1744int d_validate(struct dentry *dentry, struct dentry *dparent)
1459{ 1745{
1746 struct dcache_hash_bucket *b;
1460 struct hlist_head *base; 1747 struct hlist_head *base;
1461 struct hlist_node *lhp; 1748 struct hlist_node *lhp;
1462 1749
@@ -1467,19 +1754,23 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
1467 if (dentry->d_parent != dparent) 1754 if (dentry->d_parent != dparent)
1468 goto out; 1755 goto out;
1469 1756
1470 spin_lock(&dcache_lock); 1757 spin_lock(&dentry->d_lock);
1471 base = d_hash(dparent, dentry->d_name.hash); 1758 b = d_hash(dparent, dentry->d_name.hash);
1472 hlist_for_each(lhp,base) { 1759 base = &b->head;
1760 spin_lock(&b->lock);
1761 hlist_for_each(lhp, base) {
1473 /* hlist_for_each_entry_rcu() not required for d_hash list 1762 /* hlist_for_each_entry_rcu() not required for d_hash list
1474 * as it is parsed under dcache_lock 1763 * as it is parsed under dcache_hash_bucket->lock
1475 */ 1764 */
1476 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1765 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1477 __dget_locked(dentry); 1766 spin_unlock(&b->lock);
1478 spin_unlock(&dcache_lock); 1767 __dget_dlock(dentry);
1768 spin_unlock(&dentry->d_lock);
1479 return 1; 1769 return 1;
1480 } 1770 }
1481 } 1771 }
1482 spin_unlock(&dcache_lock); 1772 spin_unlock(&b->lock);
1773 spin_unlock(&dentry->d_lock);
1483out: 1774out:
1484 return 0; 1775 return 0;
1485} 1776}
@@ -1507,14 +1798,20 @@ out:
1507 1798
1508void d_delete(struct dentry * dentry) 1799void d_delete(struct dentry * dentry)
1509{ 1800{
1801 struct inode *inode;
1510 int isdir = 0; 1802 int isdir = 0;
1511 /* 1803 /*
1512 * Are we the only user? 1804 * Are we the only user?
1513 */ 1805 */
1514 spin_lock(&dcache_lock); 1806again:
1515 spin_lock(&dentry->d_lock); 1807 spin_lock(&dentry->d_lock);
1516 isdir = S_ISDIR(dentry->d_inode->i_mode); 1808 inode = dentry->d_inode;
1809 isdir = S_ISDIR(inode->i_mode);
1517 if (atomic_read(&dentry->d_count) == 1) { 1810 if (atomic_read(&dentry->d_count) == 1) {
1811 if (inode && !spin_trylock(&inode->i_lock)) {
1812 spin_unlock(&dentry->d_lock);
1813 goto again;
1814 }
1518 dentry_iput(dentry); 1815 dentry_iput(dentry);
1519 fsnotify_nameremove(dentry, isdir); 1816 fsnotify_nameremove(dentry, isdir);
1520 return; 1817 return;
@@ -1524,16 +1821,16 @@ void d_delete(struct dentry * dentry)
1524 __d_drop(dentry); 1821 __d_drop(dentry);
1525 1822
1526 spin_unlock(&dentry->d_lock); 1823 spin_unlock(&dentry->d_lock);
1527 spin_unlock(&dcache_lock);
1528 1824
1529 fsnotify_nameremove(dentry, isdir); 1825 fsnotify_nameremove(dentry, isdir);
1530} 1826}
1531 1827
1532static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1828static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
1533{ 1829{
1534
1535 entry->d_flags &= ~DCACHE_UNHASHED; 1830 entry->d_flags &= ~DCACHE_UNHASHED;
1536 hlist_add_head_rcu(&entry->d_hash, list); 1831 spin_lock(&b->lock);
1832 hlist_add_head_rcu(&entry->d_hash, &b->head);
1833 spin_unlock(&b->lock);
1537} 1834}
1538 1835
1539static void _d_rehash(struct dentry * entry) 1836static void _d_rehash(struct dentry * entry)
@@ -1550,11 +1847,9 @@ static void _d_rehash(struct dentry * entry)
1550 1847
1551void d_rehash(struct dentry * entry) 1848void d_rehash(struct dentry * entry)
1552{ 1849{
1553 spin_lock(&dcache_lock);
1554 spin_lock(&entry->d_lock); 1850 spin_lock(&entry->d_lock);
1555 _d_rehash(entry); 1851 _d_rehash(entry);
1556 spin_unlock(&entry->d_lock); 1852 spin_unlock(&entry->d_lock);
1557 spin_unlock(&dcache_lock);
1558} 1853}
1559 1854
1560/* 1855/*
@@ -1631,32 +1926,46 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1631 */ 1926 */
1632static void d_move_locked(struct dentry * dentry, struct dentry * target) 1927static void d_move_locked(struct dentry * dentry, struct dentry * target)
1633{ 1928{
1634 struct hlist_head *list; 1929 struct dcache_hash_bucket *b;
1635
1636 if (!dentry->d_inode) 1930 if (!dentry->d_inode)
1637 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1931 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1638 1932
1639 write_seqlock(&rename_lock); 1933 write_seqlock(&rename_lock);
1640 /* 1934
1641 * XXXX: do we really need to take target->d_lock? 1935 if (target->d_parent != dentry->d_parent) {
1642 */ 1936 if (target->d_parent < dentry->d_parent) {
1643 if (target < dentry) { 1937 spin_lock(&target->d_parent->d_lock);
1644 spin_lock(&target->d_lock); 1938 spin_lock_nested(&dentry->d_parent->d_lock,
1645 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1939 DENTRY_D_LOCK_NESTED);
1940 } else {
1941 spin_lock(&dentry->d_parent->d_lock);
1942 spin_lock_nested(&target->d_parent->d_lock,
1943 DENTRY_D_LOCK_NESTED);
1944 }
1646 } else { 1945 } else {
1647 spin_lock(&dentry->d_lock); 1946 spin_lock(&target->d_parent->d_lock);
1648 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1649 } 1947 }
1650 1948
1651 /* Move the dentry to the target hash queue, if on different bucket */ 1949 if (dentry != dentry->d_parent) {
1652 if (d_unhashed(dentry)) 1950 if (target < dentry) {
1653 goto already_unhashed; 1951 spin_lock_nested(&target->d_lock, 2);
1654 1952 spin_lock_nested(&dentry->d_lock, 3);
1655 hlist_del_rcu(&dentry->d_hash); 1953 } else {
1954 spin_lock_nested(&dentry->d_lock, 2);
1955 spin_lock_nested(&target->d_lock, 3);
1956 }
1957 } else {
1958 spin_lock_nested(&target->d_lock, 2);
1959 }
1656 1960
1657already_unhashed: 1961 /* Move the dentry to the target hash queue, if on different bucket */
1658 list = d_hash(target->d_parent, target->d_name.hash); 1962 if (!d_unhashed(dentry)) {
1659 __d_rehash(dentry, list); 1963 b = d_hash(dentry->d_parent, dentry->d_name.hash);
1964 spin_lock(&b->lock);
1965 hlist_del_rcu(&dentry->d_hash);
1966 spin_unlock(&b->lock);
1967 }
1968 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1660 1969
1661 /* Unhash the target: dput() will then get rid of it */ 1970 /* Unhash the target: dput() will then get rid of it */
1662 __d_drop(target); 1971 __d_drop(target);
@@ -1665,6 +1974,8 @@ already_unhashed:
1665 list_del(&target->d_u.d_child); 1974 list_del(&target->d_u.d_child);
1666 1975
1667 /* Switch the names.. */ 1976 /* Switch the names.. */
1977 write_seqcount_begin(&dentry->d_seq);
1978 write_seqcount_begin(&target->d_seq);
1668 switch_names(dentry, target); 1979 switch_names(dentry, target);
1669 swap(dentry->d_name.hash, target->d_name.hash); 1980 swap(dentry->d_name.hash, target->d_name.hash);
1670 1981
@@ -1679,8 +1990,14 @@ already_unhashed:
1679 /* And add them back to the (new) parent lists */ 1990 /* And add them back to the (new) parent lists */
1680 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1991 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1681 } 1992 }
1993 write_seqcount_end(&target->d_seq);
1994 write_seqcount_end(&dentry->d_seq);
1682 1995
1683 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1996 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1997 if (target->d_parent != dentry->d_parent)
1998 spin_unlock(&dentry->d_parent->d_lock);
1999 if (target->d_parent != target)
2000 spin_unlock(&target->d_parent->d_lock);
1684 spin_unlock(&target->d_lock); 2001 spin_unlock(&target->d_lock);
1685 fsnotify_d_move(dentry); 2002 fsnotify_d_move(dentry);
1686 spin_unlock(&dentry->d_lock); 2003 spin_unlock(&dentry->d_lock);
@@ -1698,9 +2015,7 @@ already_unhashed:
1698 2015
1699void d_move(struct dentry * dentry, struct dentry * target) 2016void d_move(struct dentry * dentry, struct dentry * target)
1700{ 2017{
1701 spin_lock(&dcache_lock);
1702 d_move_locked(dentry, target); 2018 d_move_locked(dentry, target);
1703 spin_unlock(&dcache_lock);
1704} 2019}
1705 2020
1706/** 2021/**
@@ -1726,16 +2041,16 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1726 * This helper attempts to cope with remotely renamed directories 2041 * This helper attempts to cope with remotely renamed directories
1727 * 2042 *
1728 * It assumes that the caller is already holding 2043 * It assumes that the caller is already holding
1729 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2044 * dentry->d_parent->d_inode->i_mutex
1730 * 2045 *
1731 * Note: If ever the locking in lock_rename() changes, then please 2046 * Note: If ever the locking in lock_rename() changes, then please
1732 * remember to update this too... 2047 * remember to update this too...
1733 */ 2048 */
1734static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2049static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1735 __releases(dcache_lock)
1736{ 2050{
1737 struct mutex *m1 = NULL, *m2 = NULL; 2051 struct mutex *m1 = NULL, *m2 = NULL;
1738 struct dentry *ret; 2052 struct dentry *ret;
2053 struct inode *inode = NULL;
1739 2054
1740 /* If alias and dentry share a parent, then no extra locks required */ 2055 /* If alias and dentry share a parent, then no extra locks required */
1741 if (alias->d_parent == dentry->d_parent) 2056 if (alias->d_parent == dentry->d_parent)
@@ -1751,14 +2066,15 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1751 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2066 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
1752 goto out_err; 2067 goto out_err;
1753 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2068 m1 = &dentry->d_sb->s_vfs_rename_mutex;
1754 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2069 inode = alias->d_parent->d_inode;
2070 if (!mutex_trylock(&inode->i_mutex))
1755 goto out_err; 2071 goto out_err;
1756 m2 = &alias->d_parent->d_inode->i_mutex; 2072 m2 = &inode->i_mutex;
1757out_unalias: 2073out_unalias:
1758 d_move_locked(alias, dentry); 2074 d_move_locked(alias, dentry);
1759 ret = alias; 2075 ret = alias;
1760out_err: 2076out_err:
1761 spin_unlock(&dcache_lock); 2077 spin_unlock(&inode->i_lock);
1762 if (m2) 2078 if (m2)
1763 mutex_unlock(m2); 2079 mutex_unlock(m2);
1764 if (m1) 2080 if (m1)
@@ -1780,6 +2096,12 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1780 dparent = dentry->d_parent; 2096 dparent = dentry->d_parent;
1781 aparent = anon->d_parent; 2097 aparent = anon->d_parent;
1782 2098
2099 /* XXX: hack */
2100 spin_lock(&aparent->d_lock);
2101 spin_lock(&dparent->d_lock);
2102 spin_lock(&dentry->d_lock);
2103 spin_lock(&anon->d_lock);
2104
1783 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2105 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1784 list_del(&dentry->d_u.d_child); 2106 list_del(&dentry->d_u.d_child);
1785 if (!IS_ROOT(dentry)) 2107 if (!IS_ROOT(dentry))
@@ -1794,6 +2116,11 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1794 else 2116 else
1795 INIT_LIST_HEAD(&anon->d_u.d_child); 2117 INIT_LIST_HEAD(&anon->d_u.d_child);
1796 2118
2119 spin_unlock(&anon->d_lock);
2120 spin_unlock(&dentry->d_lock);
2121 spin_unlock(&dparent->d_lock);
2122 spin_unlock(&aparent->d_lock);
2123
1797 anon->d_flags &= ~DCACHE_DISCONNECTED; 2124 anon->d_flags &= ~DCACHE_DISCONNECTED;
1798} 2125}
1799 2126
@@ -1811,14 +2138,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1811 2138
1812 BUG_ON(!d_unhashed(dentry)); 2139 BUG_ON(!d_unhashed(dentry));
1813 2140
1814 spin_lock(&dcache_lock);
1815
1816 if (!inode) { 2141 if (!inode) {
1817 actual = dentry; 2142 actual = dentry;
1818 __d_instantiate(dentry, NULL); 2143 __d_instantiate(dentry, NULL);
1819 goto found_lock; 2144 d_rehash(actual);
2145 goto out_nolock;
1820 } 2146 }
1821 2147
2148 spin_lock(&inode->i_lock);
2149
1822 if (S_ISDIR(inode->i_mode)) { 2150 if (S_ISDIR(inode->i_mode)) {
1823 struct dentry *alias; 2151 struct dentry *alias;
1824 2152
@@ -1846,15 +2174,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1846 actual = __d_instantiate_unique(dentry, inode); 2174 actual = __d_instantiate_unique(dentry, inode);
1847 if (!actual) 2175 if (!actual)
1848 actual = dentry; 2176 actual = dentry;
1849 else if (unlikely(!d_unhashed(actual))) 2177 else
1850 goto shouldnt_be_hashed; 2178 BUG_ON(!d_unhashed(actual));
1851 2179
1852found_lock:
1853 spin_lock(&actual->d_lock); 2180 spin_lock(&actual->d_lock);
1854found: 2181found:
1855 _d_rehash(actual); 2182 _d_rehash(actual);
1856 spin_unlock(&actual->d_lock); 2183 spin_unlock(&actual->d_lock);
1857 spin_unlock(&dcache_lock); 2184 spin_unlock(&inode->i_lock);
1858out_nolock: 2185out_nolock:
1859 if (actual == dentry) { 2186 if (actual == dentry) {
1860 security_d_instantiate(dentry, inode); 2187 security_d_instantiate(dentry, inode);
@@ -1863,10 +2190,6 @@ out_nolock:
1863 2190
1864 iput(inode); 2191 iput(inode);
1865 return actual; 2192 return actual;
1866
1867shouldnt_be_hashed:
1868 spin_unlock(&dcache_lock);
1869 BUG();
1870} 2193}
1871 2194
1872static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2195static int prepend(char **buffer, int *buflen, const char *str, int namelen)
@@ -1897,7 +2220,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1897 * Returns a pointer into the buffer or an error code if the 2220 * Returns a pointer into the buffer or an error code if the
1898 * path was too long. 2221 * path was too long.
1899 * 2222 *
1900 * "buflen" should be positive. Caller holds the dcache_lock. 2223 * "buflen" should be positive. Caller holds the path->dentry->d_lock.
1901 * 2224 *
1902 * If path is not reachable from the supplied root, then the value of 2225 * If path is not reachable from the supplied root, then the value of
1903 * root is changed (without modifying refcounts). 2226 * root is changed (without modifying refcounts).
@@ -1905,13 +2228,22 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1905char *__d_path(const struct path *path, struct path *root, 2228char *__d_path(const struct path *path, struct path *root,
1906 char *buffer, int buflen) 2229 char *buffer, int buflen)
1907{ 2230{
1908 struct dentry *dentry = path->dentry; 2231 struct dentry *dentry;
1909 struct vfsmount *vfsmnt = path->mnt; 2232 struct vfsmount *vfsmnt;
1910 char *end = buffer + buflen; 2233 char *end;
1911 char *retval; 2234 char *retval;
2235 unsigned seq;
1912 2236
1913 spin_lock(&vfsmount_lock); 2237rename_retry:
2238 dentry = path->dentry;
2239 vfsmnt = path->mnt;
2240 end = buffer + buflen;
1914 prepend(&end, &buflen, "\0", 1); 2241 prepend(&end, &buflen, "\0", 1);
2242
2243 seq = read_seqbegin(&rename_lock);
2244 rcu_read_lock();
2245 spin_lock(&dentry->d_lock);
2246unlinked:
1915 if (d_unlinked(dentry) && 2247 if (d_unlinked(dentry) &&
1916 (prepend(&end, &buflen, " (deleted)", 10) != 0)) 2248 (prepend(&end, &buflen, " (deleted)", 10) != 0))
1917 goto Elong; 2249 goto Elong;
@@ -1923,7 +2255,7 @@ char *__d_path(const struct path *path, struct path *root,
1923 *retval = '/'; 2255 *retval = '/';
1924 2256
1925 for (;;) { 2257 for (;;) {
1926 struct dentry * parent; 2258 struct dentry *parent;
1927 2259
1928 if (dentry == root->dentry && vfsmnt == root->mnt) 2260 if (dentry == root->dentry && vfsmnt == root->mnt)
1929 break; 2261 break;
@@ -1932,8 +2264,10 @@ char *__d_path(const struct path *path, struct path *root,
1932 if (vfsmnt->mnt_parent == vfsmnt) { 2264 if (vfsmnt->mnt_parent == vfsmnt) {
1933 goto global_root; 2265 goto global_root;
1934 } 2266 }
2267 spin_unlock(&dentry->d_lock);
1935 dentry = vfsmnt->mnt_mountpoint; 2268 dentry = vfsmnt->mnt_mountpoint;
1936 vfsmnt = vfsmnt->mnt_parent; 2269 vfsmnt = vfsmnt->mnt_parent;
2270 spin_lock(&dentry->d_lock); /* can't get unlinked because locked vfsmount */
1937 continue; 2271 continue;
1938 } 2272 }
1939 parent = dentry->d_parent; 2273 parent = dentry->d_parent;
@@ -1942,11 +2276,18 @@ char *__d_path(const struct path *path, struct path *root,
1942 (prepend(&end, &buflen, "/", 1) != 0)) 2276 (prepend(&end, &buflen, "/", 1) != 0))
1943 goto Elong; 2277 goto Elong;
1944 retval = end; 2278 retval = end;
2279 spin_unlock(&dentry->d_lock);
1945 dentry = parent; 2280 dentry = parent;
2281 spin_lock(&dentry->d_lock);
2282 if (d_unlinked(dentry))
2283 goto unlinked;
1946 } 2284 }
1947 2285
1948out: 2286out:
1949 spin_unlock(&vfsmount_lock); 2287 spin_unlock(&dentry->d_lock);
2288 rcu_read_unlock();
2289 if (read_seqretry(&rename_lock, seq))
2290 goto rename_retry;
1950 return retval; 2291 return retval;
1951 2292
1952global_root: 2293global_root:
@@ -1955,6 +2296,7 @@ global_root:
1955 goto Elong; 2296 goto Elong;
1956 root->mnt = vfsmnt; 2297 root->mnt = vfsmnt;
1957 root->dentry = dentry; 2298 root->dentry = dentry;
2299 /* XXX: this could wrongly modify root if we rename retry */
1958 goto out; 2300 goto out;
1959 2301
1960Elong: 2302Elong:
@@ -1983,6 +2325,8 @@ char *d_path(const struct path *path, char *buf, int buflen)
1983 char *res; 2325 char *res;
1984 struct path root; 2326 struct path root;
1985 struct path tmp; 2327 struct path tmp;
2328 int cpu = get_cpu();
2329 put_cpu();
1986 2330
1987 /* 2331 /*
1988 * We have various synthetic filesystems that never get mounted. On 2332 * We have various synthetic filesystems that never get mounted. On
@@ -1998,10 +2342,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
1998 root = current->fs->root; 2342 root = current->fs->root;
1999 path_get(&root); 2343 path_get(&root);
2000 read_unlock(&current->fs->lock); 2344 read_unlock(&current->fs->lock);
2001 spin_lock(&dcache_lock); 2345
2346 vfsmount_read_lock(cpu);
2002 tmp = root; 2347 tmp = root;
2003 res = __d_path(path, &tmp, buf, buflen); 2348 res = __d_path(path, &tmp, buf, buflen);
2004 spin_unlock(&dcache_lock); 2349 vfsmount_read_unlock(cpu);
2350
2005 path_put(&root); 2351 path_put(&root);
2006 return res; 2352 return res;
2007} 2353}
@@ -2032,11 +2378,21 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2032 */ 2378 */
2033char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2379char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2034{ 2380{
2035 char *end = buf + buflen; 2381 char *end;
2036 char *retval; 2382 char *retval;
2383 unsigned seq;
2384 int cpu = get_cpu();
2385 put_cpu();
2037 2386
2038 spin_lock(&dcache_lock); 2387rename_retry:
2388 end = buf + buflen;
2039 prepend(&end, &buflen, "\0", 1); 2389 prepend(&end, &buflen, "\0", 1);
2390
2391 seq = read_seqbegin(&rename_lock);
2392 vfsmount_read_lock(cpu);
2393 rcu_read_lock(); /* protect parent */
2394 spin_lock(&dentry->d_lock);
2395unlinked:
2040 if (d_unlinked(dentry) && 2396 if (d_unlinked(dentry) &&
2041 (prepend(&end, &buflen, "//deleted", 9) != 0)) 2397 (prepend(&end, &buflen, "//deleted", 9) != 0))
2042 goto Elong; 2398 goto Elong;
@@ -2055,13 +2411,22 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2055 goto Elong; 2411 goto Elong;
2056 2412
2057 retval = end; 2413 retval = end;
2414 spin_unlock(&dentry->d_lock);
2058 dentry = parent; 2415 dentry = parent;
2416 spin_lock(&dentry->d_lock);
2417 if (d_unlinked(dentry))
2418 goto unlinked;
2059 } 2419 }
2060 spin_unlock(&dcache_lock); 2420out:
2421 spin_unlock(&dentry->d_lock);
2422 rcu_read_unlock();
2423 vfsmount_read_unlock(cpu);
2424 if (read_seqretry(&rename_lock, seq))
2425 goto rename_retry;
2061 return retval; 2426 return retval;
2062Elong: 2427Elong:
2063 spin_unlock(&dcache_lock); 2428 retval = ERR_PTR(-ENAMETOOLONG);
2064 return ERR_PTR(-ENAMETOOLONG); 2429 goto out;
2065} 2430}
2066 2431
2067/* 2432/*
@@ -2087,6 +2452,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2087 int error; 2452 int error;
2088 struct path pwd, root; 2453 struct path pwd, root;
2089 char *page = (char *) __get_free_page(GFP_USER); 2454 char *page = (char *) __get_free_page(GFP_USER);
2455 int cpu = get_cpu();
2456 put_cpu();
2090 2457
2091 if (!page) 2458 if (!page)
2092 return -ENOMEM; 2459 return -ENOMEM;
@@ -2099,14 +2466,17 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2099 read_unlock(&current->fs->lock); 2466 read_unlock(&current->fs->lock);
2100 2467
2101 error = -ENOENT; 2468 error = -ENOENT;
2102 spin_lock(&dcache_lock); 2469 vfsmount_read_lock(cpu);
2470 spin_lock(&pwd.dentry->d_lock);
2103 if (!d_unlinked(pwd.dentry)) { 2471 if (!d_unlinked(pwd.dentry)) {
2104 unsigned long len; 2472 unsigned long len;
2105 struct path tmp = root; 2473 struct path tmp = root;
2106 char * cwd; 2474 char * cwd;
2107 2475
2476 spin_unlock(&pwd.dentry->d_lock);
2477 /* XXX: race here, have to close (eg. return unlinked from __d_path) */
2108 cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); 2478 cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
2109 spin_unlock(&dcache_lock); 2479 vfsmount_read_unlock(cpu);
2110 2480
2111 error = PTR_ERR(cwd); 2481 error = PTR_ERR(cwd);
2112 if (IS_ERR(cwd)) 2482 if (IS_ERR(cwd))
@@ -2119,8 +2489,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2119 if (copy_to_user(buf, cwd, len)) 2489 if (copy_to_user(buf, cwd, len))
2120 error = -EFAULT; 2490 error = -EFAULT;
2121 } 2491 }
2122 } else 2492 } else {
2123 spin_unlock(&dcache_lock); 2493 spin_unlock(&pwd.dentry->d_lock);
2494 vfsmount_read_unlock(cpu);
2495 }
2124 2496
2125out: 2497out:
2126 path_put(&pwd); 2498 path_put(&pwd);
@@ -2148,35 +2520,39 @@ out:
2148int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2520int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2149{ 2521{
2150 int result; 2522 int result;
2151 unsigned long seq; 2523 unsigned seq;
2152 2524
2153 if (new_dentry == old_dentry) 2525 if (new_dentry == old_dentry)
2154 return 1; 2526 return 1;
2155 2527
2156 /*
2157 * Need rcu_readlock to protect against the d_parent trashing
2158 * due to d_move
2159 */
2160 rcu_read_lock();
2161 do { 2528 do {
2162 /* for restarting inner loop in case of seq retry */ 2529 /* for restarting inner loop in case of seq retry */
2163 seq = read_seqbegin(&rename_lock); 2530 seq = read_seqbegin(&rename_lock);
2531 /*
2532 * Need rcu_readlock to protect against the d_parent trashing
2533 * due to d_move
2534 */
2535 rcu_read_lock();
2164 if (d_ancestor(old_dentry, new_dentry)) 2536 if (d_ancestor(old_dentry, new_dentry))
2165 result = 1; 2537 result = 1;
2166 else 2538 else
2167 result = 0; 2539 result = 0;
2540 rcu_read_unlock();
2168 } while (read_seqretry(&rename_lock, seq)); 2541 } while (read_seqretry(&rename_lock, seq));
2169 rcu_read_unlock();
2170 2542
2171 return result; 2543 return result;
2172} 2544}
2173 2545
2174void d_genocide(struct dentry *root) 2546void d_genocide(struct dentry *root)
2175{ 2547{
2176 struct dentry *this_parent = root; 2548 struct dentry *this_parent;
2177 struct list_head *next; 2549 struct list_head *next;
2550 unsigned seq;
2178 2551
2179 spin_lock(&dcache_lock); 2552rename_retry:
2553 this_parent = root;
2554 seq = read_seqbegin(&rename_lock);
2555 spin_lock(&this_parent->d_lock);
2180repeat: 2556repeat:
2181 next = this_parent->d_subdirs.next; 2557 next = this_parent->d_subdirs.next;
2182resume: 2558resume:
@@ -2184,21 +2560,49 @@ resume:
2184 struct list_head *tmp = next; 2560 struct list_head *tmp = next;
2185 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2561 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2186 next = tmp->next; 2562 next = tmp->next;
2187 if (d_unhashed(dentry)||!dentry->d_inode) 2563
2564 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2565 if (d_unhashed(dentry) || !dentry->d_inode) {
2566 spin_unlock(&dentry->d_lock);
2188 continue; 2567 continue;
2568 }
2189 if (!list_empty(&dentry->d_subdirs)) { 2569 if (!list_empty(&dentry->d_subdirs)) {
2570 spin_unlock(&this_parent->d_lock);
2571 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2190 this_parent = dentry; 2572 this_parent = dentry;
2573 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2191 goto repeat; 2574 goto repeat;
2192 } 2575 }
2193 atomic_dec(&dentry->d_count); 2576 atomic_dec(&dentry->d_count);
2577 spin_unlock(&dentry->d_lock);
2194 } 2578 }
2195 if (this_parent != root) { 2579 if (this_parent != root) {
2196 next = this_parent->d_u.d_child.next; 2580 struct dentry *tmp;
2581 struct dentry *child;
2582
2583 tmp = this_parent->d_parent;
2197 atomic_dec(&this_parent->d_count); 2584 atomic_dec(&this_parent->d_count);
2198 this_parent = this_parent->d_parent; 2585 rcu_read_lock();
2586 spin_unlock(&this_parent->d_lock);
2587 child = this_parent;
2588 this_parent = tmp;
2589 spin_lock(&this_parent->d_lock);
2590 /* might go back up the wrong parent if we have had a rename
2591 * or deletion */
2592 if (this_parent != child->d_parent ||
2593 // d_unlinked(this_parent) || XXX
2594 read_seqretry(&rename_lock, seq)) {
2595 spin_unlock(&this_parent->d_lock);
2596 rcu_read_unlock();
2597 goto rename_retry;
2598 }
2599 rcu_read_unlock();
2600 next = child->d_u.d_child.next;
2199 goto resume; 2601 goto resume;
2200 } 2602 }
2201 spin_unlock(&dcache_lock); 2603 spin_unlock(&this_parent->d_lock);
2604 if (read_seqretry(&rename_lock, seq))
2605 goto rename_retry;
2202} 2606}
2203 2607
2204/** 2608/**
@@ -2251,7 +2655,7 @@ static void __init dcache_init_early(void)
2251 2655
2252 dentry_hashtable = 2656 dentry_hashtable =
2253 alloc_large_system_hash("Dentry cache", 2657 alloc_large_system_hash("Dentry cache",
2254 sizeof(struct hlist_head), 2658 sizeof(struct dcache_hash_bucket),
2255 dhash_entries, 2659 dhash_entries,
2256 13, 2660 13,
2257 HASH_EARLY, 2661 HASH_EARLY,
@@ -2259,8 +2663,10 @@ static void __init dcache_init_early(void)
2259 &d_hash_mask, 2663 &d_hash_mask,
2260 0); 2664 0);
2261 2665
2262 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2666 for (loop = 0; loop < (1 << d_hash_shift); loop++) {
2263 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2667 spin_lock_init(&dentry_hashtable[loop].lock);
2668 INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
2669 }
2264} 2670}
2265 2671
2266static void __init dcache_init(void) 2672static void __init dcache_init(void)
@@ -2283,7 +2689,7 @@ static void __init dcache_init(void)
2283 2689
2284 dentry_hashtable = 2690 dentry_hashtable =
2285 alloc_large_system_hash("Dentry cache", 2691 alloc_large_system_hash("Dentry cache",
2286 sizeof(struct hlist_head), 2692 sizeof(struct dcache_hash_bucket),
2287 dhash_entries, 2693 dhash_entries,
2288 13, 2694 13,
2289 0, 2695 0,
@@ -2291,8 +2697,10 @@ static void __init dcache_init(void)
2291 &d_hash_mask, 2697 &d_hash_mask,
2292 0); 2698 0);
2293 2699
2294 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2700 for (loop = 0; loop < (1 << d_hash_shift); loop++) {
2295 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2701 spin_lock_init(&dentry_hashtable[loop].lock);
2702 INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
2703 }
2296} 2704}
2297 2705
2298/* SLAB cache for __getname() consumers */ 2706/* SLAB cache for __getname() consumers */
@@ -2342,7 +2750,6 @@ EXPORT_SYMBOL(d_rehash);
2342EXPORT_SYMBOL(d_splice_alias); 2750EXPORT_SYMBOL(d_splice_alias);
2343EXPORT_SYMBOL(d_add_ci); 2751EXPORT_SYMBOL(d_add_ci);
2344EXPORT_SYMBOL(d_validate); 2752EXPORT_SYMBOL(d_validate);
2345EXPORT_SYMBOL(dget_locked);
2346EXPORT_SYMBOL(dput); 2753EXPORT_SYMBOL(dput);
2347EXPORT_SYMBOL(find_inode_number); 2754EXPORT_SYMBOL(find_inode_number);
2348EXPORT_SYMBOL(have_submounts); 2755EXPORT_SYMBOL(have_submounts);