aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c1375
1 files changed, 981 insertions, 394 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9d4e6..5699d4c027c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
36#include "internal.h" 38#include "internal.h"
37 39
40/*
41 * Usage:
42 * dcache->d_inode->i_lock protects:
43 * - i_dentry, d_alias, d_inode of aliases
44 * dcache_hash_bucket lock protects:
45 * - the dcache hash table
46 * s_anon bl list spinlock protects:
47 * - the s_anon list (see __d_drop)
48 * dcache_lru_lock protects:
49 * - the dcache lru lists and counters
50 * d_lock protects:
51 * - d_flags
52 * - d_name
53 * - d_lru
54 * - d_count
55 * - d_unhashed()
56 * - d_parent and d_subdirs
57 * - childrens' d_child and d_parent
58 * - d_alias, d_inode
59 *
60 * Ordering:
61 * dentry->d_inode->i_lock
62 * dentry->d_lock
63 * dcache_lru_lock
64 * dcache_hash_bucket lock
65 * s_anon lock
66 *
67 * If there is an ancestor relationship:
68 * dentry->d_parent->...->d_parent->d_lock
69 * ...
70 * dentry->d_parent->d_lock
71 * dentry->d_lock
72 *
73 * If no ancestor relationship:
74 * if (dentry1 < dentry2)
75 * dentry1->d_lock
76 * dentry2->d_lock
77 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 78int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 80
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 83
44EXPORT_SYMBOL(dcache_lock); 84EXPORT_SYMBOL(rename_lock);
45 85
46static struct kmem_cache *dentry_cache __read_mostly; 86static struct kmem_cache *dentry_cache __read_mostly;
47 87
48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
49
50/* 88/*
51 * This is the single most critical data structure when it comes 89 * This is the single most critical data structure when it comes
52 * to the dcache: the hashtable for lookups. Somebody should try 90 * to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 98
61static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 101
102struct dcache_hash_bucket {
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
108 unsigned long hash)
109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK);
113}
114
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
64 124
65/* Statistics gathering. */ 125/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 126struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 127 .age_limit = 45,
68}; 128};
69 129
70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; 130static DEFINE_PER_CPU(unsigned int, nr_dentry);
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72 131
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 132#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
133static int get_nr_dentry(void)
134{
135 int i;
136 int sum = 0;
137 for_each_possible_cpu(i)
138 sum += per_cpu(nr_dentry, i);
139 return sum < 0 ? 0 : sum;
140}
141
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 142int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos) 143 size_t *lenp, loff_t *ppos)
76{ 144{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); 145 dentry_stat.nr_dentry = get_nr_dentry();
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos); 146 return proc_dointvec(table, write, buffer, lenp, ppos);
80} 147}
81#endif 148#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
91} 158}
92 159
93/* 160/*
94 * no dcache_lock, please. 161 * no locks, please.
95 */ 162 */
96static void d_free(struct dentry *dentry) 163static void d_free(struct dentry *dentry)
97{ 164{
98 percpu_counter_dec(&nr_dentry); 165 BUG_ON(dentry->d_count);
166 this_cpu_dec(nr_dentry);
99 if (dentry->d_op && dentry->d_op->d_release) 167 if (dentry->d_op && dentry->d_op->d_release)
100 dentry->d_op->d_release(dentry); 168 dentry->d_op->d_release(dentry);
101 169
102 /* if dentry was never inserted into hash, immediate free is OK */ 170 /* if dentry was never inserted into hash, immediate free is OK */
103 if (hlist_unhashed(&dentry->d_hash)) 171 if (hlist_bl_unhashed(&dentry->d_hash))
104 __d_free(&dentry->d_u.d_rcu); 172 __d_free(&dentry->d_u.d_rcu);
105 else 173 else
106 call_rcu(&dentry->d_u.d_rcu, __d_free); 174 call_rcu(&dentry->d_u.d_rcu, __d_free);
107} 175}
108 176
177/**
178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
179 * After this call, in-progress rcu-walk path lookup will fail. This
180 * should be called after unhashing, and after changing d_inode (if
181 * the dentry has not already been unhashed).
182 */
183static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
184{
185 assert_spin_locked(&dentry->d_lock);
186 /* Go through a barrier */
187 write_seqcount_barrier(&dentry->d_seq);
188}
189
109/* 190/*
110 * Release the dentry's inode, using the filesystem 191 * Release the dentry's inode, using the filesystem
111 * d_iput() operation if defined. 192 * d_iput() operation if defined. Dentry has no refcount
193 * and is unhashed.
112 */ 194 */
113static void dentry_iput(struct dentry * dentry) 195static void dentry_iput(struct dentry * dentry)
114 __releases(dentry->d_lock) 196 __releases(dentry->d_lock)
115 __releases(dcache_lock) 197 __releases(dentry->d_inode->i_lock)
116{ 198{
117 struct inode *inode = dentry->d_inode; 199 struct inode *inode = dentry->d_inode;
118 if (inode) { 200 if (inode) {
119 dentry->d_inode = NULL; 201 dentry->d_inode = NULL;
120 list_del_init(&dentry->d_alias); 202 list_del_init(&dentry->d_alias);
121 spin_unlock(&dentry->d_lock); 203 spin_unlock(&dentry->d_lock);
122 spin_unlock(&dcache_lock); 204 spin_unlock(&inode->i_lock);
123 if (!inode->i_nlink) 205 if (!inode->i_nlink)
124 fsnotify_inoderemove(inode); 206 fsnotify_inoderemove(inode);
125 if (dentry->d_op && dentry->d_op->d_iput) 207 if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
128 iput(inode); 210 iput(inode);
129 } else { 211 } else {
130 spin_unlock(&dentry->d_lock); 212 spin_unlock(&dentry->d_lock);
131 spin_unlock(&dcache_lock);
132 } 213 }
133} 214}
134 215
135/* 216/*
136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. 217 * Release the dentry's inode, using the filesystem
218 * d_iput() operation if defined. dentry remains in-use.
219 */
220static void dentry_unlink_inode(struct dentry * dentry)
221 __releases(dentry->d_lock)
222 __releases(dentry->d_inode->i_lock)
223{
224 struct inode *inode = dentry->d_inode;
225 dentry->d_inode = NULL;
226 list_del_init(&dentry->d_alias);
227 dentry_rcuwalk_barrier(dentry);
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&inode->i_lock);
230 if (!inode->i_nlink)
231 fsnotify_inoderemove(inode);
232 if (dentry->d_op && dentry->d_op->d_iput)
233 dentry->d_op->d_iput(dentry, inode);
234 else
235 iput(inode);
236}
237
238/*
239 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
137 */ 240 */
138static void dentry_lru_add(struct dentry *dentry) 241static void dentry_lru_add(struct dentry *dentry)
139{ 242{
140 if (list_empty(&dentry->d_lru)) { 243 if (list_empty(&dentry->d_lru)) {
244 spin_lock(&dcache_lru_lock);
141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
142 dentry->d_sb->s_nr_dentry_unused++; 246 dentry->d_sb->s_nr_dentry_unused++;
143 percpu_counter_inc(&nr_dentry_unused); 247 dentry_stat.nr_unused++;
248 spin_unlock(&dcache_lru_lock);
144 } 249 }
145} 250}
146 251
252static void __dentry_lru_del(struct dentry *dentry)
253{
254 list_del_init(&dentry->d_lru);
255 dentry->d_sb->s_nr_dentry_unused--;
256 dentry_stat.nr_unused--;
257}
258
147static void dentry_lru_del(struct dentry *dentry) 259static void dentry_lru_del(struct dentry *dentry)
148{ 260{
149 if (!list_empty(&dentry->d_lru)) { 261 if (!list_empty(&dentry->d_lru)) {
150 list_del_init(&dentry->d_lru); 262 spin_lock(&dcache_lru_lock);
151 dentry->d_sb->s_nr_dentry_unused--; 263 __dentry_lru_del(dentry);
152 percpu_counter_dec(&nr_dentry_unused); 264 spin_unlock(&dcache_lru_lock);
153 } 265 }
154} 266}
155 267
156static void dentry_lru_move_tail(struct dentry *dentry) 268static void dentry_lru_move_tail(struct dentry *dentry)
157{ 269{
270 spin_lock(&dcache_lru_lock);
158 if (list_empty(&dentry->d_lru)) { 271 if (list_empty(&dentry->d_lru)) {
159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
160 dentry->d_sb->s_nr_dentry_unused++; 273 dentry->d_sb->s_nr_dentry_unused++;
161 percpu_counter_inc(&nr_dentry_unused); 274 dentry_stat.nr_unused++;
162 } else { 275 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
164 } 277 }
278 spin_unlock(&dcache_lru_lock);
165} 279}
166 280
167/** 281/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
171 * The dentry must already be unhashed and removed from the LRU. 285 * The dentry must already be unhashed and removed from the LRU.
172 * 286 *
173 * If this is the root of the dentry tree, return NULL. 287 * If this is the root of the dentry tree, return NULL.
288 *
289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
290 * d_kill.
174 */ 291 */
175static struct dentry *d_kill(struct dentry *dentry) 292static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
176 __releases(dentry->d_lock) 293 __releases(dentry->d_lock)
177 __releases(dcache_lock) 294 __releases(parent->d_lock)
295 __releases(dentry->d_inode->i_lock)
178{ 296{
179 struct dentry *parent; 297 dentry->d_parent = NULL;
180
181 list_del(&dentry->d_u.d_child); 298 list_del(&dentry->d_u.d_child);
182 /*drops the locks, at that point nobody can reach this dentry */ 299 if (parent)
300 spin_unlock(&parent->d_lock);
183 dentry_iput(dentry); 301 dentry_iput(dentry);
302 /*
303 * dentry_iput drops the locks, at which point nobody (except
304 * transient RCU lookups) can reach this dentry.
305 */
306 d_free(dentry);
307 return parent;
308}
309
310/**
311 * d_drop - drop a dentry
312 * @dentry: dentry to drop
313 *
314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
315 * be found through a VFS lookup any more. Note that this is different from
316 * deleting the dentry - d_delete will try to mark the dentry negative if
317 * possible, giving a successful _negative_ lookup, while d_drop will
318 * just make the cache lookup fail.
319 *
320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
321 * reason (NFS timeouts or autofs deletes).
322 *
323 * __d_drop requires dentry->d_lock.
324 */
325void __d_drop(struct dentry *dentry)
326{
327 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
329 bit_spin_lock(0,
330 (unsigned long *)&dentry->d_sb->s_anon.first);
331 dentry->d_flags |= DCACHE_UNHASHED;
332 hlist_bl_del_init(&dentry->d_hash);
333 __bit_spin_unlock(0,
334 (unsigned long *)&dentry->d_sb->s_anon.first);
335 } else {
336 struct dcache_hash_bucket *b;
337 b = d_hash(dentry->d_parent, dentry->d_name.hash);
338 spin_lock_bucket(b);
339 /*
340 * We may not actually need to put DCACHE_UNHASHED
341 * manipulations under the hash lock, but follow
342 * the principle of least surprise.
343 */
344 dentry->d_flags |= DCACHE_UNHASHED;
345 hlist_bl_del_rcu(&dentry->d_hash);
346 spin_unlock_bucket(b);
347 dentry_rcuwalk_barrier(dentry);
348 }
349 }
350}
351EXPORT_SYMBOL(__d_drop);
352
353void d_drop(struct dentry *dentry)
354{
355 spin_lock(&dentry->d_lock);
356 __d_drop(dentry);
357 spin_unlock(&dentry->d_lock);
358}
359EXPORT_SYMBOL(d_drop);
360
361/*
362 * Finish off a dentry we've decided to kill.
363 * dentry->d_lock must be held, returns with it unlocked.
364 * If ref is non-zero, then decrement the refcount too.
365 * Returns dentry requiring refcount drop, or NULL if we're done.
366 */
367static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
368 __releases(dentry->d_lock)
369{
370 struct inode *inode;
371 struct dentry *parent;
372
373 inode = dentry->d_inode;
374 if (inode && !spin_trylock(&inode->i_lock)) {
375relock:
376 spin_unlock(&dentry->d_lock);
377 cpu_relax();
378 return dentry; /* try again with same dentry */
379 }
184 if (IS_ROOT(dentry)) 380 if (IS_ROOT(dentry))
185 parent = NULL; 381 parent = NULL;
186 else 382 else
187 parent = dentry->d_parent; 383 parent = dentry->d_parent;
188 d_free(dentry); 384 if (parent && !spin_trylock(&parent->d_lock)) {
189 return parent; 385 if (inode)
386 spin_unlock(&inode->i_lock);
387 goto relock;
388 }
389
390 if (ref)
391 dentry->d_count--;
392 /* if dentry was on the d_lru list delete it from there */
393 dentry_lru_del(dentry);
394 /* if it was on the hash then remove it */
395 __d_drop(dentry);
396 return d_kill(dentry, parent);
190} 397}
191 398
192/* 399/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
214 * call the dentry unlink method as well as removing it from the queues and 421 * call the dentry unlink method as well as removing it from the queues and
215 * releasing its resources. If the parent dentries were scheduled for release 422 * releasing its resources. If the parent dentries were scheduled for release
216 * they too may now get deleted. 423 * they too may now get deleted.
217 *
218 * no dcache lock, please.
219 */ 424 */
220
221void dput(struct dentry *dentry) 425void dput(struct dentry *dentry)
222{ 426{
223 if (!dentry) 427 if (!dentry)
224 return; 428 return;
225 429
226repeat: 430repeat:
227 if (atomic_read(&dentry->d_count) == 1) 431 if (dentry->d_count == 1)
228 might_sleep(); 432 might_sleep();
229 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
230 return;
231
232 spin_lock(&dentry->d_lock); 433 spin_lock(&dentry->d_lock);
233 if (atomic_read(&dentry->d_count)) { 434 BUG_ON(!dentry->d_count);
435 if (dentry->d_count > 1) {
436 dentry->d_count--;
234 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
235 spin_unlock(&dcache_lock);
236 return; 438 return;
237 } 439 }
238 440
239 /* 441 if (dentry->d_flags & DCACHE_OP_DELETE) {
240 * AV: ->d_delete() is _NOT_ allowed to block now.
241 */
242 if (dentry->d_op && dentry->d_op->d_delete) {
243 if (dentry->d_op->d_delete(dentry)) 442 if (dentry->d_op->d_delete(dentry))
244 goto unhash_it; 443 goto kill_it;
245 } 444 }
246 445
247 /* Unreachable? Get rid of it */ 446 /* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
252 dentry->d_flags |= DCACHE_REFERENCED; 451 dentry->d_flags |= DCACHE_REFERENCED;
253 dentry_lru_add(dentry); 452 dentry_lru_add(dentry);
254 453
255 spin_unlock(&dentry->d_lock); 454 dentry->d_count--;
256 spin_unlock(&dcache_lock); 455 spin_unlock(&dentry->d_lock);
257 return; 456 return;
258 457
259unhash_it:
260 __d_drop(dentry);
261kill_it: 458kill_it:
262 /* if dentry was on the d_lru list delete it from there */ 459 dentry = dentry_kill(dentry, 1);
263 dentry_lru_del(dentry);
264 dentry = d_kill(dentry);
265 if (dentry) 460 if (dentry)
266 goto repeat; 461 goto repeat;
267} 462}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
284 /* 479 /*
285 * If it's already been dropped, return OK. 480 * If it's already been dropped, return OK.
286 */ 481 */
287 spin_lock(&dcache_lock); 482 spin_lock(&dentry->d_lock);
288 if (d_unhashed(dentry)) { 483 if (d_unhashed(dentry)) {
289 spin_unlock(&dcache_lock); 484 spin_unlock(&dentry->d_lock);
290 return 0; 485 return 0;
291 } 486 }
292 /* 487 /*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
294 * to get rid of unused child entries. 489 * to get rid of unused child entries.
295 */ 490 */
296 if (!list_empty(&dentry->d_subdirs)) { 491 if (!list_empty(&dentry->d_subdirs)) {
297 spin_unlock(&dcache_lock); 492 spin_unlock(&dentry->d_lock);
298 shrink_dcache_parent(dentry); 493 shrink_dcache_parent(dentry);
299 spin_lock(&dcache_lock); 494 spin_lock(&dentry->d_lock);
300 } 495 }
301 496
302 /* 497 /*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
309 * we might still populate it if it was a 504 * we might still populate it if it was a
310 * working directory or similar). 505 * working directory or similar).
311 */ 506 */
312 spin_lock(&dentry->d_lock); 507 if (dentry->d_count > 1) {
313 if (atomic_read(&dentry->d_count) > 1) {
314 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
315 spin_unlock(&dentry->d_lock); 509 spin_unlock(&dentry->d_lock);
316 spin_unlock(&dcache_lock);
317 return -EBUSY; 510 return -EBUSY;
318 } 511 }
319 } 512 }
320 513
321 __d_drop(dentry); 514 __d_drop(dentry);
322 spin_unlock(&dentry->d_lock); 515 spin_unlock(&dentry->d_lock);
323 spin_unlock(&dcache_lock);
324 return 0; 516 return 0;
325} 517}
326EXPORT_SYMBOL(d_invalidate); 518EXPORT_SYMBOL(d_invalidate);
327 519
328/* This should be called _only_ with dcache_lock held */ 520/* This must be called with d_lock held */
329static inline struct dentry * __dget_locked(struct dentry *dentry) 521static inline void __dget_dlock(struct dentry *dentry)
330{ 522{
331 atomic_inc(&dentry->d_count); 523 dentry->d_count++;
332 dentry_lru_del(dentry);
333 return dentry;
334} 524}
335 525
336struct dentry * dget_locked(struct dentry *dentry) 526static inline void __dget(struct dentry *dentry)
337{ 527{
338 return __dget_locked(dentry); 528 spin_lock(&dentry->d_lock);
529 __dget_dlock(dentry);
530 spin_unlock(&dentry->d_lock);
531}
532
533struct dentry *dget_parent(struct dentry *dentry)
534{
535 struct dentry *ret;
536
537repeat:
538 /*
539 * Don't need rcu_dereference because we re-check it was correct under
540 * the lock.
541 */
542 rcu_read_lock();
543 ret = dentry->d_parent;
544 if (!ret) {
545 rcu_read_unlock();
546 goto out;
547 }
548 spin_lock(&ret->d_lock);
549 if (unlikely(ret != dentry->d_parent)) {
550 spin_unlock(&ret->d_lock);
551 rcu_read_unlock();
552 goto repeat;
553 }
554 rcu_read_unlock();
555 BUG_ON(!ret->d_count);
556 ret->d_count++;
557 spin_unlock(&ret->d_lock);
558out:
559 return ret;
339} 560}
340EXPORT_SYMBOL(dget_locked); 561EXPORT_SYMBOL(dget_parent);
341 562
342/** 563/**
343 * d_find_alias - grab a hashed alias of inode 564 * d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked);
355 * any other hashed alias over that one unless @want_discon is set, 576 * any other hashed alias over that one unless @want_discon is set,
356 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
357 */ 578 */
358 579static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
359static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
360{ 580{
361 struct list_head *head, *next, *tmp; 581 struct dentry *alias, *discon_alias;
362 struct dentry *alias, *discon_alias=NULL;
363 582
364 head = &inode->i_dentry; 583again:
365 next = inode->i_dentry.next; 584 discon_alias = NULL;
366 while (next != head) { 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
367 tmp = next; 586 spin_lock(&alias->d_lock);
368 next = tmp->next;
369 prefetch(next);
370 alias = list_entry(tmp, struct dentry, d_alias);
371 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
372 if (IS_ROOT(alias) && 588 if (IS_ROOT(alias) &&
373 (alias->d_flags & DCACHE_DISCONNECTED)) 589 (alias->d_flags & DCACHE_DISCONNECTED)) {
374 discon_alias = alias; 590 discon_alias = alias;
375 else if (!want_discon) { 591 } else if (!want_discon) {
376 __dget_locked(alias); 592 __dget_dlock(alias);
593 spin_unlock(&alias->d_lock);
594 return alias;
595 }
596 }
597 spin_unlock(&alias->d_lock);
598 }
599 if (discon_alias) {
600 alias = discon_alias;
601 spin_lock(&alias->d_lock);
602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
603 if (IS_ROOT(alias) &&
604 (alias->d_flags & DCACHE_DISCONNECTED)) {
605 __dget_dlock(alias);
606 spin_unlock(&alias->d_lock);
377 return alias; 607 return alias;
378 } 608 }
379 } 609 }
610 spin_unlock(&alias->d_lock);
611 goto again;
380 } 612 }
381 if (discon_alias) 613 return NULL;
382 __dget_locked(discon_alias);
383 return discon_alias;
384} 614}
385 615
386struct dentry * d_find_alias(struct inode *inode) 616struct dentry *d_find_alias(struct inode *inode)
387{ 617{
388 struct dentry *de = NULL; 618 struct dentry *de = NULL;
389 619
390 if (!list_empty(&inode->i_dentry)) { 620 if (!list_empty(&inode->i_dentry)) {
391 spin_lock(&dcache_lock); 621 spin_lock(&inode->i_lock);
392 de = __d_find_alias(inode, 0); 622 de = __d_find_alias(inode, 0);
393 spin_unlock(&dcache_lock); 623 spin_unlock(&inode->i_lock);
394 } 624 }
395 return de; 625 return de;
396} 626}
@@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode)
404{ 634{
405 struct dentry *dentry; 635 struct dentry *dentry;
406restart: 636restart:
407 spin_lock(&dcache_lock); 637 spin_lock(&inode->i_lock);
408 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
409 spin_lock(&dentry->d_lock); 639 spin_lock(&dentry->d_lock);
410 if (!atomic_read(&dentry->d_count)) { 640 if (!dentry->d_count) {
411 __dget_locked(dentry); 641 __dget_dlock(dentry);
412 __d_drop(dentry); 642 __d_drop(dentry);
413 spin_unlock(&dentry->d_lock); 643 spin_unlock(&dentry->d_lock);
414 spin_unlock(&dcache_lock); 644 spin_unlock(&inode->i_lock);
415 dput(dentry); 645 dput(dentry);
416 goto restart; 646 goto restart;
417 } 647 }
418 spin_unlock(&dentry->d_lock); 648 spin_unlock(&dentry->d_lock);
419 } 649 }
420 spin_unlock(&dcache_lock); 650 spin_unlock(&inode->i_lock);
421} 651}
422EXPORT_SYMBOL(d_prune_aliases); 652EXPORT_SYMBOL(d_prune_aliases);
423 653
424/* 654/*
425 * Throw away a dentry - free the inode, dput the parent. This requires that 655 * Try to throw away a dentry - free the inode, dput the parent.
426 * the LRU list has already been removed. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0.
657 * Releases dentry->d_lock.
427 * 658 *
428 * Try to prune ancestors as well. This is necessary to prevent 659 * This may fail if locks cannot be acquired no problem, just try again.
429 * quadratic behavior of shrink_dcache_parent(), but is also expected
430 * to be beneficial in reducing dentry cache fragmentation.
431 */ 660 */
432static void prune_one_dentry(struct dentry * dentry) 661static void try_prune_one_dentry(struct dentry *dentry)
433 __releases(dentry->d_lock) 662 __releases(dentry->d_lock)
434 __releases(dcache_lock)
435 __acquires(dcache_lock)
436{ 663{
437 __d_drop(dentry); 664 struct dentry *parent;
438 dentry = d_kill(dentry);
439 665
666 parent = dentry_kill(dentry, 0);
440 /* 667 /*
441 * Prune ancestors. Locking is simpler than in dput(), 668 * If dentry_kill returns NULL, we have nothing more to do.
442 * because dcache_lock needs to be taken anyway. 669 * if it returns the same dentry, trylocks failed. In either
670 * case, just loop again.
671 *
672 * Otherwise, we need to prune ancestors too. This is necessary
673 * to prevent quadratic behavior of shrink_dcache_parent(), but
674 * is also expected to be beneficial in reducing dentry cache
675 * fragmentation.
443 */ 676 */
444 spin_lock(&dcache_lock); 677 if (!parent)
678 return;
679 if (parent == dentry)
680 return;
681
682 /* Prune ancestors. */
683 dentry = parent;
445 while (dentry) { 684 while (dentry) {
446 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 685 spin_lock(&dentry->d_lock);
686 if (dentry->d_count > 1) {
687 dentry->d_count--;
688 spin_unlock(&dentry->d_lock);
447 return; 689 return;
448 690 }
449 if (dentry->d_op && dentry->d_op->d_delete) 691 dentry = dentry_kill(dentry, 1);
450 dentry->d_op->d_delete(dentry);
451 dentry_lru_del(dentry);
452 __d_drop(dentry);
453 dentry = d_kill(dentry);
454 spin_lock(&dcache_lock);
455 } 692 }
456} 693}
457 694
@@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list)
459{ 696{
460 struct dentry *dentry; 697 struct dentry *dentry;
461 698
462 while (!list_empty(list)) { 699 rcu_read_lock();
463 dentry = list_entry(list->prev, struct dentry, d_lru); 700 for (;;) {
464 dentry_lru_del(dentry); 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
702 if (&dentry->d_lru == list)
703 break; /* empty */
704 spin_lock(&dentry->d_lock);
705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
706 spin_unlock(&dentry->d_lock);
707 continue;
708 }
465 709
466 /* 710 /*
467 * We found an inuse dentry which was not removed from 711 * We found an inuse dentry which was not removed from
468 * the LRU because of laziness during lookup. Do not free 712 * the LRU because of laziness during lookup. Do not free
469 * it - just keep it off the LRU list. 713 * it - just keep it off the LRU list.
470 */ 714 */
471 spin_lock(&dentry->d_lock); 715 if (dentry->d_count) {
472 if (atomic_read(&dentry->d_count)) { 716 dentry_lru_del(dentry);
473 spin_unlock(&dentry->d_lock); 717 spin_unlock(&dentry->d_lock);
474 continue; 718 continue;
475 } 719 }
476 prune_one_dentry(dentry); 720
477 /* dentry->d_lock was dropped in prune_one_dentry() */ 721 rcu_read_unlock();
478 cond_resched_lock(&dcache_lock); 722
723 try_prune_one_dentry(dentry);
724
725 rcu_read_lock();
479 } 726 }
727 rcu_read_unlock();
480} 728}
481 729
482/** 730/**
@@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
495 LIST_HEAD(tmp); 743 LIST_HEAD(tmp);
496 int cnt = *count; 744 int cnt = *count;
497 745
498 spin_lock(&dcache_lock); 746relock:
747 spin_lock(&dcache_lru_lock);
499 while (!list_empty(&sb->s_dentry_lru)) { 748 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev, 749 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru); 750 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb); 751 BUG_ON(dentry->d_sb != sb);
503 752
753 if (!spin_trylock(&dentry->d_lock)) {
754 spin_unlock(&dcache_lru_lock);
755 cpu_relax();
756 goto relock;
757 }
758
504 /* 759 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the 760 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag 761 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU. 762 * and put it back on the LRU.
508 */ 763 */
509 if (flags & DCACHE_REFERENCED) { 764 if (flags & DCACHE_REFERENCED &&
510 spin_lock(&dentry->d_lock); 765 dentry->d_flags & DCACHE_REFERENCED) {
511 if (dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED;
512 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced);
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock); 768 spin_unlock(&dentry->d_lock);
769 } else {
770 list_move_tail(&dentry->d_lru, &tmp);
771 spin_unlock(&dentry->d_lock);
772 if (!--cnt)
773 break;
519 } 774 }
520 775 cond_resched_lock(&dcache_lru_lock);
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 } 776 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
530 if (!list_empty(&referenced)) 777 if (!list_empty(&referenced))
531 list_splice(&referenced, &sb->s_dentry_lru); 778 list_splice(&referenced, &sb->s_dentry_lru);
532 spin_unlock(&dcache_lock); 779 spin_unlock(&dcache_lru_lock);
533 780
781 shrink_dentry_list(&tmp);
782
783 *count = cnt;
534} 784}
535 785
536/** 786/**
@@ -546,13 +796,12 @@ static void prune_dcache(int count)
546{ 796{
547 struct super_block *sb, *p = NULL; 797 struct super_block *sb, *p = NULL;
548 int w_count; 798 int w_count;
549 int unused = percpu_counter_sum_positive(&nr_dentry_unused); 799 int unused = dentry_stat.nr_unused;
550 int prune_ratio; 800 int prune_ratio;
551 int pruned; 801 int pruned;
552 802
553 if (unused == 0 || count == 0) 803 if (unused == 0 || count == 0)
554 return; 804 return;
555 spin_lock(&dcache_lock);
556 if (count >= unused) 805 if (count >= unused)
557 prune_ratio = 1; 806 prune_ratio = 1;
558 else 807 else
@@ -589,11 +838,9 @@ static void prune_dcache(int count)
589 if (down_read_trylock(&sb->s_umount)) { 838 if (down_read_trylock(&sb->s_umount)) {
590 if ((sb->s_root != NULL) && 839 if ((sb->s_root != NULL) &&
591 (!list_empty(&sb->s_dentry_lru))) { 840 (!list_empty(&sb->s_dentry_lru))) {
592 spin_unlock(&dcache_lock);
593 __shrink_dcache_sb(sb, &w_count, 841 __shrink_dcache_sb(sb, &w_count,
594 DCACHE_REFERENCED); 842 DCACHE_REFERENCED);
595 pruned -= w_count; 843 pruned -= w_count;
596 spin_lock(&dcache_lock);
597 } 844 }
598 up_read(&sb->s_umount); 845 up_read(&sb->s_umount);
599 } 846 }
@@ -609,7 +856,6 @@ static void prune_dcache(int count)
609 if (p) 856 if (p)
610 __put_super(p); 857 __put_super(p);
611 spin_unlock(&sb_lock); 858 spin_unlock(&sb_lock);
612 spin_unlock(&dcache_lock);
613} 859}
614 860
615/** 861/**
@@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb)
623{ 869{
624 LIST_HEAD(tmp); 870 LIST_HEAD(tmp);
625 871
626 spin_lock(&dcache_lock); 872 spin_lock(&dcache_lru_lock);
627 while (!list_empty(&sb->s_dentry_lru)) { 873 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp); 874 list_splice_init(&sb->s_dentry_lru, &tmp);
875 spin_unlock(&dcache_lru_lock);
629 shrink_dentry_list(&tmp); 876 shrink_dentry_list(&tmp);
877 spin_lock(&dcache_lru_lock);
630 } 878 }
631 spin_unlock(&dcache_lock); 879 spin_unlock(&dcache_lru_lock);
632} 880}
633EXPORT_SYMBOL(shrink_dcache_sb); 881EXPORT_SYMBOL(shrink_dcache_sb);
634 882
@@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
645 BUG_ON(!IS_ROOT(dentry)); 893 BUG_ON(!IS_ROOT(dentry));
646 894
647 /* detach this root from the system */ 895 /* detach this root from the system */
648 spin_lock(&dcache_lock); 896 spin_lock(&dentry->d_lock);
649 dentry_lru_del(dentry); 897 dentry_lru_del(dentry);
650 __d_drop(dentry); 898 __d_drop(dentry);
651 spin_unlock(&dcache_lock); 899 spin_unlock(&dentry->d_lock);
652 900
653 for (;;) { 901 for (;;) {
654 /* descend to the first leaf in the current subtree */ 902 /* descend to the first leaf in the current subtree */
@@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
657 905
658 /* this is a branch with children - detach all of them 906 /* this is a branch with children - detach all of them
659 * from the system in one go */ 907 * from the system in one go */
660 spin_lock(&dcache_lock); 908 spin_lock(&dentry->d_lock);
661 list_for_each_entry(loop, &dentry->d_subdirs, 909 list_for_each_entry(loop, &dentry->d_subdirs,
662 d_u.d_child) { 910 d_u.d_child) {
911 spin_lock_nested(&loop->d_lock,
912 DENTRY_D_LOCK_NESTED);
663 dentry_lru_del(loop); 913 dentry_lru_del(loop);
664 __d_drop(loop); 914 __d_drop(loop);
665 cond_resched_lock(&dcache_lock); 915 spin_unlock(&loop->d_lock);
666 } 916 }
667 spin_unlock(&dcache_lock); 917 spin_unlock(&dentry->d_lock);
668 918
669 /* move to the first child */ 919 /* move to the first child */
670 dentry = list_entry(dentry->d_subdirs.next, 920 dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
676 do { 926 do {
677 struct inode *inode; 927 struct inode *inode;
678 928
679 if (atomic_read(&dentry->d_count) != 0) { 929 if (dentry->d_count != 0) {
680 printk(KERN_ERR 930 printk(KERN_ERR
681 "BUG: Dentry %p{i=%lx,n=%s}" 931 "BUG: Dentry %p{i=%lx,n=%s}"
682 " still in use (%d)" 932 " still in use (%d)"
@@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
685 dentry->d_inode ? 935 dentry->d_inode ?
686 dentry->d_inode->i_ino : 0UL, 936 dentry->d_inode->i_ino : 0UL,
687 dentry->d_name.name, 937 dentry->d_name.name,
688 atomic_read(&dentry->d_count), 938 dentry->d_count,
689 dentry->d_sb->s_type->name, 939 dentry->d_sb->s_type->name,
690 dentry->d_sb->s_id); 940 dentry->d_sb->s_id);
691 BUG(); 941 BUG();
692 } 942 }
693 943
694 if (IS_ROOT(dentry)) 944 if (IS_ROOT(dentry)) {
695 parent = NULL; 945 parent = NULL;
696 else { 946 list_del(&dentry->d_u.d_child);
947 } else {
697 parent = dentry->d_parent; 948 parent = dentry->d_parent;
698 atomic_dec(&parent->d_count); 949 spin_lock(&parent->d_lock);
950 parent->d_count--;
951 list_del(&dentry->d_u.d_child);
952 spin_unlock(&parent->d_lock);
699 } 953 }
700 954
701 list_del(&dentry->d_u.d_child);
702 detached++; 955 detached++;
703 956
704 inode = dentry->d_inode; 957 inode = dentry->d_inode;
@@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
728 981
729/* 982/*
730 * destroy the dentries attached to a superblock on unmounting 983 * destroy the dentries attached to a superblock on unmounting
731 * - we don't need to use dentry->d_lock, and only need dcache_lock when 984 * - we don't need to use dentry->d_lock because:
732 * removing the dentry from the system lists and hashes because:
733 * - the superblock is detached from all mountings and open files, so the 985 * - the superblock is detached from all mountings and open files, so the
734 * dentry trees will not be rearranged by the VFS 986 * dentry trees will not be rearranged by the VFS
735 * - s_umount is write-locked, so the memory pressure shrinker will ignore 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
746 998
747 dentry = sb->s_root; 999 dentry = sb->s_root;
748 sb->s_root = NULL; 1000 sb->s_root = NULL;
749 atomic_dec(&dentry->d_count); 1001 spin_lock(&dentry->d_lock);
1002 dentry->d_count--;
1003 spin_unlock(&dentry->d_lock);
750 shrink_dcache_for_umount_subtree(dentry); 1004 shrink_dcache_for_umount_subtree(dentry);
751 1005
752 while (!hlist_empty(&sb->s_anon)) { 1006 while (!hlist_bl_empty(&sb->s_anon)) {
753 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
754 shrink_dcache_for_umount_subtree(dentry); 1008 shrink_dcache_for_umount_subtree(dentry);
755 } 1009 }
756} 1010}
@@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
768 * Return true if the parent or its subdirectories contain 1022 * Return true if the parent or its subdirectories contain
769 * a mount point 1023 * a mount point
770 */ 1024 */
771
772int have_submounts(struct dentry *parent) 1025int have_submounts(struct dentry *parent)
773{ 1026{
774 struct dentry *this_parent = parent; 1027 struct dentry *this_parent;
775 struct list_head *next; 1028 struct list_head *next;
1029 unsigned seq;
1030 int locked = 0;
1031
1032 seq = read_seqbegin(&rename_lock);
1033again:
1034 this_parent = parent;
776 1035
777 spin_lock(&dcache_lock);
778 if (d_mountpoint(parent)) 1036 if (d_mountpoint(parent))
779 goto positive; 1037 goto positive;
1038 spin_lock(&this_parent->d_lock);
780repeat: 1039repeat:
781 next = this_parent->d_subdirs.next; 1040 next = this_parent->d_subdirs.next;
782resume: 1041resume:
@@ -784,27 +1043,65 @@ resume:
784 struct list_head *tmp = next; 1043 struct list_head *tmp = next;
785 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
786 next = tmp->next; 1045 next = tmp->next;
1046
1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
787 /* Have we found a mount point ? */ 1048 /* Have we found a mount point ? */
788 if (d_mountpoint(dentry)) 1049 if (d_mountpoint(dentry)) {
1050 spin_unlock(&dentry->d_lock);
1051 spin_unlock(&this_parent->d_lock);
789 goto positive; 1052 goto positive;
1053 }
790 if (!list_empty(&dentry->d_subdirs)) { 1054 if (!list_empty(&dentry->d_subdirs)) {
1055 spin_unlock(&this_parent->d_lock);
1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
791 this_parent = dentry; 1057 this_parent = dentry;
1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
792 goto repeat; 1059 goto repeat;
793 } 1060 }
1061 spin_unlock(&dentry->d_lock);
794 } 1062 }
795 /* 1063 /*
796 * All done at this level ... ascend and resume the search. 1064 * All done at this level ... ascend and resume the search.
797 */ 1065 */
798 if (this_parent != parent) { 1066 if (this_parent != parent) {
799 next = this_parent->d_u.d_child.next; 1067 struct dentry *tmp;
800 this_parent = this_parent->d_parent; 1068 struct dentry *child;
1069
1070 tmp = this_parent->d_parent;
1071 rcu_read_lock();
1072 spin_unlock(&this_parent->d_lock);
1073 child = this_parent;
1074 this_parent = tmp;
1075 spin_lock(&this_parent->d_lock);
1076 /* might go back up the wrong parent if we have had a rename
1077 * or deletion */
1078 if (this_parent != child->d_parent ||
1079 (!locked && read_seqretry(&rename_lock, seq))) {
1080 spin_unlock(&this_parent->d_lock);
1081 rcu_read_unlock();
1082 goto rename_retry;
1083 }
1084 rcu_read_unlock();
1085 next = child->d_u.d_child.next;
801 goto resume; 1086 goto resume;
802 } 1087 }
803 spin_unlock(&dcache_lock); 1088 spin_unlock(&this_parent->d_lock);
1089 if (!locked && read_seqretry(&rename_lock, seq))
1090 goto rename_retry;
1091 if (locked)
1092 write_sequnlock(&rename_lock);
804 return 0; /* No mount points found in tree */ 1093 return 0; /* No mount points found in tree */
805positive: 1094positive:
806 spin_unlock(&dcache_lock); 1095 if (!locked && read_seqretry(&rename_lock, seq))
1096 goto rename_retry;
1097 if (locked)
1098 write_sequnlock(&rename_lock);
807 return 1; 1099 return 1;
1100
1101rename_retry:
1102 locked = 1;
1103 write_seqlock(&rename_lock);
1104 goto again;
808} 1105}
809EXPORT_SYMBOL(have_submounts); 1106EXPORT_SYMBOL(have_submounts);
810 1107
@@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
824 */ 1121 */
825static int select_parent(struct dentry * parent) 1122static int select_parent(struct dentry * parent)
826{ 1123{
827 struct dentry *this_parent = parent; 1124 struct dentry *this_parent;
828 struct list_head *next; 1125 struct list_head *next;
1126 unsigned seq;
829 int found = 0; 1127 int found = 0;
1128 int locked = 0;
830 1129
831 spin_lock(&dcache_lock); 1130 seq = read_seqbegin(&rename_lock);
1131again:
1132 this_parent = parent;
1133 spin_lock(&this_parent->d_lock);
832repeat: 1134repeat:
833 next = this_parent->d_subdirs.next; 1135 next = this_parent->d_subdirs.next;
834resume: 1136resume:
@@ -837,11 +1139,13 @@ resume:
837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
838 next = tmp->next; 1140 next = tmp->next;
839 1141
1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
840 /* 1144 /*
841 * move only zero ref count dentries to the end 1145 * move only zero ref count dentries to the end
842 * of the unused list for prune_dcache 1146 * of the unused list for prune_dcache
843 */ 1147 */
844 if (!atomic_read(&dentry->d_count)) { 1148 if (!dentry->d_count) {
845 dentry_lru_move_tail(dentry); 1149 dentry_lru_move_tail(dentry);
846 found++; 1150 found++;
847 } else { 1151 } else {
@@ -853,28 +1157,63 @@ resume:
853 * ensures forward progress). We'll be coming back to find 1157 * ensures forward progress). We'll be coming back to find
854 * the rest. 1158 * the rest.
855 */ 1159 */
856 if (found && need_resched()) 1160 if (found && need_resched()) {
1161 spin_unlock(&dentry->d_lock);
857 goto out; 1162 goto out;
1163 }
858 1164
859 /* 1165 /*
860 * Descend a level if the d_subdirs list is non-empty. 1166 * Descend a level if the d_subdirs list is non-empty.
861 */ 1167 */
862 if (!list_empty(&dentry->d_subdirs)) { 1168 if (!list_empty(&dentry->d_subdirs)) {
1169 spin_unlock(&this_parent->d_lock);
1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
863 this_parent = dentry; 1171 this_parent = dentry;
1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
864 goto repeat; 1173 goto repeat;
865 } 1174 }
1175
1176 spin_unlock(&dentry->d_lock);
866 } 1177 }
867 /* 1178 /*
868 * All done at this level ... ascend and resume the search. 1179 * All done at this level ... ascend and resume the search.
869 */ 1180 */
870 if (this_parent != parent) { 1181 if (this_parent != parent) {
871 next = this_parent->d_u.d_child.next; 1182 struct dentry *tmp;
872 this_parent = this_parent->d_parent; 1183 struct dentry *child;
1184
1185 tmp = this_parent->d_parent;
1186 rcu_read_lock();
1187 spin_unlock(&this_parent->d_lock);
1188 child = this_parent;
1189 this_parent = tmp;
1190 spin_lock(&this_parent->d_lock);
1191 /* might go back up the wrong parent if we have had a rename
1192 * or deletion */
1193 if (this_parent != child->d_parent ||
1194 (!locked && read_seqretry(&rename_lock, seq))) {
1195 spin_unlock(&this_parent->d_lock);
1196 rcu_read_unlock();
1197 goto rename_retry;
1198 }
1199 rcu_read_unlock();
1200 next = child->d_u.d_child.next;
873 goto resume; 1201 goto resume;
874 } 1202 }
875out: 1203out:
876 spin_unlock(&dcache_lock); 1204 spin_unlock(&this_parent->d_lock);
1205 if (!locked && read_seqretry(&rename_lock, seq))
1206 goto rename_retry;
1207 if (locked)
1208 write_sequnlock(&rename_lock);
877 return found; 1209 return found;
1210
1211rename_retry:
1212 if (found)
1213 return found;
1214 locked = 1;
1215 write_seqlock(&rename_lock);
1216 goto again;
878} 1217}
879 1218
880/** 1219/**
@@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
908 */ 1247 */
909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1248static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
910{ 1249{
911 int nr_unused;
912
913 if (nr) { 1250 if (nr) {
914 if (!(gfp_mask & __GFP_FS)) 1251 if (!(gfp_mask & __GFP_FS))
915 return -1; 1252 return -1;
916 prune_dcache(nr); 1253 prune_dcache(nr);
917 } 1254 }
918 1255
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
921} 1257}
922 1258
923static struct shrinker dcache_shrinker = { 1259static struct shrinker dcache_shrinker = {
@@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
960 memcpy(dname, name->name, name->len); 1296 memcpy(dname, name->name, name->len);
961 dname[name->len] = 0; 1297 dname[name->len] = 0;
962 1298
963 atomic_set(&dentry->d_count, 1); 1299 dentry->d_count = 1;
964 dentry->d_flags = DCACHE_UNHASHED; 1300 dentry->d_flags = DCACHE_UNHASHED;
965 spin_lock_init(&dentry->d_lock); 1301 spin_lock_init(&dentry->d_lock);
1302 seqcount_init(&dentry->d_seq);
966 dentry->d_inode = NULL; 1303 dentry->d_inode = NULL;
967 dentry->d_parent = NULL; 1304 dentry->d_parent = NULL;
968 dentry->d_sb = NULL; 1305 dentry->d_sb = NULL;
969 dentry->d_op = NULL; 1306 dentry->d_op = NULL;
970 dentry->d_fsdata = NULL; 1307 dentry->d_fsdata = NULL;
971 dentry->d_mounted = 0; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash);
972 INIT_HLIST_NODE(&dentry->d_hash);
973 INIT_LIST_HEAD(&dentry->d_lru); 1309 INIT_LIST_HEAD(&dentry->d_lru);
974 INIT_LIST_HEAD(&dentry->d_subdirs); 1310 INIT_LIST_HEAD(&dentry->d_subdirs);
975 INIT_LIST_HEAD(&dentry->d_alias); 1311 INIT_LIST_HEAD(&dentry->d_alias);
1312 INIT_LIST_HEAD(&dentry->d_u.d_child);
976 1313
977 if (parent) { 1314 if (parent) {
978 dentry->d_parent = dget(parent); 1315 spin_lock(&parent->d_lock);
1316 /*
1317 * don't need child lock because it is not subject
1318 * to concurrency here
1319 */
1320 __dget_dlock(parent);
1321 dentry->d_parent = parent;
979 dentry->d_sb = parent->d_sb; 1322 dentry->d_sb = parent->d_sb;
980 } else {
981 INIT_LIST_HEAD(&dentry->d_u.d_child);
982 }
983
984 spin_lock(&dcache_lock);
985 if (parent)
986 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1323 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
987 spin_unlock(&dcache_lock); 1324 spin_unlock(&parent->d_lock);
1325 }
988 1326
989 percpu_counter_inc(&nr_dentry); 1327 this_cpu_inc(nr_dentry);
990 1328
991 return dentry; 1329 return dentry;
992} 1330}
993EXPORT_SYMBOL(d_alloc); 1331EXPORT_SYMBOL(d_alloc);
994 1332
1333struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1334{
1335 struct dentry *dentry = d_alloc(NULL, name);
1336 if (dentry) {
1337 dentry->d_sb = sb;
1338 dentry->d_parent = dentry;
1339 dentry->d_flags |= DCACHE_DISCONNECTED;
1340 }
1341 return dentry;
1342}
1343EXPORT_SYMBOL(d_alloc_pseudo);
1344
995struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1345struct dentry *d_alloc_name(struct dentry *parent, const char *name)
996{ 1346{
997 struct qstr q; 1347 struct qstr q;
@@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1003} 1353}
1004EXPORT_SYMBOL(d_alloc_name); 1354EXPORT_SYMBOL(d_alloc_name);
1005 1355
1006/* the caller must hold dcache_lock */ 1356void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1357{
1358 BUG_ON(dentry->d_op);
1359 BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
1360 DCACHE_OP_COMPARE |
1361 DCACHE_OP_REVALIDATE |
1362 DCACHE_OP_DELETE ));
1363 dentry->d_op = op;
1364 if (!op)
1365 return;
1366 if (op->d_hash)
1367 dentry->d_flags |= DCACHE_OP_HASH;
1368 if (op->d_compare)
1369 dentry->d_flags |= DCACHE_OP_COMPARE;
1370 if (op->d_revalidate)
1371 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1372 if (op->d_delete)
1373 dentry->d_flags |= DCACHE_OP_DELETE;
1374
1375}
1376EXPORT_SYMBOL(d_set_d_op);
1377
1007static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1378static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1008{ 1379{
1380 spin_lock(&dentry->d_lock);
1009 if (inode) 1381 if (inode)
1010 list_add(&dentry->d_alias, &inode->i_dentry); 1382 list_add(&dentry->d_alias, &inode->i_dentry);
1011 dentry->d_inode = inode; 1383 dentry->d_inode = inode;
1384 dentry_rcuwalk_barrier(dentry);
1385 spin_unlock(&dentry->d_lock);
1012 fsnotify_d_instantiate(dentry, inode); 1386 fsnotify_d_instantiate(dentry, inode);
1013} 1387}
1014 1388
@@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1030void d_instantiate(struct dentry *entry, struct inode * inode) 1404void d_instantiate(struct dentry *entry, struct inode * inode)
1031{ 1405{
1032 BUG_ON(!list_empty(&entry->d_alias)); 1406 BUG_ON(!list_empty(&entry->d_alias));
1033 spin_lock(&dcache_lock); 1407 if (inode)
1408 spin_lock(&inode->i_lock);
1034 __d_instantiate(entry, inode); 1409 __d_instantiate(entry, inode);
1035 spin_unlock(&dcache_lock); 1410 if (inode)
1411 spin_unlock(&inode->i_lock);
1036 security_d_instantiate(entry, inode); 1412 security_d_instantiate(entry, inode);
1037} 1413}
1038EXPORT_SYMBOL(d_instantiate); 1414EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1069 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1445 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1070 struct qstr *qstr = &alias->d_name; 1446 struct qstr *qstr = &alias->d_name;
1071 1447
1448 /*
1449 * Don't need alias->d_lock here, because aliases with
1450 * d_parent == entry->d_parent are not subject to name or
1451 * parent changes, because the parent inode i_mutex is held.
1452 */
1072 if (qstr->hash != hash) 1453 if (qstr->hash != hash)
1073 continue; 1454 continue;
1074 if (alias->d_parent != entry->d_parent) 1455 if (alias->d_parent != entry->d_parent)
1075 continue; 1456 continue;
1076 if (qstr->len != len) 1457 if (dentry_cmp(qstr->name, qstr->len, name, len))
1077 continue; 1458 continue;
1078 if (memcmp(qstr->name, name, len)) 1459 __dget(alias);
1079 continue;
1080 dget_locked(alias);
1081 return alias; 1460 return alias;
1082 } 1461 }
1083 1462
@@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1091 1470
1092 BUG_ON(!list_empty(&entry->d_alias)); 1471 BUG_ON(!list_empty(&entry->d_alias));
1093 1472
1094 spin_lock(&dcache_lock); 1473 if (inode)
1474 spin_lock(&inode->i_lock);
1095 result = __d_instantiate_unique(entry, inode); 1475 result = __d_instantiate_unique(entry, inode);
1096 spin_unlock(&dcache_lock); 1476 if (inode)
1477 spin_unlock(&inode->i_lock);
1097 1478
1098 if (!result) { 1479 if (!result) {
1099 security_d_instantiate(entry, inode); 1480 security_d_instantiate(entry, inode);
@@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1134} 1515}
1135EXPORT_SYMBOL(d_alloc_root); 1516EXPORT_SYMBOL(d_alloc_root);
1136 1517
1137static inline struct hlist_head *d_hash(struct dentry *parent,
1138 unsigned long hash)
1139{
1140 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1141 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1142 return dentry_hashtable + (hash & D_HASHMASK);
1143}
1144
1145/** 1518/**
1146 * d_obtain_alias - find or allocate a dentry for a given inode 1519 * d_obtain_alias - find or allocate a dentry for a given inode
1147 * @inode: inode to allocate the dentry for 1520 * @inode: inode to allocate the dentry for
@@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1182 } 1555 }
1183 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1556 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1184 1557
1185 spin_lock(&dcache_lock); 1558
1559 spin_lock(&inode->i_lock);
1186 res = __d_find_alias(inode, 0); 1560 res = __d_find_alias(inode, 0);
1187 if (res) { 1561 if (res) {
1188 spin_unlock(&dcache_lock); 1562 spin_unlock(&inode->i_lock);
1189 dput(tmp); 1563 dput(tmp);
1190 goto out_iput; 1564 goto out_iput;
1191 } 1565 }
@@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
1195 tmp->d_sb = inode->i_sb; 1569 tmp->d_sb = inode->i_sb;
1196 tmp->d_inode = inode; 1570 tmp->d_inode = inode;
1197 tmp->d_flags |= DCACHE_DISCONNECTED; 1571 tmp->d_flags |= DCACHE_DISCONNECTED;
1198 tmp->d_flags &= ~DCACHE_UNHASHED;
1199 list_add(&tmp->d_alias, &inode->i_dentry); 1572 list_add(&tmp->d_alias, &inode->i_dentry);
1200 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1573 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1574 tmp->d_flags &= ~DCACHE_UNHASHED;
1575 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1576 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1201 spin_unlock(&tmp->d_lock); 1577 spin_unlock(&tmp->d_lock);
1578 spin_unlock(&inode->i_lock);
1202 1579
1203 spin_unlock(&dcache_lock);
1204 return tmp; 1580 return tmp;
1205 1581
1206 out_iput: 1582 out_iput:
@@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1230 struct dentry *new = NULL; 1606 struct dentry *new = NULL;
1231 1607
1232 if (inode && S_ISDIR(inode->i_mode)) { 1608 if (inode && S_ISDIR(inode->i_mode)) {
1233 spin_lock(&dcache_lock); 1609 spin_lock(&inode->i_lock);
1234 new = __d_find_alias(inode, 1); 1610 new = __d_find_alias(inode, 1);
1235 if (new) { 1611 if (new) {
1236 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1612 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1237 spin_unlock(&dcache_lock); 1613 spin_unlock(&inode->i_lock);
1238 security_d_instantiate(new, inode); 1614 security_d_instantiate(new, inode);
1239 d_move(new, dentry); 1615 d_move(new, dentry);
1240 iput(inode); 1616 iput(inode);
1241 } else { 1617 } else {
1242 /* already taking dcache_lock, so d_add() by hand */ 1618 /* already taking inode->i_lock, so d_add() by hand */
1243 __d_instantiate(dentry, inode); 1619 __d_instantiate(dentry, inode);
1244 spin_unlock(&dcache_lock); 1620 spin_unlock(&inode->i_lock);
1245 security_d_instantiate(dentry, inode); 1621 security_d_instantiate(dentry, inode);
1246 d_rehash(dentry); 1622 d_rehash(dentry);
1247 } 1623 }
@@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1314 * Negative dentry: instantiate it unless the inode is a directory and 1690 * Negative dentry: instantiate it unless the inode is a directory and
1315 * already has a dentry. 1691 * already has a dentry.
1316 */ 1692 */
1317 spin_lock(&dcache_lock); 1693 spin_lock(&inode->i_lock);
1318 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1694 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1319 __d_instantiate(found, inode); 1695 __d_instantiate(found, inode);
1320 spin_unlock(&dcache_lock); 1696 spin_unlock(&inode->i_lock);
1321 security_d_instantiate(found, inode); 1697 security_d_instantiate(found, inode);
1322 return found; 1698 return found;
1323 } 1699 }
@@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1327 * reference to it, move it in place and use it. 1703 * reference to it, move it in place and use it.
1328 */ 1704 */
1329 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1705 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1330 dget_locked(new); 1706 __dget(new);
1331 spin_unlock(&dcache_lock); 1707 spin_unlock(&inode->i_lock);
1332 security_d_instantiate(found, inode); 1708 security_d_instantiate(found, inode);
1333 d_move(new, found); 1709 d_move(new, found);
1334 iput(inode); 1710 iput(inode);
@@ -1342,6 +1718,112 @@ err_out:
1342EXPORT_SYMBOL(d_add_ci); 1718EXPORT_SYMBOL(d_add_ci);
1343 1719
1344/** 1720/**
1721 * __d_lookup_rcu - search for a dentry (racy, store-free)
1722 * @parent: parent dentry
1723 * @name: qstr of name we wish to find
1724 * @seq: returns d_seq value at the point where the dentry was found
1725 * @inode: returns dentry->d_inode when the inode was found valid.
1726 * Returns: dentry, or NULL
1727 *
1728 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1729 * resolution (store-free path walking) design described in
1730 * Documentation/filesystems/path-lookup.txt.
1731 *
1732 * This is not to be used outside core vfs.
1733 *
1734 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1735 * held, and rcu_read_lock held. The returned dentry must not be stored into
1736 * without taking d_lock and checking d_seq sequence count against @seq
1737 * returned here.
1738 *
1739 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1740 * function.
1741 *
1742 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1743 * the returned dentry, so long as its parent's seqlock is checked after the
1744 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1745 * is formed, giving integrity down the path walk.
1746 */
1747struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 unsigned *seq, struct inode **inode)
1749{
1750 unsigned int len = name->len;
1751 unsigned int hash = name->hash;
1752 const unsigned char *str = name->name;
1753 struct dcache_hash_bucket *b = d_hash(parent, hash);
1754 struct hlist_bl_node *node;
1755 struct dentry *dentry;
1756
1757 /*
1758 * Note: There is significant duplication with __d_lookup_rcu which is
1759 * required to prevent single threaded performance regressions
1760 * especially on architectures where smp_rmb (in seqcounts) are costly.
1761 * Keep the two functions in sync.
1762 */
1763
1764 /*
1765 * The hash list is protected using RCU.
1766 *
1767 * Carefully use d_seq when comparing a candidate dentry, to avoid
1768 * races with d_move().
1769 *
1770 * It is possible that concurrent renames can mess up our list
1771 * walk here and result in missing our dentry, resulting in the
1772 * false-negative result. d_lookup() protects against concurrent
1773 * renames using rename_lock seqlock.
1774 *
1775 * See Documentation/vfs/dcache-locking.txt for more details.
1776 */
1777 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1778 struct inode *i;
1779 const char *tname;
1780 int tlen;
1781
1782 if (dentry->d_name.hash != hash)
1783 continue;
1784
1785seqretry:
1786 *seq = read_seqcount_begin(&dentry->d_seq);
1787 if (dentry->d_parent != parent)
1788 continue;
1789 if (d_unhashed(dentry))
1790 continue;
1791 tlen = dentry->d_name.len;
1792 tname = dentry->d_name.name;
1793 i = dentry->d_inode;
1794 prefetch(tname);
1795 if (i)
1796 prefetch(i);
1797 /*
1798 * This seqcount check is required to ensure name and
1799 * len are loaded atomically, so as not to walk off the
1800 * edge of memory when walking. If we could load this
1801 * atomically some other way, we could drop this check.
1802 */
1803 if (read_seqcount_retry(&dentry->d_seq, *seq))
1804 goto seqretry;
1805 if (parent->d_flags & DCACHE_OP_COMPARE) {
1806 if (parent->d_op->d_compare(parent, *inode,
1807 dentry, i,
1808 tlen, tname, name))
1809 continue;
1810 } else {
1811 if (dentry_cmp(tname, tlen, str, len))
1812 continue;
1813 }
1814 /*
1815 * No extra seqcount check is required after the name
1816 * compare. The caller must perform a seqcount check in
1817 * order to do anything useful with the returned dentry
1818 * anyway.
1819 */
1820 *inode = i;
1821 return dentry;
1822 }
1823 return NULL;
1824}
1825
1826/**
1345 * d_lookup - search for a dentry 1827 * d_lookup - search for a dentry
1346 * @parent: parent dentry 1828 * @parent: parent dentry
1347 * @name: qstr of name we wish to find 1829 * @name: qstr of name we wish to find
@@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci);
1352 * dentry is returned. The caller must use dput to free the entry when it has 1834 * dentry is returned. The caller must use dput to free the entry when it has
1353 * finished using it. %NULL is returned if the dentry does not exist. 1835 * finished using it. %NULL is returned if the dentry does not exist.
1354 */ 1836 */
1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1837struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1356{ 1838{
1357 struct dentry * dentry = NULL; 1839 struct dentry *dentry;
1358 unsigned long seq; 1840 unsigned seq;
1359 1841
1360 do { 1842 do {
1361 seq = read_seqbegin(&rename_lock); 1843 seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1367} 1849}
1368EXPORT_SYMBOL(d_lookup); 1850EXPORT_SYMBOL(d_lookup);
1369 1851
1370/* 1852/**
1371 * __d_lookup - search for a dentry (racy) 1853 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry 1854 * @parent: parent dentry
1373 * @name: qstr of name we wish to find 1855 * @name: qstr of name we wish to find
@@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup);
1382 * 1864 *
1383 * __d_lookup callers must be commented. 1865 * __d_lookup callers must be commented.
1384 */ 1866 */
1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1867struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1386{ 1868{
1387 unsigned int len = name->len; 1869 unsigned int len = name->len;
1388 unsigned int hash = name->hash; 1870 unsigned int hash = name->hash;
1389 const unsigned char *str = name->name; 1871 const unsigned char *str = name->name;
1390 struct hlist_head *head = d_hash(parent,hash); 1872 struct dcache_hash_bucket *b = d_hash(parent, hash);
1873 struct hlist_bl_node *node;
1391 struct dentry *found = NULL; 1874 struct dentry *found = NULL;
1392 struct hlist_node *node;
1393 struct dentry *dentry; 1875 struct dentry *dentry;
1394 1876
1395 /* 1877 /*
1878 * Note: There is significant duplication with __d_lookup_rcu which is
1879 * required to prevent single threaded performance regressions
1880 * especially on architectures where smp_rmb (in seqcounts) are costly.
1881 * Keep the two functions in sync.
1882 */
1883
1884 /*
1396 * The hash list is protected using RCU. 1885 * The hash list is protected using RCU.
1397 * 1886 *
1398 * Take d_lock when comparing a candidate dentry, to avoid races 1887 * Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1407 */ 1896 */
1408 rcu_read_lock(); 1897 rcu_read_lock();
1409 1898
1410 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1899 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1411 struct qstr *qstr; 1900 const char *tname;
1901 int tlen;
1412 1902
1413 if (dentry->d_name.hash != hash) 1903 if (dentry->d_name.hash != hash)
1414 continue; 1904 continue;
1415 if (dentry->d_parent != parent)
1416 continue;
1417 1905
1418 spin_lock(&dentry->d_lock); 1906 spin_lock(&dentry->d_lock);
1419
1420 /*
1421 * Recheck the dentry after taking the lock - d_move may have
1422 * changed things. Don't bother checking the hash because
1423 * we're about to compare the whole name anyway.
1424 */
1425 if (dentry->d_parent != parent) 1907 if (dentry->d_parent != parent)
1426 goto next; 1908 goto next;
1427
1428 /* non-existing due to RCU? */
1429 if (d_unhashed(dentry)) 1909 if (d_unhashed(dentry))
1430 goto next; 1910 goto next;
1431 1911
@@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1433 * It is safe to compare names since d_move() cannot 1913 * It is safe to compare names since d_move() cannot
1434 * change the qstr (protected by d_lock). 1914 * change the qstr (protected by d_lock).
1435 */ 1915 */
1436 qstr = &dentry->d_name; 1916 tlen = dentry->d_name.len;
1437 if (parent->d_op && parent->d_op->d_compare) { 1917 tname = dentry->d_name.name;
1438 if (parent->d_op->d_compare(parent, qstr, name)) 1918 if (parent->d_flags & DCACHE_OP_COMPARE) {
1919 if (parent->d_op->d_compare(parent, parent->d_inode,
1920 dentry, dentry->d_inode,
1921 tlen, tname, name))
1439 goto next; 1922 goto next;
1440 } else { 1923 } else {
1441 if (qstr->len != len) 1924 if (dentry_cmp(tname, tlen, str, len))
1442 goto next;
1443 if (memcmp(qstr->name, str, len))
1444 goto next; 1925 goto next;
1445 } 1926 }
1446 1927
1447 atomic_inc(&dentry->d_count); 1928 dentry->d_count++;
1448 found = dentry; 1929 found = dentry;
1449 spin_unlock(&dentry->d_lock); 1930 spin_unlock(&dentry->d_lock);
1450 break; 1931 break;
@@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1473 * routine may choose to leave the hash value unchanged. 1954 * routine may choose to leave the hash value unchanged.
1474 */ 1955 */
1475 name->hash = full_name_hash(name->name, name->len); 1956 name->hash = full_name_hash(name->name, name->len);
1476 if (dir->d_op && dir->d_op->d_hash) { 1957 if (dir->d_flags & DCACHE_OP_HASH) {
1477 if (dir->d_op->d_hash(dir, name) < 0) 1958 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1478 goto out; 1959 goto out;
1479 } 1960 }
1480 dentry = d_lookup(dir, name); 1961 dentry = d_lookup(dir, name);
@@ -1483,34 +1964,32 @@ out:
1483} 1964}
1484 1965
1485/** 1966/**
1486 * d_validate - verify dentry provided from insecure source 1967 * d_validate - verify dentry provided from insecure source (deprecated)
1487 * @dentry: The dentry alleged to be valid child of @dparent 1968 * @dentry: The dentry alleged to be valid child of @dparent
1488 * @dparent: The parent dentry (known to be valid) 1969 * @dparent: The parent dentry (known to be valid)
1489 * 1970 *
1490 * An insecure source has sent us a dentry, here we verify it and dget() it. 1971 * An insecure source has sent us a dentry, here we verify it and dget() it.
1491 * This is used by ncpfs in its readdir implementation. 1972 * This is used by ncpfs in its readdir implementation.
1492 * Zero is returned in the dentry is invalid. 1973 * Zero is returned in the dentry is invalid.
1974 *
1975 * This function is slow for big directories, and deprecated, do not use it.
1493 */ 1976 */
1494int d_validate(struct dentry *dentry, struct dentry *parent) 1977int d_validate(struct dentry *dentry, struct dentry *dparent)
1495{ 1978{
1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash); 1979 struct dentry *child;
1497 struct hlist_node *node;
1498 struct dentry *d;
1499
1500 /* Check whether the ptr might be valid at all.. */
1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1502 return 0;
1503 if (dentry->d_parent != parent)
1504 return 0;
1505 1980
1506 rcu_read_lock(); 1981 spin_lock(&dparent->d_lock);
1507 hlist_for_each_entry_rcu(d, node, head, d_hash) { 1982 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1508 if (d == dentry) { 1983 if (dentry == child) {
1509 dget(dentry); 1984 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1985 __dget_dlock(dentry);
1986 spin_unlock(&dentry->d_lock);
1987 spin_unlock(&dparent->d_lock);
1510 return 1; 1988 return 1;
1511 } 1989 }
1512 } 1990 }
1513 rcu_read_unlock(); 1991 spin_unlock(&dparent->d_lock);
1992
1514 return 0; 1993 return 0;
1515} 1994}
1516EXPORT_SYMBOL(d_validate); 1995EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate);
1538 2017
1539void d_delete(struct dentry * dentry) 2018void d_delete(struct dentry * dentry)
1540{ 2019{
2020 struct inode *inode;
1541 int isdir = 0; 2021 int isdir = 0;
1542 /* 2022 /*
1543 * Are we the only user? 2023 * Are we the only user?
1544 */ 2024 */
1545 spin_lock(&dcache_lock); 2025again:
1546 spin_lock(&dentry->d_lock); 2026 spin_lock(&dentry->d_lock);
1547 isdir = S_ISDIR(dentry->d_inode->i_mode); 2027 inode = dentry->d_inode;
1548 if (atomic_read(&dentry->d_count) == 1) { 2028 isdir = S_ISDIR(inode->i_mode);
2029 if (dentry->d_count == 1) {
2030 if (inode && !spin_trylock(&inode->i_lock)) {
2031 spin_unlock(&dentry->d_lock);
2032 cpu_relax();
2033 goto again;
2034 }
1549 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2035 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1550 dentry_iput(dentry); 2036 dentry_unlink_inode(dentry);
1551 fsnotify_nameremove(dentry, isdir); 2037 fsnotify_nameremove(dentry, isdir);
1552 return; 2038 return;
1553 } 2039 }
@@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry)
1556 __d_drop(dentry); 2042 __d_drop(dentry);
1557 2043
1558 spin_unlock(&dentry->d_lock); 2044 spin_unlock(&dentry->d_lock);
1559 spin_unlock(&dcache_lock);
1560 2045
1561 fsnotify_nameremove(dentry, isdir); 2046 fsnotify_nameremove(dentry, isdir);
1562} 2047}
1563EXPORT_SYMBOL(d_delete); 2048EXPORT_SYMBOL(d_delete);
1564 2049
1565static void __d_rehash(struct dentry * entry, struct hlist_head *list) 2050static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
1566{ 2051{
1567 2052 BUG_ON(!d_unhashed(entry));
2053 spin_lock_bucket(b);
1568 entry->d_flags &= ~DCACHE_UNHASHED; 2054 entry->d_flags &= ~DCACHE_UNHASHED;
1569 hlist_add_head_rcu(&entry->d_hash, list); 2055 hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
2056 spin_unlock_bucket(b);
1570} 2057}
1571 2058
1572static void _d_rehash(struct dentry * entry) 2059static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry)
1583 2070
1584void d_rehash(struct dentry * entry) 2071void d_rehash(struct dentry * entry)
1585{ 2072{
1586 spin_lock(&dcache_lock);
1587 spin_lock(&entry->d_lock); 2073 spin_lock(&entry->d_lock);
1588 _d_rehash(entry); 2074 _d_rehash(entry);
1589 spin_unlock(&entry->d_lock); 2075 spin_unlock(&entry->d_lock);
1590 spin_unlock(&dcache_lock);
1591} 2076}
1592EXPORT_SYMBOL(d_rehash); 2077EXPORT_SYMBOL(d_rehash);
1593 2078
1594/* 2079/**
1595 * When switching names, the actual string doesn't strictly have to 2080 * dentry_update_name_case - update case insensitive dentry with a new name
1596 * be preserved in the target - because we're dropping the target 2081 * @dentry: dentry to be updated
1597 * anyway. As such, we can just do a simple memcpy() to copy over 2082 * @name: new name
1598 * the new name before we switch.
1599 * 2083 *
1600 * Note that we have to be a lot more careful about getting the hash 2084 * Update a case insensitive dentry with new case of name.
1601 * switched - we have to switch the hash value properly even if it 2085 *
1602 * then no longer matches the actual (corrupted) string of the target. 2086 * dentry must have been returned by d_lookup with name @name. Old and new
1603 * The hash value has to match the hash queue that the dentry is on.. 2087 * name lengths must match (ie. no d_compare which allows mismatched name
2088 * lengths).
2089 *
2090 * Parent inode i_mutex must be held over d_lookup and into this call (to
2091 * keep renames and concurrent inserts, and readdir(2) away).
1604 */ 2092 */
2093void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2094{
2095 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2096 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2097
2098 spin_lock(&dentry->d_lock);
2099 write_seqcount_begin(&dentry->d_seq);
2100 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2101 write_seqcount_end(&dentry->d_seq);
2102 spin_unlock(&dentry->d_lock);
2103}
2104EXPORT_SYMBOL(dentry_update_name_case);
2105
1605static void switch_names(struct dentry *dentry, struct dentry *target) 2106static void switch_names(struct dentry *dentry, struct dentry *target)
1606{ 2107{
1607 if (dname_external(target)) { 2108 if (dname_external(target)) {
@@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1643 swap(dentry->d_name.len, target->d_name.len); 2144 swap(dentry->d_name.len, target->d_name.len);
1644} 2145}
1645 2146
2147static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2148{
2149 /*
2150 * XXXX: do we really need to take target->d_lock?
2151 */
2152 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2153 spin_lock(&target->d_parent->d_lock);
2154 else {
2155 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2156 spin_lock(&dentry->d_parent->d_lock);
2157 spin_lock_nested(&target->d_parent->d_lock,
2158 DENTRY_D_LOCK_NESTED);
2159 } else {
2160 spin_lock(&target->d_parent->d_lock);
2161 spin_lock_nested(&dentry->d_parent->d_lock,
2162 DENTRY_D_LOCK_NESTED);
2163 }
2164 }
2165 if (target < dentry) {
2166 spin_lock_nested(&target->d_lock, 2);
2167 spin_lock_nested(&dentry->d_lock, 3);
2168 } else {
2169 spin_lock_nested(&dentry->d_lock, 2);
2170 spin_lock_nested(&target->d_lock, 3);
2171 }
2172}
2173
2174static void dentry_unlock_parents_for_move(struct dentry *dentry,
2175 struct dentry *target)
2176{
2177 if (target->d_parent != dentry->d_parent)
2178 spin_unlock(&dentry->d_parent->d_lock);
2179 if (target->d_parent != target)
2180 spin_unlock(&target->d_parent->d_lock);
2181}
2182
1646/* 2183/*
1647 * We cannibalize "target" when moving dentry on top of it, 2184 * When switching names, the actual string doesn't strictly have to
1648 * because it's going to be thrown away anyway. We could be more 2185 * be preserved in the target - because we're dropping the target
1649 * polite about it, though. 2186 * anyway. As such, we can just do a simple memcpy() to copy over
1650 * 2187 * the new name before we switch.
1651 * This forceful removal will result in ugly /proc output if 2188 *
1652 * somebody holds a file open that got deleted due to a rename. 2189 * Note that we have to be a lot more careful about getting the hash
1653 * We could be nicer about the deleted file, and let it show 2190 * switched - we have to switch the hash value properly even if it
1654 * up under the name it had before it was deleted rather than 2191 * then no longer matches the actual (corrupted) string of the target.
1655 * under the original name of the file that was moved on top of it. 2192 * The hash value has to match the hash queue that the dentry is on..
1656 */ 2193 */
1657
1658/* 2194/*
1659 * d_move_locked - move a dentry 2195 * d_move - move a dentry
1660 * @dentry: entry to move 2196 * @dentry: entry to move
1661 * @target: new dentry 2197 * @target: new dentry
1662 * 2198 *
1663 * Update the dcache to reflect the move of a file name. Negative 2199 * Update the dcache to reflect the move of a file name. Negative
1664 * dcache entries should not be moved in this way. 2200 * dcache entries should not be moved in this way.
1665 */ 2201 */
1666static void d_move_locked(struct dentry * dentry, struct dentry * target) 2202void d_move(struct dentry * dentry, struct dentry * target)
1667{ 2203{
1668 struct hlist_head *list;
1669
1670 if (!dentry->d_inode) 2204 if (!dentry->d_inode)
1671 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2205 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1672 2206
2207 BUG_ON(d_ancestor(dentry, target));
2208 BUG_ON(d_ancestor(target, dentry));
2209
1673 write_seqlock(&rename_lock); 2210 write_seqlock(&rename_lock);
1674 /*
1675 * XXXX: do we really need to take target->d_lock?
1676 */
1677 if (target < dentry) {
1678 spin_lock(&target->d_lock);
1679 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1680 } else {
1681 spin_lock(&dentry->d_lock);
1682 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1683 }
1684 2211
1685 /* Move the dentry to the target hash queue, if on different bucket */ 2212 dentry_lock_for_move(dentry, target);
1686 if (d_unhashed(dentry))
1687 goto already_unhashed;
1688 2213
1689 hlist_del_rcu(&dentry->d_hash); 2214 write_seqcount_begin(&dentry->d_seq);
2215 write_seqcount_begin(&target->d_seq);
1690 2216
1691already_unhashed: 2217 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
1692 list = d_hash(target->d_parent, target->d_name.hash); 2218
1693 __d_rehash(dentry, list); 2219 /*
2220 * Move the dentry to the target hash queue. Don't bother checking
2221 * for the same hash queue because of how unlikely it is.
2222 */
2223 __d_drop(dentry);
2224 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1694 2225
1695 /* Unhash the target: dput() will then get rid of it */ 2226 /* Unhash the target: dput() will then get rid of it */
1696 __d_drop(target); 2227 __d_drop(target);
@@ -1715,27 +2246,16 @@ already_unhashed:
1715 } 2246 }
1716 2247
1717 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2248 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2249
2250 write_seqcount_end(&target->d_seq);
2251 write_seqcount_end(&dentry->d_seq);
2252
2253 dentry_unlock_parents_for_move(dentry, target);
1718 spin_unlock(&target->d_lock); 2254 spin_unlock(&target->d_lock);
1719 fsnotify_d_move(dentry); 2255 fsnotify_d_move(dentry);
1720 spin_unlock(&dentry->d_lock); 2256 spin_unlock(&dentry->d_lock);
1721 write_sequnlock(&rename_lock); 2257 write_sequnlock(&rename_lock);
1722} 2258}
1723
1724/**
1725 * d_move - move a dentry
1726 * @dentry: entry to move
1727 * @target: new dentry
1728 *
1729 * Update the dcache to reflect the move of a file name. Negative
1730 * dcache entries should not be moved in this way.
1731 */
1732
1733void d_move(struct dentry * dentry, struct dentry * target)
1734{
1735 spin_lock(&dcache_lock);
1736 d_move_locked(dentry, target);
1737 spin_unlock(&dcache_lock);
1738}
1739EXPORT_SYMBOL(d_move); 2259EXPORT_SYMBOL(d_move);
1740 2260
1741/** 2261/**
@@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1761 * This helper attempts to cope with remotely renamed directories 2281 * This helper attempts to cope with remotely renamed directories
1762 * 2282 *
1763 * It assumes that the caller is already holding 2283 * It assumes that the caller is already holding
1764 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2284 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
1765 * 2285 *
1766 * Note: If ever the locking in lock_rename() changes, then please 2286 * Note: If ever the locking in lock_rename() changes, then please
1767 * remember to update this too... 2287 * remember to update this too...
1768 */ 2288 */
1769static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2289static struct dentry *__d_unalias(struct inode *inode,
1770 __releases(dcache_lock) 2290 struct dentry *dentry, struct dentry *alias)
1771{ 2291{
1772 struct mutex *m1 = NULL, *m2 = NULL; 2292 struct mutex *m1 = NULL, *m2 = NULL;
1773 struct dentry *ret; 2293 struct dentry *ret;
@@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1790 goto out_err; 2310 goto out_err;
1791 m2 = &alias->d_parent->d_inode->i_mutex; 2311 m2 = &alias->d_parent->d_inode->i_mutex;
1792out_unalias: 2312out_unalias:
1793 d_move_locked(alias, dentry); 2313 d_move(alias, dentry);
1794 ret = alias; 2314 ret = alias;
1795out_err: 2315out_err:
1796 spin_unlock(&dcache_lock); 2316 spin_unlock(&inode->i_lock);
1797 if (m2) 2317 if (m2)
1798 mutex_unlock(m2); 2318 mutex_unlock(m2);
1799 if (m1) 2319 if (m1)
@@ -1804,17 +2324,23 @@ out_err:
1804/* 2324/*
1805 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2325 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1806 * named dentry in place of the dentry to be replaced. 2326 * named dentry in place of the dentry to be replaced.
2327 * returns with anon->d_lock held!
1807 */ 2328 */
1808static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2329static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1809{ 2330{
1810 struct dentry *dparent, *aparent; 2331 struct dentry *dparent, *aparent;
1811 2332
1812 switch_names(dentry, anon); 2333 dentry_lock_for_move(anon, dentry);
1813 swap(dentry->d_name.hash, anon->d_name.hash); 2334
2335 write_seqcount_begin(&dentry->d_seq);
2336 write_seqcount_begin(&anon->d_seq);
1814 2337
1815 dparent = dentry->d_parent; 2338 dparent = dentry->d_parent;
1816 aparent = anon->d_parent; 2339 aparent = anon->d_parent;
1817 2340
2341 switch_names(dentry, anon);
2342 swap(dentry->d_name.hash, anon->d_name.hash);
2343
1818 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2344 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1819 list_del(&dentry->d_u.d_child); 2345 list_del(&dentry->d_u.d_child);
1820 if (!IS_ROOT(dentry)) 2346 if (!IS_ROOT(dentry))
@@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1829 else 2355 else
1830 INIT_LIST_HEAD(&anon->d_u.d_child); 2356 INIT_LIST_HEAD(&anon->d_u.d_child);
1831 2357
2358 write_seqcount_end(&dentry->d_seq);
2359 write_seqcount_end(&anon->d_seq);
2360
2361 dentry_unlock_parents_for_move(anon, dentry);
2362 spin_unlock(&dentry->d_lock);
2363
2364 /* anon->d_lock still locked, returns locked */
1832 anon->d_flags &= ~DCACHE_DISCONNECTED; 2365 anon->d_flags &= ~DCACHE_DISCONNECTED;
1833} 2366}
1834 2367
@@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1846 2379
1847 BUG_ON(!d_unhashed(dentry)); 2380 BUG_ON(!d_unhashed(dentry));
1848 2381
1849 spin_lock(&dcache_lock);
1850
1851 if (!inode) { 2382 if (!inode) {
1852 actual = dentry; 2383 actual = dentry;
1853 __d_instantiate(dentry, NULL); 2384 __d_instantiate(dentry, NULL);
1854 goto found_lock; 2385 d_rehash(actual);
2386 goto out_nolock;
1855 } 2387 }
1856 2388
2389 spin_lock(&inode->i_lock);
2390
1857 if (S_ISDIR(inode->i_mode)) { 2391 if (S_ISDIR(inode->i_mode)) {
1858 struct dentry *alias; 2392 struct dentry *alias;
1859 2393
@@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1864 /* Is this an anonymous mountpoint that we could splice 2398 /* Is this an anonymous mountpoint that we could splice
1865 * into our tree? */ 2399 * into our tree? */
1866 if (IS_ROOT(alias)) { 2400 if (IS_ROOT(alias)) {
1867 spin_lock(&alias->d_lock);
1868 __d_materialise_dentry(dentry, alias); 2401 __d_materialise_dentry(dentry, alias);
1869 __d_drop(alias); 2402 __d_drop(alias);
1870 goto found; 2403 goto found;
1871 } 2404 }
1872 /* Nope, but we must(!) avoid directory aliasing */ 2405 /* Nope, but we must(!) avoid directory aliasing */
1873 actual = __d_unalias(dentry, alias); 2406 actual = __d_unalias(inode, dentry, alias);
1874 if (IS_ERR(actual)) 2407 if (IS_ERR(actual))
1875 dput(alias); 2408 dput(alias);
1876 goto out_nolock; 2409 goto out_nolock;
@@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1881 actual = __d_instantiate_unique(dentry, inode); 2414 actual = __d_instantiate_unique(dentry, inode);
1882 if (!actual) 2415 if (!actual)
1883 actual = dentry; 2416 actual = dentry;
1884 else if (unlikely(!d_unhashed(actual))) 2417 else
1885 goto shouldnt_be_hashed; 2418 BUG_ON(!d_unhashed(actual));
1886 2419
1887found_lock:
1888 spin_lock(&actual->d_lock); 2420 spin_lock(&actual->d_lock);
1889found: 2421found:
1890 _d_rehash(actual); 2422 _d_rehash(actual);
1891 spin_unlock(&actual->d_lock); 2423 spin_unlock(&actual->d_lock);
1892 spin_unlock(&dcache_lock); 2424 spin_unlock(&inode->i_lock);
1893out_nolock: 2425out_nolock:
1894 if (actual == dentry) { 2426 if (actual == dentry) {
1895 security_d_instantiate(dentry, inode); 2427 security_d_instantiate(dentry, inode);
@@ -1898,10 +2430,6 @@ out_nolock:
1898 2430
1899 iput(inode); 2431 iput(inode);
1900 return actual; 2432 return actual;
1901
1902shouldnt_be_hashed:
1903 spin_unlock(&dcache_lock);
1904 BUG();
1905} 2433}
1906EXPORT_SYMBOL_GPL(d_materialise_unique); 2434EXPORT_SYMBOL_GPL(d_materialise_unique);
1907 2435
@@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1928 * @buffer: pointer to the end of the buffer 2456 * @buffer: pointer to the end of the buffer
1929 * @buflen: pointer to buffer length 2457 * @buflen: pointer to buffer length
1930 * 2458 *
1931 * Caller holds the dcache_lock. 2459 * Caller holds the rename_lock.
1932 * 2460 *
1933 * If path is not reachable from the supplied root, then the value of 2461 * If path is not reachable from the supplied root, then the value of
1934 * root is changed (without modifying refcounts). 2462 * root is changed (without modifying refcounts).
@@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root,
1956 } 2484 }
1957 parent = dentry->d_parent; 2485 parent = dentry->d_parent;
1958 prefetch(parent); 2486 prefetch(parent);
2487 spin_lock(&dentry->d_lock);
1959 error = prepend_name(buffer, buflen, &dentry->d_name); 2488 error = prepend_name(buffer, buflen, &dentry->d_name);
2489 spin_unlock(&dentry->d_lock);
1960 if (!error) 2490 if (!error)
1961 error = prepend(buffer, buflen, "/", 1); 2491 error = prepend(buffer, buflen, "/", 1);
1962 if (error) 2492 if (error)
@@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root,
2012 int error; 2542 int error;
2013 2543
2014 prepend(&res, &buflen, "\0", 1); 2544 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock); 2545 write_seqlock(&rename_lock);
2016 error = prepend_path(path, root, &res, &buflen); 2546 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock); 2547 write_sequnlock(&rename_lock);
2018 2548
2019 if (error) 2549 if (error)
2020 return ERR_PTR(error); 2550 return ERR_PTR(error);
@@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2076 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2606 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2077 2607
2078 get_fs_root(current->fs, &root); 2608 get_fs_root(current->fs, &root);
2079 spin_lock(&dcache_lock); 2609 write_seqlock(&rename_lock);
2080 tmp = root; 2610 tmp = root;
2081 error = path_with_deleted(path, &tmp, &res, &buflen); 2611 error = path_with_deleted(path, &tmp, &res, &buflen);
2082 if (error) 2612 if (error)
2083 res = ERR_PTR(error); 2613 res = ERR_PTR(error);
2084 spin_unlock(&dcache_lock); 2614 write_sequnlock(&rename_lock);
2085 path_put(&root); 2615 path_put(&root);
2086 return res; 2616 return res;
2087} 2617}
@@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2107 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2637 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108 2638
2109 get_fs_root(current->fs, &root); 2639 get_fs_root(current->fs, &root);
2110 spin_lock(&dcache_lock); 2640 write_seqlock(&rename_lock);
2111 tmp = root; 2641 tmp = root;
2112 error = path_with_deleted(path, &tmp, &res, &buflen); 2642 error = path_with_deleted(path, &tmp, &res, &buflen);
2113 if (!error && !path_equal(&tmp, &root)) 2643 if (!error && !path_equal(&tmp, &root))
2114 error = prepend_unreachable(&res, &buflen); 2644 error = prepend_unreachable(&res, &buflen);
2115 spin_unlock(&dcache_lock); 2645 write_sequnlock(&rename_lock);
2116 path_put(&root); 2646 path_put(&root);
2117 if (error) 2647 if (error)
2118 res = ERR_PTR(error); 2648 res = ERR_PTR(error);
@@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2144/* 2674/*
2145 * Write full pathname from the root of the filesystem into the buffer. 2675 * Write full pathname from the root of the filesystem into the buffer.
2146 */ 2676 */
2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2677static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2148{ 2678{
2149 char *end = buf + buflen; 2679 char *end = buf + buflen;
2150 char *retval; 2680 char *retval;
@@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2158 2688
2159 while (!IS_ROOT(dentry)) { 2689 while (!IS_ROOT(dentry)) {
2160 struct dentry *parent = dentry->d_parent; 2690 struct dentry *parent = dentry->d_parent;
2691 int error;
2161 2692
2162 prefetch(parent); 2693 prefetch(parent);
2163 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2694 spin_lock(&dentry->d_lock);
2164 (prepend(&end, &buflen, "/", 1) != 0)) 2695 error = prepend_name(&end, &buflen, &dentry->d_name);
2696 spin_unlock(&dentry->d_lock);
2697 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2165 goto Elong; 2698 goto Elong;
2166 2699
2167 retval = end; 2700 retval = end;
@@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2171Elong: 2704Elong:
2172 return ERR_PTR(-ENAMETOOLONG); 2705 return ERR_PTR(-ENAMETOOLONG);
2173} 2706}
2174EXPORT_SYMBOL(__dentry_path); 2707
2708char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2709{
2710 char *retval;
2711
2712 write_seqlock(&rename_lock);
2713 retval = __dentry_path(dentry, buf, buflen);
2714 write_sequnlock(&rename_lock);
2715
2716 return retval;
2717}
2718EXPORT_SYMBOL(dentry_path_raw);
2175 2719
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2720char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{ 2721{
2178 char *p = NULL; 2722 char *p = NULL;
2179 char *retval; 2723 char *retval;
2180 2724
2181 spin_lock(&dcache_lock); 2725 write_seqlock(&rename_lock);
2182 if (d_unlinked(dentry)) { 2726 if (d_unlinked(dentry)) {
2183 p = buf + buflen; 2727 p = buf + buflen;
2184 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2728 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2186 buflen++; 2730 buflen++;
2187 } 2731 }
2188 retval = __dentry_path(dentry, buf, buflen); 2732 retval = __dentry_path(dentry, buf, buflen);
2189 spin_unlock(&dcache_lock); 2733 write_sequnlock(&rename_lock);
2190 if (!IS_ERR(retval) && p) 2734 if (!IS_ERR(retval) && p)
2191 *p = '/'; /* restore '/' overriden with '\0' */ 2735 *p = '/'; /* restore '/' overriden with '\0' */
2192 return retval; 2736 return retval;
2193Elong: 2737Elong:
2194 spin_unlock(&dcache_lock);
2195 return ERR_PTR(-ENAMETOOLONG); 2738 return ERR_PTR(-ENAMETOOLONG);
2196} 2739}
2197 2740
@@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2225 get_fs_root_and_pwd(current->fs, &root, &pwd); 2768 get_fs_root_and_pwd(current->fs, &root, &pwd);
2226 2769
2227 error = -ENOENT; 2770 error = -ENOENT;
2228 spin_lock(&dcache_lock); 2771 write_seqlock(&rename_lock);
2229 if (!d_unlinked(pwd.dentry)) { 2772 if (!d_unlinked(pwd.dentry)) {
2230 unsigned long len; 2773 unsigned long len;
2231 struct path tmp = root; 2774 struct path tmp = root;
@@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2234 2777
2235 prepend(&cwd, &buflen, "\0", 1); 2778 prepend(&cwd, &buflen, "\0", 1);
2236 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2779 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2237 spin_unlock(&dcache_lock); 2780 write_sequnlock(&rename_lock);
2238 2781
2239 if (error) 2782 if (error)
2240 goto out; 2783 goto out;
@@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2253 if (copy_to_user(buf, cwd, len)) 2796 if (copy_to_user(buf, cwd, len))
2254 error = -EFAULT; 2797 error = -EFAULT;
2255 } 2798 }
2256 } else 2799 } else {
2257 spin_unlock(&dcache_lock); 2800 write_sequnlock(&rename_lock);
2801 }
2258 2802
2259out: 2803out:
2260 path_put(&pwd); 2804 path_put(&pwd);
@@ -2282,25 +2826,25 @@ out:
2282int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2826int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2283{ 2827{
2284 int result; 2828 int result;
2285 unsigned long seq; 2829 unsigned seq;
2286 2830
2287 if (new_dentry == old_dentry) 2831 if (new_dentry == old_dentry)
2288 return 1; 2832 return 1;
2289 2833
2290 /*
2291 * Need rcu_readlock to protect against the d_parent trashing
2292 * due to d_move
2293 */
2294 rcu_read_lock();
2295 do { 2834 do {
2296 /* for restarting inner loop in case of seq retry */ 2835 /* for restarting inner loop in case of seq retry */
2297 seq = read_seqbegin(&rename_lock); 2836 seq = read_seqbegin(&rename_lock);
2837 /*
2838 * Need rcu_readlock to protect against the d_parent trashing
2839 * due to d_move
2840 */
2841 rcu_read_lock();
2298 if (d_ancestor(old_dentry, new_dentry)) 2842 if (d_ancestor(old_dentry, new_dentry))
2299 result = 1; 2843 result = 1;
2300 else 2844 else
2301 result = 0; 2845 result = 0;
2846 rcu_read_unlock();
2302 } while (read_seqretry(&rename_lock, seq)); 2847 } while (read_seqretry(&rename_lock, seq));
2303 rcu_read_unlock();
2304 2848
2305 return result; 2849 return result;
2306} 2850}
@@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under);
2332 2876
2333void d_genocide(struct dentry *root) 2877void d_genocide(struct dentry *root)
2334{ 2878{
2335 struct dentry *this_parent = root; 2879 struct dentry *this_parent;
2336 struct list_head *next; 2880 struct list_head *next;
2881 unsigned seq;
2882 int locked = 0;
2337 2883
2338 spin_lock(&dcache_lock); 2884 seq = read_seqbegin(&rename_lock);
2885again:
2886 this_parent = root;
2887 spin_lock(&this_parent->d_lock);
2339repeat: 2888repeat:
2340 next = this_parent->d_subdirs.next; 2889 next = this_parent->d_subdirs.next;
2341resume: 2890resume:
@@ -2343,21 +2892,62 @@ resume:
2343 struct list_head *tmp = next; 2892 struct list_head *tmp = next;
2344 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2893 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2345 next = tmp->next; 2894 next = tmp->next;
2346 if (d_unhashed(dentry)||!dentry->d_inode) 2895
2896 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2897 if (d_unhashed(dentry) || !dentry->d_inode) {
2898 spin_unlock(&dentry->d_lock);
2347 continue; 2899 continue;
2900 }
2348 if (!list_empty(&dentry->d_subdirs)) { 2901 if (!list_empty(&dentry->d_subdirs)) {
2902 spin_unlock(&this_parent->d_lock);
2903 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2349 this_parent = dentry; 2904 this_parent = dentry;
2905 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2350 goto repeat; 2906 goto repeat;
2351 } 2907 }
2352 atomic_dec(&dentry->d_count); 2908 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2909 dentry->d_flags |= DCACHE_GENOCIDE;
2910 dentry->d_count--;
2911 }
2912 spin_unlock(&dentry->d_lock);
2353 } 2913 }
2354 if (this_parent != root) { 2914 if (this_parent != root) {
2355 next = this_parent->d_u.d_child.next; 2915 struct dentry *tmp;
2356 atomic_dec(&this_parent->d_count); 2916 struct dentry *child;
2357 this_parent = this_parent->d_parent; 2917
2918 tmp = this_parent->d_parent;
2919 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2920 this_parent->d_flags |= DCACHE_GENOCIDE;
2921 this_parent->d_count--;
2922 }
2923 rcu_read_lock();
2924 spin_unlock(&this_parent->d_lock);
2925 child = this_parent;
2926 this_parent = tmp;
2927 spin_lock(&this_parent->d_lock);
2928 /* might go back up the wrong parent if we have had a rename
2929 * or deletion */
2930 if (this_parent != child->d_parent ||
2931 (!locked && read_seqretry(&rename_lock, seq))) {
2932 spin_unlock(&this_parent->d_lock);
2933 rcu_read_unlock();
2934 goto rename_retry;
2935 }
2936 rcu_read_unlock();
2937 next = child->d_u.d_child.next;
2358 goto resume; 2938 goto resume;
2359 } 2939 }
2360 spin_unlock(&dcache_lock); 2940 spin_unlock(&this_parent->d_lock);
2941 if (!locked && read_seqretry(&rename_lock, seq))
2942 goto rename_retry;
2943 if (locked)
2944 write_sequnlock(&rename_lock);
2945 return;
2946
2947rename_retry:
2948 locked = 1;
2949 write_seqlock(&rename_lock);
2950 goto again;
2361} 2951}
2362 2952
2363/** 2953/**
@@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void)
2411 3001
2412 dentry_hashtable = 3002 dentry_hashtable =
2413 alloc_large_system_hash("Dentry cache", 3003 alloc_large_system_hash("Dentry cache",
2414 sizeof(struct hlist_head), 3004 sizeof(struct dcache_hash_bucket),
2415 dhash_entries, 3005 dhash_entries,
2416 13, 3006 13,
2417 HASH_EARLY, 3007 HASH_EARLY,
@@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void)
2420 0); 3010 0);
2421 3011
2422 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3012 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2423 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3013 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2424} 3014}
2425 3015
2426static void __init dcache_init(void) 3016static void __init dcache_init(void)
2427{ 3017{
2428 int loop; 3018 int loop;
2429 3019
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2433 /* 3020 /*
2434 * A constructor could be added for stable state like the lists, 3021 * A constructor could be added for stable state like the lists,
2435 * but it is probably not worth it because of the cache nature 3022 * but it is probably not worth it because of the cache nature
@@ -2446,7 +3033,7 @@ static void __init dcache_init(void)
2446 3033
2447 dentry_hashtable = 3034 dentry_hashtable =
2448 alloc_large_system_hash("Dentry cache", 3035 alloc_large_system_hash("Dentry cache",
2449 sizeof(struct hlist_head), 3036 sizeof(struct dcache_hash_bucket),
2450 dhash_entries, 3037 dhash_entries,
2451 13, 3038 13,
2452 0, 3039 0,
@@ -2455,7 +3042,7 @@ static void __init dcache_init(void)
2455 0); 3042 0);
2456 3043
2457 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3044 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2458 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3045 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2459} 3046}
2460 3047
2461/* SLAB cache for __getname() consumers */ 3048/* SLAB cache for __getname() consumers */