aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/dcache.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c1557
1 files changed, 1088 insertions, 469 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 83293be48149..fbdcbca40725 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,59 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
36#include "internal.h" 39#include "internal.h"
37 40
41/*
42 * Usage:
43 * dcache->d_inode->i_lock protects:
44 * - i_dentry, d_alias, d_inode of aliases
45 * dcache_hash_bucket lock protects:
46 * - the dcache hash table
47 * s_anon bl list spinlock protects:
48 * - the s_anon list (see __d_drop)
49 * dcache_lru_lock protects:
50 * - the dcache lru lists and counters
51 * d_lock protects:
52 * - d_flags
53 * - d_name
54 * - d_lru
55 * - d_count
56 * - d_unhashed()
57 * - d_parent and d_subdirs
58 * - childrens' d_child and d_parent
59 * - d_alias, d_inode
60 *
61 * Ordering:
62 * dentry->d_inode->i_lock
63 * dentry->d_lock
64 * dcache_lru_lock
65 * dcache_hash_bucket lock
66 * s_anon lock
67 *
68 * If there is an ancestor relationship:
69 * dentry->d_parent->...->d_parent->d_lock
70 * ...
71 * dentry->d_parent->d_lock
72 * dentry->d_lock
73 *
74 * If no ancestor relationship:
75 * if (dentry1 < dentry2)
76 * dentry1->d_lock
77 * dentry2->d_lock
78 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 79int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 80EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 81
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 82static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 83__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 84
44EXPORT_SYMBOL(dcache_lock); 85EXPORT_SYMBOL(rename_lock);
45 86
46static struct kmem_cache *dentry_cache __read_mostly; 87static struct kmem_cache *dentry_cache __read_mostly;
47 88
48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
49
50/* 89/*
51 * This is the single most critical data structure when it comes 90 * This is the single most critical data structure when it comes
52 * to the dcache: the hashtable for lookups. Somebody should try 91 * to the dcache: the hashtable for lookups. Somebody should try
@@ -60,56 +99,98 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 99
61static unsigned int d_hash_mask __read_mostly; 100static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 101static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 102
103static struct hlist_bl_head *dentry_hashtable __read_mostly;
104
105static inline struct hlist_bl_head *d_hash(struct dentry *parent,
106 unsigned long hash)
107{
108 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
109 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
110 return dentry_hashtable + (hash & D_HASHMASK);
111}
64 112
65/* Statistics gathering. */ 113/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 114struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 115 .age_limit = 45,
68}; 116};
69 117
70static void __d_free(struct dentry *dentry) 118static DEFINE_PER_CPU(unsigned int, nr_dentry);
119
120#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
121static int get_nr_dentry(void)
71{ 122{
123 int i;
124 int sum = 0;
125 for_each_possible_cpu(i)
126 sum += per_cpu(nr_dentry, i);
127 return sum < 0 ? 0 : sum;
128}
129
130int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
131 size_t *lenp, loff_t *ppos)
132{
133 dentry_stat.nr_dentry = get_nr_dentry();
134 return proc_dointvec(table, write, buffer, lenp, ppos);
135}
136#endif
137
138static void __d_free(struct rcu_head *head)
139{
140 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
141
72 WARN_ON(!list_empty(&dentry->d_alias)); 142 WARN_ON(!list_empty(&dentry->d_alias));
73 if (dname_external(dentry)) 143 if (dname_external(dentry))
74 kfree(dentry->d_name.name); 144 kfree(dentry->d_name.name);
75 kmem_cache_free(dentry_cache, dentry); 145 kmem_cache_free(dentry_cache, dentry);
76} 146}
77 147
78static void d_callback(struct rcu_head *head)
79{
80 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
81 __d_free(dentry);
82}
83
84/* 148/*
85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 149 * no locks, please.
86 * inside dcache_lock.
87 */ 150 */
88static void d_free(struct dentry *dentry) 151static void d_free(struct dentry *dentry)
89{ 152{
153 BUG_ON(dentry->d_count);
154 this_cpu_dec(nr_dentry);
90 if (dentry->d_op && dentry->d_op->d_release) 155 if (dentry->d_op && dentry->d_op->d_release)
91 dentry->d_op->d_release(dentry); 156 dentry->d_op->d_release(dentry);
92 /* if dentry was never inserted into hash, immediate free is OK */ 157
93 if (hlist_unhashed(&dentry->d_hash)) 158 /* if dentry was never visible to RCU, immediate free is OK */
94 __d_free(dentry); 159 if (!(dentry->d_flags & DCACHE_RCUACCESS))
160 __d_free(&dentry->d_u.d_rcu);
95 else 161 else
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 162 call_rcu(&dentry->d_u.d_rcu, __d_free);
163}
164
165/**
166 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
167 * @dentry: the target dentry
168 * After this call, in-progress rcu-walk path lookup will fail. This
169 * should be called after unhashing, and after changing d_inode (if
170 * the dentry has not already been unhashed).
171 */
172static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
173{
174 assert_spin_locked(&dentry->d_lock);
175 /* Go through a barrier */
176 write_seqcount_barrier(&dentry->d_seq);
97} 177}
98 178
99/* 179/*
100 * Release the dentry's inode, using the filesystem 180 * Release the dentry's inode, using the filesystem
101 * d_iput() operation if defined. 181 * d_iput() operation if defined. Dentry has no refcount
182 * and is unhashed.
102 */ 183 */
103static void dentry_iput(struct dentry * dentry) 184static void dentry_iput(struct dentry * dentry)
104 __releases(dentry->d_lock) 185 __releases(dentry->d_lock)
105 __releases(dcache_lock) 186 __releases(dentry->d_inode->i_lock)
106{ 187{
107 struct inode *inode = dentry->d_inode; 188 struct inode *inode = dentry->d_inode;
108 if (inode) { 189 if (inode) {
109 dentry->d_inode = NULL; 190 dentry->d_inode = NULL;
110 list_del_init(&dentry->d_alias); 191 list_del_init(&dentry->d_alias);
111 spin_unlock(&dentry->d_lock); 192 spin_unlock(&dentry->d_lock);
112 spin_unlock(&dcache_lock); 193 spin_unlock(&inode->i_lock);
113 if (!inode->i_nlink) 194 if (!inode->i_nlink)
114 fsnotify_inoderemove(inode); 195 fsnotify_inoderemove(inode);
115 if (dentry->d_op && dentry->d_op->d_iput) 196 if (dentry->d_op && dentry->d_op->d_iput)
@@ -118,69 +199,186 @@ static void dentry_iput(struct dentry * dentry)
118 iput(inode); 199 iput(inode);
119 } else { 200 } else {
120 spin_unlock(&dentry->d_lock); 201 spin_unlock(&dentry->d_lock);
121 spin_unlock(&dcache_lock);
122 } 202 }
123} 203}
124 204
125/* 205/*
126 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. 206 * Release the dentry's inode, using the filesystem
207 * d_iput() operation if defined. dentry remains in-use.
208 */
209static void dentry_unlink_inode(struct dentry * dentry)
210 __releases(dentry->d_lock)
211 __releases(dentry->d_inode->i_lock)
212{
213 struct inode *inode = dentry->d_inode;
214 dentry->d_inode = NULL;
215 list_del_init(&dentry->d_alias);
216 dentry_rcuwalk_barrier(dentry);
217 spin_unlock(&dentry->d_lock);
218 spin_unlock(&inode->i_lock);
219 if (!inode->i_nlink)
220 fsnotify_inoderemove(inode);
221 if (dentry->d_op && dentry->d_op->d_iput)
222 dentry->d_op->d_iput(dentry, inode);
223 else
224 iput(inode);
225}
226
227/*
228 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
127 */ 229 */
128static void dentry_lru_add(struct dentry *dentry) 230static void dentry_lru_add(struct dentry *dentry)
129{ 231{
130 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 232 if (list_empty(&dentry->d_lru)) {
131 dentry->d_sb->s_nr_dentry_unused++; 233 spin_lock(&dcache_lru_lock);
132 dentry_stat.nr_unused++; 234 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
235 dentry->d_sb->s_nr_dentry_unused++;
236 dentry_stat.nr_unused++;
237 spin_unlock(&dcache_lru_lock);
238 }
133} 239}
134 240
135static void dentry_lru_add_tail(struct dentry *dentry) 241static void __dentry_lru_del(struct dentry *dentry)
136{ 242{
137 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 243 list_del_init(&dentry->d_lru);
138 dentry->d_sb->s_nr_dentry_unused++; 244 dentry->d_sb->s_nr_dentry_unused--;
139 dentry_stat.nr_unused++; 245 dentry_stat.nr_unused--;
140} 246}
141 247
142static void dentry_lru_del(struct dentry *dentry) 248static void dentry_lru_del(struct dentry *dentry)
143{ 249{
144 if (!list_empty(&dentry->d_lru)) { 250 if (!list_empty(&dentry->d_lru)) {
145 list_del(&dentry->d_lru); 251 spin_lock(&dcache_lru_lock);
146 dentry->d_sb->s_nr_dentry_unused--; 252 __dentry_lru_del(dentry);
147 dentry_stat.nr_unused--; 253 spin_unlock(&dcache_lru_lock);
148 } 254 }
149} 255}
150 256
151static void dentry_lru_del_init(struct dentry *dentry) 257static void dentry_lru_move_tail(struct dentry *dentry)
152{ 258{
153 if (likely(!list_empty(&dentry->d_lru))) { 259 spin_lock(&dcache_lru_lock);
154 list_del_init(&dentry->d_lru); 260 if (list_empty(&dentry->d_lru)) {
155 dentry->d_sb->s_nr_dentry_unused--; 261 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
156 dentry_stat.nr_unused--; 262 dentry->d_sb->s_nr_dentry_unused++;
263 dentry_stat.nr_unused++;
264 } else {
265 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
157 } 266 }
267 spin_unlock(&dcache_lru_lock);
158} 268}
159 269
160/** 270/**
161 * d_kill - kill dentry and return parent 271 * d_kill - kill dentry and return parent
162 * @dentry: dentry to kill 272 * @dentry: dentry to kill
273 * @parent: parent dentry
163 * 274 *
164 * The dentry must already be unhashed and removed from the LRU. 275 * The dentry must already be unhashed and removed from the LRU.
165 * 276 *
166 * If this is the root of the dentry tree, return NULL. 277 * If this is the root of the dentry tree, return NULL.
278 *
279 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
280 * d_kill.
167 */ 281 */
168static struct dentry *d_kill(struct dentry *dentry) 282static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
169 __releases(dentry->d_lock) 283 __releases(dentry->d_lock)
170 __releases(dcache_lock) 284 __releases(parent->d_lock)
285 __releases(dentry->d_inode->i_lock)
171{ 286{
172 struct dentry *parent;
173
174 list_del(&dentry->d_u.d_child); 287 list_del(&dentry->d_u.d_child);
175 dentry_stat.nr_dentry--; /* For d_free, below */ 288 /*
176 /*drops the locks, at that point nobody can reach this dentry */ 289 * Inform try_to_ascend() that we are no longer attached to the
290 * dentry tree
291 */
292 dentry->d_flags |= DCACHE_DISCONNECTED;
293 if (parent)
294 spin_unlock(&parent->d_lock);
177 dentry_iput(dentry); 295 dentry_iput(dentry);
296 /*
297 * dentry_iput drops the locks, at which point nobody (except
298 * transient RCU lookups) can reach this dentry.
299 */
300 d_free(dentry);
301 return parent;
302}
303
304/**
305 * d_drop - drop a dentry
306 * @dentry: dentry to drop
307 *
308 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
309 * be found through a VFS lookup any more. Note that this is different from
310 * deleting the dentry - d_delete will try to mark the dentry negative if
311 * possible, giving a successful _negative_ lookup, while d_drop will
312 * just make the cache lookup fail.
313 *
314 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
315 * reason (NFS timeouts or autofs deletes).
316 *
317 * __d_drop requires dentry->d_lock.
318 */
319void __d_drop(struct dentry *dentry)
320{
321 if (!d_unhashed(dentry)) {
322 struct hlist_bl_head *b;
323 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
324 b = &dentry->d_sb->s_anon;
325 else
326 b = d_hash(dentry->d_parent, dentry->d_name.hash);
327
328 hlist_bl_lock(b);
329 __hlist_bl_del(&dentry->d_hash);
330 dentry->d_hash.pprev = NULL;
331 hlist_bl_unlock(b);
332
333 dentry_rcuwalk_barrier(dentry);
334 }
335}
336EXPORT_SYMBOL(__d_drop);
337
338void d_drop(struct dentry *dentry)
339{
340 spin_lock(&dentry->d_lock);
341 __d_drop(dentry);
342 spin_unlock(&dentry->d_lock);
343}
344EXPORT_SYMBOL(d_drop);
345
346/*
347 * Finish off a dentry we've decided to kill.
348 * dentry->d_lock must be held, returns with it unlocked.
349 * If ref is non-zero, then decrement the refcount too.
350 * Returns dentry requiring refcount drop, or NULL if we're done.
351 */
352static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
353 __releases(dentry->d_lock)
354{
355 struct inode *inode;
356 struct dentry *parent;
357
358 inode = dentry->d_inode;
359 if (inode && !spin_trylock(&inode->i_lock)) {
360relock:
361 spin_unlock(&dentry->d_lock);
362 cpu_relax();
363 return dentry; /* try again with same dentry */
364 }
178 if (IS_ROOT(dentry)) 365 if (IS_ROOT(dentry))
179 parent = NULL; 366 parent = NULL;
180 else 367 else
181 parent = dentry->d_parent; 368 parent = dentry->d_parent;
182 d_free(dentry); 369 if (parent && !spin_trylock(&parent->d_lock)) {
183 return parent; 370 if (inode)
371 spin_unlock(&inode->i_lock);
372 goto relock;
373 }
374
375 if (ref)
376 dentry->d_count--;
377 /* if dentry was on the d_lru list delete it from there */
378 dentry_lru_del(dentry);
379 /* if it was on the hash then remove it */
380 __d_drop(dentry);
381 return d_kill(dentry, parent);
184} 382}
185 383
186/* 384/*
@@ -208,52 +406,42 @@ static struct dentry *d_kill(struct dentry *dentry)
208 * call the dentry unlink method as well as removing it from the queues and 406 * call the dentry unlink method as well as removing it from the queues and
209 * releasing its resources. If the parent dentries were scheduled for release 407 * releasing its resources. If the parent dentries were scheduled for release
210 * they too may now get deleted. 408 * they too may now get deleted.
211 *
212 * no dcache lock, please.
213 */ 409 */
214
215void dput(struct dentry *dentry) 410void dput(struct dentry *dentry)
216{ 411{
217 if (!dentry) 412 if (!dentry)
218 return; 413 return;
219 414
220repeat: 415repeat:
221 if (atomic_read(&dentry->d_count) == 1) 416 if (dentry->d_count == 1)
222 might_sleep(); 417 might_sleep();
223 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
224 return;
225
226 spin_lock(&dentry->d_lock); 418 spin_lock(&dentry->d_lock);
227 if (atomic_read(&dentry->d_count)) { 419 BUG_ON(!dentry->d_count);
420 if (dentry->d_count > 1) {
421 dentry->d_count--;
228 spin_unlock(&dentry->d_lock); 422 spin_unlock(&dentry->d_lock);
229 spin_unlock(&dcache_lock);
230 return; 423 return;
231 } 424 }
232 425
233 /* 426 if (dentry->d_flags & DCACHE_OP_DELETE) {
234 * AV: ->d_delete() is _NOT_ allowed to block now.
235 */
236 if (dentry->d_op && dentry->d_op->d_delete) {
237 if (dentry->d_op->d_delete(dentry)) 427 if (dentry->d_op->d_delete(dentry))
238 goto unhash_it; 428 goto kill_it;
239 } 429 }
430
240 /* Unreachable? Get rid of it */ 431 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry)) 432 if (d_unhashed(dentry))
242 goto kill_it; 433 goto kill_it;
243 if (list_empty(&dentry->d_lru)) { 434
244 dentry->d_flags |= DCACHE_REFERENCED; 435 /* Otherwise leave it cached and ensure it's on the LRU */
245 dentry_lru_add(dentry); 436 dentry->d_flags |= DCACHE_REFERENCED;
246 } 437 dentry_lru_add(dentry);
247 spin_unlock(&dentry->d_lock); 438
248 spin_unlock(&dcache_lock); 439 dentry->d_count--;
440 spin_unlock(&dentry->d_lock);
249 return; 441 return;
250 442
251unhash_it:
252 __d_drop(dentry);
253kill_it: 443kill_it:
254 /* if dentry was on the d_lru list delete it from there */ 444 dentry = dentry_kill(dentry, 1);
255 dentry_lru_del(dentry);
256 dentry = d_kill(dentry);
257 if (dentry) 445 if (dentry)
258 goto repeat; 446 goto repeat;
259} 447}
@@ -276,9 +464,9 @@ int d_invalidate(struct dentry * dentry)
276 /* 464 /*
277 * If it's already been dropped, return OK. 465 * If it's already been dropped, return OK.
278 */ 466 */
279 spin_lock(&dcache_lock); 467 spin_lock(&dentry->d_lock);
280 if (d_unhashed(dentry)) { 468 if (d_unhashed(dentry)) {
281 spin_unlock(&dcache_lock); 469 spin_unlock(&dentry->d_lock);
282 return 0; 470 return 0;
283 } 471 }
284 /* 472 /*
@@ -286,9 +474,9 @@ int d_invalidate(struct dentry * dentry)
286 * to get rid of unused child entries. 474 * to get rid of unused child entries.
287 */ 475 */
288 if (!list_empty(&dentry->d_subdirs)) { 476 if (!list_empty(&dentry->d_subdirs)) {
289 spin_unlock(&dcache_lock); 477 spin_unlock(&dentry->d_lock);
290 shrink_dcache_parent(dentry); 478 shrink_dcache_parent(dentry);
291 spin_lock(&dcache_lock); 479 spin_lock(&dentry->d_lock);
292 } 480 }
293 481
294 /* 482 /*
@@ -301,36 +489,61 @@ int d_invalidate(struct dentry * dentry)
301 * we might still populate it if it was a 489 * we might still populate it if it was a
302 * working directory or similar). 490 * working directory or similar).
303 */ 491 */
304 spin_lock(&dentry->d_lock); 492 if (dentry->d_count > 1) {
305 if (atomic_read(&dentry->d_count) > 1) {
306 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 493 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
307 spin_unlock(&dentry->d_lock); 494 spin_unlock(&dentry->d_lock);
308 spin_unlock(&dcache_lock);
309 return -EBUSY; 495 return -EBUSY;
310 } 496 }
311 } 497 }
312 498
313 __d_drop(dentry); 499 __d_drop(dentry);
314 spin_unlock(&dentry->d_lock); 500 spin_unlock(&dentry->d_lock);
315 spin_unlock(&dcache_lock);
316 return 0; 501 return 0;
317} 502}
318EXPORT_SYMBOL(d_invalidate); 503EXPORT_SYMBOL(d_invalidate);
319 504
320/* This should be called _only_ with dcache_lock held */ 505/* This must be called with d_lock held */
506static inline void __dget_dlock(struct dentry *dentry)
507{
508 dentry->d_count++;
509}
321 510
322static inline struct dentry * __dget_locked(struct dentry *dentry) 511static inline void __dget(struct dentry *dentry)
323{ 512{
324 atomic_inc(&dentry->d_count); 513 spin_lock(&dentry->d_lock);
325 dentry_lru_del_init(dentry); 514 __dget_dlock(dentry);
326 return dentry; 515 spin_unlock(&dentry->d_lock);
327} 516}
328 517
329struct dentry * dget_locked(struct dentry *dentry) 518struct dentry *dget_parent(struct dentry *dentry)
330{ 519{
331 return __dget_locked(dentry); 520 struct dentry *ret;
521
522repeat:
523 /*
524 * Don't need rcu_dereference because we re-check it was correct under
525 * the lock.
526 */
527 rcu_read_lock();
528 ret = dentry->d_parent;
529 if (!ret) {
530 rcu_read_unlock();
531 goto out;
532 }
533 spin_lock(&ret->d_lock);
534 if (unlikely(ret != dentry->d_parent)) {
535 spin_unlock(&ret->d_lock);
536 rcu_read_unlock();
537 goto repeat;
538 }
539 rcu_read_unlock();
540 BUG_ON(!ret->d_count);
541 ret->d_count++;
542 spin_unlock(&ret->d_lock);
543out:
544 return ret;
332} 545}
333EXPORT_SYMBOL(dget_locked); 546EXPORT_SYMBOL(dget_parent);
334 547
335/** 548/**
336 * d_find_alias - grab a hashed alias of inode 549 * d_find_alias - grab a hashed alias of inode
@@ -348,42 +561,51 @@ EXPORT_SYMBOL(dget_locked);
348 * any other hashed alias over that one unless @want_discon is set, 561 * any other hashed alias over that one unless @want_discon is set,
349 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 562 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
350 */ 563 */
351 564static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
352static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
353{ 565{
354 struct list_head *head, *next, *tmp; 566 struct dentry *alias, *discon_alias;
355 struct dentry *alias, *discon_alias=NULL;
356 567
357 head = &inode->i_dentry; 568again:
358 next = inode->i_dentry.next; 569 discon_alias = NULL;
359 while (next != head) { 570 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
360 tmp = next; 571 spin_lock(&alias->d_lock);
361 next = tmp->next;
362 prefetch(next);
363 alias = list_entry(tmp, struct dentry, d_alias);
364 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 572 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
365 if (IS_ROOT(alias) && 573 if (IS_ROOT(alias) &&
366 (alias->d_flags & DCACHE_DISCONNECTED)) 574 (alias->d_flags & DCACHE_DISCONNECTED)) {
367 discon_alias = alias; 575 discon_alias = alias;
368 else if (!want_discon) { 576 } else if (!want_discon) {
369 __dget_locked(alias); 577 __dget_dlock(alias);
578 spin_unlock(&alias->d_lock);
579 return alias;
580 }
581 }
582 spin_unlock(&alias->d_lock);
583 }
584 if (discon_alias) {
585 alias = discon_alias;
586 spin_lock(&alias->d_lock);
587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
588 if (IS_ROOT(alias) &&
589 (alias->d_flags & DCACHE_DISCONNECTED)) {
590 __dget_dlock(alias);
591 spin_unlock(&alias->d_lock);
370 return alias; 592 return alias;
371 } 593 }
372 } 594 }
595 spin_unlock(&alias->d_lock);
596 goto again;
373 } 597 }
374 if (discon_alias) 598 return NULL;
375 __dget_locked(discon_alias);
376 return discon_alias;
377} 599}
378 600
379struct dentry * d_find_alias(struct inode *inode) 601struct dentry *d_find_alias(struct inode *inode)
380{ 602{
381 struct dentry *de = NULL; 603 struct dentry *de = NULL;
382 604
383 if (!list_empty(&inode->i_dentry)) { 605 if (!list_empty(&inode->i_dentry)) {
384 spin_lock(&dcache_lock); 606 spin_lock(&inode->i_lock);
385 de = __d_find_alias(inode, 0); 607 de = __d_find_alias(inode, 0);
386 spin_unlock(&dcache_lock); 608 spin_unlock(&inode->i_lock);
387 } 609 }
388 return de; 610 return de;
389} 611}
@@ -397,132 +619,153 @@ void d_prune_aliases(struct inode *inode)
397{ 619{
398 struct dentry *dentry; 620 struct dentry *dentry;
399restart: 621restart:
400 spin_lock(&dcache_lock); 622 spin_lock(&inode->i_lock);
401 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 623 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
402 spin_lock(&dentry->d_lock); 624 spin_lock(&dentry->d_lock);
403 if (!atomic_read(&dentry->d_count)) { 625 if (!dentry->d_count) {
404 __dget_locked(dentry); 626 __dget_dlock(dentry);
405 __d_drop(dentry); 627 __d_drop(dentry);
406 spin_unlock(&dentry->d_lock); 628 spin_unlock(&dentry->d_lock);
407 spin_unlock(&dcache_lock); 629 spin_unlock(&inode->i_lock);
408 dput(dentry); 630 dput(dentry);
409 goto restart; 631 goto restart;
410 } 632 }
411 spin_unlock(&dentry->d_lock); 633 spin_unlock(&dentry->d_lock);
412 } 634 }
413 spin_unlock(&dcache_lock); 635 spin_unlock(&inode->i_lock);
414} 636}
415EXPORT_SYMBOL(d_prune_aliases); 637EXPORT_SYMBOL(d_prune_aliases);
416 638
417/* 639/*
418 * Throw away a dentry - free the inode, dput the parent. This requires that 640 * Try to throw away a dentry - free the inode, dput the parent.
419 * the LRU list has already been removed. 641 * Requires dentry->d_lock is held, and dentry->d_count == 0.
642 * Releases dentry->d_lock.
420 * 643 *
421 * Try to prune ancestors as well. This is necessary to prevent 644 * This may fail if locks cannot be acquired no problem, just try again.
422 * quadratic behavior of shrink_dcache_parent(), but is also expected
423 * to be beneficial in reducing dentry cache fragmentation.
424 */ 645 */
425static void prune_one_dentry(struct dentry * dentry) 646static void try_prune_one_dentry(struct dentry *dentry)
426 __releases(dentry->d_lock) 647 __releases(dentry->d_lock)
427 __releases(dcache_lock)
428 __acquires(dcache_lock)
429{ 648{
430 __d_drop(dentry); 649 struct dentry *parent;
431 dentry = d_kill(dentry);
432 650
651 parent = dentry_kill(dentry, 0);
433 /* 652 /*
434 * Prune ancestors. Locking is simpler than in dput(), 653 * If dentry_kill returns NULL, we have nothing more to do.
435 * because dcache_lock needs to be taken anyway. 654 * if it returns the same dentry, trylocks failed. In either
655 * case, just loop again.
656 *
657 * Otherwise, we need to prune ancestors too. This is necessary
658 * to prevent quadratic behavior of shrink_dcache_parent(), but
659 * is also expected to be beneficial in reducing dentry cache
660 * fragmentation.
436 */ 661 */
437 spin_lock(&dcache_lock); 662 if (!parent)
663 return;
664 if (parent == dentry)
665 return;
666
667 /* Prune ancestors. */
668 dentry = parent;
438 while (dentry) { 669 while (dentry) {
439 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 670 spin_lock(&dentry->d_lock);
671 if (dentry->d_count > 1) {
672 dentry->d_count--;
673 spin_unlock(&dentry->d_lock);
440 return; 674 return;
441 675 }
442 if (dentry->d_op && dentry->d_op->d_delete) 676 dentry = dentry_kill(dentry, 1);
443 dentry->d_op->d_delete(dentry);
444 dentry_lru_del_init(dentry);
445 __d_drop(dentry);
446 dentry = d_kill(dentry);
447 spin_lock(&dcache_lock);
448 } 677 }
449} 678}
450 679
451/* 680static void shrink_dentry_list(struct list_head *list)
452 * Shrink the dentry LRU on a given superblock.
453 * @sb : superblock to shrink dentry LRU.
454 * @count: If count is NULL, we prune all dentries on superblock.
455 * @flags: If flags is non-zero, we need to do special processing based on
456 * which flags are set. This means we don't need to maintain multiple
457 * similar copies of this loop.
458 */
459static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
460{ 681{
461 LIST_HEAD(referenced);
462 LIST_HEAD(tmp);
463 struct dentry *dentry; 682 struct dentry *dentry;
464 int cnt = 0;
465
466 BUG_ON(!sb);
467 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
468 spin_lock(&dcache_lock);
469 if (count != NULL)
470 /* called from prune_dcache() and shrink_dcache_parent() */
471 cnt = *count;
472restart:
473 if (count == NULL)
474 list_splice_init(&sb->s_dentry_lru, &tmp);
475 else {
476 while (!list_empty(&sb->s_dentry_lru)) {
477 dentry = list_entry(sb->s_dentry_lru.prev,
478 struct dentry, d_lru);
479 BUG_ON(dentry->d_sb != sb);
480 683
481 spin_lock(&dentry->d_lock); 684 rcu_read_lock();
482 /* 685 for (;;) {
483 * If we are honouring the DCACHE_REFERENCED flag and 686 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
484 * the dentry has this flag set, don't free it. Clear 687 if (&dentry->d_lru == list)
485 * the flag and put it back on the LRU. 688 break; /* empty */
486 */
487 if ((flags & DCACHE_REFERENCED)
488 && (dentry->d_flags & DCACHE_REFERENCED)) {
489 dentry->d_flags &= ~DCACHE_REFERENCED;
490 list_move(&dentry->d_lru, &referenced);
491 spin_unlock(&dentry->d_lock);
492 } else {
493 list_move_tail(&dentry->d_lru, &tmp);
494 spin_unlock(&dentry->d_lock);
495 cnt--;
496 if (!cnt)
497 break;
498 }
499 cond_resched_lock(&dcache_lock);
500 }
501 }
502 while (!list_empty(&tmp)) {
503 dentry = list_entry(tmp.prev, struct dentry, d_lru);
504 dentry_lru_del_init(dentry);
505 spin_lock(&dentry->d_lock); 689 spin_lock(&dentry->d_lock);
690 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
691 spin_unlock(&dentry->d_lock);
692 continue;
693 }
694
506 /* 695 /*
507 * We found an inuse dentry which was not removed from 696 * We found an inuse dentry which was not removed from
508 * the LRU because of laziness during lookup. Do not free 697 * the LRU because of laziness during lookup. Do not free
509 * it - just keep it off the LRU list. 698 * it - just keep it off the LRU list.
510 */ 699 */
511 if (atomic_read(&dentry->d_count)) { 700 if (dentry->d_count) {
701 dentry_lru_del(dentry);
512 spin_unlock(&dentry->d_lock); 702 spin_unlock(&dentry->d_lock);
513 continue; 703 continue;
514 } 704 }
515 prune_one_dentry(dentry); 705
516 /* dentry->d_lock was dropped in prune_one_dentry() */ 706 rcu_read_unlock();
517 cond_resched_lock(&dcache_lock); 707
518 } 708 try_prune_one_dentry(dentry);
519 if (count == NULL && !list_empty(&sb->s_dentry_lru)) 709
520 goto restart; 710 rcu_read_lock();
521 if (count != NULL) 711 }
522 *count = cnt; 712 rcu_read_unlock();
713}
714
715/**
716 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
717 * @sb: superblock to shrink dentry LRU.
718 * @count: number of entries to prune
719 * @flags: flags to control the dentry processing
720 *
721 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
722 */
723static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
724{
725 /* called from prune_dcache() and shrink_dcache_parent() */
726 struct dentry *dentry;
727 LIST_HEAD(referenced);
728 LIST_HEAD(tmp);
729 int cnt = *count;
730
731relock:
732 spin_lock(&dcache_lru_lock);
733 while (!list_empty(&sb->s_dentry_lru)) {
734 dentry = list_entry(sb->s_dentry_lru.prev,
735 struct dentry, d_lru);
736 BUG_ON(dentry->d_sb != sb);
737
738 if (!spin_trylock(&dentry->d_lock)) {
739 spin_unlock(&dcache_lru_lock);
740 cpu_relax();
741 goto relock;
742 }
743
744 /*
745 * If we are honouring the DCACHE_REFERENCED flag and the
746 * dentry has this flag set, don't free it. Clear the flag
747 * and put it back on the LRU.
748 */
749 if (flags & DCACHE_REFERENCED &&
750 dentry->d_flags & DCACHE_REFERENCED) {
751 dentry->d_flags &= ~DCACHE_REFERENCED;
752 list_move(&dentry->d_lru, &referenced);
753 spin_unlock(&dentry->d_lock);
754 } else {
755 list_move_tail(&dentry->d_lru, &tmp);
756 spin_unlock(&dentry->d_lock);
757 if (!--cnt)
758 break;
759 }
760 cond_resched_lock(&dcache_lru_lock);
761 }
523 if (!list_empty(&referenced)) 762 if (!list_empty(&referenced))
524 list_splice(&referenced, &sb->s_dentry_lru); 763 list_splice(&referenced, &sb->s_dentry_lru);
525 spin_unlock(&dcache_lock); 764 spin_unlock(&dcache_lru_lock);
765
766 shrink_dentry_list(&tmp);
767
768 *count = cnt;
526} 769}
527 770
528/** 771/**
@@ -544,7 +787,6 @@ static void prune_dcache(int count)
544 787
545 if (unused == 0 || count == 0) 788 if (unused == 0 || count == 0)
546 return; 789 return;
547 spin_lock(&dcache_lock);
548 if (count >= unused) 790 if (count >= unused)
549 prune_ratio = 1; 791 prune_ratio = 1;
550 else 792 else
@@ -581,11 +823,9 @@ static void prune_dcache(int count)
581 if (down_read_trylock(&sb->s_umount)) { 823 if (down_read_trylock(&sb->s_umount)) {
582 if ((sb->s_root != NULL) && 824 if ((sb->s_root != NULL) &&
583 (!list_empty(&sb->s_dentry_lru))) { 825 (!list_empty(&sb->s_dentry_lru))) {
584 spin_unlock(&dcache_lock);
585 __shrink_dcache_sb(sb, &w_count, 826 __shrink_dcache_sb(sb, &w_count,
586 DCACHE_REFERENCED); 827 DCACHE_REFERENCED);
587 pruned -= w_count; 828 pruned -= w_count;
588 spin_lock(&dcache_lock);
589 } 829 }
590 up_read(&sb->s_umount); 830 up_read(&sb->s_umount);
591 } 831 }
@@ -601,20 +841,27 @@ static void prune_dcache(int count)
601 if (p) 841 if (p)
602 __put_super(p); 842 __put_super(p);
603 spin_unlock(&sb_lock); 843 spin_unlock(&sb_lock);
604 spin_unlock(&dcache_lock);
605} 844}
606 845
607/** 846/**
608 * shrink_dcache_sb - shrink dcache for a superblock 847 * shrink_dcache_sb - shrink dcache for a superblock
609 * @sb: superblock 848 * @sb: superblock
610 * 849 *
611 * Shrink the dcache for the specified super block. This 850 * Shrink the dcache for the specified super block. This is used to free
612 * is used to free the dcache before unmounting a file 851 * the dcache before unmounting a file system.
613 * system
614 */ 852 */
615void shrink_dcache_sb(struct super_block * sb) 853void shrink_dcache_sb(struct super_block *sb)
616{ 854{
617 __shrink_dcache_sb(sb, NULL, 0); 855 LIST_HEAD(tmp);
856
857 spin_lock(&dcache_lru_lock);
858 while (!list_empty(&sb->s_dentry_lru)) {
859 list_splice_init(&sb->s_dentry_lru, &tmp);
860 spin_unlock(&dcache_lru_lock);
861 shrink_dentry_list(&tmp);
862 spin_lock(&dcache_lru_lock);
863 }
864 spin_unlock(&dcache_lru_lock);
618} 865}
619EXPORT_SYMBOL(shrink_dcache_sb); 866EXPORT_SYMBOL(shrink_dcache_sb);
620 867
@@ -631,10 +878,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
631 BUG_ON(!IS_ROOT(dentry)); 878 BUG_ON(!IS_ROOT(dentry));
632 879
633 /* detach this root from the system */ 880 /* detach this root from the system */
634 spin_lock(&dcache_lock); 881 spin_lock(&dentry->d_lock);
635 dentry_lru_del_init(dentry); 882 dentry_lru_del(dentry);
636 __d_drop(dentry); 883 __d_drop(dentry);
637 spin_unlock(&dcache_lock); 884 spin_unlock(&dentry->d_lock);
638 885
639 for (;;) { 886 for (;;) {
640 /* descend to the first leaf in the current subtree */ 887 /* descend to the first leaf in the current subtree */
@@ -643,14 +890,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
643 890
644 /* this is a branch with children - detach all of them 891 /* this is a branch with children - detach all of them
645 * from the system in one go */ 892 * from the system in one go */
646 spin_lock(&dcache_lock); 893 spin_lock(&dentry->d_lock);
647 list_for_each_entry(loop, &dentry->d_subdirs, 894 list_for_each_entry(loop, &dentry->d_subdirs,
648 d_u.d_child) { 895 d_u.d_child) {
649 dentry_lru_del_init(loop); 896 spin_lock_nested(&loop->d_lock,
897 DENTRY_D_LOCK_NESTED);
898 dentry_lru_del(loop);
650 __d_drop(loop); 899 __d_drop(loop);
651 cond_resched_lock(&dcache_lock); 900 spin_unlock(&loop->d_lock);
652 } 901 }
653 spin_unlock(&dcache_lock); 902 spin_unlock(&dentry->d_lock);
654 903
655 /* move to the first child */ 904 /* move to the first child */
656 dentry = list_entry(dentry->d_subdirs.next, 905 dentry = list_entry(dentry->d_subdirs.next,
@@ -662,7 +911,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
662 do { 911 do {
663 struct inode *inode; 912 struct inode *inode;
664 913
665 if (atomic_read(&dentry->d_count) != 0) { 914 if (dentry->d_count != 0) {
666 printk(KERN_ERR 915 printk(KERN_ERR
667 "BUG: Dentry %p{i=%lx,n=%s}" 916 "BUG: Dentry %p{i=%lx,n=%s}"
668 " still in use (%d)" 917 " still in use (%d)"
@@ -671,20 +920,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
671 dentry->d_inode ? 920 dentry->d_inode ?
672 dentry->d_inode->i_ino : 0UL, 921 dentry->d_inode->i_ino : 0UL,
673 dentry->d_name.name, 922 dentry->d_name.name,
674 atomic_read(&dentry->d_count), 923 dentry->d_count,
675 dentry->d_sb->s_type->name, 924 dentry->d_sb->s_type->name,
676 dentry->d_sb->s_id); 925 dentry->d_sb->s_id);
677 BUG(); 926 BUG();
678 } 927 }
679 928
680 if (IS_ROOT(dentry)) 929 if (IS_ROOT(dentry)) {
681 parent = NULL; 930 parent = NULL;
682 else { 931 list_del(&dentry->d_u.d_child);
932 } else {
683 parent = dentry->d_parent; 933 parent = dentry->d_parent;
684 atomic_dec(&parent->d_count); 934 spin_lock(&parent->d_lock);
935 parent->d_count--;
936 list_del(&dentry->d_u.d_child);
937 spin_unlock(&parent->d_lock);
685 } 938 }
686 939
687 list_del(&dentry->d_u.d_child);
688 detached++; 940 detached++;
689 941
690 inode = dentry->d_inode; 942 inode = dentry->d_inode;
@@ -703,26 +955,18 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
703 * otherwise we ascend to the parent and move to the 955 * otherwise we ascend to the parent and move to the
704 * next sibling if there is one */ 956 * next sibling if there is one */
705 if (!parent) 957 if (!parent)
706 goto out; 958 return;
707
708 dentry = parent; 959 dentry = parent;
709
710 } while (list_empty(&dentry->d_subdirs)); 960 } while (list_empty(&dentry->d_subdirs));
711 961
712 dentry = list_entry(dentry->d_subdirs.next, 962 dentry = list_entry(dentry->d_subdirs.next,
713 struct dentry, d_u.d_child); 963 struct dentry, d_u.d_child);
714 } 964 }
715out:
716 /* several dentries were freed, need to correct nr_dentry */
717 spin_lock(&dcache_lock);
718 dentry_stat.nr_dentry -= detached;
719 spin_unlock(&dcache_lock);
720} 965}
721 966
722/* 967/*
723 * destroy the dentries attached to a superblock on unmounting 968 * destroy the dentries attached to a superblock on unmounting
724 * - we don't need to use dentry->d_lock, and only need dcache_lock when 969 * - we don't need to use dentry->d_lock because:
725 * removing the dentry from the system lists and hashes because:
726 * - the superblock is detached from all mountings and open files, so the 970 * - the superblock is detached from all mountings and open files, so the
727 * dentry trees will not be rearranged by the VFS 971 * dentry trees will not be rearranged by the VFS
728 * - s_umount is write-locked, so the memory pressure shrinker will ignore 972 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -739,16 +983,47 @@ void shrink_dcache_for_umount(struct super_block *sb)
739 983
740 dentry = sb->s_root; 984 dentry = sb->s_root;
741 sb->s_root = NULL; 985 sb->s_root = NULL;
742 atomic_dec(&dentry->d_count); 986 spin_lock(&dentry->d_lock);
987 dentry->d_count--;
988 spin_unlock(&dentry->d_lock);
743 shrink_dcache_for_umount_subtree(dentry); 989 shrink_dcache_for_umount_subtree(dentry);
744 990
745 while (!hlist_empty(&sb->s_anon)) { 991 while (!hlist_bl_empty(&sb->s_anon)) {
746 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 992 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
747 shrink_dcache_for_umount_subtree(dentry); 993 shrink_dcache_for_umount_subtree(dentry);
748 } 994 }
749} 995}
750 996
751/* 997/*
998 * This tries to ascend one level of parenthood, but
999 * we can race with renaming, so we need to re-check
1000 * the parenthood after dropping the lock and check
1001 * that the sequence number still matches.
1002 */
1003static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1004{
1005 struct dentry *new = old->d_parent;
1006
1007 rcu_read_lock();
1008 spin_unlock(&old->d_lock);
1009 spin_lock(&new->d_lock);
1010
1011 /*
1012 * might go back up the wrong parent if we have had a rename
1013 * or deletion
1014 */
1015 if (new != old->d_parent ||
1016 (old->d_flags & DCACHE_DISCONNECTED) ||
1017 (!locked && read_seqretry(&rename_lock, seq))) {
1018 spin_unlock(&new->d_lock);
1019 new = NULL;
1020 }
1021 rcu_read_unlock();
1022 return new;
1023}
1024
1025
1026/*
752 * Search for at least 1 mount point in the dentry's subdirs. 1027 * Search for at least 1 mount point in the dentry's subdirs.
753 * We descend to the next level whenever the d_subdirs 1028 * We descend to the next level whenever the d_subdirs
754 * list is non-empty and continue searching. 1029 * list is non-empty and continue searching.
@@ -761,15 +1036,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
761 * Return true if the parent or its subdirectories contain 1036 * Return true if the parent or its subdirectories contain
762 * a mount point 1037 * a mount point
763 */ 1038 */
764
765int have_submounts(struct dentry *parent) 1039int have_submounts(struct dentry *parent)
766{ 1040{
767 struct dentry *this_parent = parent; 1041 struct dentry *this_parent;
768 struct list_head *next; 1042 struct list_head *next;
1043 unsigned seq;
1044 int locked = 0;
1045
1046 seq = read_seqbegin(&rename_lock);
1047again:
1048 this_parent = parent;
769 1049
770 spin_lock(&dcache_lock);
771 if (d_mountpoint(parent)) 1050 if (d_mountpoint(parent))
772 goto positive; 1051 goto positive;
1052 spin_lock(&this_parent->d_lock);
773repeat: 1053repeat:
774 next = this_parent->d_subdirs.next; 1054 next = this_parent->d_subdirs.next;
775resume: 1055resume:
@@ -777,27 +1057,51 @@ resume:
777 struct list_head *tmp = next; 1057 struct list_head *tmp = next;
778 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1058 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
779 next = tmp->next; 1059 next = tmp->next;
1060
1061 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
780 /* Have we found a mount point ? */ 1062 /* Have we found a mount point ? */
781 if (d_mountpoint(dentry)) 1063 if (d_mountpoint(dentry)) {
1064 spin_unlock(&dentry->d_lock);
1065 spin_unlock(&this_parent->d_lock);
782 goto positive; 1066 goto positive;
1067 }
783 if (!list_empty(&dentry->d_subdirs)) { 1068 if (!list_empty(&dentry->d_subdirs)) {
1069 spin_unlock(&this_parent->d_lock);
1070 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
784 this_parent = dentry; 1071 this_parent = dentry;
1072 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
785 goto repeat; 1073 goto repeat;
786 } 1074 }
1075 spin_unlock(&dentry->d_lock);
787 } 1076 }
788 /* 1077 /*
789 * All done at this level ... ascend and resume the search. 1078 * All done at this level ... ascend and resume the search.
790 */ 1079 */
791 if (this_parent != parent) { 1080 if (this_parent != parent) {
792 next = this_parent->d_u.d_child.next; 1081 struct dentry *child = this_parent;
793 this_parent = this_parent->d_parent; 1082 this_parent = try_to_ascend(this_parent, locked, seq);
1083 if (!this_parent)
1084 goto rename_retry;
1085 next = child->d_u.d_child.next;
794 goto resume; 1086 goto resume;
795 } 1087 }
796 spin_unlock(&dcache_lock); 1088 spin_unlock(&this_parent->d_lock);
1089 if (!locked && read_seqretry(&rename_lock, seq))
1090 goto rename_retry;
1091 if (locked)
1092 write_sequnlock(&rename_lock);
797 return 0; /* No mount points found in tree */ 1093 return 0; /* No mount points found in tree */
798positive: 1094positive:
799 spin_unlock(&dcache_lock); 1095 if (!locked && read_seqretry(&rename_lock, seq))
1096 goto rename_retry;
1097 if (locked)
1098 write_sequnlock(&rename_lock);
800 return 1; 1099 return 1;
1100
1101rename_retry:
1102 locked = 1;
1103 write_seqlock(&rename_lock);
1104 goto again;
801} 1105}
802EXPORT_SYMBOL(have_submounts); 1106EXPORT_SYMBOL(have_submounts);
803 1107
@@ -817,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
817 */ 1121 */
818static int select_parent(struct dentry * parent) 1122static int select_parent(struct dentry * parent)
819{ 1123{
820 struct dentry *this_parent = parent; 1124 struct dentry *this_parent;
821 struct list_head *next; 1125 struct list_head *next;
1126 unsigned seq;
822 int found = 0; 1127 int found = 0;
1128 int locked = 0;
823 1129
824 spin_lock(&dcache_lock); 1130 seq = read_seqbegin(&rename_lock);
1131again:
1132 this_parent = parent;
1133 spin_lock(&this_parent->d_lock);
825repeat: 1134repeat:
826 next = this_parent->d_subdirs.next; 1135 next = this_parent->d_subdirs.next;
827resume: 1136resume:
@@ -830,14 +1139,17 @@ resume:
830 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
831 next = tmp->next; 1140 next = tmp->next;
832 1141
833 dentry_lru_del_init(dentry); 1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
834 /* 1144 /*
835 * move only zero ref count dentries to the end 1145 * move only zero ref count dentries to the end
836 * of the unused list for prune_dcache 1146 * of the unused list for prune_dcache
837 */ 1147 */
838 if (!atomic_read(&dentry->d_count)) { 1148 if (!dentry->d_count) {
839 dentry_lru_add_tail(dentry); 1149 dentry_lru_move_tail(dentry);
840 found++; 1150 found++;
1151 } else {
1152 dentry_lru_del(dentry);
841 } 1153 }
842 1154
843 /* 1155 /*
@@ -845,28 +1157,49 @@ resume:
845 * ensures forward progress). We'll be coming back to find 1157 * ensures forward progress). We'll be coming back to find
846 * the rest. 1158 * the rest.
847 */ 1159 */
848 if (found && need_resched()) 1160 if (found && need_resched()) {
1161 spin_unlock(&dentry->d_lock);
849 goto out; 1162 goto out;
1163 }
850 1164
851 /* 1165 /*
852 * Descend a level if the d_subdirs list is non-empty. 1166 * Descend a level if the d_subdirs list is non-empty.
853 */ 1167 */
854 if (!list_empty(&dentry->d_subdirs)) { 1168 if (!list_empty(&dentry->d_subdirs)) {
1169 spin_unlock(&this_parent->d_lock);
1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
855 this_parent = dentry; 1171 this_parent = dentry;
1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
856 goto repeat; 1173 goto repeat;
857 } 1174 }
1175
1176 spin_unlock(&dentry->d_lock);
858 } 1177 }
859 /* 1178 /*
860 * All done at this level ... ascend and resume the search. 1179 * All done at this level ... ascend and resume the search.
861 */ 1180 */
862 if (this_parent != parent) { 1181 if (this_parent != parent) {
863 next = this_parent->d_u.d_child.next; 1182 struct dentry *child = this_parent;
864 this_parent = this_parent->d_parent; 1183 this_parent = try_to_ascend(this_parent, locked, seq);
1184 if (!this_parent)
1185 goto rename_retry;
1186 next = child->d_u.d_child.next;
865 goto resume; 1187 goto resume;
866 } 1188 }
867out: 1189out:
868 spin_unlock(&dcache_lock); 1190 spin_unlock(&this_parent->d_lock);
1191 if (!locked && read_seqretry(&rename_lock, seq))
1192 goto rename_retry;
1193 if (locked)
1194 write_sequnlock(&rename_lock);
869 return found; 1195 return found;
1196
1197rename_retry:
1198 if (found)
1199 return found;
1200 locked = 1;
1201 write_seqlock(&rename_lock);
1202 goto again;
870} 1203}
871 1204
872/** 1205/**
@@ -887,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
887EXPORT_SYMBOL(shrink_dcache_parent); 1220EXPORT_SYMBOL(shrink_dcache_parent);
888 1221
889/* 1222/*
890 * Scan `nr' dentries and return the number which remain. 1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
891 * 1224 *
892 * We need to avoid reentering the filesystem if the caller is performing a 1225 * We need to avoid reentering the filesystem if the caller is performing a
893 * GFP_NOFS allocation attempt. One example deadlock is: 1226 * GFP_NOFS allocation attempt. One example deadlock is:
@@ -898,13 +1231,18 @@ EXPORT_SYMBOL(shrink_dcache_parent);
898 * 1231 *
899 * In this case we return -1 to tell the caller that we baled. 1232 * In this case we return -1 to tell the caller that we baled.
900 */ 1233 */
901static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
902{ 1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
903 if (nr) { 1240 if (nr) {
904 if (!(gfp_mask & __GFP_FS)) 1241 if (!(gfp_mask & __GFP_FS))
905 return -1; 1242 return -1;
906 prune_dcache(nr); 1243 prune_dcache(nr);
907 } 1244 }
1245
908 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 1246 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
909} 1247}
910 1248
@@ -948,37 +1286,54 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
948 memcpy(dname, name->name, name->len); 1286 memcpy(dname, name->name, name->len);
949 dname[name->len] = 0; 1287 dname[name->len] = 0;
950 1288
951 atomic_set(&dentry->d_count, 1); 1289 dentry->d_count = 1;
952 dentry->d_flags = DCACHE_UNHASHED; 1290 dentry->d_flags = 0;
953 spin_lock_init(&dentry->d_lock); 1291 spin_lock_init(&dentry->d_lock);
1292 seqcount_init(&dentry->d_seq);
954 dentry->d_inode = NULL; 1293 dentry->d_inode = NULL;
955 dentry->d_parent = NULL; 1294 dentry->d_parent = NULL;
956 dentry->d_sb = NULL; 1295 dentry->d_sb = NULL;
957 dentry->d_op = NULL; 1296 dentry->d_op = NULL;
958 dentry->d_fsdata = NULL; 1297 dentry->d_fsdata = NULL;
959 dentry->d_mounted = 0; 1298 INIT_HLIST_BL_NODE(&dentry->d_hash);
960 INIT_HLIST_NODE(&dentry->d_hash);
961 INIT_LIST_HEAD(&dentry->d_lru); 1299 INIT_LIST_HEAD(&dentry->d_lru);
962 INIT_LIST_HEAD(&dentry->d_subdirs); 1300 INIT_LIST_HEAD(&dentry->d_subdirs);
963 INIT_LIST_HEAD(&dentry->d_alias); 1301 INIT_LIST_HEAD(&dentry->d_alias);
1302 INIT_LIST_HEAD(&dentry->d_u.d_child);
964 1303
965 if (parent) { 1304 if (parent) {
966 dentry->d_parent = dget(parent); 1305 spin_lock(&parent->d_lock);
1306 /*
1307 * don't need child lock because it is not subject
1308 * to concurrency here
1309 */
1310 __dget_dlock(parent);
1311 dentry->d_parent = parent;
967 dentry->d_sb = parent->d_sb; 1312 dentry->d_sb = parent->d_sb;
968 } else { 1313 d_set_d_op(dentry, dentry->d_sb->s_d_op);
969 INIT_LIST_HEAD(&dentry->d_u.d_child); 1314 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1315 spin_unlock(&parent->d_lock);
970 } 1316 }
971 1317
972 spin_lock(&dcache_lock); 1318 this_cpu_inc(nr_dentry);
973 if (parent)
974 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
975 dentry_stat.nr_dentry++;
976 spin_unlock(&dcache_lock);
977 1319
978 return dentry; 1320 return dentry;
979} 1321}
980EXPORT_SYMBOL(d_alloc); 1322EXPORT_SYMBOL(d_alloc);
981 1323
1324struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1325{
1326 struct dentry *dentry = d_alloc(NULL, name);
1327 if (dentry) {
1328 dentry->d_sb = sb;
1329 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1330 dentry->d_parent = dentry;
1331 dentry->d_flags |= DCACHE_DISCONNECTED;
1332 }
1333 return dentry;
1334}
1335EXPORT_SYMBOL(d_alloc_pseudo);
1336
982struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1337struct dentry *d_alloc_name(struct dentry *parent, const char *name)
983{ 1338{
984 struct qstr q; 1339 struct qstr q;
@@ -990,12 +1345,39 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
990} 1345}
991EXPORT_SYMBOL(d_alloc_name); 1346EXPORT_SYMBOL(d_alloc_name);
992 1347
993/* the caller must hold dcache_lock */ 1348void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1349{
1350 WARN_ON_ONCE(dentry->d_op);
1351 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
1352 DCACHE_OP_COMPARE |
1353 DCACHE_OP_REVALIDATE |
1354 DCACHE_OP_DELETE ));
1355 dentry->d_op = op;
1356 if (!op)
1357 return;
1358 if (op->d_hash)
1359 dentry->d_flags |= DCACHE_OP_HASH;
1360 if (op->d_compare)
1361 dentry->d_flags |= DCACHE_OP_COMPARE;
1362 if (op->d_revalidate)
1363 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1364 if (op->d_delete)
1365 dentry->d_flags |= DCACHE_OP_DELETE;
1366
1367}
1368EXPORT_SYMBOL(d_set_d_op);
1369
994static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1370static void __d_instantiate(struct dentry *dentry, struct inode *inode)
995{ 1371{
996 if (inode) 1372 spin_lock(&dentry->d_lock);
1373 if (inode) {
1374 if (unlikely(IS_AUTOMOUNT(inode)))
1375 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
997 list_add(&dentry->d_alias, &inode->i_dentry); 1376 list_add(&dentry->d_alias, &inode->i_dentry);
1377 }
998 dentry->d_inode = inode; 1378 dentry->d_inode = inode;
1379 dentry_rcuwalk_barrier(dentry);
1380 spin_unlock(&dentry->d_lock);
999 fsnotify_d_instantiate(dentry, inode); 1381 fsnotify_d_instantiate(dentry, inode);
1000} 1382}
1001 1383
@@ -1017,9 +1399,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1017void d_instantiate(struct dentry *entry, struct inode * inode) 1399void d_instantiate(struct dentry *entry, struct inode * inode)
1018{ 1400{
1019 BUG_ON(!list_empty(&entry->d_alias)); 1401 BUG_ON(!list_empty(&entry->d_alias));
1020 spin_lock(&dcache_lock); 1402 if (inode)
1403 spin_lock(&inode->i_lock);
1021 __d_instantiate(entry, inode); 1404 __d_instantiate(entry, inode);
1022 spin_unlock(&dcache_lock); 1405 if (inode)
1406 spin_unlock(&inode->i_lock);
1023 security_d_instantiate(entry, inode); 1407 security_d_instantiate(entry, inode);
1024} 1408}
1025EXPORT_SYMBOL(d_instantiate); 1409EXPORT_SYMBOL(d_instantiate);
@@ -1056,15 +1440,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1056 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1440 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1057 struct qstr *qstr = &alias->d_name; 1441 struct qstr *qstr = &alias->d_name;
1058 1442
1443 /*
1444 * Don't need alias->d_lock here, because aliases with
1445 * d_parent == entry->d_parent are not subject to name or
1446 * parent changes, because the parent inode i_mutex is held.
1447 */
1059 if (qstr->hash != hash) 1448 if (qstr->hash != hash)
1060 continue; 1449 continue;
1061 if (alias->d_parent != entry->d_parent) 1450 if (alias->d_parent != entry->d_parent)
1062 continue; 1451 continue;
1063 if (qstr->len != len) 1452 if (dentry_cmp(qstr->name, qstr->len, name, len))
1064 continue;
1065 if (memcmp(qstr->name, name, len))
1066 continue; 1453 continue;
1067 dget_locked(alias); 1454 __dget(alias);
1068 return alias; 1455 return alias;
1069 } 1456 }
1070 1457
@@ -1078,9 +1465,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1078 1465
1079 BUG_ON(!list_empty(&entry->d_alias)); 1466 BUG_ON(!list_empty(&entry->d_alias));
1080 1467
1081 spin_lock(&dcache_lock); 1468 if (inode)
1469 spin_lock(&inode->i_lock);
1082 result = __d_instantiate_unique(entry, inode); 1470 result = __d_instantiate_unique(entry, inode);
1083 spin_unlock(&dcache_lock); 1471 if (inode)
1472 spin_unlock(&inode->i_lock);
1084 1473
1085 if (!result) { 1474 if (!result) {
1086 security_d_instantiate(entry, inode); 1475 security_d_instantiate(entry, inode);
@@ -1113,6 +1502,7 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1113 res = d_alloc(NULL, &name); 1502 res = d_alloc(NULL, &name);
1114 if (res) { 1503 if (res) {
1115 res->d_sb = root_inode->i_sb; 1504 res->d_sb = root_inode->i_sb;
1505 d_set_d_op(res, res->d_sb->s_d_op);
1116 res->d_parent = res; 1506 res->d_parent = res;
1117 d_instantiate(res, root_inode); 1507 d_instantiate(res, root_inode);
1118 } 1508 }
@@ -1121,14 +1511,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1121} 1511}
1122EXPORT_SYMBOL(d_alloc_root); 1512EXPORT_SYMBOL(d_alloc_root);
1123 1513
1124static inline struct hlist_head *d_hash(struct dentry *parent, 1514static struct dentry * __d_find_any_alias(struct inode *inode)
1125 unsigned long hash)
1126{ 1515{
1127 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 1516 struct dentry *alias;
1128 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 1517
1129 return dentry_hashtable + (hash & D_HASHMASK); 1518 if (list_empty(&inode->i_dentry))
1519 return NULL;
1520 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1521 __dget(alias);
1522 return alias;
1130} 1523}
1131 1524
1525static struct dentry * d_find_any_alias(struct inode *inode)
1526{
1527 struct dentry *de;
1528
1529 spin_lock(&inode->i_lock);
1530 de = __d_find_any_alias(inode);
1531 spin_unlock(&inode->i_lock);
1532 return de;
1533}
1534
1535
1132/** 1536/**
1133 * d_obtain_alias - find or allocate a dentry for a given inode 1537 * d_obtain_alias - find or allocate a dentry for a given inode
1134 * @inode: inode to allocate the dentry for 1538 * @inode: inode to allocate the dentry for
@@ -1158,7 +1562,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1158 if (IS_ERR(inode)) 1562 if (IS_ERR(inode))
1159 return ERR_CAST(inode); 1563 return ERR_CAST(inode);
1160 1564
1161 res = d_find_alias(inode); 1565 res = d_find_any_alias(inode);
1162 if (res) 1566 if (res)
1163 goto out_iput; 1567 goto out_iput;
1164 1568
@@ -1169,10 +1573,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1169 } 1573 }
1170 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1574 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1171 1575
1172 spin_lock(&dcache_lock); 1576
1173 res = __d_find_alias(inode, 0); 1577 spin_lock(&inode->i_lock);
1578 res = __d_find_any_alias(inode);
1174 if (res) { 1579 if (res) {
1175 spin_unlock(&dcache_lock); 1580 spin_unlock(&inode->i_lock);
1176 dput(tmp); 1581 dput(tmp);
1177 goto out_iput; 1582 goto out_iput;
1178 } 1583 }
@@ -1180,17 +1585,22 @@ struct dentry *d_obtain_alias(struct inode *inode)
1180 /* attach a disconnected dentry */ 1585 /* attach a disconnected dentry */
1181 spin_lock(&tmp->d_lock); 1586 spin_lock(&tmp->d_lock);
1182 tmp->d_sb = inode->i_sb; 1587 tmp->d_sb = inode->i_sb;
1588 d_set_d_op(tmp, tmp->d_sb->s_d_op);
1183 tmp->d_inode = inode; 1589 tmp->d_inode = inode;
1184 tmp->d_flags |= DCACHE_DISCONNECTED; 1590 tmp->d_flags |= DCACHE_DISCONNECTED;
1185 tmp->d_flags &= ~DCACHE_UNHASHED;
1186 list_add(&tmp->d_alias, &inode->i_dentry); 1591 list_add(&tmp->d_alias, &inode->i_dentry);
1187 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1592 hlist_bl_lock(&tmp->d_sb->s_anon);
1593 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1594 hlist_bl_unlock(&tmp->d_sb->s_anon);
1188 spin_unlock(&tmp->d_lock); 1595 spin_unlock(&tmp->d_lock);
1596 spin_unlock(&inode->i_lock);
1597 security_d_instantiate(tmp, inode);
1189 1598
1190 spin_unlock(&dcache_lock);
1191 return tmp; 1599 return tmp;
1192 1600
1193 out_iput: 1601 out_iput:
1602 if (res && !IS_ERR(res))
1603 security_d_instantiate(res, inode);
1194 iput(inode); 1604 iput(inode);
1195 return res; 1605 return res;
1196} 1606}
@@ -1217,18 +1627,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1217 struct dentry *new = NULL; 1627 struct dentry *new = NULL;
1218 1628
1219 if (inode && S_ISDIR(inode->i_mode)) { 1629 if (inode && S_ISDIR(inode->i_mode)) {
1220 spin_lock(&dcache_lock); 1630 spin_lock(&inode->i_lock);
1221 new = __d_find_alias(inode, 1); 1631 new = __d_find_alias(inode, 1);
1222 if (new) { 1632 if (new) {
1223 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1633 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1224 spin_unlock(&dcache_lock); 1634 spin_unlock(&inode->i_lock);
1225 security_d_instantiate(new, inode); 1635 security_d_instantiate(new, inode);
1226 d_move(new, dentry); 1636 d_move(new, dentry);
1227 iput(inode); 1637 iput(inode);
1228 } else { 1638 } else {
1229 /* already taking dcache_lock, so d_add() by hand */ 1639 /* already taking inode->i_lock, so d_add() by hand */
1230 __d_instantiate(dentry, inode); 1640 __d_instantiate(dentry, inode);
1231 spin_unlock(&dcache_lock); 1641 spin_unlock(&inode->i_lock);
1232 security_d_instantiate(dentry, inode); 1642 security_d_instantiate(dentry, inode);
1233 d_rehash(dentry); 1643 d_rehash(dentry);
1234 } 1644 }
@@ -1301,10 +1711,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1301 * Negative dentry: instantiate it unless the inode is a directory and 1711 * Negative dentry: instantiate it unless the inode is a directory and
1302 * already has a dentry. 1712 * already has a dentry.
1303 */ 1713 */
1304 spin_lock(&dcache_lock); 1714 spin_lock(&inode->i_lock);
1305 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1715 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1306 __d_instantiate(found, inode); 1716 __d_instantiate(found, inode);
1307 spin_unlock(&dcache_lock); 1717 spin_unlock(&inode->i_lock);
1308 security_d_instantiate(found, inode); 1718 security_d_instantiate(found, inode);
1309 return found; 1719 return found;
1310 } 1720 }
@@ -1314,8 +1724,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1314 * reference to it, move it in place and use it. 1724 * reference to it, move it in place and use it.
1315 */ 1725 */
1316 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1726 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1317 dget_locked(new); 1727 __dget(new);
1318 spin_unlock(&dcache_lock); 1728 spin_unlock(&inode->i_lock);
1319 security_d_instantiate(found, inode); 1729 security_d_instantiate(found, inode);
1320 d_move(new, found); 1730 d_move(new, found);
1321 iput(inode); 1731 iput(inode);
@@ -1329,6 +1739,110 @@ err_out:
1329EXPORT_SYMBOL(d_add_ci); 1739EXPORT_SYMBOL(d_add_ci);
1330 1740
1331/** 1741/**
1742 * __d_lookup_rcu - search for a dentry (racy, store-free)
1743 * @parent: parent dentry
1744 * @name: qstr of name we wish to find
1745 * @seq: returns d_seq value at the point where the dentry was found
1746 * @inode: returns dentry->d_inode when the inode was found valid.
1747 * Returns: dentry, or NULL
1748 *
1749 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1750 * resolution (store-free path walking) design described in
1751 * Documentation/filesystems/path-lookup.txt.
1752 *
1753 * This is not to be used outside core vfs.
1754 *
1755 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1756 * held, and rcu_read_lock held. The returned dentry must not be stored into
1757 * without taking d_lock and checking d_seq sequence count against @seq
1758 * returned here.
1759 *
1760 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1761 * function.
1762 *
1763 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1764 * the returned dentry, so long as its parent's seqlock is checked after the
1765 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1766 * is formed, giving integrity down the path walk.
1767 */
1768struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1769 unsigned *seq, struct inode **inode)
1770{
1771 unsigned int len = name->len;
1772 unsigned int hash = name->hash;
1773 const unsigned char *str = name->name;
1774 struct hlist_bl_head *b = d_hash(parent, hash);
1775 struct hlist_bl_node *node;
1776 struct dentry *dentry;
1777
1778 /*
1779 * Note: There is significant duplication with __d_lookup_rcu which is
1780 * required to prevent single threaded performance regressions
1781 * especially on architectures where smp_rmb (in seqcounts) are costly.
1782 * Keep the two functions in sync.
1783 */
1784
1785 /*
1786 * The hash list is protected using RCU.
1787 *
1788 * Carefully use d_seq when comparing a candidate dentry, to avoid
1789 * races with d_move().
1790 *
1791 * It is possible that concurrent renames can mess up our list
1792 * walk here and result in missing our dentry, resulting in the
1793 * false-negative result. d_lookup() protects against concurrent
1794 * renames using rename_lock seqlock.
1795 *
1796 * See Documentation/filesystems/path-lookup.txt for more details.
1797 */
1798 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1799 struct inode *i;
1800 const char *tname;
1801 int tlen;
1802
1803 if (dentry->d_name.hash != hash)
1804 continue;
1805
1806seqretry:
1807 *seq = read_seqcount_begin(&dentry->d_seq);
1808 if (dentry->d_parent != parent)
1809 continue;
1810 if (d_unhashed(dentry))
1811 continue;
1812 tlen = dentry->d_name.len;
1813 tname = dentry->d_name.name;
1814 i = dentry->d_inode;
1815 prefetch(tname);
1816 /*
1817 * This seqcount check is required to ensure name and
1818 * len are loaded atomically, so as not to walk off the
1819 * edge of memory when walking. If we could load this
1820 * atomically some other way, we could drop this check.
1821 */
1822 if (read_seqcount_retry(&dentry->d_seq, *seq))
1823 goto seqretry;
1824 if (parent->d_flags & DCACHE_OP_COMPARE) {
1825 if (parent->d_op->d_compare(parent, *inode,
1826 dentry, i,
1827 tlen, tname, name))
1828 continue;
1829 } else {
1830 if (dentry_cmp(tname, tlen, str, len))
1831 continue;
1832 }
1833 /*
1834 * No extra seqcount check is required after the name
1835 * compare. The caller must perform a seqcount check in
1836 * order to do anything useful with the returned dentry
1837 * anyway.
1838 */
1839 *inode = i;
1840 return dentry;
1841 }
1842 return NULL;
1843}
1844
1845/**
1332 * d_lookup - search for a dentry 1846 * d_lookup - search for a dentry
1333 * @parent: parent dentry 1847 * @parent: parent dentry
1334 * @name: qstr of name we wish to find 1848 * @name: qstr of name we wish to find
@@ -1339,10 +1853,10 @@ EXPORT_SYMBOL(d_add_ci);
1339 * dentry is returned. The caller must use dput to free the entry when it has 1853 * dentry is returned. The caller must use dput to free the entry when it has
1340 * finished using it. %NULL is returned if the dentry does not exist. 1854 * finished using it. %NULL is returned if the dentry does not exist.
1341 */ 1855 */
1342struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1856struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1343{ 1857{
1344 struct dentry * dentry = NULL; 1858 struct dentry *dentry;
1345 unsigned long seq; 1859 unsigned seq;
1346 1860
1347 do { 1861 do {
1348 seq = read_seqbegin(&rename_lock); 1862 seq = read_seqbegin(&rename_lock);
@@ -1354,7 +1868,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1354} 1868}
1355EXPORT_SYMBOL(d_lookup); 1869EXPORT_SYMBOL(d_lookup);
1356 1870
1357/* 1871/**
1358 * __d_lookup - search for a dentry (racy) 1872 * __d_lookup - search for a dentry (racy)
1359 * @parent: parent dentry 1873 * @parent: parent dentry
1360 * @name: qstr of name we wish to find 1874 * @name: qstr of name we wish to find
@@ -1369,17 +1883,24 @@ EXPORT_SYMBOL(d_lookup);
1369 * 1883 *
1370 * __d_lookup callers must be commented. 1884 * __d_lookup callers must be commented.
1371 */ 1885 */
1372struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1886struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1373{ 1887{
1374 unsigned int len = name->len; 1888 unsigned int len = name->len;
1375 unsigned int hash = name->hash; 1889 unsigned int hash = name->hash;
1376 const unsigned char *str = name->name; 1890 const unsigned char *str = name->name;
1377 struct hlist_head *head = d_hash(parent,hash); 1891 struct hlist_bl_head *b = d_hash(parent, hash);
1892 struct hlist_bl_node *node;
1378 struct dentry *found = NULL; 1893 struct dentry *found = NULL;
1379 struct hlist_node *node;
1380 struct dentry *dentry; 1894 struct dentry *dentry;
1381 1895
1382 /* 1896 /*
1897 * Note: There is significant duplication with __d_lookup_rcu which is
1898 * required to prevent single threaded performance regressions
1899 * especially on architectures where smp_rmb (in seqcounts) are costly.
1900 * Keep the two functions in sync.
1901 */
1902
1903 /*
1383 * The hash list is protected using RCU. 1904 * The hash list is protected using RCU.
1384 * 1905 *
1385 * Take d_lock when comparing a candidate dentry, to avoid races 1906 * Take d_lock when comparing a candidate dentry, to avoid races
@@ -1390,29 +1911,20 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1390 * false-negative result. d_lookup() protects against concurrent 1911 * false-negative result. d_lookup() protects against concurrent
1391 * renames using rename_lock seqlock. 1912 * renames using rename_lock seqlock.
1392 * 1913 *
1393 * See Documentation/vfs/dcache-locking.txt for more details. 1914 * See Documentation/filesystems/path-lookup.txt for more details.
1394 */ 1915 */
1395 rcu_read_lock(); 1916 rcu_read_lock();
1396 1917
1397 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1918 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1398 struct qstr *qstr; 1919 const char *tname;
1920 int tlen;
1399 1921
1400 if (dentry->d_name.hash != hash) 1922 if (dentry->d_name.hash != hash)
1401 continue; 1923 continue;
1402 if (dentry->d_parent != parent)
1403 continue;
1404 1924
1405 spin_lock(&dentry->d_lock); 1925 spin_lock(&dentry->d_lock);
1406
1407 /*
1408 * Recheck the dentry after taking the lock - d_move may have
1409 * changed things. Don't bother checking the hash because
1410 * we're about to compare the whole name anyway.
1411 */
1412 if (dentry->d_parent != parent) 1926 if (dentry->d_parent != parent)
1413 goto next; 1927 goto next;
1414
1415 /* non-existing due to RCU? */
1416 if (d_unhashed(dentry)) 1928 if (d_unhashed(dentry))
1417 goto next; 1929 goto next;
1418 1930
@@ -1420,18 +1932,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1420 * It is safe to compare names since d_move() cannot 1932 * It is safe to compare names since d_move() cannot
1421 * change the qstr (protected by d_lock). 1933 * change the qstr (protected by d_lock).
1422 */ 1934 */
1423 qstr = &dentry->d_name; 1935 tlen = dentry->d_name.len;
1424 if (parent->d_op && parent->d_op->d_compare) { 1936 tname = dentry->d_name.name;
1425 if (parent->d_op->d_compare(parent, qstr, name)) 1937 if (parent->d_flags & DCACHE_OP_COMPARE) {
1938 if (parent->d_op->d_compare(parent, parent->d_inode,
1939 dentry, dentry->d_inode,
1940 tlen, tname, name))
1426 goto next; 1941 goto next;
1427 } else { 1942 } else {
1428 if (qstr->len != len) 1943 if (dentry_cmp(tname, tlen, str, len))
1429 goto next;
1430 if (memcmp(qstr->name, str, len))
1431 goto next; 1944 goto next;
1432 } 1945 }
1433 1946
1434 atomic_inc(&dentry->d_count); 1947 dentry->d_count++;
1435 found = dentry; 1948 found = dentry;
1436 spin_unlock(&dentry->d_lock); 1949 spin_unlock(&dentry->d_lock);
1437 break; 1950 break;
@@ -1460,8 +1973,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1460 * routine may choose to leave the hash value unchanged. 1973 * routine may choose to leave the hash value unchanged.
1461 */ 1974 */
1462 name->hash = full_name_hash(name->name, name->len); 1975 name->hash = full_name_hash(name->name, name->len);
1463 if (dir->d_op && dir->d_op->d_hash) { 1976 if (dir->d_flags & DCACHE_OP_HASH) {
1464 if (dir->d_op->d_hash(dir, name) < 0) 1977 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1465 goto out; 1978 goto out;
1466 } 1979 }
1467 dentry = d_lookup(dir, name); 1980 dentry = d_lookup(dir, name);
@@ -1470,41 +1983,32 @@ out:
1470} 1983}
1471 1984
1472/** 1985/**
1473 * d_validate - verify dentry provided from insecure source 1986 * d_validate - verify dentry provided from insecure source (deprecated)
1474 * @dentry: The dentry alleged to be valid child of @dparent 1987 * @dentry: The dentry alleged to be valid child of @dparent
1475 * @dparent: The parent dentry (known to be valid) 1988 * @dparent: The parent dentry (known to be valid)
1476 * 1989 *
1477 * An insecure source has sent us a dentry, here we verify it and dget() it. 1990 * An insecure source has sent us a dentry, here we verify it and dget() it.
1478 * This is used by ncpfs in its readdir implementation. 1991 * This is used by ncpfs in its readdir implementation.
1479 * Zero is returned in the dentry is invalid. 1992 * Zero is returned in the dentry is invalid.
1993 *
1994 * This function is slow for big directories, and deprecated, do not use it.
1480 */ 1995 */
1481
1482int d_validate(struct dentry *dentry, struct dentry *dparent) 1996int d_validate(struct dentry *dentry, struct dentry *dparent)
1483{ 1997{
1484 struct hlist_head *base; 1998 struct dentry *child;
1485 struct hlist_node *lhp;
1486
1487 /* Check whether the ptr might be valid at all.. */
1488 if (!kmem_ptr_validate(dentry_cache, dentry))
1489 goto out;
1490
1491 if (dentry->d_parent != dparent)
1492 goto out;
1493 1999
1494 spin_lock(&dcache_lock); 2000 spin_lock(&dparent->d_lock);
1495 base = d_hash(dparent, dentry->d_name.hash); 2001 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1496 hlist_for_each(lhp,base) { 2002 if (dentry == child) {
1497 /* hlist_for_each_entry_rcu() not required for d_hash list 2003 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1498 * as it is parsed under dcache_lock 2004 __dget_dlock(dentry);
1499 */ 2005 spin_unlock(&dentry->d_lock);
1500 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 2006 spin_unlock(&dparent->d_lock);
1501 __dget_locked(dentry);
1502 spin_unlock(&dcache_lock);
1503 return 1; 2007 return 1;
1504 } 2008 }
1505 } 2009 }
1506 spin_unlock(&dcache_lock); 2010 spin_unlock(&dparent->d_lock);
1507out: 2011
1508 return 0; 2012 return 0;
1509} 2013}
1510EXPORT_SYMBOL(d_validate); 2014EXPORT_SYMBOL(d_validate);
@@ -1532,16 +2036,23 @@ EXPORT_SYMBOL(d_validate);
1532 2036
1533void d_delete(struct dentry * dentry) 2037void d_delete(struct dentry * dentry)
1534{ 2038{
2039 struct inode *inode;
1535 int isdir = 0; 2040 int isdir = 0;
1536 /* 2041 /*
1537 * Are we the only user? 2042 * Are we the only user?
1538 */ 2043 */
1539 spin_lock(&dcache_lock); 2044again:
1540 spin_lock(&dentry->d_lock); 2045 spin_lock(&dentry->d_lock);
1541 isdir = S_ISDIR(dentry->d_inode->i_mode); 2046 inode = dentry->d_inode;
1542 if (atomic_read(&dentry->d_count) == 1) { 2047 isdir = S_ISDIR(inode->i_mode);
2048 if (dentry->d_count == 1) {
2049 if (inode && !spin_trylock(&inode->i_lock)) {
2050 spin_unlock(&dentry->d_lock);
2051 cpu_relax();
2052 goto again;
2053 }
1543 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2054 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1544 dentry_iput(dentry); 2055 dentry_unlink_inode(dentry);
1545 fsnotify_nameremove(dentry, isdir); 2056 fsnotify_nameremove(dentry, isdir);
1546 return; 2057 return;
1547 } 2058 }
@@ -1550,17 +2061,18 @@ void d_delete(struct dentry * dentry)
1550 __d_drop(dentry); 2061 __d_drop(dentry);
1551 2062
1552 spin_unlock(&dentry->d_lock); 2063 spin_unlock(&dentry->d_lock);
1553 spin_unlock(&dcache_lock);
1554 2064
1555 fsnotify_nameremove(dentry, isdir); 2065 fsnotify_nameremove(dentry, isdir);
1556} 2066}
1557EXPORT_SYMBOL(d_delete); 2067EXPORT_SYMBOL(d_delete);
1558 2068
1559static void __d_rehash(struct dentry * entry, struct hlist_head *list) 2069static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
1560{ 2070{
1561 2071 BUG_ON(!d_unhashed(entry));
1562 entry->d_flags &= ~DCACHE_UNHASHED; 2072 hlist_bl_lock(b);
1563 hlist_add_head_rcu(&entry->d_hash, list); 2073 entry->d_flags |= DCACHE_RCUACCESS;
2074 hlist_bl_add_head_rcu(&entry->d_hash, b);
2075 hlist_bl_unlock(b);
1564} 2076}
1565 2077
1566static void _d_rehash(struct dentry * entry) 2078static void _d_rehash(struct dentry * entry)
@@ -1577,25 +2089,39 @@ static void _d_rehash(struct dentry * entry)
1577 2089
1578void d_rehash(struct dentry * entry) 2090void d_rehash(struct dentry * entry)
1579{ 2091{
1580 spin_lock(&dcache_lock);
1581 spin_lock(&entry->d_lock); 2092 spin_lock(&entry->d_lock);
1582 _d_rehash(entry); 2093 _d_rehash(entry);
1583 spin_unlock(&entry->d_lock); 2094 spin_unlock(&entry->d_lock);
1584 spin_unlock(&dcache_lock);
1585} 2095}
1586EXPORT_SYMBOL(d_rehash); 2096EXPORT_SYMBOL(d_rehash);
1587 2097
1588/* 2098/**
1589 * When switching names, the actual string doesn't strictly have to 2099 * dentry_update_name_case - update case insensitive dentry with a new name
1590 * be preserved in the target - because we're dropping the target 2100 * @dentry: dentry to be updated
1591 * anyway. As such, we can just do a simple memcpy() to copy over 2101 * @name: new name
1592 * the new name before we switch.
1593 * 2102 *
1594 * Note that we have to be a lot more careful about getting the hash 2103 * Update a case insensitive dentry with new case of name.
1595 * switched - we have to switch the hash value properly even if it 2104 *
1596 * then no longer matches the actual (corrupted) string of the target. 2105 * dentry must have been returned by d_lookup with name @name. Old and new
1597 * The hash value has to match the hash queue that the dentry is on.. 2106 * name lengths must match (ie. no d_compare which allows mismatched name
2107 * lengths).
2108 *
2109 * Parent inode i_mutex must be held over d_lookup and into this call (to
2110 * keep renames and concurrent inserts, and readdir(2) away).
1598 */ 2111 */
2112void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2113{
2114 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2115 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2116
2117 spin_lock(&dentry->d_lock);
2118 write_seqcount_begin(&dentry->d_seq);
2119 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2120 write_seqcount_end(&dentry->d_seq);
2121 spin_unlock(&dentry->d_lock);
2122}
2123EXPORT_SYMBOL(dentry_update_name_case);
2124
1599static void switch_names(struct dentry *dentry, struct dentry *target) 2125static void switch_names(struct dentry *dentry, struct dentry *target)
1600{ 2126{
1601 if (dname_external(target)) { 2127 if (dname_external(target)) {
@@ -1637,54 +2163,83 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1637 swap(dentry->d_name.len, target->d_name.len); 2163 swap(dentry->d_name.len, target->d_name.len);
1638} 2164}
1639 2165
2166static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2167{
2168 /*
2169 * XXXX: do we really need to take target->d_lock?
2170 */
2171 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2172 spin_lock(&target->d_parent->d_lock);
2173 else {
2174 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2175 spin_lock(&dentry->d_parent->d_lock);
2176 spin_lock_nested(&target->d_parent->d_lock,
2177 DENTRY_D_LOCK_NESTED);
2178 } else {
2179 spin_lock(&target->d_parent->d_lock);
2180 spin_lock_nested(&dentry->d_parent->d_lock,
2181 DENTRY_D_LOCK_NESTED);
2182 }
2183 }
2184 if (target < dentry) {
2185 spin_lock_nested(&target->d_lock, 2);
2186 spin_lock_nested(&dentry->d_lock, 3);
2187 } else {
2188 spin_lock_nested(&dentry->d_lock, 2);
2189 spin_lock_nested(&target->d_lock, 3);
2190 }
2191}
2192
2193static void dentry_unlock_parents_for_move(struct dentry *dentry,
2194 struct dentry *target)
2195{
2196 if (target->d_parent != dentry->d_parent)
2197 spin_unlock(&dentry->d_parent->d_lock);
2198 if (target->d_parent != target)
2199 spin_unlock(&target->d_parent->d_lock);
2200}
2201
1640/* 2202/*
1641 * We cannibalize "target" when moving dentry on top of it, 2203 * When switching names, the actual string doesn't strictly have to
1642 * because it's going to be thrown away anyway. We could be more 2204 * be preserved in the target - because we're dropping the target
1643 * polite about it, though. 2205 * anyway. As such, we can just do a simple memcpy() to copy over
1644 * 2206 * the new name before we switch.
1645 * This forceful removal will result in ugly /proc output if 2207 *
1646 * somebody holds a file open that got deleted due to a rename. 2208 * Note that we have to be a lot more careful about getting the hash
1647 * We could be nicer about the deleted file, and let it show 2209 * switched - we have to switch the hash value properly even if it
1648 * up under the name it had before it was deleted rather than 2210 * then no longer matches the actual (corrupted) string of the target.
1649 * under the original name of the file that was moved on top of it. 2211 * The hash value has to match the hash queue that the dentry is on..
1650 */ 2212 */
1651
1652/* 2213/*
1653 * d_move_locked - move a dentry 2214 * __d_move - move a dentry
1654 * @dentry: entry to move 2215 * @dentry: entry to move
1655 * @target: new dentry 2216 * @target: new dentry
1656 * 2217 *
1657 * Update the dcache to reflect the move of a file name. Negative 2218 * Update the dcache to reflect the move of a file name. Negative
1658 * dcache entries should not be moved in this way. 2219 * dcache entries should not be moved in this way. Caller hold
2220 * rename_lock.
1659 */ 2221 */
1660static void d_move_locked(struct dentry * dentry, struct dentry * target) 2222static void __d_move(struct dentry * dentry, struct dentry * target)
1661{ 2223{
1662 struct hlist_head *list;
1663
1664 if (!dentry->d_inode) 2224 if (!dentry->d_inode)
1665 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2225 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1666 2226
1667 write_seqlock(&rename_lock); 2227 BUG_ON(d_ancestor(dentry, target));
1668 /* 2228 BUG_ON(d_ancestor(target, dentry));
1669 * XXXX: do we really need to take target->d_lock?
1670 */
1671 if (target < dentry) {
1672 spin_lock(&target->d_lock);
1673 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1674 } else {
1675 spin_lock(&dentry->d_lock);
1676 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1677 }
1678 2229
1679 /* Move the dentry to the target hash queue, if on different bucket */ 2230 dentry_lock_for_move(dentry, target);
1680 if (d_unhashed(dentry))
1681 goto already_unhashed;
1682 2231
1683 hlist_del_rcu(&dentry->d_hash); 2232 write_seqcount_begin(&dentry->d_seq);
2233 write_seqcount_begin(&target->d_seq);
1684 2234
1685already_unhashed: 2235 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
1686 list = d_hash(target->d_parent, target->d_name.hash); 2236
1687 __d_rehash(dentry, list); 2237 /*
2238 * Move the dentry to the target hash queue. Don't bother checking
2239 * for the same hash queue because of how unlikely it is.
2240 */
2241 __d_drop(dentry);
2242 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1688 2243
1689 /* Unhash the target: dput() will then get rid of it */ 2244 /* Unhash the target: dput() will then get rid of it */
1690 __d_drop(target); 2245 __d_drop(target);
@@ -1709,13 +2264,17 @@ already_unhashed:
1709 } 2264 }
1710 2265
1711 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2266 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2267
2268 write_seqcount_end(&target->d_seq);
2269 write_seqcount_end(&dentry->d_seq);
2270
2271 dentry_unlock_parents_for_move(dentry, target);
1712 spin_unlock(&target->d_lock); 2272 spin_unlock(&target->d_lock);
1713 fsnotify_d_move(dentry); 2273 fsnotify_d_move(dentry);
1714 spin_unlock(&dentry->d_lock); 2274 spin_unlock(&dentry->d_lock);
1715 write_sequnlock(&rename_lock);
1716} 2275}
1717 2276
1718/** 2277/*
1719 * d_move - move a dentry 2278 * d_move - move a dentry
1720 * @dentry: entry to move 2279 * @dentry: entry to move
1721 * @target: new dentry 2280 * @target: new dentry
@@ -1723,12 +2282,11 @@ already_unhashed:
1723 * Update the dcache to reflect the move of a file name. Negative 2282 * Update the dcache to reflect the move of a file name. Negative
1724 * dcache entries should not be moved in this way. 2283 * dcache entries should not be moved in this way.
1725 */ 2284 */
1726 2285void d_move(struct dentry *dentry, struct dentry *target)
1727void d_move(struct dentry * dentry, struct dentry * target)
1728{ 2286{
1729 spin_lock(&dcache_lock); 2287 write_seqlock(&rename_lock);
1730 d_move_locked(dentry, target); 2288 __d_move(dentry, target);
1731 spin_unlock(&dcache_lock); 2289 write_sequnlock(&rename_lock);
1732} 2290}
1733EXPORT_SYMBOL(d_move); 2291EXPORT_SYMBOL(d_move);
1734 2292
@@ -1755,13 +2313,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1755 * This helper attempts to cope with remotely renamed directories 2313 * This helper attempts to cope with remotely renamed directories
1756 * 2314 *
1757 * It assumes that the caller is already holding 2315 * It assumes that the caller is already holding
1758 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2316 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
1759 * 2317 *
1760 * Note: If ever the locking in lock_rename() changes, then please 2318 * Note: If ever the locking in lock_rename() changes, then please
1761 * remember to update this too... 2319 * remember to update this too...
1762 */ 2320 */
1763static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2321static struct dentry *__d_unalias(struct inode *inode,
1764 __releases(dcache_lock) 2322 struct dentry *dentry, struct dentry *alias)
1765{ 2323{
1766 struct mutex *m1 = NULL, *m2 = NULL; 2324 struct mutex *m1 = NULL, *m2 = NULL;
1767 struct dentry *ret; 2325 struct dentry *ret;
@@ -1770,11 +2328,6 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1770 if (alias->d_parent == dentry->d_parent) 2328 if (alias->d_parent == dentry->d_parent)
1771 goto out_unalias; 2329 goto out_unalias;
1772 2330
1773 /* Check for loops */
1774 ret = ERR_PTR(-ELOOP);
1775 if (d_ancestor(alias, dentry))
1776 goto out_err;
1777
1778 /* See lock_rename() */ 2331 /* See lock_rename() */
1779 ret = ERR_PTR(-EBUSY); 2332 ret = ERR_PTR(-EBUSY);
1780 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2333 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
@@ -1784,10 +2337,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1784 goto out_err; 2337 goto out_err;
1785 m2 = &alias->d_parent->d_inode->i_mutex; 2338 m2 = &alias->d_parent->d_inode->i_mutex;
1786out_unalias: 2339out_unalias:
1787 d_move_locked(alias, dentry); 2340 __d_move(alias, dentry);
1788 ret = alias; 2341 ret = alias;
1789out_err: 2342out_err:
1790 spin_unlock(&dcache_lock); 2343 spin_unlock(&inode->i_lock);
1791 if (m2) 2344 if (m2)
1792 mutex_unlock(m2); 2345 mutex_unlock(m2);
1793 if (m1) 2346 if (m1)
@@ -1798,17 +2351,23 @@ out_err:
1798/* 2351/*
1799 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2352 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1800 * named dentry in place of the dentry to be replaced. 2353 * named dentry in place of the dentry to be replaced.
2354 * returns with anon->d_lock held!
1801 */ 2355 */
1802static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2356static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1803{ 2357{
1804 struct dentry *dparent, *aparent; 2358 struct dentry *dparent, *aparent;
1805 2359
1806 switch_names(dentry, anon); 2360 dentry_lock_for_move(anon, dentry);
1807 swap(dentry->d_name.hash, anon->d_name.hash); 2361
2362 write_seqcount_begin(&dentry->d_seq);
2363 write_seqcount_begin(&anon->d_seq);
1808 2364
1809 dparent = dentry->d_parent; 2365 dparent = dentry->d_parent;
1810 aparent = anon->d_parent; 2366 aparent = anon->d_parent;
1811 2367
2368 switch_names(dentry, anon);
2369 swap(dentry->d_name.hash, anon->d_name.hash);
2370
1812 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2371 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1813 list_del(&dentry->d_u.d_child); 2372 list_del(&dentry->d_u.d_child);
1814 if (!IS_ROOT(dentry)) 2373 if (!IS_ROOT(dentry))
@@ -1823,6 +2382,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1823 else 2382 else
1824 INIT_LIST_HEAD(&anon->d_u.d_child); 2383 INIT_LIST_HEAD(&anon->d_u.d_child);
1825 2384
2385 write_seqcount_end(&dentry->d_seq);
2386 write_seqcount_end(&anon->d_seq);
2387
2388 dentry_unlock_parents_for_move(anon, dentry);
2389 spin_unlock(&dentry->d_lock);
2390
2391 /* anon->d_lock still locked, returns locked */
1826 anon->d_flags &= ~DCACHE_DISCONNECTED; 2392 anon->d_flags &= ~DCACHE_DISCONNECTED;
1827} 2393}
1828 2394
@@ -1840,14 +2406,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1840 2406
1841 BUG_ON(!d_unhashed(dentry)); 2407 BUG_ON(!d_unhashed(dentry));
1842 2408
1843 spin_lock(&dcache_lock);
1844
1845 if (!inode) { 2409 if (!inode) {
1846 actual = dentry; 2410 actual = dentry;
1847 __d_instantiate(dentry, NULL); 2411 __d_instantiate(dentry, NULL);
1848 goto found_lock; 2412 d_rehash(actual);
2413 goto out_nolock;
1849 } 2414 }
1850 2415
2416 spin_lock(&inode->i_lock);
2417
1851 if (S_ISDIR(inode->i_mode)) { 2418 if (S_ISDIR(inode->i_mode)) {
1852 struct dentry *alias; 2419 struct dentry *alias;
1853 2420
@@ -1855,16 +2422,24 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1855 alias = __d_find_alias(inode, 0); 2422 alias = __d_find_alias(inode, 0);
1856 if (alias) { 2423 if (alias) {
1857 actual = alias; 2424 actual = alias;
1858 /* Is this an anonymous mountpoint that we could splice 2425 write_seqlock(&rename_lock);
1859 * into our tree? */ 2426
1860 if (IS_ROOT(alias)) { 2427 if (d_ancestor(alias, dentry)) {
1861 spin_lock(&alias->d_lock); 2428 /* Check for loops */
2429 actual = ERR_PTR(-ELOOP);
2430 } else if (IS_ROOT(alias)) {
2431 /* Is this an anonymous mountpoint that we
2432 * could splice into our tree? */
1862 __d_materialise_dentry(dentry, alias); 2433 __d_materialise_dentry(dentry, alias);
2434 write_sequnlock(&rename_lock);
1863 __d_drop(alias); 2435 __d_drop(alias);
1864 goto found; 2436 goto found;
2437 } else {
2438 /* Nope, but we must(!) avoid directory
2439 * aliasing */
2440 actual = __d_unalias(inode, dentry, alias);
1865 } 2441 }
1866 /* Nope, but we must(!) avoid directory aliasing */ 2442 write_sequnlock(&rename_lock);
1867 actual = __d_unalias(dentry, alias);
1868 if (IS_ERR(actual)) 2443 if (IS_ERR(actual))
1869 dput(alias); 2444 dput(alias);
1870 goto out_nolock; 2445 goto out_nolock;
@@ -1875,15 +2450,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1875 actual = __d_instantiate_unique(dentry, inode); 2450 actual = __d_instantiate_unique(dentry, inode);
1876 if (!actual) 2451 if (!actual)
1877 actual = dentry; 2452 actual = dentry;
1878 else if (unlikely(!d_unhashed(actual))) 2453 else
1879 goto shouldnt_be_hashed; 2454 BUG_ON(!d_unhashed(actual));
1880 2455
1881found_lock:
1882 spin_lock(&actual->d_lock); 2456 spin_lock(&actual->d_lock);
1883found: 2457found:
1884 _d_rehash(actual); 2458 _d_rehash(actual);
1885 spin_unlock(&actual->d_lock); 2459 spin_unlock(&actual->d_lock);
1886 spin_unlock(&dcache_lock); 2460 spin_unlock(&inode->i_lock);
1887out_nolock: 2461out_nolock:
1888 if (actual == dentry) { 2462 if (actual == dentry) {
1889 security_d_instantiate(dentry, inode); 2463 security_d_instantiate(dentry, inode);
@@ -1892,10 +2466,6 @@ out_nolock:
1892 2466
1893 iput(inode); 2467 iput(inode);
1894 return actual; 2468 return actual;
1895
1896shouldnt_be_hashed:
1897 spin_unlock(&dcache_lock);
1898 BUG();
1899} 2469}
1900EXPORT_SYMBOL_GPL(d_materialise_unique); 2470EXPORT_SYMBOL_GPL(d_materialise_unique);
1901 2471
@@ -1915,14 +2485,13 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1915} 2485}
1916 2486
1917/** 2487/**
1918 * Prepend path string to a buffer 2488 * prepend_path - Prepend path string to a buffer
1919 *
1920 * @path: the dentry/vfsmount to report 2489 * @path: the dentry/vfsmount to report
1921 * @root: root vfsmnt/dentry (may be modified by this function) 2490 * @root: root vfsmnt/dentry (may be modified by this function)
1922 * @buffer: pointer to the end of the buffer 2491 * @buffer: pointer to the end of the buffer
1923 * @buflen: pointer to buffer length 2492 * @buflen: pointer to buffer length
1924 * 2493 *
1925 * Caller holds the dcache_lock. 2494 * Caller holds the rename_lock.
1926 * 2495 *
1927 * If path is not reachable from the supplied root, then the value of 2496 * If path is not reachable from the supplied root, then the value of
1928 * root is changed (without modifying refcounts). 2497 * root is changed (without modifying refcounts).
@@ -1950,7 +2519,9 @@ static int prepend_path(const struct path *path, struct path *root,
1950 } 2519 }
1951 parent = dentry->d_parent; 2520 parent = dentry->d_parent;
1952 prefetch(parent); 2521 prefetch(parent);
2522 spin_lock(&dentry->d_lock);
1953 error = prepend_name(buffer, buflen, &dentry->d_name); 2523 error = prepend_name(buffer, buflen, &dentry->d_name);
2524 spin_unlock(&dentry->d_lock);
1954 if (!error) 2525 if (!error)
1955 error = prepend(buffer, buflen, "/", 1); 2526 error = prepend(buffer, buflen, "/", 1);
1956 if (error) 2527 if (error)
@@ -1994,7 +2565,7 @@ global_root:
1994 * Returns a pointer into the buffer or an error code if the 2565 * Returns a pointer into the buffer or an error code if the
1995 * path was too long. 2566 * path was too long.
1996 * 2567 *
1997 * "buflen" should be positive. Caller holds the dcache_lock. 2568 * "buflen" should be positive.
1998 * 2569 *
1999 * If path is not reachable from the supplied root, then the value of 2570 * If path is not reachable from the supplied root, then the value of
2000 * root is changed (without modifying refcounts). 2571 * root is changed (without modifying refcounts).
@@ -2006,10 +2577,12 @@ char *__d_path(const struct path *path, struct path *root,
2006 int error; 2577 int error;
2007 2578
2008 prepend(&res, &buflen, "\0", 1); 2579 prepend(&res, &buflen, "\0", 1);
2580 write_seqlock(&rename_lock);
2009 error = prepend_path(path, root, &res, &buflen); 2581 error = prepend_path(path, root, &res, &buflen);
2582 write_sequnlock(&rename_lock);
2583
2010 if (error) 2584 if (error)
2011 return ERR_PTR(error); 2585 return ERR_PTR(error);
2012
2013 return res; 2586 return res;
2014} 2587}
2015 2588
@@ -2068,12 +2641,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2068 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2641 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2069 2642
2070 get_fs_root(current->fs, &root); 2643 get_fs_root(current->fs, &root);
2071 spin_lock(&dcache_lock); 2644 write_seqlock(&rename_lock);
2072 tmp = root; 2645 tmp = root;
2073 error = path_with_deleted(path, &tmp, &res, &buflen); 2646 error = path_with_deleted(path, &tmp, &res, &buflen);
2074 if (error) 2647 if (error)
2075 res = ERR_PTR(error); 2648 res = ERR_PTR(error);
2076 spin_unlock(&dcache_lock); 2649 write_sequnlock(&rename_lock);
2077 path_put(&root); 2650 path_put(&root);
2078 return res; 2651 return res;
2079} 2652}
@@ -2099,12 +2672,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2099 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2672 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2100 2673
2101 get_fs_root(current->fs, &root); 2674 get_fs_root(current->fs, &root);
2102 spin_lock(&dcache_lock); 2675 write_seqlock(&rename_lock);
2103 tmp = root; 2676 tmp = root;
2104 error = path_with_deleted(path, &tmp, &res, &buflen); 2677 error = path_with_deleted(path, &tmp, &res, &buflen);
2105 if (!error && !path_equal(&tmp, &root)) 2678 if (!error && !path_equal(&tmp, &root))
2106 error = prepend_unreachable(&res, &buflen); 2679 error = prepend_unreachable(&res, &buflen);
2107 spin_unlock(&dcache_lock); 2680 write_sequnlock(&rename_lock);
2108 path_put(&root); 2681 path_put(&root);
2109 if (error) 2682 if (error)
2110 res = ERR_PTR(error); 2683 res = ERR_PTR(error);
@@ -2136,7 +2709,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2136/* 2709/*
2137 * Write full pathname from the root of the filesystem into the buffer. 2710 * Write full pathname from the root of the filesystem into the buffer.
2138 */ 2711 */
2139char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2712static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2140{ 2713{
2141 char *end = buf + buflen; 2714 char *end = buf + buflen;
2142 char *retval; 2715 char *retval;
@@ -2150,10 +2723,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2150 2723
2151 while (!IS_ROOT(dentry)) { 2724 while (!IS_ROOT(dentry)) {
2152 struct dentry *parent = dentry->d_parent; 2725 struct dentry *parent = dentry->d_parent;
2726 int error;
2153 2727
2154 prefetch(parent); 2728 prefetch(parent);
2155 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2729 spin_lock(&dentry->d_lock);
2156 (prepend(&end, &buflen, "/", 1) != 0)) 2730 error = prepend_name(&end, &buflen, &dentry->d_name);
2731 spin_unlock(&dentry->d_lock);
2732 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2157 goto Elong; 2733 goto Elong;
2158 2734
2159 retval = end; 2735 retval = end;
@@ -2163,14 +2739,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2163Elong: 2739Elong:
2164 return ERR_PTR(-ENAMETOOLONG); 2740 return ERR_PTR(-ENAMETOOLONG);
2165} 2741}
2166EXPORT_SYMBOL(__dentry_path); 2742
2743char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2744{
2745 char *retval;
2746
2747 write_seqlock(&rename_lock);
2748 retval = __dentry_path(dentry, buf, buflen);
2749 write_sequnlock(&rename_lock);
2750
2751 return retval;
2752}
2753EXPORT_SYMBOL(dentry_path_raw);
2167 2754
2168char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2755char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2169{ 2756{
2170 char *p = NULL; 2757 char *p = NULL;
2171 char *retval; 2758 char *retval;
2172 2759
2173 spin_lock(&dcache_lock); 2760 write_seqlock(&rename_lock);
2174 if (d_unlinked(dentry)) { 2761 if (d_unlinked(dentry)) {
2175 p = buf + buflen; 2762 p = buf + buflen;
2176 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2763 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2178,12 +2765,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2178 buflen++; 2765 buflen++;
2179 } 2766 }
2180 retval = __dentry_path(dentry, buf, buflen); 2767 retval = __dentry_path(dentry, buf, buflen);
2181 spin_unlock(&dcache_lock); 2768 write_sequnlock(&rename_lock);
2182 if (!IS_ERR(retval) && p) 2769 if (!IS_ERR(retval) && p)
2183 *p = '/'; /* restore '/' overriden with '\0' */ 2770 *p = '/'; /* restore '/' overriden with '\0' */
2184 return retval; 2771 return retval;
2185Elong: 2772Elong:
2186 spin_unlock(&dcache_lock);
2187 return ERR_PTR(-ENAMETOOLONG); 2773 return ERR_PTR(-ENAMETOOLONG);
2188} 2774}
2189 2775
@@ -2217,7 +2803,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2217 get_fs_root_and_pwd(current->fs, &root, &pwd); 2803 get_fs_root_and_pwd(current->fs, &root, &pwd);
2218 2804
2219 error = -ENOENT; 2805 error = -ENOENT;
2220 spin_lock(&dcache_lock); 2806 write_seqlock(&rename_lock);
2221 if (!d_unlinked(pwd.dentry)) { 2807 if (!d_unlinked(pwd.dentry)) {
2222 unsigned long len; 2808 unsigned long len;
2223 struct path tmp = root; 2809 struct path tmp = root;
@@ -2226,7 +2812,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2226 2812
2227 prepend(&cwd, &buflen, "\0", 1); 2813 prepend(&cwd, &buflen, "\0", 1);
2228 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2814 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2229 spin_unlock(&dcache_lock); 2815 write_sequnlock(&rename_lock);
2230 2816
2231 if (error) 2817 if (error)
2232 goto out; 2818 goto out;
@@ -2245,8 +2831,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2245 if (copy_to_user(buf, cwd, len)) 2831 if (copy_to_user(buf, cwd, len))
2246 error = -EFAULT; 2832 error = -EFAULT;
2247 } 2833 }
2248 } else 2834 } else {
2249 spin_unlock(&dcache_lock); 2835 write_sequnlock(&rename_lock);
2836 }
2250 2837
2251out: 2838out:
2252 path_put(&pwd); 2839 path_put(&pwd);
@@ -2274,25 +2861,25 @@ out:
2274int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2861int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2275{ 2862{
2276 int result; 2863 int result;
2277 unsigned long seq; 2864 unsigned seq;
2278 2865
2279 if (new_dentry == old_dentry) 2866 if (new_dentry == old_dentry)
2280 return 1; 2867 return 1;
2281 2868
2282 /*
2283 * Need rcu_readlock to protect against the d_parent trashing
2284 * due to d_move
2285 */
2286 rcu_read_lock();
2287 do { 2869 do {
2288 /* for restarting inner loop in case of seq retry */ 2870 /* for restarting inner loop in case of seq retry */
2289 seq = read_seqbegin(&rename_lock); 2871 seq = read_seqbegin(&rename_lock);
2872 /*
2873 * Need rcu_readlock to protect against the d_parent trashing
2874 * due to d_move
2875 */
2876 rcu_read_lock();
2290 if (d_ancestor(old_dentry, new_dentry)) 2877 if (d_ancestor(old_dentry, new_dentry))
2291 result = 1; 2878 result = 1;
2292 else 2879 else
2293 result = 0; 2880 result = 0;
2881 rcu_read_unlock();
2294 } while (read_seqretry(&rename_lock, seq)); 2882 } while (read_seqretry(&rename_lock, seq));
2295 rcu_read_unlock();
2296 2883
2297 return result; 2884 return result;
2298} 2885}
@@ -2324,10 +2911,15 @@ EXPORT_SYMBOL(path_is_under);
2324 2911
2325void d_genocide(struct dentry *root) 2912void d_genocide(struct dentry *root)
2326{ 2913{
2327 struct dentry *this_parent = root; 2914 struct dentry *this_parent;
2328 struct list_head *next; 2915 struct list_head *next;
2916 unsigned seq;
2917 int locked = 0;
2329 2918
2330 spin_lock(&dcache_lock); 2919 seq = read_seqbegin(&rename_lock);
2920again:
2921 this_parent = root;
2922 spin_lock(&this_parent->d_lock);
2331repeat: 2923repeat:
2332 next = this_parent->d_subdirs.next; 2924 next = this_parent->d_subdirs.next;
2333resume: 2925resume:
@@ -2335,21 +2927,48 @@ resume:
2335 struct list_head *tmp = next; 2927 struct list_head *tmp = next;
2336 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2928 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2337 next = tmp->next; 2929 next = tmp->next;
2338 if (d_unhashed(dentry)||!dentry->d_inode) 2930
2931 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2932 if (d_unhashed(dentry) || !dentry->d_inode) {
2933 spin_unlock(&dentry->d_lock);
2339 continue; 2934 continue;
2935 }
2340 if (!list_empty(&dentry->d_subdirs)) { 2936 if (!list_empty(&dentry->d_subdirs)) {
2937 spin_unlock(&this_parent->d_lock);
2938 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2341 this_parent = dentry; 2939 this_parent = dentry;
2940 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2342 goto repeat; 2941 goto repeat;
2343 } 2942 }
2344 atomic_dec(&dentry->d_count); 2943 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2944 dentry->d_flags |= DCACHE_GENOCIDE;
2945 dentry->d_count--;
2946 }
2947 spin_unlock(&dentry->d_lock);
2345 } 2948 }
2346 if (this_parent != root) { 2949 if (this_parent != root) {
2347 next = this_parent->d_u.d_child.next; 2950 struct dentry *child = this_parent;
2348 atomic_dec(&this_parent->d_count); 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2349 this_parent = this_parent->d_parent; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2953 this_parent->d_count--;
2954 }
2955 this_parent = try_to_ascend(this_parent, locked, seq);
2956 if (!this_parent)
2957 goto rename_retry;
2958 next = child->d_u.d_child.next;
2350 goto resume; 2959 goto resume;
2351 } 2960 }
2352 spin_unlock(&dcache_lock); 2961 spin_unlock(&this_parent->d_lock);
2962 if (!locked && read_seqretry(&rename_lock, seq))
2963 goto rename_retry;
2964 if (locked)
2965 write_sequnlock(&rename_lock);
2966 return;
2967
2968rename_retry:
2969 locked = 1;
2970 write_seqlock(&rename_lock);
2971 goto again;
2353} 2972}
2354 2973
2355/** 2974/**
@@ -2403,7 +3022,7 @@ static void __init dcache_init_early(void)
2403 3022
2404 dentry_hashtable = 3023 dentry_hashtable =
2405 alloc_large_system_hash("Dentry cache", 3024 alloc_large_system_hash("Dentry cache",
2406 sizeof(struct hlist_head), 3025 sizeof(struct hlist_bl_head),
2407 dhash_entries, 3026 dhash_entries,
2408 13, 3027 13,
2409 HASH_EARLY, 3028 HASH_EARLY,
@@ -2412,7 +3031,7 @@ static void __init dcache_init_early(void)
2412 0); 3031 0);
2413 3032
2414 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3033 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2415 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3034 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
2416} 3035}
2417 3036
2418static void __init dcache_init(void) 3037static void __init dcache_init(void)
@@ -2435,7 +3054,7 @@ static void __init dcache_init(void)
2435 3054
2436 dentry_hashtable = 3055 dentry_hashtable =
2437 alloc_large_system_hash("Dentry cache", 3056 alloc_large_system_hash("Dentry cache",
2438 sizeof(struct hlist_head), 3057 sizeof(struct hlist_bl_head),
2439 dhash_entries, 3058 dhash_entries,
2440 13, 3059 13,
2441 0, 3060 0,
@@ -2444,7 +3063,7 @@ static void __init dcache_init(void)
2444 0); 3063 0);
2445 3064
2446 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3065 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2447 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3066 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
2448} 3067}
2449 3068
2450/* SLAB cache for __getname() consumers */ 3069/* SLAB cache for __getname() consumers */