From be148247cfbe2422f5709e77d9c3e10b8a6394da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:21 -0400 Subject: fs: take dcache_lock inside __d_path All callers take dcache_lock just around the call to __d_path, so take the lock into it in preparation of getting rid of dcache_lock. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 83293be48149..54f93f5e6b0f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1994,7 +1994,7 @@ global_root: * Returns a pointer into the buffer or an error code if the * path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -2006,10 +2006,12 @@ char *__d_path(const struct path *path, struct path *root, int error; prepend(&res, &buflen, "\0", 1); + spin_lock(&dcache_lock); error = prepend_path(path, root, &res, &buflen); + spin_unlock(&dcache_lock); + if (error) return ERR_PTR(error); - return res; } -- cgit v1.2.2 From 9c82ab9c9e16cb9edf17bd0d31f3d6904afce04f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:22 -0400 Subject: fs: simplify __d_free Remove d_callback and always call __d_free with a RCU head. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 54f93f5e6b0f..028753951e95 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,20 +67,16 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static void __d_free(struct dentry *dentry) +static void __d_free(struct rcu_head *head) { + struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + WARN_ON(!list_empty(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } -static void d_callback(struct rcu_head *head) -{ - struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); - __d_free(dentry); -} - /* * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry * inside dcache_lock. @@ -91,9 +87,9 @@ static void d_free(struct dentry *dentry) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ if (hlist_unhashed(&dentry->d_hash)) - __d_free(dentry); + __d_free(&dentry->d_u.d_rcu); else - call_rcu(&dentry->d_u.d_rcu, d_callback); + call_rcu(&dentry->d_u.d_rcu, __d_free); } /* -- cgit v1.2.2 From 312d3ca856d369bb04d0443846b85b4cdde6fa8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:23 -0400 Subject: fs: use percpu counter for nr_dentry and nr_dentry_unused The nr_dentry stat is a globally touched cacheline and atomic operation twice over the lifetime of a dentry. It is used for the benfit of userspace only. Turn it into a per-cpu counter and always decrement it in d_free instead of doing various batching operations to reduce lock hold times in the callers. Based on an earlier patch from Nick Piggin . Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 028753951e95..c37a656802b0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,6 +67,19 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; +static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; +static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; + +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); + dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); @@ -78,13 +91,14 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry - * inside dcache_lock. + * no dcache_lock, please. */ static void d_free(struct dentry *dentry) { + percpu_counter_dec(&nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); + /* if dentry was never inserted into hash, immediate free is OK */ if (hlist_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); @@ -125,14 +139,14 @@ static void dentry_lru_add(struct dentry *dentry) { list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + percpu_counter_inc(&nr_dentry_unused); } static void dentry_lru_add_tail(struct dentry *dentry) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + percpu_counter_inc(&nr_dentry_unused); } static void dentry_lru_del(struct dentry *dentry) @@ -140,7 +154,7 @@ static void dentry_lru_del(struct dentry *dentry) if (!list_empty(&dentry->d_lru)) { list_del(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + percpu_counter_dec(&nr_dentry_unused); } } @@ -149,7 +163,7 @@ static void dentry_lru_del_init(struct dentry *dentry) if (likely(!list_empty(&dentry->d_lru))) { list_del_init(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + percpu_counter_dec(&nr_dentry_unused); } } @@ -168,7 +182,6 @@ static struct dentry *d_kill(struct dentry *dentry) struct dentry *parent; list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); if (IS_ROOT(dentry)) @@ -314,7 +327,6 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This should be called _only_ with dcache_lock held */ - static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); @@ -534,7 +546,7 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = dentry_stat.nr_unused; + int unused = percpu_counter_sum_positive(&nr_dentry_unused); int prune_ratio; int pruned; @@ -699,20 +711,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) * otherwise we ascend to the parent and move to the * next sibling if there is one */ if (!parent) - goto out; - + return; dentry = parent; - } while (list_empty(&dentry->d_subdirs)); dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); } -out: - /* several dentries were freed, need to correct nr_dentry */ - spin_lock(&dcache_lock); - dentry_stat.nr_dentry -= detached; - spin_unlock(&dcache_lock); } /* @@ -896,12 +901,16 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { + int nr_unused; + if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + + nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + return (nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker dcache_shrinker = { @@ -968,9 +977,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) spin_lock(&dcache_lock); if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); + percpu_counter_inc(&nr_dentry); + return dentry; } EXPORT_SYMBOL(d_alloc); @@ -2417,6 +2427,9 @@ static void __init dcache_init(void) { int loop; + percpu_counter_init(&nr_dentry, 0); + percpu_counter_init(&nr_dentry_unused, 0); + /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature -- cgit v1.2.2 From 265ac90230257e9c035e4b0c63a0c11c5336e93c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 10 Oct 2010 05:36:24 -0400 Subject: fs: improve DCACHE_REFERENCED usage dentry referenced bit is only set when installing the dentry back onto the LRU. However with lazy LRU, the dentry can already be on the LRU list at dput time, thus missing out on setting the referenced bit. Fix this. Signed-off-by: Nick Piggin Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c37a656802b0..1a976d4efbe1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -246,13 +246,16 @@ repeat: if (dentry->d_op->d_delete(dentry)) goto unhash_it; } + /* Unreachable? Get rid of it */ if (d_unhashed(dentry)) goto kill_it; - if (list_empty(&dentry->d_lru)) { - dentry->d_flags |= DCACHE_REFERENCED; + + /* Otherwise leave it cached and ensure it's on the LRU */ + dentry->d_flags |= DCACHE_REFERENCED; + if (list_empty(&dentry->d_lru)) dentry_lru_add(dentry); - } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; -- cgit v1.2.2 From 3049cfe24ef3872ba74f90630356722cf988b80d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:25 -0400 Subject: fs: split __shrink_dcache_sb Currently __shrink_dcache_sb has an extremly awkward calling convention because it tries to please very different callers. Split out the main loop into a shrink_dentry_list helper, which gets called directly from shrink_dcache_sb for the cases where all dentries need to be pruned, or from __shrink_dcache_sb for pruning only a certain number of dentries. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 127 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 67 insertions(+), 60 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 1a976d4efbe1..e987ad576a39 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -459,66 +459,20 @@ static void prune_one_dentry(struct dentry * dentry) } } -/* - * Shrink the dentry LRU on a given superblock. - * @sb : superblock to shrink dentry LRU. - * @count: If count is NULL, we prune all dentries on superblock. - * @flags: If flags is non-zero, we need to do special processing based on - * which flags are set. This means we don't need to maintain multiple - * similar copies of this loop. - */ -static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) +static void shrink_dentry_list(struct list_head *list) { - LIST_HEAD(referenced); - LIST_HEAD(tmp); struct dentry *dentry; - int cnt = 0; - BUG_ON(!sb); - BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); - spin_lock(&dcache_lock); - if (count != NULL) - /* called from prune_dcache() and shrink_dcache_parent() */ - cnt = *count; -restart: - if (count == NULL) - list_splice_init(&sb->s_dentry_lru, &tmp); - else { - while (!list_empty(&sb->s_dentry_lru)) { - dentry = list_entry(sb->s_dentry_lru.prev, - struct dentry, d_lru); - BUG_ON(dentry->d_sb != sb); - - spin_lock(&dentry->d_lock); - /* - * If we are honouring the DCACHE_REFERENCED flag and - * the dentry has this flag set, don't free it. Clear - * the flag and put it back on the LRU. - */ - if ((flags & DCACHE_REFERENCED) - && (dentry->d_flags & DCACHE_REFERENCED)) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - } else { - list_move_tail(&dentry->d_lru, &tmp); - spin_unlock(&dentry->d_lock); - cnt--; - if (!cnt) - break; - } - cond_resched_lock(&dcache_lock); - } - } - while (!list_empty(&tmp)) { - dentry = list_entry(tmp.prev, struct dentry, d_lru); + while (!list_empty(list)) { + dentry = list_entry(list->prev, struct dentry, d_lru); dentry_lru_del_init(dentry); - spin_lock(&dentry->d_lock); + /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ + spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count)) { spin_unlock(&dentry->d_lock); continue; @@ -527,13 +481,60 @@ restart: /* dentry->d_lock was dropped in prune_one_dentry() */ cond_resched_lock(&dcache_lock); } - if (count == NULL && !list_empty(&sb->s_dentry_lru)) - goto restart; - if (count != NULL) - *count = cnt; +} + +/** + * __shrink_dcache_sb - shrink the dentry LRU on a given superblock + * @sb: superblock to shrink dentry LRU. + * @count: number of entries to prune + * @flags: flags to control the dentry processing + * + * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. + */ +static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) +{ + /* called from prune_dcache() and shrink_dcache_parent() */ + struct dentry *dentry; + LIST_HEAD(referenced); + LIST_HEAD(tmp); + int cnt = *count; + + spin_lock(&dcache_lock); + while (!list_empty(&sb->s_dentry_lru)) { + dentry = list_entry(sb->s_dentry_lru.prev, + struct dentry, d_lru); + BUG_ON(dentry->d_sb != sb); + + /* + * If we are honouring the DCACHE_REFERENCED flag and the + * dentry has this flag set, don't free it. Clear the flag + * and put it back on the LRU. + */ + if (flags & DCACHE_REFERENCED) { + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move(&dentry->d_lru, &referenced); + spin_unlock(&dentry->d_lock); + cond_resched_lock(&dcache_lock); + continue; + } + spin_unlock(&dentry->d_lock); + } + + list_move_tail(&dentry->d_lru, &tmp); + if (!--cnt) + break; + cond_resched_lock(&dcache_lock); + } + + *count = cnt; + shrink_dentry_list(&tmp); + if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lock); + } /** @@ -619,13 +620,19 @@ static void prune_dcache(int count) * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock * - * Shrink the dcache for the specified super block. This - * is used to free the dcache before unmounting a file - * system + * Shrink the dcache for the specified super block. This is used to free + * the dcache before unmounting a file system. */ -void shrink_dcache_sb(struct super_block * sb) +void shrink_dcache_sb(struct super_block *sb) { - __shrink_dcache_sb(sb, NULL, 0); + LIST_HEAD(tmp); + + spin_lock(&dcache_lock); + while (!list_empty(&sb->s_dentry_lru)) { + list_splice_init(&sb->s_dentry_lru, &tmp); + shrink_dentry_list(&tmp); + } + spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); -- cgit v1.2.2 From a4633357ac610cd2f8740e28a31fc148a7960421 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:26 -0400 Subject: fs: clean up dentry lru modification Always do a list_del_init on the LRU to make sure the list_empty invariant for not beeing on the LRU always holds true, and fold dentry_lru_del_init into dentry_lru_del. Replace the dentry_lru_add_tail primitive with a dentry_lru_move_tail operations that simpler when the dentry already is one the list, which is always is. Move the list_empty into dentry_lru_add to fit the scheme of the other lru helpers, and simplify locking once we move to a separate LRU lock. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index e987ad576a39..cc2b93802179 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -133,37 +133,34 @@ static void dentry_iput(struct dentry * dentry) } /* - * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. + * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. */ static void dentry_lru_add(struct dentry *dentry) { - list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); -} - -static void dentry_lru_add_tail(struct dentry *dentry) -{ - list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + if (list_empty(&dentry->d_lru)) { + list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + dentry->d_sb->s_nr_dentry_unused++; + percpu_counter_inc(&nr_dentry_unused); + } } static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - list_del(&dentry->d_lru); + list_del_init(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; percpu_counter_dec(&nr_dentry_unused); } } -static void dentry_lru_del_init(struct dentry *dentry) +static void dentry_lru_move_tail(struct dentry *dentry) { - if (likely(!list_empty(&dentry->d_lru))) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - percpu_counter_dec(&nr_dentry_unused); + if (list_empty(&dentry->d_lru)) { + list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + dentry->d_sb->s_nr_dentry_unused++; + percpu_counter_inc(&nr_dentry_unused); + } else { + list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } } @@ -253,8 +250,7 @@ repeat: /* Otherwise leave it cached and ensure it's on the LRU */ dentry->d_flags |= DCACHE_REFERENCED; - if (list_empty(&dentry->d_lru)) - dentry_lru_add(dentry); + dentry_lru_add(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -333,7 +329,7 @@ EXPORT_SYMBOL(d_invalidate); static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - dentry_lru_del_init(dentry); + dentry_lru_del(dentry); return dentry; } @@ -452,7 +448,7 @@ static void prune_one_dentry(struct dentry * dentry) if (dentry->d_op && dentry->d_op->d_delete) dentry->d_op->d_delete(dentry); - dentry_lru_del_init(dentry); + dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry); spin_lock(&dcache_lock); @@ -465,7 +461,7 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { dentry = list_entry(list->prev, struct dentry, d_lru); - dentry_lru_del_init(dentry); + dentry_lru_del(dentry); /* * We found an inuse dentry which was not removed from @@ -650,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* detach this root from the system */ spin_lock(&dcache_lock); - dentry_lru_del_init(dentry); + dentry_lru_del(dentry); __d_drop(dentry); spin_unlock(&dcache_lock); @@ -664,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - dentry_lru_del_init(loop); + dentry_lru_del(loop); __d_drop(loop); cond_resched_lock(&dcache_lock); } @@ -841,14 +837,15 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - dentry_lru_del_init(dentry); /* * move only zero ref count dentries to the end * of the unused list for prune_dcache */ if (!atomic_read(&dentry->d_count)) { - dentry_lru_add_tail(dentry); + dentry_lru_move_tail(dentry); found++; + } else { + dentry_lru_del(dentry); } /* -- cgit v1.2.2 From 3825bdb7ed920845961f32f364454bee5f469abb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:27 -0400 Subject: fs: use RCU read side protection in d_validate d_validate does a purely read lookup in the dentry hash, so use RCU read side locking instead of dcache_lock. Split out from a larget patch by Nick Piggin . Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index cc2b93802179..23702a9d4e6d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1491,33 +1491,26 @@ out: * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. */ - -int d_validate(struct dentry *dentry, struct dentry *dparent) +int d_validate(struct dentry *dentry, struct dentry *parent) { - struct hlist_head *base; - struct hlist_node *lhp; + struct hlist_head *head = d_hash(parent, dentry->d_name.hash); + struct hlist_node *node; + struct dentry *d; /* Check whether the ptr might be valid at all.. */ if (!kmem_ptr_validate(dentry_cache, dentry)) - goto out; - - if (dentry->d_parent != dparent) - goto out; + return 0; + if (dentry->d_parent != parent) + return 0; - spin_lock(&dcache_lock); - base = d_hash(dparent, dentry->d_name.hash); - hlist_for_each(lhp,base) { - /* hlist_for_each_entry_rcu() not required for d_hash list - * as it is parsed under dcache_lock - */ - if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { - __dget_locked(dentry); - spin_unlock(&dcache_lock); + rcu_read_lock(); + hlist_for_each_entry_rcu(d, node, head, d_hash) { + if (d == dentry) { + dget(dentry); return 1; } } - spin_unlock(&dcache_lock); -out: + rcu_read_unlock(); return 0; } EXPORT_SYMBOL(d_validate); -- cgit v1.2.2 From d3a23e1678a5827c38ed8a465ad91d65e59fa911 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 5 Jan 2011 20:01:21 +1100 Subject: Revert "fs: use RCU read side protection in d_validate" This reverts commit 3825bdb7ed920845961f32f364454bee5f469abb. You cannot dget() a dentry without having a reference, or holding a lock that guarantees it remains valid. Signed-off-by: Nick Piggin --- fs/dcache.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 23702a9d4e6d..cc2b93802179 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1491,26 +1491,33 @@ out: * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. */ -int d_validate(struct dentry *dentry, struct dentry *parent) + +int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *head = d_hash(parent, dentry->d_name.hash); - struct hlist_node *node; - struct dentry *d; + struct hlist_head *base; + struct hlist_node *lhp; /* Check whether the ptr might be valid at all.. */ if (!kmem_ptr_validate(dentry_cache, dentry)) - return 0; - if (dentry->d_parent != parent) - return 0; + goto out; - rcu_read_lock(); - hlist_for_each_entry_rcu(d, node, head, d_hash) { - if (d == dentry) { - dget(dentry); + if (dentry->d_parent != dparent) + goto out; + + spin_lock(&dcache_lock); + base = d_hash(dparent, dentry->d_name.hash); + hlist_for_each(lhp,base) { + /* hlist_for_each_entry_rcu() not required for d_hash list + * as it is parsed under dcache_lock + */ + if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { + __dget_locked(dentry); + spin_unlock(&dcache_lock); return 1; } } - rcu_read_unlock(); + spin_unlock(&dcache_lock); +out: return 0; } EXPORT_SYMBOL(d_validate); -- cgit v1.2.2 From 786a5e15b613a9cee4fc9139fc3113a5ab0fde79 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:16 +1100 Subject: fs: d_validate fixes d_validate has been broken for a long time. kmem_ptr_validate does not guarantee that a pointer can be dereferenced if it can go away at any time. Even rcu_read_lock doesn't help, because the pointer might be queued in RCU callbacks but not executed yet. So the parent cannot be checked, nor the name hashed. The dentry pointer can not be touched until it can be verified under lock. Hashing simply cannot be used. Instead, verify the parent/child relationship by traversing parent's d_child list. It's slow, but only ncpfs and the destaged smbfs care about it, at this point. Signed-off-by: Nick Piggin --- fs/dcache.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index cc2b93802179..9d1a59dfda0b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1483,41 +1483,30 @@ out: } /** - * d_validate - verify dentry provided from insecure source + * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. + * + * This function is slow for big directories, and deprecated, do not use it. */ - int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *base; - struct hlist_node *lhp; - - /* Check whether the ptr might be valid at all.. */ - if (!kmem_ptr_validate(dentry_cache, dentry)) - goto out; - - if (dentry->d_parent != dparent) - goto out; + struct dentry *child; spin_lock(&dcache_lock); - base = d_hash(dparent, dentry->d_name.hash); - hlist_for_each(lhp,base) { - /* hlist_for_each_entry_rcu() not required for d_hash list - * as it is parsed under dcache_lock - */ - if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { + list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + if (dentry == child) { __dget_locked(dentry); spin_unlock(&dcache_lock); return 1; } } spin_unlock(&dcache_lock); -out: + return 0; } EXPORT_SYMBOL(d_validate); -- cgit v1.2.2 From 86c8749ede0c59e590de9267066932a26f1ce796 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:18 +1100 Subject: vfs: revert per-cpu nr_unused counters for dentry and inodes The nr_unused counters count the number of objects on an LRU, and as such they are synchronized with LRU object insertion and removal and scanning, and protected under the LRU lock. Making it per-cpu does not actually get any concurrency improvements because of this lock, and summing the counter is much slower, and incrementing/decrementing it costs more code size and is slower too. These counters should stay per-LRU, which currently means global. Signed-off-by: Nick Piggin --- fs/dcache.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9d1a59dfda0b..f62ba90bce91 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -68,14 +68,12 @@ struct dentry_stat_t dentry_stat = { }; static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; -static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); - dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -140,7 +138,7 @@ static void dentry_lru_add(struct dentry *dentry) if (list_empty(&dentry->d_lru)) { list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } } @@ -149,7 +147,7 @@ static void dentry_lru_del(struct dentry *dentry) if (!list_empty(&dentry->d_lru)) { list_del_init(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - percpu_counter_dec(&nr_dentry_unused); + dentry_stat.nr_unused--; } } @@ -158,7 +156,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } @@ -546,7 +544,7 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = percpu_counter_sum_positive(&nr_dentry_unused); + int unused = dentry_stat.nr_unused; int prune_ratio; int pruned; @@ -908,16 +906,13 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { - int nr_unused; - if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); - return (nr_unused / 100) * sysctl_vfs_cache_pressure; + return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker dcache_shrinker = { @@ -2424,7 +2419,6 @@ static void __init dcache_init(void) int loop; percpu_counter_init(&nr_dentry, 0); - percpu_counter_init(&nr_dentry_unused, 0); /* * A constructor could be added for stable state like the lists, -- cgit v1.2.2 From 3e880fb5e4bb6a012035e3edd0586ee2817c2e24 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:19 +1100 Subject: fs: use fast counters for vfs caches percpu_counter library generates quite nasty code, so unless you need to dynamically allocate counters or take fast approximate value, a simple per cpu set of counters is much better. The percpu_counter can never be made to work as well, because it has an indirection from pointer to percpu memory, and it can't use direct this_cpu_inc interfaces because it doesn't use static PER_CPU data, so code will always be worse. In the fastpath, it is the difference between this: incl %gs:nr_dentry # nr_dentry and this: movl percpu_counter_batch(%rip), %edx # percpu_counter_batch, movl $1, %esi #, movq $nr_dentry, %rdi #, call __percpu_counter_add # (plus I clobber registers) __percpu_counter_add: pushq %rbp # movq %rsp, %rbp #, subq $32, %rsp #, movq %rbx, -24(%rbp) #, movq %r12, -16(%rbp) #, movq %r13, -8(%rbp) #, movq %rdi, %rbx # fbc, fbc #APP # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1 movq %gs:kernel_stack,%rax #, pfo_ret__ # 0 "" 2 #NO_APP incl -8124(%rax) # .preempt_count movq 32(%rdi), %r12 # .counters, tcp_ptr__ #APP # 78 "lib/percpu_counter.c" 1 add %gs:this_cpu_off, %r12 # this_cpu_off, tcp_ptr__ # 0 "" 2 #NO_APP movslq (%r12),%r13 #* tcp_ptr__, tmp73 movslq %edx,%rax # batch, batch addq %rsi, %r13 # amount, count cmpq %rax, %r13 # batch, count jge .L27 #, negl %edx # tmp76 movslq %edx,%rdx # tmp76, tmp77 cmpq %rdx, %r13 # tmp77, count jg .L28 #, .L27: movq %rbx, %rdi # fbc, call _raw_spin_lock # addq %r13, 8(%rbx) # count, .count movq %rbx, %rdi # fbc, movl $0, (%r12) #,* tcp_ptr__ call _raw_spin_unlock # .L29: #APP # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1 movq %gs:kernel_stack,%rax #, pfo_ret__ # 0 "" 2 #NO_APP decl -8124(%rax) # .preempt_count movq -8136(%rax), %rax #, D.14625 testb $8, %al #, D.14625 jne .L32 #, .L31: movq -24(%rbp), %rbx #, movq -16(%rbp), %r12 #, movq -8(%rbp), %r13 #, leave ret .p2align 4,,10 .p2align 3 .L28: movl %r13d, (%r12) # count,* jmp .L29 # .L32: call preempt_schedule # .p2align 4,,6 jmp .L31 # .size __percpu_counter_add, .-__percpu_counter_add .p2align 4,,15 Signed-off-by: Nick Piggin --- fs/dcache.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index f62ba90bce91..b2cb2662ca00 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,13 +67,22 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_dentry); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +static int get_nr_dentry(void) +{ + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); + dentry_stat.nr_dentry = get_nr_dentry(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -93,7 +102,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - percpu_counter_dec(&nr_dentry); + this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -981,7 +990,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&dcache_lock); - percpu_counter_inc(&nr_dentry); + this_cpu_inc(nr_dentry); return dentry; } @@ -2418,8 +2427,6 @@ static void __init dcache_init(void) { int loop; - percpu_counter_init(&nr_dentry, 0); - /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature -- cgit v1.2.2 From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- fs/dcache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b2cb2662ca00..6ee6bc40cb63 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -453,8 +453,6 @@ static void prune_one_dentry(struct dentry * dentry) if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) return; - if (dentry->d_op && dentry->d_op->d_delete) - dentry->d_op->d_delete(dentry); dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry); -- cgit v1.2.2 From fb2d5b86aff355a27ebfc132d3c99f4a940cc3fe Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:26 +1100 Subject: fs: name case update method smpfs and ncpfs want to update a live dentry name in-place. Rather than have them open code the locking, provide a documented dcache API. Signed-off-by: Nick Piggin --- fs/dcache.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6ee6bc40cb63..814e5f491e9c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1589,6 +1589,33 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +/** + * dentry_update_name_case - update case insensitive dentry with a new name + * @dentry: dentry to be updated + * @name: new name + * + * Update a case insensitive dentry with new case of name. + * + * dentry must have been returned by d_lookup with name @name. Old and new + * name lengths must match (ie. no d_compare which allows mismatched name + * lengths). + * + * Parent inode i_mutex must be held over d_lookup and into this call (to + * keep renames and concurrent inserts, and readdir(2) away). + */ +void dentry_update_name_case(struct dentry *dentry, struct qstr *name) +{ + BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ + + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} +EXPORT_SYMBOL(dentry_update_name_case); + /* * When switching names, the actual string doesn't strictly have to * be preserved in the target - because we're dropping the target -- cgit v1.2.2 From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:27 +1100 Subject: fs: change d_compare for rcu-walk Change d_compare so it may be called from lock-free RCU lookups. This does put significant restrictions on what may be done from the callback, however there don't seem to have been any problems with in-tree fses. If some strange use case pops up that _really_ cannot cope with the rcu-walk rules, we can just add new rcu-unaware callbacks, which would cause name lookup to drop out of rcu-walk mode. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- fs/dcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 814e5f491e9c..7075555fbb04 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1437,7 +1437,9 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) */ qstr = &dentry->d_name; if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) + if (parent->d_op->d_compare(parent, parent->d_inode, + dentry, dentry->d_inode, + qstr->len, qstr->name, name)) goto next; } else { if (qstr->len != len) -- cgit v1.2.2 From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:28 +1100 Subject: fs: change d_hash for rcu-walk Change d_hash so it may be called from lock-free RCU lookups. See similar patch for d_compare for details. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 7075555fbb04..6f59f375ed9d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1478,7 +1478,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) */ name->hash = full_name_hash(name->name, name->len); if (dir->d_op && dir->d_op->d_hash) { - if (dir->d_op->d_hash(dir, name) < 0) + if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } dentry = d_lookup(dir, name); -- cgit v1.2.2 From ec2447c278ee973d35f38e53ca16ba7f965ae33d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:29 +1100 Subject: hostfs: simplify locking Remove dcache_lock locking from hostfs filesystem, and move it into dcache helpers. All that is required is a coherent path name. Protection from concurrent modification of the namespace after path name generation is not provided in current code, because dcache_lock is dropped before the path is used. Signed-off-by: Nick Piggin --- fs/dcache.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6f59f375ed9d..61beb40dd6bf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2171,7 +2171,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, /* * Write full pathname from the root of the filesystem into the buffer. */ -char *__dentry_path(struct dentry *dentry, char *buf, int buflen) +static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { char *end = buf + buflen; char *retval; @@ -2198,7 +2198,18 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen) Elong: return ERR_PTR(-ENAMETOOLONG); } -EXPORT_SYMBOL(__dentry_path); + +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + char *retval; + + spin_lock(&dcache_lock); + retval = __dentry_path(dentry, buf, buflen); + spin_unlock(&dcache_lock); + + return retval; +} +EXPORT_SYMBOL(dentry_path_raw); char *dentry_path(struct dentry *dentry, char *buf, int buflen) { -- cgit v1.2.2 From 789680d1ee9311cdf095241dc02bd9784d799cd1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:30 +1100 Subject: fs: dcache scale hash Add a new lock, dcache_hash_lock, to protect the dcache hash table from concurrent modification. d_hash is also protected by d_lock. Signed-off-by: Nick Piggin --- fs/dcache.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 11 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 61beb40dd6bf..1e124d4673c6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,10 +35,24 @@ #include #include "internal.h" +/* + * Usage: + * dcache_hash_lock protects dcache hash table, s_anon lists + * + * Ordering: + * dcache_lock + * dentry->d_lock + * dcache_hash_lock + * + * if (dentry1 < dentry2) + * dentry1->d_lock + * dentry2->d_lock + */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); @@ -196,6 +210,42 @@ static struct dentry *d_kill(struct dentry *dentry) return parent; } +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock. + */ +void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + dentry->d_flags |= DCACHE_UNHASHED; + spin_lock(&dcache_hash_lock); + hlist_del_rcu(&dentry->d_hash); + spin_unlock(&dcache_hash_lock); + } +} +EXPORT_SYMBOL(__d_drop); + +void d_drop(struct dentry *dentry) +{ + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} +EXPORT_SYMBOL(d_drop); + /* * This is dput * @@ -1199,7 +1249,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_flags |= DCACHE_DISCONNECTED; tmp->d_flags &= ~DCACHE_UNHASHED; list_add(&tmp->d_alias, &inode->i_dentry); + spin_lock(&dcache_hash_lock); hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + spin_unlock(&dcache_hash_lock); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); @@ -1585,7 +1637,9 @@ void d_rehash(struct dentry * entry) { spin_lock(&dcache_lock); spin_lock(&entry->d_lock); + spin_lock(&dcache_hash_lock); _d_rehash(entry); + spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); spin_unlock(&dcache_lock); } @@ -1692,8 +1746,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ static void d_move_locked(struct dentry * dentry, struct dentry * target) { - struct hlist_head *list; - if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -1710,14 +1762,11 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) } /* Move the dentry to the target hash queue, if on different bucket */ - if (d_unhashed(dentry)) - goto already_unhashed; - - hlist_del_rcu(&dentry->d_hash); - -already_unhashed: - list = d_hash(target->d_parent, target->d_name.hash); - __d_rehash(dentry, list); + spin_lock(&dcache_hash_lock); + if (!d_unhashed(dentry)) + hlist_del_rcu(&dentry->d_hash); + __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); + spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ __d_drop(target); @@ -1914,7 +1963,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) found_lock: spin_lock(&actual->d_lock); found: + spin_lock(&dcache_hash_lock); _d_rehash(actual); + spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_lock); out_nolock: -- cgit v1.2.2 From 2304450783dfde7b0b94ae234edd0dbffa865073 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:31 +1100 Subject: fs: dcache scale lru Add a new lock, dcache_lru_lock, to protect the dcache LRU list from concurrent modification. d_lru is also protected by d_lock, which allows LRU lists to be accessed without the lru lock, using RCU in future patches. Signed-off-by: Nick Piggin --- fs/dcache.c | 112 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 28 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 1e124d4673c6..3d3c843c36ed 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,11 +37,19 @@ /* * Usage: - * dcache_hash_lock protects dcache hash table, s_anon lists + * dcache_hash_lock protects: + * - the dcache hash table, s_anon lists + * dcache_lru_lock protects: + * - the dcache lru lists and counters + * d_lock protects: + * - d_flags + * - d_name + * - d_lru * * Ordering: * dcache_lock * dentry->d_lock + * dcache_lru_lock * dcache_hash_lock * * if (dentry1 < dentry2) @@ -52,6 +60,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); @@ -154,28 +163,38 @@ static void dentry_iput(struct dentry * dentry) } /* - * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. + * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { if (list_empty(&dentry->d_lru)) { + spin_lock(&dcache_lru_lock); list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; + spin_unlock(&dcache_lru_lock); } } +static void __dentry_lru_del(struct dentry *dentry) +{ + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; +} + static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + spin_lock(&dcache_lru_lock); + __dentry_lru_del(dentry); + spin_unlock(&dcache_lru_lock); } } static void dentry_lru_move_tail(struct dentry *dentry) { + spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; @@ -183,6 +202,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } + spin_unlock(&dcache_lru_lock); } /** @@ -192,6 +212,8 @@ static void dentry_lru_move_tail(struct dentry *dentry) * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. + * + * dcache_lock and d_lock must be held by caller, are dropped by d_kill. */ static struct dentry *d_kill(struct dentry *dentry) __releases(dentry->d_lock) @@ -383,10 +405,19 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This should be called _only_ with dcache_lock held */ +static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) +{ + atomic_inc(&dentry->d_count); + dentry_lru_del(dentry); + return dentry; +} + static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); + spin_unlock(&dentry->d_lock); return dentry; } @@ -465,7 +496,7 @@ restart: list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); + __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -489,7 +520,6 @@ EXPORT_SYMBOL(d_prune_aliases); static void prune_one_dentry(struct dentry * dentry) __releases(dentry->d_lock) __releases(dcache_lock) - __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -498,15 +528,16 @@ static void prune_one_dentry(struct dentry * dentry) * Prune ancestors. Locking is simpler than in dput(), * because dcache_lock needs to be taken anyway. */ - spin_lock(&dcache_lock); while (dentry) { - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + spin_lock(&dcache_lock); + if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) { + spin_unlock(&dcache_lock); return; + } dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry); - spin_lock(&dcache_lock); } } @@ -516,21 +547,31 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { dentry = list_entry(list->prev, struct dentry, d_lru); - dentry_lru_del(dentry); + + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + spin_lock(&dcache_lru_lock); + continue; + } + + __dentry_lru_del(dentry); /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count)) { spin_unlock(&dentry->d_lock); continue; } + spin_unlock(&dcache_lru_lock); + prune_one_dentry(dentry); - /* dentry->d_lock was dropped in prune_one_dentry() */ - cond_resched_lock(&dcache_lock); + /* dcache_lock and dentry->d_lock dropped */ + spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); } } @@ -551,32 +592,36 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) int cnt = *count; spin_lock(&dcache_lock); +relock: + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { dentry = list_entry(sb->s_dentry_lru.prev, struct dentry, d_lru); BUG_ON(dentry->d_sb != sb); + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + goto relock; + } + /* * If we are honouring the DCACHE_REFERENCED flag and the * dentry has this flag set, don't free it. Clear the flag * and put it back on the LRU. */ - if (flags & DCACHE_REFERENCED) { - spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - cond_resched_lock(&dcache_lock); - continue; - } + if (flags & DCACHE_REFERENCED && + dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move(&dentry->d_lru, &referenced); + spin_unlock(&dentry->d_lock); + } else { + list_move_tail(&dentry->d_lru, &tmp); spin_unlock(&dentry->d_lock); + if (!--cnt) + break; } - - list_move_tail(&dentry->d_lru, &tmp); - if (!--cnt) - break; - cond_resched_lock(&dcache_lock); + /* XXX: re-add cond_resched_lock when dcache_lock goes away */ } *count = cnt; @@ -584,6 +629,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); + spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_lock); } @@ -679,10 +725,12 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(tmp); spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } + spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -701,7 +749,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* detach this root from the system */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); + spin_unlock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dcache_lock); @@ -715,7 +765,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { + spin_lock(&loop->d_lock); dentry_lru_del(loop); + spin_unlock(&loop->d_lock); __d_drop(loop); cond_resched_lock(&dcache_lock); } @@ -892,6 +944,8 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock(&dentry->d_lock); + /* * move only zero ref count dentries to the end * of the unused list for prune_dcache @@ -903,6 +957,8 @@ resume: dentry_lru_del(dentry); } + spin_unlock(&dentry->d_lock); + /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find -- cgit v1.2.2 From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:32 +1100 Subject: fs: dcache scale dentry refcount Make d_count non-atomic and protect it with d_lock. This allows us to ensure a 0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when we start protecting many other dentry members with d_lock. Signed-off-by: Nick Piggin --- fs/dcache.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 24 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 3d3c843c36ed..81e91502b294 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -45,6 +45,7 @@ * - d_flags * - d_name * - d_lru + * - d_count * * Ordering: * dcache_lock @@ -125,6 +126,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { + BUG_ON(dentry->d_count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -222,8 +224,11 @@ static struct dentry *d_kill(struct dentry *dentry) struct dentry *parent; list_del(&dentry->d_u.d_child); - /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ if (IS_ROOT(dentry)) parent = NULL; else @@ -303,13 +308,23 @@ void dput(struct dentry *dentry) return; repeat: - if (atomic_read(&dentry->d_count) == 1) + if (dentry->d_count == 1) might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count == 1) { + if (!spin_trylock(&dcache_lock)) { + /* + * Something of a livelock possibility we could avoid + * by taking dcache_lock and trying again, but we + * want to reduce dcache_lock anyway so this will + * get improved. + */ + spin_unlock(&dentry->d_lock); + goto repeat; + } + } + dentry->d_count--; + if (dentry->d_count) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -389,7 +404,7 @@ int d_invalidate(struct dentry * dentry) * working directory or similar). */ spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -404,29 +419,61 @@ int d_invalidate(struct dentry * dentry) } EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ +/* This must be called with dcache_lock and d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { - atomic_inc(&dentry->d_count); + dentry->d_count++; dentry_lru_del(dentry); return dentry; } +/* This should be called _only_ with dcache_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { - atomic_inc(&dentry->d_count); spin_lock(&dentry->d_lock); - dentry_lru_del(dentry); + __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); return dentry; } +struct dentry * dget_locked_dlock(struct dentry *dentry) +{ + return __dget_locked_dlock(dentry); +} + struct dentry * dget_locked(struct dentry *dentry) { return __dget_locked(dentry); } EXPORT_SYMBOL(dget_locked); +struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + +repeat: + spin_lock(&dentry->d_lock); + ret = dentry->d_parent; + if (!ret) + goto out; + if (dentry == ret) { + ret->d_count++; + goto out; + } + if (!spin_trylock(&ret->d_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto repeat; + } + BUG_ON(!ret->d_count); + ret->d_count++; + spin_unlock(&ret->d_lock); +out: + spin_unlock(&dentry->d_lock); + return ret; +} +EXPORT_SYMBOL(dget_parent); + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -495,7 +542,7 @@ restart: spin_lock(&dcache_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -530,7 +577,10 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) { + spin_lock(&dentry->d_lock); + dentry->d_count--; + if (dentry->d_count) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; } @@ -562,7 +612,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count) { spin_unlock(&dentry->d_lock); continue; } @@ -783,7 +833,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) do { struct inode *inode; - if (atomic_read(&dentry->d_count) != 0) { + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -792,7 +842,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - atomic_read(&dentry->d_count), + dentry->d_count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -802,7 +852,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) parent = NULL; else { parent = dentry->d_parent; - atomic_dec(&parent->d_count); + spin_lock(&parent->d_lock); + parent->d_count--; + spin_unlock(&parent->d_lock); } list_del(&dentry->d_u.d_child); @@ -853,7 +905,9 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_empty(&sb->s_anon)) { @@ -950,7 +1004,7 @@ resume: * move only zero ref count dentries to the end * of the unused list for prune_dcache */ - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { dentry_lru_move_tail(dentry); found++; } else { @@ -1068,7 +1122,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) memcpy(dname, name->name, name->len); dname[name->len] = 0; - atomic_set(&dentry->d_count, 1); + dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); dentry->d_inode = NULL; @@ -1556,7 +1610,7 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) goto next; } - atomic_inc(&dentry->d_count); + dentry->d_count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -1653,7 +1707,7 @@ void d_delete(struct dentry * dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); - if (atomic_read(&dentry->d_count) == 1) { + if (dentry->d_count == 1) { dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); @@ -2494,11 +2548,15 @@ resume: this_parent = dentry; goto repeat; } - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); } if (this_parent != root) { next = this_parent->d_u.d_child.next; - atomic_dec(&this_parent->d_count); + spin_lock(&this_parent->d_lock); + this_parent->d_count--; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; goto resume; } -- cgit v1.2.2 From da5029563a0a026c64821b09e8e7b4fd81d3fe1b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:33 +1100 Subject: fs: dcache scale d_unhashed Protect d_unhashed(dentry) condition with d_lock. This means keeping DCACHE_UNHASHED bit in synch with hash manipulations. Signed-off-by: Nick Piggin --- fs/dcache.c | 74 +++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 81e91502b294..ee127f9ab274 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -46,6 +46,7 @@ * - d_name * - d_lru * - d_count + * - d_unhashed() * * Ordering: * dcache_lock @@ -53,6 +54,13 @@ * dcache_lru_lock * dcache_hash_lock * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: * if (dentry1 < dentry2) * dentry1->d_lock * dentry2->d_lock @@ -379,7 +387,9 @@ int d_invalidate(struct dentry * dentry) * If it's already been dropped, return OK. */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } @@ -388,9 +398,11 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -403,7 +415,6 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ - spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); @@ -490,35 +501,44 @@ EXPORT_SYMBOL(dget_parent); * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; @@ -801,8 +821,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); - spin_unlock(&dentry->d_lock); __d_drop(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); for (;;) { @@ -817,8 +837,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) d_u.d_child) { spin_lock(&loop->d_lock); dentry_lru_del(loop); - spin_unlock(&loop->d_lock); __d_drop(loop); + spin_unlock(&loop->d_lock); cond_resched_lock(&dcache_lock); } spin_unlock(&dcache_lock); @@ -1863,7 +1883,10 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) /* * XXXX: do we really need to take target->d_lock? */ - if (target < dentry) { + if (d_ancestor(dentry, target)) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); + } else if (d_ancestor(target, dentry) || target < dentry) { spin_lock(&target->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { @@ -2542,13 +2565,16 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - if (d_unhashed(dentry)||!dentry->d_inode) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (d_unhashed(dentry) || !dentry->d_inode) { + spin_unlock(&dentry->d_lock); continue; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); this_parent = dentry; goto repeat; } - spin_lock(&dentry->d_lock); dentry->d_count--; spin_unlock(&dentry->d_lock); } -- cgit v1.2.2 From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:34 +1100 Subject: fs: dcache scale subdirs Protect d_subdirs and d_child with d_lock, except in filesystems that aren't using dcache_lock for these anyway (eg. using i_mutex). Note: if we change the locking rule in future so that ->d_child protection is provided only with ->d_parent->d_lock, it may allow us to reduce some locking. But it would be an exception to an otherwise regular locking scheme, so we'd have to see some good results. Probably not worthwhile. Signed-off-by: Nick Piggin --- fs/dcache.c | 248 ++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 174 insertions(+), 74 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index ee127f9ab274..a661247a20d5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -47,6 +47,8 @@ * - d_lru * - d_count * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent * * Ordering: * dcache_lock @@ -223,24 +225,22 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock must be held by caller, are dropped by d_kill. + * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and + * are dropped by d_kill. */ -static struct dentry *d_kill(struct dentry *dentry) +static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { - struct dentry *parent; - list_del(&dentry->d_u.d_child); + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); /* * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; d_free(dentry); return parent; } @@ -312,6 +312,7 @@ EXPORT_SYMBOL(d_drop); void dput(struct dentry *dentry) { + struct dentry *parent; if (!dentry) return; @@ -319,6 +320,10 @@ repeat: if (dentry->d_count == 1) might_sleep(); spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; if (dentry->d_count == 1) { if (!spin_trylock(&dcache_lock)) { /* @@ -330,10 +335,17 @@ repeat: spin_unlock(&dentry->d_lock); goto repeat; } + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + goto repeat; + } } dentry->d_count--; if (dentry->d_count) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; } @@ -355,6 +367,8 @@ repeat: dentry_lru_add(dentry); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; @@ -363,7 +377,7 @@ unhash_it: kill_it: /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); if (dentry) goto repeat; } @@ -584,12 +598,13 @@ EXPORT_SYMBOL(d_prune_aliases); * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. */ -static void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); /* * Prune ancestors. Locking is simpler than in dput(), @@ -597,9 +612,20 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); +again: spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto again; + } dentry->d_count--; if (dentry->d_count) { + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -607,7 +633,7 @@ static void prune_one_dentry(struct dentry * dentry) dentry_lru_del(dentry); __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); } } @@ -616,29 +642,40 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry; while (!list_empty(list)) { + struct dentry *parent; + dentry = list_entry(list->prev, struct dentry, d_lru); if (!spin_trylock(&dentry->d_lock)) { +relock: spin_unlock(&dcache_lru_lock); cpu_relax(); spin_lock(&dcache_lru_lock); continue; } - __dentry_lru_del(dentry); - /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ if (dentry->d_count) { + __dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto relock; + } + __dentry_lru_del(dentry); spin_unlock(&dcache_lru_lock); - prune_one_dentry(dentry); + prune_one_dentry(dentry, parent); /* dcache_lock and dentry->d_lock dropped */ spin_lock(&dcache_lock); spin_lock(&dcache_lru_lock); @@ -833,14 +870,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock(&loop->d_lock); + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); __d_drop(loop); spin_unlock(&loop->d_lock); - cond_resched_lock(&dcache_lock); } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* move to the first child */ @@ -868,16 +907,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG(); } - if (IS_ROOT(dentry)) + if (IS_ROOT(dentry)) { parent = NULL; - else { + list_del(&dentry->d_u.d_child); + } else { parent = dentry->d_parent; spin_lock(&parent->d_lock); parent->d_count--; + list_del(&dentry->d_u.d_child); spin_unlock(&parent->d_lock); } - list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -958,6 +998,7 @@ int have_submounts(struct dentry *parent) spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -965,22 +1006,34 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) + if (d_mountpoint(dentry)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); goto positive; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { next = this_parent->d_u.d_child.next; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return 0; /* No mount points found in tree */ positive: @@ -1010,6 +1063,7 @@ static int select_parent(struct dentry * parent) int found = 0; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -1017,8 +1071,9 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + BUG_ON(this_parent == dentry); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* * move only zero ref count dentries to the end @@ -1031,33 +1086,44 @@ resume: dentry_lru_del(dentry); } - spin_unlock(&dentry->d_lock); - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) + if (found && need_resched()) { + spin_unlock(&dentry->d_lock); goto out; + } /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { + struct dentry *tmp; next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + tmp = this_parent->d_parent; + spin_unlock(&this_parent->d_lock); + BUG_ON(tmp == this_parent); + this_parent = tmp; + spin_lock(&this_parent->d_lock); goto resume; } out: + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return found; } @@ -1155,18 +1221,19 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - dentry->d_parent = dget(parent); + spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry->d_parent = dget_dlock(parent); dentry->d_sb = parent->d_sb; - } else { - INIT_LIST_HEAD(&dentry->d_u.d_child); - } - - spin_lock(&dcache_lock); - if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); + } this_cpu_inc(nr_dentry); @@ -1684,13 +1751,18 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dcache_lock); + spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { - __dget_locked(dentry); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + __dget_locked_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 1; } } + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 0; @@ -1802,17 +1874,6 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) } EXPORT_SYMBOL(dentry_update_name_case); -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch. - * - * Note that we have to be a lot more careful about getting the hash - * switched - we have to switch the hash value properly even if it - * then no longer matches the actual (corrupted) string of the target. - * The hash value has to match the hash queue that the dentry is on.. - */ static void switch_names(struct dentry *dentry, struct dentry *target) { if (dname_external(target)) { @@ -1854,18 +1915,53 @@ static void switch_names(struct dentry *dentry, struct dentry *target) swap(dentry->d_name.len, target->d_name.len); } +static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +{ + /* + * XXXX: do we really need to take target->d_lock? + */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { + if (d_ancestor(dentry->d_parent, target->d_parent)) { + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&dentry->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + } + if (target < dentry) { + spin_lock_nested(&target->d_lock, 2); + spin_lock_nested(&dentry->d_lock, 3); + } else { + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); + } +} + +static void dentry_unlock_parents_for_move(struct dentry *dentry, + struct dentry *target) +{ + if (target->d_parent != dentry->d_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (target->d_parent != target) + spin_unlock(&target->d_parent->d_lock); +} + /* - * We cannibalize "target" when moving dentry on top of it, - * because it's going to be thrown away anyway. We could be more - * polite about it, though. - * - * This forceful removal will result in ugly /proc output if - * somebody holds a file open that got deleted due to a rename. - * We could be nicer about the deleted file, and let it show - * up under the name it had before it was deleted rather than - * under the original name of the file that was moved on top of it. + * When switching names, the actual string doesn't strictly have to + * be preserved in the target - because we're dropping the target + * anyway. As such, we can just do a simple memcpy() to copy over + * the new name before we switch. + * + * Note that we have to be a lot more careful about getting the hash + * switched - we have to switch the hash value properly even if it + * then no longer matches the actual (corrupted) string of the target. + * The hash value has to match the hash queue that the dentry is on.. */ - /* * d_move_locked - move a dentry * @dentry: entry to move @@ -1879,20 +1975,12 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + BUG_ON(d_ancestor(dentry, target)); + BUG_ON(d_ancestor(target, dentry)); + write_seqlock(&rename_lock); - /* - * XXXX: do we really need to take target->d_lock? - */ - if (d_ancestor(dentry, target)) { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } else if (d_ancestor(target, dentry) || target < dentry) { - spin_lock(&target->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } + + dentry_lock_for_move(dentry, target); /* Move the dentry to the target hash queue, if on different bucket */ spin_lock(&dcache_hash_lock); @@ -1924,6 +2012,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) } list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2013,17 +2103,20 @@ out_err: /* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. + * returns with anon->d_lock held! */ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) { struct dentry *dparent, *aparent; - switch_names(dentry, anon); - swap(dentry->d_name.hash, anon->d_name.hash); + dentry_lock_for_move(anon, dentry); dparent = dentry->d_parent; aparent = anon->d_parent; + switch_names(dentry, anon); + swap(dentry->d_name.hash, anon->d_name.hash); + dentry->d_parent = (aparent == anon) ? dentry : aparent; list_del(&dentry->d_u.d_child); if (!IS_ROOT(dentry)) @@ -2038,6 +2131,10 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + dentry_unlock_parents_for_move(anon, dentry); + spin_unlock(&dentry->d_lock); + + /* anon->d_lock still locked, returns locked */ anon->d_flags &= ~DCACHE_DISCONNECTED; } @@ -2073,7 +2170,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) /* Is this an anonymous mountpoint that we could splice * into our tree? */ if (IS_ROOT(alias)) { - spin_lock(&alias->d_lock); __d_materialise_dentry(dentry, alias); __d_drop(alias); goto found; @@ -2558,6 +2654,7 @@ void d_genocide(struct dentry *root) struct list_head *next; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -2571,8 +2668,10 @@ resume: continue; } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } dentry->d_count--; @@ -2580,12 +2679,13 @@ resume: } if (this_parent != root) { next = this_parent->d_u.d_child.next; - spin_lock(&this_parent->d_lock); this_parent->d_count--; spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); } -- cgit v1.2.2 From b23fb0a60379a95e10c671f646b259ea2558421e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:35 +1100 Subject: fs: scale inode alias list Add a new lock, dcache_inode_lock, to protect the inode's i_dentry list from concurrent modification. d_alias is also protected by d_lock. Signed-off-by: Nick Piggin --- fs/dcache.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a661247a20d5..de38680ee0ed 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,6 +37,8 @@ /* * Usage: + * dcache_inode_lock protects: + * - i_dentry, d_alias, d_inode * dcache_hash_lock protects: * - the dcache hash table, s_anon lists * dcache_lru_lock protects: @@ -49,12 +51,14 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent + * - d_alias, d_inode * * Ordering: * dcache_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_lock + * dcache_inode_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -70,11 +74,13 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +EXPORT_SYMBOL(dcache_inode_lock); EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -154,6 +160,7 @@ static void d_free(struct dentry *dentry) */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { struct inode *inode = dentry->d_inode; @@ -161,6 +168,7 @@ static void dentry_iput(struct dentry * dentry) dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); @@ -170,6 +178,7 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } } @@ -231,6 +240,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { list_del(&dentry->d_u.d_child); @@ -332,13 +342,18 @@ repeat: * want to reduce dcache_lock anyway so this will * get improved. */ +drop1: spin_unlock(&dentry->d_lock); goto repeat; } - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); + if (!spin_trylock(&dcache_inode_lock)) { +drop2: spin_unlock(&dcache_lock); - goto repeat; + goto drop1; + } + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto drop2; } } dentry->d_count--; @@ -369,6 +384,7 @@ repeat: spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return; @@ -558,7 +574,9 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); de = __d_find_alias(inode, 0); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } return de; @@ -574,18 +592,21 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -601,6 +622,7 @@ EXPORT_SYMBOL(d_prune_aliases); static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) + __releases(dcache_inode_lock) __releases(dcache_lock) { __d_drop(dentry); @@ -612,6 +634,7 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) */ while (dentry) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); again: spin_lock(&dentry->d_lock); if (IS_ROOT(dentry)) @@ -627,6 +650,7 @@ again: if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return; } @@ -676,8 +700,9 @@ relock: spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); - /* dcache_lock and dentry->d_lock dropped */ + /* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } } @@ -699,6 +724,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) int cnt = *count; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -737,8 +763,8 @@ relock: if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); - } /** @@ -832,12 +858,14 @@ void shrink_dcache_sb(struct super_block *sb) LIST_HEAD(tmp); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } spin_unlock(&dcache_lru_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -1255,9 +1283,11 @@ EXPORT_SYMBOL(d_alloc_name); /* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + spin_lock(&dentry->d_lock); if (inode) list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1280,7 +1310,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); __d_instantiate(entry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } @@ -1341,7 +1373,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (!result) { @@ -1432,8 +1466,10 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_parent = tmp; /* make sure dput doesn't croak */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); res = __d_find_alias(inode, 0); if (res) { + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); dput(tmp); goto out_iput; @@ -1450,6 +1486,7 @@ struct dentry *d_obtain_alias(struct inode *inode) hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); spin_unlock(&dcache_hash_lock); spin_unlock(&tmp->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return tmp; @@ -1482,9 +1519,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -1492,6 +1531,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) } else { /* already taking dcache_lock, so d_add() by hand */ __d_instantiate(dentry, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); @@ -1566,8 +1606,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * already has a dentry. */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; @@ -1579,6 +1621,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); dget_locked(new); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); @@ -1797,6 +1840,7 @@ void d_delete(struct dentry * dentry) * Are we the only user? */ spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); if (dentry->d_count == 1) { @@ -1810,6 +1854,7 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); @@ -2067,6 +2112,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) __releases(dcache_lock) + __releases(dcache_inode_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -2092,6 +2138,7 @@ out_unalias: d_move_locked(alias, dentry); ret = alias; out_err: + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); @@ -2153,6 +2200,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); if (!inode) { actual = dentry; @@ -2196,6 +2244,7 @@ found: _d_rehash(actual); spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { @@ -2207,6 +2256,7 @@ out_nolock: return actual; shouldnt_be_hashed: + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); BUG(); } -- cgit v1.2.2 From 9abca36087288fe28de4749c71ca003d4b9e3ed0 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:36 +1100 Subject: fs: increase d_name lock coverage Cover d_name with d_lock in more cases, where there may be concurrent modification to it. Signed-off-by: Nick Piggin --- fs/dcache.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index de38680ee0ed..a09f0771fd27 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1350,6 +1350,11 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ if (qstr->hash != hash) continue; if (alias->d_parent != entry->d_parent) @@ -2313,7 +2318,9 @@ static int prepend_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); + spin_lock(&dentry->d_lock); error = prepend_name(buffer, buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); if (!error) error = prepend(buffer, buflen, "/", 1); if (error) @@ -2515,10 +2522,13 @@ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; + int error; prefetch(parent); - if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || - (prepend(&end, &buflen, "/", 1) != 0)) + spin_lock(&dentry->d_lock); + error = prepend_name(&end, &buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); + if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) goto Elong; retval = end; -- cgit v1.2.2 From 949854d02455080d20cd3e1db28a3a18daf7599d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:37 +1100 Subject: fs: Use rename lock and RCU for multi-step operations The remaining usages for dcache_lock is to allow atomic, multi-step read-side operations over the directory tree by excluding modifications to the tree. Also, to walk in the leaf->root direction in the tree where we don't have a natural d_lock ordering. This could be accomplished by taking every d_lock, but this would mean a huge number of locks and actually gets very tricky. Solve this instead by using the rename seqlock for multi-step read-side operations, retry in case of a rename so we don't walk up the wrong parent. Concurrent dentry insertions are not serialised against. Concurrent deletes are tricky when walking up the directory: our parent might have been deleted when dropping locks so also need to check and retry for that. We can also use the rename lock in cases where livelock is a worry (and it is introduced in subsequent patch). Signed-off-by: Nick Piggin --- fs/dcache.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 23 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a09f0771fd27..a9bc4ecc21e1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -80,6 +80,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); EXPORT_SYMBOL(dcache_lock); @@ -243,6 +244,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dcache_inode_lock) __releases(dcache_lock) { + dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); if (parent) spin_unlock(&parent->d_lock); @@ -1017,11 +1019,15 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ - int have_submounts(struct dentry *parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); if (d_mountpoint(parent)) @@ -1055,17 +1061,37 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; /* No mount points found in tree */ positive: spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 1; } EXPORT_SYMBOL(have_submounts); @@ -1086,10 +1112,15 @@ EXPORT_SYMBOL(have_submounts); */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; int found = 0; +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); + spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -1099,7 +1130,6 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - BUG_ON(this_parent == dentry); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1142,17 +1172,32 @@ resume: */ if (this_parent != parent) { struct dentry *tmp; - next = this_parent->d_u.d_child.next; + struct dentry *child; + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - BUG_ON(tmp == this_parent); + child = this_parent; this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } out: spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return found; } @@ -1654,7 +1699,7 @@ EXPORT_SYMBOL(d_add_ci); struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; - unsigned long seq; + unsigned seq; do { seq = read_seqbegin(&rename_lock); @@ -2290,7 +2335,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the dcache_lock. + * Caller holds the rename_lock. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -2377,7 +2422,9 @@ char *__d_path(const struct path *path, struct path *root, prepend(&res, &buflen, "\0", 1); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2441,10 +2488,12 @@ char *d_path(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); return res; @@ -2472,10 +2521,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); if (error) @@ -2544,7 +2595,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); return retval; @@ -2557,6 +2610,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2564,6 +2618,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ @@ -2604,6 +2659,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -ENOENT; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; @@ -2612,6 +2668,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2631,8 +2688,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else + } else { + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); + } out: path_put(&pwd); @@ -2660,25 +2719,25 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned long seq; + unsigned seq; if (new_dentry == old_dentry) return 1; - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; + rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); - rcu_read_unlock(); return result; } @@ -2710,9 +2769,13 @@ EXPORT_SYMBOL(path_is_under); void d_genocide(struct dentry *root) { - struct dentry *this_parent = root; + struct dentry *this_parent; struct list_head *next; + unsigned seq; +rename_retry: + this_parent = root; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -2722,6 +2785,7 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (d_unhashed(dentry) || !dentry->d_inode) { spin_unlock(&dentry->d_lock); @@ -2734,19 +2798,43 @@ resume: spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - dentry->d_count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_count--; + } spin_unlock(&dentry->d_lock); } if (this_parent != root) { - next = this_parent->d_u.d_child.next; - this_parent->d_count--; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { + this_parent->d_flags |= DCACHE_GENOCIDE; + this_parent->d_count--; + } + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; } /** -- cgit v1.2.2 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- fs/dcache.c | 160 ++++++++---------------------------------------------------- 1 file changed, 20 insertions(+), 140 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a9bc4ecc21e1..0dbae053b664 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -54,11 +54,10 @@ * - d_alias, d_inode * * Ordering: - * dcache_lock - * dcache_inode_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_lock + * dcache_inode_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -77,12 +76,10 @@ EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); -EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -139,7 +136,7 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. + * no locks, please. */ static void d_free(struct dentry *dentry) { @@ -162,7 +159,6 @@ static void d_free(struct dentry *dentry) static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -170,7 +166,6 @@ static void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -180,7 +175,6 @@ static void dentry_iput(struct dentry * dentry) } else { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } } @@ -235,14 +229,13 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and - * are dropped by d_kill. + * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by + * d_kill. */ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); @@ -285,11 +278,9 @@ EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_drop); @@ -337,21 +328,10 @@ repeat: else parent = dentry->d_parent; if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_lock)) { - /* - * Something of a livelock possibility we could avoid - * by taking dcache_lock and trying again, but we - * want to reduce dcache_lock anyway so this will - * get improved. - */ -drop1: - spin_unlock(&dentry->d_lock); - goto repeat; - } if (!spin_trylock(&dcache_inode_lock)) { drop2: - spin_unlock(&dcache_lock); - goto drop1; + spin_unlock(&dentry->d_lock); + goto repeat; } if (parent && !spin_trylock(&parent->d_lock)) { spin_unlock(&dcache_inode_lock); @@ -363,7 +343,6 @@ drop2: spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return; } @@ -387,7 +366,6 @@ drop2: if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; unhash_it: @@ -418,11 +396,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } /* @@ -431,9 +407,7 @@ int d_invalidate(struct dentry * dentry) */ if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } @@ -450,19 +424,17 @@ int d_invalidate(struct dentry * dentry) if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } EXPORT_SYMBOL(d_invalidate); -/* This must be called with dcache_lock and d_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { dentry->d_count++; @@ -470,7 +442,7 @@ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) return dentry; } -/* This should be called _only_ with dcache_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { spin_lock(&dentry->d_lock); @@ -575,11 +547,9 @@ struct dentry *d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); de = __d_find_alias(inode, 0); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } return de; } @@ -593,7 +563,6 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); @@ -602,14 +571,12 @@ restart: __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -625,17 +592,14 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry, parent); /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. + * Prune ancestors. */ while (dentry) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); again: spin_lock(&dentry->d_lock); @@ -653,7 +617,6 @@ again: spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; } @@ -702,8 +665,7 @@ relock: spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); - /* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_lock); + /* dcache_inode_lock and dentry->d_lock dropped */ spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } @@ -725,7 +687,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); @@ -766,7 +727,6 @@ relock: list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } /** @@ -788,7 +748,6 @@ static void prune_dcache(int count) if (unused == 0 || count == 0) return; - spin_lock(&dcache_lock); if (count >= unused) prune_ratio = 1; else @@ -825,11 +784,9 @@ static void prune_dcache(int count) if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; - spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -845,7 +802,6 @@ static void prune_dcache(int count) if (p) __put_super(p); spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } /** @@ -859,7 +815,6 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -868,7 +823,6 @@ void shrink_dcache_sb(struct super_block *sb) } spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -885,12 +839,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -899,7 +851,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { @@ -910,7 +861,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_unlock(&loop->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -977,8 +927,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: + * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -1029,7 +978,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; spin_lock(&this_parent->d_lock); @@ -1075,7 +1023,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1084,12 +1031,10 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 0; /* No mount points found in tree */ positive: - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 1; @@ -1121,7 +1066,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -1185,7 +1129,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1195,7 +1138,6 @@ resume: } out: spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return found; @@ -1297,7 +1239,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_parent = dget_dlock(parent); @@ -1305,7 +1246,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } this_cpu_inc(nr_dentry); @@ -1325,7 +1265,6 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -/* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1354,11 +1293,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); __d_instantiate(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1422,11 +1359,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); result = __d_instantiate_unique(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1515,12 +1450,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); res = __d_find_alias(inode, 0); if (res) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(tmp); goto out_iput; } @@ -1538,7 +1472,6 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_unlock(&tmp->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return tmp; out_iput: @@ -1568,21 +1501,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already taking dcache_lock, so d_add() by hand */ + /* already taking dcache_inode_lock, so d_add() by hand */ __d_instantiate(dentry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1655,12 +1585,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; } @@ -1672,7 +1600,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, new = list_entry(inode->i_dentry.next, struct dentry, d_alias); dget_locked(new); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -1843,7 +1770,6 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) { struct dentry *child; - spin_lock(&dcache_lock); spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { @@ -1851,12 +1777,10 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 1; } } spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 0; } @@ -1889,7 +1813,6 @@ void d_delete(struct dentry * dentry) /* * Are we the only user? */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); @@ -1905,7 +1828,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } @@ -1932,13 +1854,11 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); spin_lock(&dcache_hash_lock); _d_rehash(entry); spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_rehash); @@ -1961,11 +1881,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(dentry_update_name_case); @@ -2058,14 +1976,14 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * The hash value has to match the hash queue that the dentry is on.. */ /* - * d_move_locked - move a dentry + * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ -static void d_move_locked(struct dentry * dentry, struct dentry * target) +void d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2114,22 +2032,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); } - -/** - * d_move - move a dentry - * @dentry: entry to move - * @target: new dentry - * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. - */ - -void d_move(struct dentry * dentry, struct dentry * target) -{ - spin_lock(&dcache_lock); - d_move_locked(dentry, target); - spin_unlock(&dcache_lock); -} EXPORT_SYMBOL(d_move); /** @@ -2155,13 +2057,12 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_lock) __releases(dcache_inode_lock) { struct mutex *m1 = NULL, *m2 = NULL; @@ -2185,11 +2086,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move_locked(alias, dentry); + d_move(alias, dentry); ret = alias; out_err: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2249,7 +2149,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!inode) { @@ -2295,7 +2194,6 @@ found: spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -2307,7 +2205,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); @@ -2421,11 +2318,9 @@ char *__d_path(const struct path *path, struct path *root, int error; prepend(&res, &buflen, "\0", 1); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) return ERR_PTR(error); @@ -2487,14 +2382,12 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); return res; } @@ -2520,14 +2413,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); if (error) res = ERR_PTR(error); @@ -2594,11 +2485,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) { char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); return retval; } @@ -2609,7 +2498,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *p = NULL; char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; @@ -2619,12 +2507,10 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) } retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; Elong: - spin_unlock(&dcache_lock); return ERR_PTR(-ENAMETOOLONG); } @@ -2658,7 +2544,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2669,7 +2554,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) goto out; @@ -2690,7 +2574,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); } out: @@ -2776,7 +2659,6 @@ void d_genocide(struct dentry *root) rename_retry: this_parent = root; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -2823,7 +2705,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -2832,7 +2713,6 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; } -- cgit v1.2.2 From 58db63d086790eec2ed433f9d8c4962239809cf8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:39 +1100 Subject: fs: dcache avoid starvation in dcache multi-step operations Long lived dcache "multi-step" operations which retry on rename seq can be starved with a lot of rename activity. If they fail after the 1st pass, take the rename_lock for writing to avoid further starvation. Signed-off-by: Nick Piggin --- fs/dcache.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 0dbae053b664..bf6294a20f0e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -973,10 +973,11 @@ int have_submounts(struct dentry *parent) struct dentry *this_parent; struct list_head *next; unsigned seq; + int locked = 0; -rename_retry: - this_parent = parent; seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; if (d_mountpoint(parent)) goto positive; @@ -1021,7 +1022,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -1031,13 +1032,22 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ positive: - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 1; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } EXPORT_SYMBOL(have_submounts); @@ -1061,11 +1071,11 @@ static int select_parent(struct dentry * parent) struct list_head *next; unsigned seq; int found = 0; + int locked = 0; -rename_retry: - this_parent = parent; seq = read_seqbegin(&rename_lock); - +again: + this_parent = parent; spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -1127,7 +1137,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -1138,9 +1148,18 @@ resume: } out: spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return found; + +rename_retry: + if (found) + return found; + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** @@ -2655,10 +2674,11 @@ void d_genocide(struct dentry *root) struct dentry *this_parent; struct list_head *next; unsigned seq; + int locked = 0; -rename_retry: - this_parent = root; seq = read_seqbegin(&rename_lock); +again: + this_parent = root; spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -2703,7 +2723,7 @@ resume: /* might go back up the wrong parent if we have had a rename * or deletion */ if (this_parent != child->d_parent || - read_seqretry(&rename_lock, seq)) { + (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&this_parent->d_lock); rcu_read_unlock(); goto rename_retry; @@ -2713,8 +2733,16 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); + return; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** -- cgit v1.2.2 From 61f3dee4af09528997a970280da240577bf60721 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:40 +1100 Subject: fs: dcache reduce dput locking It is possible to run dput without taking data structure locks up-front. In many cases where we don't kill the dentry anyway, these locks are not required. Signed-off-by: Nick Piggin --- fs/dcache.c | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bf6294a20f0e..98a05696593e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -323,35 +323,16 @@ repeat: if (dentry->d_count == 1) might_sleep(); spin_lock(&dentry->d_lock); - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_inode_lock)) { -drop2: - spin_unlock(&dentry->d_lock); - goto repeat; - } - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto drop2; - } - } - dentry->d_count--; - if (dentry->d_count) { + BUG_ON(!dentry->d_count); + if (dentry->d_count > 1) { + dentry->d_count--; spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); return; } - /* - * AV: ->d_delete() is _NOT_ allowed to block now. - */ if (dentry->d_op && dentry->d_op->d_delete) { if (dentry->d_op->d_delete(dentry)) - goto unhash_it; + goto kill_it; } /* Unreachable? Get rid of it */ @@ -362,17 +343,30 @@ drop2: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - spin_unlock(&dcache_inode_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; -unhash_it: - __d_drop(dentry); kill_it: + if (!spin_trylock(&dcache_inode_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto repeat; + } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto relock; + } + dentry->d_count--; /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); + /* if it was on the hash (d_delete case), then remove it */ + __d_drop(dentry); dentry = d_kill(dentry, parent); if (dentry) goto repeat; -- cgit v1.2.2 From 89ad485f01fd83c47f17a128db3bd7b89c0f244f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:41 +1100 Subject: fs: dcache reduce locking in d_alloc Signed-off-by: Nick Piggin --- fs/dcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 98a05696593e..ccdc5c2512df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1253,11 +1253,13 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) if (parent) { spin_lock(&parent->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + /* + * don't need child lock because it is not subject + * to concurrency here + */ dentry->d_parent = dget_dlock(parent); dentry->d_sb = parent->d_sb; list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); } -- cgit v1.2.2 From 357f8e658bba8a085c4a5d4331e30894be8096b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:42 +1100 Subject: fs: dcache reduce dcache_inode_lock dcache_inode_lock can be avoided in d_delete() and d_materialise_unique() in cases where it is not required. Signed-off-by: Nick Piggin --- fs/dcache.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index ccdc5c2512df..01f016799fd4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1828,10 +1828,15 @@ void d_delete(struct dentry * dentry) /* * Are we the only user? */ - spin_lock(&dcache_inode_lock); +again: spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); if (dentry->d_count == 1) { + if (!spin_trylock(&dcache_inode_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto again; + } dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); @@ -1842,7 +1847,6 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); fsnotify_nameremove(dentry, isdir); } @@ -2164,14 +2168,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_inode_lock); - if (!inode) { actual = dentry; __d_instantiate(dentry, NULL); - goto found_lock; + d_rehash(actual); + goto out_nolock; } + spin_lock(&dcache_inode_lock); + if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -2198,10 +2203,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) actual = __d_instantiate_unique(dentry, inode); if (!actual) actual = dentry; - else if (unlikely(!d_unhashed(actual))) - goto shouldnt_be_hashed; + else + BUG_ON(!d_unhashed(actual)); -found_lock: spin_lock(&actual->d_lock); found: spin_lock(&dcache_hash_lock); @@ -2217,10 +2221,6 @@ out_nolock: iput(inode); return actual; - -shouldnt_be_hashed: - spin_unlock(&dcache_inode_lock); - BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); -- cgit v1.2.2 From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:43 +1100 Subject: fs: dcache rationalise dget variants dget_locked was a shortcut to avoid the lazy lru manipulation when we already held dcache_lock (lru manipulation was relatively cheap at that point). However, how that the lru lock is an innermost one, we never hold it at any caller, so the lock cost can now be avoided. We already have well working lazy dcache LRU, so it should be fine to defer LRU manipulations to scan time. Signed-off-by: Nick Piggin --- fs/dcache.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 01f016799fd4..b4d2e28eef5b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -429,32 +429,17 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ -static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) +static inline void __dget_dlock(struct dentry *dentry) { dentry->d_count++; - dentry_lru_del(dentry); - return dentry; } -/* This must be called with d_lock held */ -static inline struct dentry * __dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { spin_lock(&dentry->d_lock); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); - return dentry; -} - -struct dentry * dget_locked_dlock(struct dentry *dentry) -{ - return __dget_locked_dlock(dentry); -} - -struct dentry * dget_locked(struct dentry *dentry) -{ - return __dget_locked(dentry); } -EXPORT_SYMBOL(dget_locked); struct dentry *dget_parent(struct dentry *dentry) { @@ -512,7 +497,7 @@ again: (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; } else if (!want_discon) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -525,7 +510,7 @@ again: if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -561,7 +546,7 @@ restart: list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { - __dget_locked_dlock(dentry); + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); @@ -1257,7 +1242,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) * don't need child lock because it is not subject * to concurrency here */ - dentry->d_parent = dget_dlock(parent); + __dget_dlock(parent); + dentry->d_parent = parent; dentry->d_sb = parent->d_sb; list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); @@ -1360,7 +1346,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (memcmp(qstr->name, name, len)) continue; - dget_locked(alias); + __dget(alias); return alias; } @@ -1613,7 +1599,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * reference to it, move it in place and use it. */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget_locked(new); + __dget(new); spin_unlock(&dcache_inode_lock); security_d_instantiate(found, inode); d_move(new, found); @@ -1789,7 +1775,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); return 1; -- cgit v1.2.2 From a734eb458ab2bd11479a27dd54f48e1b26a55845 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:44 +1100 Subject: fs: dcache reduce d_parent locking Use RCU to simplify locking in dget_parent. Signed-off-by: Nick Piggin --- fs/dcache.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b4d2e28eef5b..a8f89765d602 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -446,24 +446,27 @@ struct dentry *dget_parent(struct dentry *dentry) struct dentry *ret; repeat: - spin_lock(&dentry->d_lock); + /* + * Don't need rcu_dereference because we re-check it was correct under + * the lock. + */ + rcu_read_lock(); ret = dentry->d_parent; - if (!ret) - goto out; - if (dentry == ret) { - ret->d_count++; + if (!ret) { + rcu_read_unlock(); goto out; } - if (!spin_trylock(&ret->d_lock)) { - spin_unlock(&dentry->d_lock); - cpu_relax(); + spin_lock(&ret->d_lock); + if (unlikely(ret != dentry->d_parent)) { + spin_unlock(&ret->d_lock); + rcu_read_unlock(); goto repeat; } + rcu_read_unlock(); BUG_ON(!ret->d_count); ret->d_count++; spin_unlock(&ret->d_lock); out: - spin_unlock(&dentry->d_lock); return ret; } EXPORT_SYMBOL(dget_parent); -- cgit v1.2.2 From 89e6054836a7b1e7500cd70a14b5579e752c9250 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:45 +1100 Subject: fs: dcache reduce prune_one_dentry locking prune_one_dentry can avoid quite a bit of locking in the common case where ancestors have an elevated refcount. Alternatively, we could have gone the other way and made fewer trylocks in the case where d_count goes to zero, but is probably less common. Signed-off-by: Nick Piggin --- fs/dcache.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a8f89765d602..195706374697 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -582,26 +582,29 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) * Prune ancestors. */ while (dentry) { - spin_lock(&dcache_inode_lock); -again: +relock: spin_lock(&dentry->d_lock); + if (dentry->d_count > 1) { + dentry->d_count--; + spin_unlock(&dentry->d_lock); + return; + } + if (!spin_trylock(&dcache_inode_lock)) { +relock2: + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto relock; + } + if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } - dentry->d_count--; - if (dentry->d_count) { - if (parent) - spin_unlock(&parent->d_lock); - spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - return; + goto relock2; } - + dentry->d_count--; dentry_lru_del(dentry); __d_drop(dentry); dentry = d_kill(dentry, parent); -- cgit v1.2.2 From be182bff72fae6a3eb25624b39170c40b72f0909 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:46 +1100 Subject: fs: reduce dcache_inode_lock width in lru scanning Signed-off-by: Nick Piggin --- fs/dcache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 195706374697..fe8f28a2878e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -621,7 +621,7 @@ static void shrink_dentry_list(struct list_head *list) dentry = list_entry(list->prev, struct dentry, d_lru); if (!spin_trylock(&dentry->d_lock)) { -relock: +relock1: spin_unlock(&dcache_lru_lock); cpu_relax(); spin_lock(&dcache_lru_lock); @@ -638,20 +638,24 @@ relock: spin_unlock(&dentry->d_lock); continue; } + if (!spin_trylock(&dcache_inode_lock)) { +relock2: + spin_unlock(&dentry->d_lock); + goto relock1; + } if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto relock; + spin_unlock(&dcache_inode_lock); + goto relock2; } __dentry_lru_del(dentry); spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); /* dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } } @@ -672,7 +676,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -711,7 +714,6 @@ relock: if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); - spin_unlock(&dcache_inode_lock); } /** @@ -800,14 +802,12 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); shrink_dentry_list(&tmp); } spin_unlock(&dcache_lru_lock); - spin_unlock(&dcache_inode_lock); } EXPORT_SYMBOL(shrink_dcache_sb); -- cgit v1.2.2 From ec33679d78f9d653a44ddba10b5fb824c06330a1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:47 +1100 Subject: fs: use RCU in shrink_dentry_list to reduce lock nesting Signed-off-by: Nick Piggin --- fs/dcache.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index fe8f28a2878e..d1840b30c673 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -615,16 +615,16 @@ static void shrink_dentry_list(struct list_head *list) { struct dentry *dentry; - while (!list_empty(list)) { + rcu_read_lock(); + for (;;) { struct dentry *parent; - dentry = list_entry(list->prev, struct dentry, d_lru); - - if (!spin_trylock(&dentry->d_lock)) { -relock1: - spin_unlock(&dcache_lru_lock); - cpu_relax(); - spin_lock(&dcache_lru_lock); + dentry = list_entry_rcu(list->prev, struct dentry, d_lru); + if (&dentry->d_lru == list) + break; /* empty */ + spin_lock(&dentry->d_lock); + if (dentry != list_entry(list->prev, struct dentry, d_lru)) { + spin_unlock(&dentry->d_lock); continue; } @@ -634,14 +634,16 @@ relock1: * it - just keep it off the LRU list. */ if (dentry->d_count) { - __dentry_lru_del(dentry); + dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } + if (!spin_trylock(&dcache_inode_lock)) { -relock2: +relock: spin_unlock(&dentry->d_lock); - goto relock1; + cpu_relax(); + continue; } if (IS_ROOT(dentry)) parent = NULL; @@ -649,15 +651,15 @@ relock2: parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { spin_unlock(&dcache_inode_lock); - goto relock2; + goto relock; } - __dentry_lru_del(dentry); - spin_unlock(&dcache_lru_lock); + dentry_lru_del(dentry); + rcu_read_unlock(); prune_one_dentry(dentry, parent); - /* dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_lru_lock); + rcu_read_lock(); } + rcu_read_unlock(); } /** @@ -705,15 +707,15 @@ relock: if (!--cnt) break; } - /* XXX: re-add cond_resched_lock when dcache_lock goes away */ + cond_resched_lock(&dcache_lru_lock); } - - *count = cnt; - shrink_dentry_list(&tmp); - if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); + + shrink_dentry_list(&tmp); + + *count = cnt; } /** @@ -805,7 +807,9 @@ void shrink_dcache_sb(struct super_block *sb) spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); + spin_unlock(&dcache_lru_lock); shrink_dentry_list(&tmp); + spin_lock(&dcache_lru_lock); } spin_unlock(&dcache_lru_lock); } -- cgit v1.2.2 From 77812a1ef139d84270d27faacc0630c887411013 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:48 +1100 Subject: fs: consolidate dentry kill sequence The tricky locking for disposing of a dentry is duplicated 3 times in the dcache (dput, pruning a dentry from the LRU, and pruning its ancestors). Consolidate them all into a single function dentry_kill. Signed-off-by: Nick Piggin --- fs/dcache.c | 135 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 61 insertions(+), 74 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d1840b30c673..dc0551c9755d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -284,6 +284,40 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) + __releases(dentry->d_lock) +{ + struct dentry *parent; + + if (!spin_trylock(&dcache_inode_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ + } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dcache_inode_lock); + goto relock; + } + if (ref) + dentry->d_count--; + /* if dentry was on the d_lru list delete it from there */ + dentry_lru_del(dentry); + /* if it was on the hash then remove it */ + __d_drop(dentry); + return d_kill(dentry, parent); +} + /* * This is dput * @@ -309,13 +343,9 @@ EXPORT_SYMBOL(d_drop); * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. - * - * no dcache lock, please. */ - void dput(struct dentry *dentry) { - struct dentry *parent; if (!dentry) return; @@ -348,26 +378,7 @@ repeat: return; kill_it: - if (!spin_trylock(&dcache_inode_lock)) { -relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto repeat; - } - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock; - } - dentry->d_count--; - /* if dentry was on the d_lru list delete it from there */ - dentry_lru_del(dentry); - /* if it was on the hash (d_delete case), then remove it */ - __d_drop(dentry); - dentry = d_kill(dentry, parent); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -563,51 +574,43 @@ restart: EXPORT_SYMBOL(d_prune_aliases); /* - * Throw away a dentry - free the inode, dput the parent. This requires that - * the LRU list has already been removed. + * Try to throw away a dentry - free the inode, dput the parent. + * Requires dentry->d_lock is held, and dentry->d_count == 0. + * Releases dentry->d_lock. * - * Try to prune ancestors as well. This is necessary to prevent - * quadratic behavior of shrink_dcache_parent(), but is also expected - * to be beneficial in reducing dentry cache fragmentation. + * This may fail if locks cannot be acquired no problem, just try again. */ -static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) +static void try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dcache_inode_lock) { - __d_drop(dentry); - dentry = d_kill(dentry, parent); + struct dentry *parent; + parent = dentry_kill(dentry, 0); /* - * Prune ancestors. + * If dentry_kill returns NULL, we have nothing more to do. + * if it returns the same dentry, trylocks failed. In either + * case, just loop again. + * + * Otherwise, we need to prune ancestors too. This is necessary + * to prevent quadratic behavior of shrink_dcache_parent(), but + * is also expected to be beneficial in reducing dentry cache + * fragmentation. */ + if (!parent) + return; + if (parent == dentry) + return; + + /* Prune ancestors. */ + dentry = parent; while (dentry) { -relock: spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { dentry->d_count--; spin_unlock(&dentry->d_lock); return; } - if (!spin_trylock(&dcache_inode_lock)) { -relock2: - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto relock; - } - - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock2; - } - dentry->d_count--; - dentry_lru_del(dentry); - __d_drop(dentry); - dentry = d_kill(dentry, parent); + dentry = dentry_kill(dentry, 1); } } @@ -617,8 +620,6 @@ static void shrink_dentry_list(struct list_head *list) rcu_read_lock(); for (;;) { - struct dentry *parent; - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); if (&dentry->d_lru == list) break; /* empty */ @@ -639,24 +640,10 @@ static void shrink_dentry_list(struct list_head *list) continue; } - if (!spin_trylock(&dcache_inode_lock)) { -relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); - continue; - } - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); - goto relock; - } - dentry_lru_del(dentry); - rcu_read_unlock(); - prune_one_dentry(dentry, parent); + + try_prune_one_dentry(dentry); + rcu_read_lock(); } rcu_read_unlock(); -- cgit v1.2.2 From 31e6b01f4183ff419a6d1f86177cbf4662347cec Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:52 +1100 Subject: fs: rcu-walk for path lookup Perform common cases of path lookups without any stores or locking in the ancestor dentry elements. This is called rcu-walk, as opposed to the current algorithm which is a refcount based walk, or ref-walk. This results in far fewer atomic operations on every path element, significantly improving path lookup performance. It also avoids cacheline bouncing on common dentries, significantly improving scalability. The overall design is like this: * LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. * Take the RCU lock for the entire path walk, starting with the acquiring of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are not required for dentry persistence. * synchronize_rcu is called when unregistering a filesystem, so we can access d_ops and i_ops during rcu-walk. * Similarly take the vfsmount lock for the entire path walk. So now mnt refcounts are not required for persistence. Also we are free to perform mount lookups, and to assume dentry mount points and mount roots are stable up and down the path. * Have a per-dentry seqlock to protect the dentry name, parent, and inode, so we can load this tuple atomically, and also check whether any of its members have changed. * Dentry lookups (based on parent, candidate string tuple) recheck the parent sequence after the child is found in case anything changed in the parent during the path walk. * inode is also RCU protected so we can load d_inode and use the inode for limited things. * i_mode, i_uid, i_gid can be tested for exec permissions during path walk. * i_op can be loaded. When we reach the destination dentry, we lock it, recheck lookup sequence, and increment its refcount and mountpoint refcount. RCU and vfsmount locks are dropped. This is termed "dropping rcu-walk". If the dentry refcount does not match, we can not drop rcu-walk gracefully at the current point in the lokup, so instead return -ECHILD (for want of a better errno). This signals the path walking code to re-do the entire lookup with a ref-walk. Aside from the final dentry, there are other situations that may be encounted where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take a reference on the last good dentry) and continue with a ref-walk. Again, if we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup using ref-walk. But it is very important that we can continue with ref-walk for most cases, particularly to avoid the overhead of double lookups, and to gain the scalability advantages on common path elements (like cwd and root). The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs * dentries with d_revalidate * Following links In future patches, permission checks and d_revalidate become rcu-walk aware. It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. Signed-off-by: Nick Piggin --- fs/dcache.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 181 insertions(+), 22 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index dc0551c9755d..187fea040108 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -152,9 +152,23 @@ static void d_free(struct dentry *dentry) call_rcu(&dentry->d_u.d_rcu, __d_free); } +/** + * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * After this call, in-progress rcu-walk path lookup will fail. This + * should be called after unhashing, and after changing d_inode (if + * the dentry has not already been unhashed). + */ +static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +{ + assert_spin_locked(&dentry->d_lock); + /* Go through a barrier */ + write_seqcount_barrier(&dentry->d_seq); +} + /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. + * d_iput() operation if defined. Dentry has no refcount + * and is unhashed. */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) @@ -178,6 +192,28 @@ static void dentry_iput(struct dentry * dentry) } } +/* + * Release the dentry's inode, using the filesystem + * d_iput() operation if defined. dentry remains in-use. + */ +static void dentry_unlink_inode(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_inode_lock) +{ + struct inode *inode = dentry->d_inode; + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_inode_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); +} + /* * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ @@ -272,6 +308,7 @@ void __d_drop(struct dentry *dentry) spin_lock(&dcache_hash_lock); hlist_del_rcu(&dentry->d_hash); spin_unlock(&dcache_hash_lock); + dentry_rcuwalk_barrier(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -309,6 +346,7 @@ relock: spin_unlock(&dcache_inode_lock); goto relock; } + if (ref) dentry->d_count--; /* if dentry was on the d_lru list delete it from there */ @@ -1221,6 +1259,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); + seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; @@ -1269,6 +1308,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1610,6 +1650,111 @@ err_out: } EXPORT_SYMBOL(d_add_ci); +/** + * __d_lookup_rcu - search for a dentry (racy, store-free) + * @parent: parent dentry + * @name: qstr of name we wish to find + * @seq: returns d_seq value at the point where the dentry was found + * @inode: returns dentry->d_inode when the inode was found valid. + * Returns: dentry, or NULL + * + * __d_lookup_rcu is the dcache lookup function for rcu-walk name + * resolution (store-free path walking) design described in + * Documentation/filesystems/path-lookup.txt. + * + * This is not to be used outside core vfs. + * + * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock + * held, and rcu_read_lock held. The returned dentry must not be stored into + * without taking d_lock and checking d_seq sequence count against @seq + * returned here. + * + * A refcount may be taken on the found dentry with the __d_rcu_to_refcount + * function. + * + * Alternatively, __d_lookup_rcu may be called again to look up the child of + * the returned dentry, so long as its parent's seqlock is checked after the + * child is looked up. Thus, an interlocking stepping of sequence lock checks + * is formed, giving integrity down the path walk. + */ +struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct hlist_head *head = d_hash(parent, hash); + struct hlist_node *node; + struct dentry *dentry; + + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + + /* + * The hash list is protected using RCU. + * + * Carefully use d_seq when comparing a candidate dentry, to avoid + * races with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ + hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + struct inode *i; + const char *tname; + int tlen; + + if (dentry->d_name.hash != hash) + continue; + +seqretry: + *seq = read_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + i = dentry->d_inode; + /* + * This seqcount check is required to ensure name and + * len are loaded atomically, so as not to walk off the + * edge of memory when walking. If we could load this + * atomically some other way, we could drop this check. + */ + if (read_seqcount_retry(&dentry->d_seq, *seq)) + goto seqretry; + if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_op->d_compare(parent, *inode, + dentry, i, + tlen, tname, name)) + continue; + } else { + if (tlen != len) + continue; + if (memcmp(tname, str, tlen)) + continue; + } + /* + * No extra seqcount check is required after the name + * compare. The caller must perform a seqcount check in + * order to do anything useful with the returned dentry + * anyway. + */ + *inode = i; + return dentry; + } + return NULL; +} + /** * d_lookup - search for a dentry * @parent: parent dentry @@ -1621,9 +1766,9 @@ EXPORT_SYMBOL(d_add_ci); * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *d_lookup(struct dentry *parent, struct qstr *name) { - struct dentry * dentry = NULL; + struct dentry *dentry; unsigned seq; do { @@ -1636,7 +1781,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); -/* +/** * __d_lookup - search for a dentry (racy) * @parent: parent dentry * @name: qstr of name we wish to find @@ -1651,16 +1796,23 @@ EXPORT_SYMBOL(d_lookup); * * __d_lookup callers must be commented. */ -struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; struct hlist_head *head = d_hash(parent,hash); - struct dentry *found = NULL; struct hlist_node *node; + struct dentry *found = NULL; struct dentry *dentry; + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + /* * The hash list is protected using RCU. * @@ -1677,24 +1829,15 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { - struct qstr *qstr; + const char *tname; + int tlen; if (dentry->d_name.hash != hash) continue; - if (dentry->d_parent != parent) - continue; spin_lock(&dentry->d_lock); - - /* - * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because - * we're about to compare the whole name anyway. - */ if (dentry->d_parent != parent) goto next; - - /* non-existing due to RCU? */ if (d_unhashed(dentry)) goto next; @@ -1702,16 +1845,17 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - qstr = &dentry->d_name; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; if (parent->d_op && parent->d_op->d_compare) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, - qstr->len, qstr->name, name)) + tlen, tname, name)) goto next; } else { - if (qstr->len != len) + if (tlen != len) goto next; - if (memcmp(qstr->name, str, len)) + if (memcmp(tname, str, tlen)) goto next; } @@ -1821,7 +1965,7 @@ again: goto again; } dentry->d_flags &= ~DCACHE_CANT_MOUNT; - dentry_iput(dentry); + dentry_unlink_inode(dentry); fsnotify_nameremove(dentry, isdir); return; } @@ -1884,7 +2028,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ spin_lock(&dentry->d_lock); + write_seqcount_begin(&dentry->d_seq); memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(dentry_update_name_case); @@ -1997,6 +2143,9 @@ void d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&target->d_seq); + /* Move the dentry to the target hash queue, if on different bucket */ spin_lock(&dcache_hash_lock); if (!d_unhashed(dentry)) @@ -2005,6 +2154,7 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ __d_drop(target); list_del(&dentry->d_u.d_child); @@ -2028,6 +2178,9 @@ void d_move(struct dentry * dentry, struct dentry * target) list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); @@ -2110,6 +2263,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&anon->d_seq); + dparent = dentry->d_parent; aparent = anon->d_parent; @@ -2130,6 +2286,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + write_seqcount_end(&dentry->d_seq); + write_seqcount_end(&anon->d_seq); + dentry_unlock_parents_for_move(anon, dentry); spin_unlock(&dentry->d_lock); -- cgit v1.2.2 From 5f57cbcc02cf18f6b22ef4066bb10afeb8f930ff Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:54 +1100 Subject: fs: dcache remove d_mounted Rather than keep a d_mounted count in the dentry, set a dentry flag instead. The flag can be cleared by checking the hash table to see if there are any mounts left, which is not time critical because it is performed at detach time. The mounted state of a dentry is only used to speculatively take a look in the mount hash table if it is set -- before following the mount, vfsmount lock is taken and mount re-checked without races. This saves 4 bytes on 32-bit, nothing on 64-bit but it does provide a hole I might use later (and some configs have larger than 32-bit spinlocks which might make use of the hole). Autofs4 conversion and changelog by Ian Kent : In autofs4, when expring direct (or offset) mounts we need to ensure that we block user path walks into the autofs mount, which is covered by another mount. To do this we clear the mounted status so that follows stop before walking into the mount and are essentially blocked until the expire is completed. The automount daemon still finds the correct dentry for the umount due to the follow mount logic in fs/autofs4/root.c:autofs4_follow_link(), which is set as an inode operation for direct and offset mounts only and is called following the lookup that stopped at the covered mount. At the end of the expire the covering mount probably has gone away so the mounted status need not be restored. But we need to check this and only restore the mounted status if the expire failed. XXX: autofs may not work right if we have other mounts go over the top of it? Signed-off-by: Nick Piggin --- fs/dcache.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 187fea040108..1d5cf511e1c7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1265,7 +1265,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - dentry->d_mounted = 0; INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); -- cgit v1.2.2 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- fs/dcache.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c7..f9693da3efbd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } -- cgit v1.2.2 From 44a7d7a878c9cbb74f236ea755b25b6b2e26a9a9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:56 +1100 Subject: fs: cache optimise dentry and inode for rcu-walk Put dentry and inode fields into top of data structure. This allows RCU path traversal to perform an RCU dentry lookup in a path walk by touching only the first 56 bytes of the dentry. We also fit in 8 bytes of inline name in the first 64 bytes, so for short names, only 64 bytes needs to be touched to perform the lookup. We should get rid of the hash->prev pointer from the first 64 bytes, and fit 16 bytes of name in there, which will take care of 81% rather than 32% of the kernel tree. inode is also rearranged so that RCU lookup will only touch a single cacheline in the inode, plus one in the i_ops structure. This is important for directory component lookups in RCU path walking. In the kernel source, directory names average is around 6 chars, so this works. When we reach the last element of the lookup, we need to lock it and take its refcount which requires another cacheline access. Align dentry and inode operations structs, so members will be at predictable offsets and we can group common operations into head of structure. Signed-off-by: Nick Piggin --- fs/dcache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index f9693da3efbd..07d1f6862dc7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -83,8 +83,6 @@ EXPORT_SYMBOL(dcache_inode_lock); static struct kmem_cache *dentry_cache __read_mostly; -#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try -- cgit v1.2.2 From ceb5bdc2d246f6d81cf61ed70f325308a11821d2 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:05 +1100 Subject: fs: dcache per-bucket dcache hash locking We can turn the dcache hash locking from a global dcache_hash_lock into per-bucket locking. Signed-off-by: Nick Piggin --- fs/dcache.c | 133 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 50 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 07d1f6862dc7..9f04e1ba75b7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -33,14 +33,18 @@ #include #include #include +#include +#include #include "internal.h" /* * Usage: * dcache_inode_lock protects: * - i_dentry, d_alias, d_inode - * dcache_hash_lock protects: - * - the dcache hash table, s_anon lists + * dcache_hash_bucket lock protects: + * - the dcache hash table + * s_anon bl list spinlock protects: + * - the s_anon list (see __d_drop) * dcache_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: @@ -57,7 +61,8 @@ * dcache_inode_lock * dentry->d_lock * dcache_lru_lock - * dcache_hash_lock + * dcache_hash_bucket lock + * s_anon lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -74,7 +79,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); @@ -96,7 +100,29 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -static struct hlist_head *dentry_hashtable __read_mostly; + +struct dcache_hash_bucket { + struct hlist_bl_head head; +}; +static struct dcache_hash_bucket *dentry_hashtable __read_mostly; + +static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, + unsigned long hash) +{ + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); +} + +static inline void spin_lock_bucket(struct dcache_hash_bucket *b) +{ + bit_spin_lock(0, (unsigned long *)&b->head.first); +} + +static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) +{ + __bit_spin_unlock(0, (unsigned long *)&b->head.first); +} /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { @@ -144,7 +170,7 @@ static void d_free(struct dentry *dentry) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_unhashed(&dentry->d_hash)) + if (hlist_bl_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); @@ -302,11 +328,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!(dentry->d_flags & DCACHE_UNHASHED)) { - dentry->d_flags |= DCACHE_UNHASHED; - spin_lock(&dcache_hash_lock); - hlist_del_rcu(&dentry->d_hash); - spin_unlock(&dcache_hash_lock); - dentry_rcuwalk_barrier(dentry); + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + bit_spin_lock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_init(&dentry->d_hash); + __bit_spin_unlock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + } else { + struct dcache_hash_bucket *b; + b = d_hash(dentry->d_parent, dentry->d_name.hash); + spin_lock_bucket(b); + /* + * We may not actually need to put DCACHE_UNHASHED + * manipulations under the hash lock, but follow + * the principle of least surprise. + */ + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_rcu(&dentry->d_hash); + spin_unlock_bucket(b); + dentry_rcuwalk_barrier(dentry); + } } } EXPORT_SYMBOL(__d_drop); @@ -961,8 +1003,8 @@ void shrink_dcache_for_umount(struct super_block *sb) spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); shrink_dcache_for_umount_subtree(dentry); } } @@ -1263,7 +1305,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - INIT_HLIST_NODE(&dentry->d_hash); + INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); @@ -1459,14 +1501,6 @@ struct dentry * d_alloc_root(struct inode * root_inode) } EXPORT_SYMBOL(d_alloc_root); -static inline struct hlist_head *d_hash(struct dentry *parent, - unsigned long hash) -{ - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); -} - /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1521,11 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_sb = inode->i_sb; tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - tmp->d_flags &= ~DCACHE_UNHASHED; list_add(&tmp->d_alias, &inode->i_dentry); - spin_lock(&dcache_hash_lock); - hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); - spin_unlock(&dcache_hash_lock); + bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); + tmp->d_flags &= ~DCACHE_UNHASHED; + hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); + __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_inode_lock); @@ -1567,7 +1601,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) d_move(new, dentry); iput(inode); } else { - /* already taking dcache_inode_lock, so d_add() by hand */ + /* already got dcache_inode_lock, so d_add() by hand */ __d_instantiate(dentry, inode); spin_unlock(&dcache_inode_lock); security_d_instantiate(dentry, inode); @@ -1702,8 +1736,8 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent, hash); - struct hlist_node *node; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *dentry; /* @@ -1726,7 +1760,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * * See Documentation/vfs/dcache-locking.txt for more details. */ - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { struct inode *i; const char *tname; int tlen; @@ -1820,8 +1854,8 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent,hash); - struct hlist_node *node; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *found = NULL; struct dentry *dentry; @@ -1847,7 +1881,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ rcu_read_lock(); - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { const char *tname; int tlen; @@ -1998,11 +2032,13 @@ again: } EXPORT_SYMBOL(d_delete); -static void __d_rehash(struct dentry * entry, struct hlist_head *list) +static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) { - + BUG_ON(!d_unhashed(entry)); + spin_lock_bucket(b); entry->d_flags &= ~DCACHE_UNHASHED; - hlist_add_head_rcu(&entry->d_hash, list); + hlist_bl_add_head_rcu(&entry->d_hash, &b->head); + spin_unlock_bucket(b); } static void _d_rehash(struct dentry * entry) @@ -2020,9 +2056,7 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { spin_lock(&entry->d_lock); - spin_lock(&dcache_hash_lock); _d_rehash(entry); - spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); } EXPORT_SYMBOL(d_rehash); @@ -2165,15 +2199,16 @@ void d_move(struct dentry * dentry, struct dentry * target) write_seqcount_begin(&dentry->d_seq); write_seqcount_begin(&target->d_seq); - /* Move the dentry to the target hash queue, if on different bucket */ - spin_lock(&dcache_hash_lock); - if (!d_unhashed(dentry)) - hlist_del_rcu(&dentry->d_hash); + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ + + /* + * Move the dentry to the target hash queue. Don't bother checking + * for the same hash queue because of how unlikely it is. + */ + __d_drop(dentry); __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - spin_unlock(&dcache_hash_lock); /* Unhash the target: dput() will then get rid of it */ - /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ __d_drop(target); list_del(&dentry->d_u.d_child); @@ -2369,9 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) spin_lock(&actual->d_lock); found: - spin_lock(&dcache_hash_lock); _d_rehash(actual); - spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_inode_lock); out_nolock: @@ -2953,7 +2986,7 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, HASH_EARLY, @@ -2962,7 +2995,7 @@ static void __init dcache_init_early(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } static void __init dcache_init(void) @@ -2985,7 +3018,7 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, 0, @@ -2994,7 +3027,7 @@ static void __init dcache_init(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } /* SLAB cache for __getname() consumers */ -- cgit v1.2.2 From 873feea09ebc980cbd3631b767356ce1eee65ec1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:06 +1100 Subject: fs: dcache per-inode inode alias locking dcache_inode_lock can be replaced with per-inode locking. Use existing inode->i_lock for this. This is slightly non-trivial because we sometimes need to find the inode from the dentry, which requires d_inode to be stabilised (either with refcount or d_lock). Signed-off-by: Nick Piggin --- fs/dcache.c | 88 +++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 41 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9f04e1ba75b7..09ec945f3c98 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -39,8 +39,8 @@ /* * Usage: - * dcache_inode_lock protects: - * - i_dentry, d_alias, d_inode + * dcache->d_inode->i_lock protects: + * - i_dentry, d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -58,7 +58,7 @@ * - d_alias, d_inode * * Ordering: - * dcache_inode_lock + * dentry->d_inode->i_lock * dentry->d_lock * dcache_lru_lock * dcache_hash_bucket lock @@ -78,12 +78,10 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); -EXPORT_SYMBOL(dcache_inode_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -196,14 +194,14 @@ static inline void dentry_rcuwalk_barrier(struct dentry *dentry) */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -212,7 +210,6 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); } } @@ -222,14 +219,14 @@ static void dentry_iput(struct dentry * dentry) */ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; list_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -295,7 +292,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) - __releases(dcache_inode_lock) + __releases(dentry->d_inode->i_lock) { dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); @@ -370,9 +367,11 @@ EXPORT_SYMBOL(d_drop); static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) __releases(dentry->d_lock) { + struct inode *inode; struct dentry *parent; - if (!spin_trylock(&dcache_inode_lock)) { + inode = dentry->d_inode; + if (inode && !spin_trylock(&inode->i_lock)) { relock: spin_unlock(&dentry->d_lock); cpu_relax(); @@ -383,7 +382,8 @@ relock: else parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); goto relock; } @@ -618,9 +618,9 @@ struct dentry *d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } return de; } @@ -634,20 +634,20 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -1392,9 +1392,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_inode_lock); + if (inode) + spin_lock(&inode->i_lock); __d_instantiate(entry, inode); - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1458,9 +1460,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_inode_lock); + if (inode) + spin_lock(&inode->i_lock); result = __d_instantiate_unique(entry, inode); - spin_unlock(&dcache_inode_lock); + if (inode) + spin_unlock(&inode->i_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1542,10 +1546,10 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); res = __d_find_alias(inode, 0); if (res) { - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); dput(tmp); goto out_iput; } @@ -1561,7 +1565,7 @@ struct dentry *d_obtain_alias(struct inode *inode) hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return tmp; @@ -1592,18 +1596,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already got dcache_inode_lock, so d_add() by hand */ + /* already taking inode->i_lock, so d_add() by hand */ __d_instantiate(dentry, inode); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1676,10 +1680,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); return found; } @@ -1690,7 +1694,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); __dget(new); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -2004,15 +2008,17 @@ EXPORT_SYMBOL(d_validate); void d_delete(struct dentry * dentry) { + struct inode *inode; int isdir = 0; /* * Are we the only user? */ again: spin_lock(&dentry->d_lock); - isdir = S_ISDIR(dentry->d_inode->i_mode); + inode = dentry->d_inode; + isdir = S_ISDIR(inode->i_mode); if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_inode_lock)) { + if (inode && !spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); goto again; @@ -2266,13 +2272,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock + * dentry->d_parent->d_inode->i_mutex and the inode->i_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_inode_lock) +static struct dentry *__d_unalias(struct inode *inode, + struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -2298,7 +2304,7 @@ out_unalias: d_move(alias, dentry); ret = alias; out_err: - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2371,7 +2377,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto out_nolock; } - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -2388,7 +2394,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto found; } /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(dentry, alias); + actual = __d_unalias(inode, dentry, alias); if (IS_ERR(actual)) dput(alias); goto out_nolock; @@ -2406,7 +2412,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) found: _d_rehash(actual); spin_unlock(&actual->d_lock); - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); -- cgit v1.2.2 From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:07 +1100 Subject: fs: improve scalability of pseudo filesystems Regardless of how much we possibly try to scale dcache, there is likely always going to be some fundamental contention when adding or removing children under the same parent. Pseudo filesystems do not seem need to have connected dentries because by definition they are disconnected. Signed-off-by: Nick Piggin --- fs/dcache.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 09ec945f3c98..9e6e6db76869 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; -- cgit v1.2.2 From e1bb57826381199cc79fbf44e9dfeee58fc7b339 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:08 +1100 Subject: fs: prefetch inode data in dcache lookup This makes single threaded git diff -1.25% +/- 0.05% elapsed time on my 2s12c24t Westmere system, and -0.86% +/- 0.05% on my 2s8c Barcelona, by prefetching the important first cacheline of the inode in while we do the actual name compare and other operations on the dentry. There was no measurable slowdown in the single file stat case, or the creat case (where negative dentries would be common). Signed-off-by: Nick Piggin --- fs/dcache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9e6e6db76869..2a4ce7dc2306 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1793,6 +1793,9 @@ seqretry: tlen = dentry->d_name.len; tname = dentry->d_name.name; i = dentry->d_inode; + prefetch(tname); + if (i) + prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the -- cgit v1.2.2 From 9d55c369bb5e695e629bc35cba2ef607755b3bee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:09 +1100 Subject: fs: implement faster dentry memcmp The standard memcmp function on a Westmere system shows up hot in profiles in the `git diff` workload (both parallel and single threaded), and it is likely due to the costs associated with trapping into microcode, and little opportunity to improve memory access (dentry name is not likely to take up more than a cacheline). So replace it with an open-coded byte comparison. This increases code size by 8 bytes in the critical __d_lookup_rcu function, but the speedup is huge, averaging 10 runs of each: git diff st user sys elapsed CPU before 1.15 2.57 3.82 97.1 after 1.14 2.35 3.61 96.8 git diff mt user sys elapsed CPU before 1.27 3.85 1.46 349 after 1.26 3.54 1.43 333 Elapsed time for single threaded git diff at 95.0% confidence: -0.21 +/- 0.01 -5.45% +/- 0.24% It's -0.66% +/- 0.06% elapsed time on my Opteron, so rep cmp costs on the fam10h seem to be relatively smaller, but there is still a win. Signed-off-by: Nick Piggin --- fs/dcache.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2a4ce7dc2306..5699d4c027cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1454,9 +1454,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (alias->d_parent != entry->d_parent) continue; - if (qstr->len != len) - continue; - if (memcmp(qstr->name, name, len)) + if (dentry_cmp(qstr->name, qstr->len, name, len)) continue; __dget(alias); return alias; @@ -1810,9 +1808,7 @@ seqretry: tlen, tname, name)) continue; } else { - if (tlen != len) - continue; - if (memcmp(tname, str, tlen)) + if (dentry_cmp(tname, tlen, str, len)) continue; } /* @@ -1925,9 +1921,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) tlen, tname, name)) goto next; } else { - if (tlen != len) - goto next; - if (memcmp(tname, str, tlen)) + if (dentry_cmp(tname, tlen, str, len)) goto next; } -- cgit v1.2.2 From c8aebb0c9f8c7471643d5f8ba68328de8013005f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Dec 2010 10:22:30 -0500 Subject: per-superblock default ->d_op Signed-off-by: Al Viro --- fs/dcache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 5699d4c027cb..5ec58267b5bb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1320,6 +1320,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) __dget_dlock(parent); dentry->d_parent = parent; dentry->d_sb = parent->d_sb; + d_set_d_op(dentry, dentry->d_sb->s_d_op); list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); } @@ -1335,6 +1336,7 @@ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) struct dentry *dentry = d_alloc(NULL, name); if (dentry) { dentry->d_sb = sb; + d_set_d_op(dentry, dentry->d_sb->s_d_op); dentry->d_parent = dentry; dentry->d_flags |= DCACHE_DISCONNECTED; } @@ -1507,6 +1509,7 @@ struct dentry * d_alloc_root(struct inode * root_inode) res = d_alloc(NULL, &name); if (res) { res->d_sb = root_inode->i_sb; + d_set_d_op(res, res->d_sb->s_d_op); res->d_parent = res; d_instantiate(res, root_inode); } @@ -1567,6 +1570,7 @@ struct dentry *d_obtain_alias(struct inode *inode) /* attach a disconnected dentry */ spin_lock(&tmp->d_lock); tmp->d_sb = inode->i_sb; + d_set_d_op(tmp, tmp->d_sb->s_d_op); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); -- cgit v1.2.2 From 1c977540fda4bf65ab467d110f5d840fc27e7608 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Nov 2010 15:02:45 -0800 Subject: fs: fix kernel-doc for dcache::d_validate Fix function parameter kernel-doc for d_validate(): Warning(fs/dcache.c:1495): No description found for parameter 'parent' Warning(fs/dcache.c:1495): Excess function parameter 'dparent' description in 'd_validate' Signed-off-by: Randy Dunlap Cc: Alexander Viro Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 5ec58267b5bb..b2e90998ad36 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1970,7 +1970,7 @@ out: /** * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent - * @dparent: The parent dentry (known to be valid) + * @parent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. -- cgit v1.2.2 From 208898c17a97610ce1c01b1cc58e51802a1d52c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Nov 2010 15:02:49 -0800 Subject: fs: fix kernel-doc for dcache::prepend_path Fix function kernel-doc warning for prepend_path(): Warning(fs/dcache.c:1924): missing initial short description on line: Signed-off-by: Randy Dunlap Cc: Alexander Viro Signed-off-by: Al Viro --- fs/dcache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b2e90998ad36..0c6d5c549d84 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2453,8 +2453,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) } /** - * Prepend path string to a buffer - * + * prepend_path - Prepend path string to a buffer * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: pointer to the end of the buffer -- cgit v1.2.2 From 6f7f7caab259026234277b659485d22c1dcb1ab4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Jan 2011 13:26:18 -0800 Subject: Turn d_set_d_op() BUG_ON() into WARN_ON_ONCE() It's indicative of a real problem, and it actually triggers with autofs4, but the BUG_ON() is excessive. The autofs4 case is being fixed (to only set d_op in the ->lookup method) but not merged yet. In the meantime this gets the code limping along. Reported-by: Alex Elder Cc: Ian Kent Cc: Nick Piggin Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 0c6d5c549d84..274a22250380 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1357,8 +1357,8 @@ EXPORT_SYMBOL(d_alloc_name); void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) { - BUG_ON(dentry->d_op); - BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + WARN_ON_ONCE(dentry->d_op); + WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE )); -- cgit v1.2.2 From 9875cf806403fae66b2410a3c2cc820d97731e04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:21 +0000 Subject: Add a dentry op to handle automounting rather than abusing follow_link() Add a dentry op (d_automount) to handle automounting directories rather than abusing the follow_link() inode operation. The operation is keyed off a new dentry flag (DCACHE_NEED_AUTOMOUNT). This also makes it easier to add an AT_ flag to suppress terminal segment automount during pathwalk and removes the need for the kludge code in the pathwalk algorithm to handle directories with follow_link() semantics. The ->d_automount() dentry operation: struct vfsmount *(*d_automount)(struct path *mountpoint); takes a pointer to the directory to be mounted upon, which is expected to provide sufficient data to determine what should be mounted. If successful, it should return the vfsmount struct it creates (which it should also have added to the namespace using do_add_mount() or similar). If there's a collision with another automount attempt, NULL should be returned. If the directory specified by the parameter should be used directly rather than being mounted upon, -EISDIR should be returned. In any other case, an error code should be returned. The ->d_automount() operation is called with no locks held and may sleep. At this point the pathwalk algorithm will be in ref-walk mode. Within fs/namei.c itself, a new pathwalk subroutine (follow_automount()) is added to handle mountpoints. It will return -EREMOTE if the automount flag was set, but no d_automount() op was supplied, -ELOOP if we've encountered too many symlinks or mountpoints, -EISDIR if the walk point should be used without mounting and 0 if successful. The path will be updated to point to the mounted filesystem if a successful automount took place. __follow_mount() is replaced by follow_managed() which is more generic (especially with the patch that adds ->d_manage()). This handles transits from directories during pathwalk, including automounting and skipping over mountpoints (and holding processes with the next patch). __follow_mount_rcu() will jump out of RCU-walk mode if it encounters an automount point with nothing mounted on it. follow_dotdot*() does not handle automounts as you don't want to trigger them whilst following "..". I've also extracted the mount/don't-mount logic from autofs4 and included it here. It makes the mount go ahead anyway if someone calls open() or creat(), tries to traverse the directory, tries to chdir/chroot/etc. into the directory, or sticks a '/' on the end of the pathname. If they do a stat(), however, they'll only trigger the automount if they didn't also say O_NOFOLLOW. I've also added an inode flag (S_AUTOMOUNT) so that filesystems can mark their inodes as automount points. This flag is automatically propagated to the dentry as DCACHE_NEED_AUTOMOUNT by __d_instantiate(). This saves NFS and could save AFS a private flag bit apiece, but is not strictly necessary. It would be preferable to do the propagation in d_set_d_op(), but that doesn't normally have access to the inode. [AV: fixed breakage in case if __follow_mount_rcu() fails and nameidata_drop_rcu() succeeds in RCU case of do_lookup(); we need to fall through to non-RCU case after that, rather than just returning with ungrabbed *path] Signed-off-by: David Howells Was-Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/dcache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 0c6d5c549d84..51f7bb6463af 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1380,8 +1380,11 @@ EXPORT_SYMBOL(d_set_d_op); static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); - if (inode) + if (inode) { + if (unlikely(IS_AUTOMOUNT(inode))) + dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; list_add(&dentry->d_alias, &inode->i_dentry); + } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); -- cgit v1.2.2 From ff5fdb61493d95332945630fcae249f896098652 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 Jan 2011 20:16:06 -0800 Subject: fs: fix new dcache.c kernel-doc warnings Fix new fs/dcache.c kernel-doc warnings: Warning(fs/dcache.c:184): No description found for parameter 'dentry' Warning(fs/dcache.c:296): No description found for parameter 'parent' Warning(fs/dcache.c:1985): No description found for parameter 'dparent' Warning(fs/dcache.c:1985): Excess function parameter 'parent' description in 'd_validate' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: Nick Piggin Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9f493ee4dcba..2a6bd9a4ae97 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry) /** * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). @@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) /** * d_kill - kill dentry and return parent * @dentry: dentry to kill + * @parent: parent dentry * * The dentry must already be unhashed and removed from the LRU. * @@ -1973,7 +1975,7 @@ out: /** * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent - * @parent: The parent dentry (known to be valid) + * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. -- cgit v1.2.2 From b0a4bb830e099a31bec79452268639a7d8c2c1e4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 22 Jan 2011 15:31:32 +0900 Subject: fs: update comments to point correct document dcache-locking.txt is not exist any more, and the path was not correct anyway. Fix it. Signed-off-by: Namhyung Kim Signed-off-by: Jiri Kosina --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2a6bd9a4ae97..79802bd790e4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1781,7 +1781,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * - * See Documentation/vfs/dcache-locking.txt for more details. + * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { struct inode *i; @@ -1901,7 +1901,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * - * See Documentation/vfs/dcache-locking.txt for more details. + * See Documentation/filesystems/path-lookup.txt for more details. */ rcu_read_lock(); -- cgit v1.2.2 From d891eedbc3b1b0fade8a9ce60cc0eba1cccb59e5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Jan 2011 15:45:09 -0500 Subject: fs/dcache: allow d_obtain_alias() to return unhashed dentries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch, inodes are not promptly freed on last close of an unlinked file by an nfs client: client$ mount -tnfs4 server:/export/ /mnt/ client$ tail -f /mnt/FOO ... server$ df -i /export server$ rm /export/FOO (^C the tail -f) server$ df -i /export server$ echo 2 >/proc/sys/vm/drop_caches server$ df -i /export the df's will show that the inode is not freed on the filesystem until the last step, when it could have been freed after killing the client's tail -f. On-disk data won't be deallocated either, leading to possible spurious ENOSPC. This occurs because when the client does the close, it arrives in a compound with a putfh and a close, processed like: - putfh: look up the filehandle.  The only alias found for the inode will be DCACHE_UNHASHED alias referenced by the filp this, so it creates a new DCACHE_DISCONECTED dentry and returns that instead. - close: closes the existing filp, which is destroyed immediately by dput() since it's DCACHE_UNHASHED. - end of the compound: release the reference to the current filehandle, and dput() the new DCACHE_DISCONECTED dentry, which gets put on the unused list instead of being destroyed immediately. Nick Piggin suggested fixing this by allowing d_obtain_alias to return the unhashed dentry that is referenced by the filp, instead of making it create a new dentry. Leave __d_find_alias() alone to avoid changing behavior of other callers. Also nfsd doesn't need all the checks of __d_find_alias(); any dentry, hashed or unhashed, disconnected or not, should work. Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2a6bd9a4ae97..611ffe928c03 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1523,6 +1523,28 @@ struct dentry * d_alloc_root(struct inode * root_inode) } EXPORT_SYMBOL(d_alloc_root); +static struct dentry * __d_find_any_alias(struct inode *inode) +{ + struct dentry *alias; + + if (list_empty(&inode->i_dentry)) + return NULL; + alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); + __dget(alias); + return alias; +} + +static struct dentry * d_find_any_alias(struct inode *inode) +{ + struct dentry *de; + + spin_lock(&inode->i_lock); + de = __d_find_any_alias(inode); + spin_unlock(&inode->i_lock); + return de; +} + + /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1552,7 +1574,7 @@ struct dentry *d_obtain_alias(struct inode *inode) if (IS_ERR(inode)) return ERR_CAST(inode); - res = d_find_alias(inode); + res = d_find_any_alias(inode); if (res) goto out_iput; @@ -1565,7 +1587,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&inode->i_lock); - res = __d_find_alias(inode, 0); + res = __d_find_any_alias(inode); if (res) { spin_unlock(&inode->i_lock); dput(tmp); -- cgit v1.2.2 From c826cb7dfce80512c26c984350077a25046bd215 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 15 Mar 2011 15:29:21 -0700 Subject: dcache.c: create helper function for duplicated functionality This creates a helper function for he "try to ascend into the parent directory" case, which was written out in triplicate before. With all the locking and subtle sequence number stuff, we really don't want to duplicate that kind of code. Signed-off-by: Linus Torvalds --- fs/dcache.c | 88 ++++++++++++++++++++++++++----------------------------------- 1 file changed, 37 insertions(+), 51 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 611ffe928c03..361882a14ccb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1011,6 +1011,34 @@ void shrink_dcache_for_umount(struct super_block *sb) } } +/* + * This tries to ascend one level of parenthood, but + * we can race with renaming, so we need to re-check + * the parenthood after dropping the lock and check + * that the sequence number still matches. + */ +static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) +{ + struct dentry *new = old->d_parent; + + rcu_read_lock(); + spin_unlock(&old->d_lock); + spin_lock(&new->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (new != old->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&new->d_lock); + new = NULL; + } + rcu_read_unlock(); + return new; +} + + /* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs @@ -1066,24 +1094,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1181,24 +1195,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -2942,28 +2942,14 @@ resume: spin_unlock(&dentry->d_lock); } if (this_parent != root) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; + struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; this_parent->d_count--; } - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } -- cgit v1.2.2 From c83ce989cb5ff86575821992ea82c4df5c388ebc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Mar 2011 13:36:43 -0400 Subject: VFS: Fix the nfs sillyrename regression in kernel 2.6.38 The new vfs locking scheme introduced in 2.6.38 breaks NFS sillyrename because the latter relies on being able to determine the parent directory of the dentry in the ->iput() callback in order to send the appropriate unlink rpc call. Looking at the code that cares about races with dput(), there doesn't seem to be anything that specifically uses d_parent as a test for whether or not there is a race: - __d_lookup_rcu(), __d_lookup() all test for d_hashed() after d_parent - shrink_dcache_for_umount() is safe since nothing else can rearrange the dentries in that super block. - have_submount(), select_parent() and d_genocide() can test for a deletion if we set the DCACHE_DISCONNECTED flag when the dentry is removed from the parent's d_subdirs list. Signed-off-by: Trond Myklebust Cc: stable@kernel.org (2.6.38, needs commit c826cb7dfce8 "dcache.c: create helper function for duplicated functionality" ) Signed-off-by: Linus Torvalds --- fs/dcache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 361882a14ccb..a39fe47c466f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); + /* + * Inform try_to_ascend() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DISCONNECTED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1030,6 +1034,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq * or deletion */ if (new != old->d_parent || + (old->d_flags & DCACHE_DISCONNECTED) || (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&new->d_lock); new = NULL; -- cgit v1.2.2 From 24ff6663ccfdaf088dfa7acae489cb11ed4f43c4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Nov 2010 20:52:55 -0500 Subject: fs: call security_d_instantiate in d_obtain_alias V2 While trying to track down some NFS problems with BTRFS, I kept noticing I was getting -EACCESS for no apparent reason. Eric Paris and printk() helped me figure out that it was SELinux that was giving me grief, with the following denial type=AVC msg=audit(1290013638.413:95): avc: denied { 0x800000 } for pid=1772 comm="nfsd" name="" dev=sda1 ino=256 scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass=file Turns out this is because in d_obtain_alias if we can't find an alias we create one and do all the normal instantiation stuff, but we don't do the security_d_instantiate. Usually we are protected from getting a hashed dentry that hasn't yet run security_d_instantiate() by the parent's i_mutex, but obviously this isn't an option there, so in order to deal with the case that a second thread comes in and finds our new dentry before we get to run security_d_instantiate(), we go ahead and call it if we find a dentry already. Eric assures me that this is ok as the code checks to see if the dentry has been initialized already so calling security_d_instantiate() against the same dentry multiple times is ok. With this patch I'm no longer getting errant -EACCESS values. Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/dcache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a39fe47c466f..1baddc1cec48 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1612,10 +1612,13 @@ struct dentry *d_obtain_alias(struct inode *inode) __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); + security_d_instantiate(tmp, inode); return tmp; out_iput: + if (res && !IS_ERR(res)) + security_d_instantiate(res, inode); iput(inode); return res; } -- cgit v1.2.2 From 7ebfa57f6d307b66bb88600145afccde31016ab5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 15 Apr 2011 07:34:26 -0700 Subject: vfs: fix incorrect dentry_update_name_case() BUG_ON() test The case we should be verifying when updating the dentry name is that the _parent_ inode (the directory) semaphore is held, not the semaphore for the dentry itself. It's the directory locking that rename and readdir() etc all care about. The comment just above even says so - but then the BUG_ON() still checked the dentry inode itself. Very few people noticed, because this helper function really isn't used for very much, so you had to be using ncpfs to ever hit it. I think I should just remove the BUG_ON (the function really has just one user), but let's run with it fixed for a while before getting rid of it entirely. Reported-and-tested-by: Bongani Hlope Reported-and-tested-by: Bernd Feige Cc: Petr Vandrovec , Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Nick Piggin Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index ad25c4cec7d5..129a35730994 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2131,7 +2131,7 @@ EXPORT_SYMBOL(d_rehash); */ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) { - BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ spin_lock(&dentry->d_lock); -- cgit v1.2.2 From b07ad9967f40b164af77205027352ba53729cf5a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 23 Apr 2011 22:32:03 -0700 Subject: vfs: get rid of 'struct dcache_hash_bucket' abstraction It's a useless abstraction for 'hlist_bl_head', and it doesn't actually help anything - quite the reverse. All the users end up having to know about the hlist_bl_head details anyway, using 'struct hlist_bl_node *' etc. So it just makes the code look confusing. And the cost of it is extra '&b->head' syntactic noise, but more importantly it spuriously makes the hash table dentry list look different from the per-superblock DCACHE_DISCONNECTED dentry list. As a result, the code ended up using ad-hoc locking for one case and special helper functions for what is really another totally identical case in the very same function. Make it all look and work the same. Signed-off-by: Linus Torvalds --- fs/dcache.c | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 129a35730994..7108c15685dd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -99,12 +99,9 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -struct dcache_hash_bucket { - struct hlist_bl_head head; -}; -static struct dcache_hash_bucket *dentry_hashtable __read_mostly; +static struct hlist_bl_head *dentry_hashtable __read_mostly; -static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, +static inline struct hlist_bl_head *d_hash(struct dentry *parent, unsigned long hash) { hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; @@ -112,14 +109,14 @@ static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, return dentry_hashtable + (hash & D_HASHMASK); } -static inline void spin_lock_bucket(struct dcache_hash_bucket *b) +static inline void spin_lock_bucket(struct hlist_bl_head *b) { - bit_spin_lock(0, (unsigned long *)&b->head.first); + bit_spin_lock(0, (unsigned long *)&b->first); } -static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) +static inline void spin_unlock_bucket(struct hlist_bl_head *b) { - __bit_spin_unlock(0, (unsigned long *)&b->head.first); + __bit_spin_unlock(0, (unsigned long *)&b->first); } /* Statistics gathering. */ @@ -331,15 +328,15 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!(dentry->d_flags & DCACHE_UNHASHED)) { + struct hlist_bl_head *b; if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { - bit_spin_lock(0, - (unsigned long *)&dentry->d_sb->s_anon.first); + b = &dentry->d_sb->s_anon; + spin_lock_bucket(b); dentry->d_flags |= DCACHE_UNHASHED; hlist_bl_del_init(&dentry->d_hash); - __bit_spin_unlock(0, - (unsigned long *)&dentry->d_sb->s_anon.first); + spin_unlock_bucket(b); } else { - struct dcache_hash_bucket *b; + struct hlist_bl_head *b; b = d_hash(dentry->d_parent, dentry->d_name.hash); spin_lock_bucket(b); /* @@ -1789,7 +1786,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_head *b = d_hash(parent, hash); struct hlist_bl_node *node; struct dentry *dentry; @@ -1813,7 +1810,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * * See Documentation/filesystems/path-lookup.txt for more details. */ - hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { struct inode *i; const char *tname; int tlen; @@ -1908,7 +1905,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_head *b = d_hash(parent, hash); struct hlist_bl_node *node; struct dentry *found = NULL; struct dentry *dentry; @@ -1935,7 +1932,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ rcu_read_lock(); - hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { + hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { const char *tname; int tlen; @@ -2086,12 +2083,12 @@ again: } EXPORT_SYMBOL(d_delete); -static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) +static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); spin_lock_bucket(b); entry->d_flags &= ~DCACHE_UNHASHED; - hlist_bl_add_head_rcu(&entry->d_hash, &b->head); + hlist_bl_add_head_rcu(&entry->d_hash, b); spin_unlock_bucket(b); } @@ -3025,7 +3022,7 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct dcache_hash_bucket), + sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_EARLY, @@ -3034,7 +3031,7 @@ static void __init dcache_init_early(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } static void __init dcache_init(void) @@ -3057,7 +3054,7 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct dcache_hash_bucket), + sizeof(struct hlist_bl_head), dhash_entries, 13, 0, @@ -3066,7 +3063,7 @@ static void __init dcache_init(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } /* SLAB cache for __getname() consumers */ -- cgit v1.2.2 From dea3667bc3c2a0521e8d8855e407a49d9d70028c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Apr 2011 07:58:46 -0700 Subject: vfs: get rid of insane dentry hashing rules The dentry hashing rules have been really quite complicated for a long while, in odd ways. That made functions like __d_drop() very fragile and non-obvious. In particular, whether a dentry was hashed or not was indicated with an explicit DCACHE_UNHASHED bit. That's despite the fact that the hash abstraction that the dentries use actually have a 'is this entry hashed or not' model (which is a simple test of the 'pprev' pointer). The reason that was done is because we used the normal 'is this entry unhashed' model to mark whether the dentry had _ever_ been hashed in the dentry hash tables, and that logic goes back many years (commit b3423415fbc2: "dcache: avoid RCU for never-hashed dentries"). That, in turn, meant that __d_drop had totally different unhashing logic for the dentry hash table case and for the anonymous dcache case, because in order to use the "is this dentry hashed" logic as a flag for whether it had ever been on the RCU hash table, we had to unhash such a dentry differently so that we'd never think that it wasn't 'unhashed' and wouldn't be free'd correctly. That's just insane. It made the logic really hard to follow, when there were two different kinds of "unhashed" states, and one of them (the one that used "list_bl_unhashed()") really had nothing at all to do with being unhashed per se, but with a very subtle lifetime rule instead. So turn all of it around, and make it logical. Instead of having a DENTRY_UNHASHED bit in d_flags to indicate whether the dentry is on the hash chains or not, use the hash chain unhashed logic for that. Suddenly "d_unhashed()" just uses "list_bl_unhashed()", and everything makes sense. And for the lifetime rule, just use an explicit DENTRY_RCUACCEES bit. If we ever insert the dentry into the dentry hash table so that it is visible to RCU lookup, we mark it DENTRY_RCUACCESS to show that it now needs the RCU lifetime rules. Now suddently that test at dentry free time makes sense too. And because unhashing now is sane and doesn't depend on where the dentry got unhashed from (because the dentry hash chain details doesn't have some subtle side effects), we can re-unify the __d_drop() logic and use common code for the unhashing. Also fix one more open-coded hash chain bit_spin_lock() that I missed in the previous chain locking cleanup commit. Signed-off-by: Linus Torvalds --- fs/dcache.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 7108c15685dd..d600a0af3b2e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -164,8 +164,8 @@ static void d_free(struct dentry *dentry) if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_bl_unhashed(&dentry->d_hash)) + /* if dentry was never visible to RCU, immediate free is OK */ + if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); @@ -327,28 +327,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) */ void __d_drop(struct dentry *dentry) { - if (!(dentry->d_flags & DCACHE_UNHASHED)) { + if (!d_unhashed(dentry)) { struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) b = &dentry->d_sb->s_anon; - spin_lock_bucket(b); - dentry->d_flags |= DCACHE_UNHASHED; - hlist_bl_del_init(&dentry->d_hash); - spin_unlock_bucket(b); - } else { - struct hlist_bl_head *b; + else b = d_hash(dentry->d_parent, dentry->d_name.hash); - spin_lock_bucket(b); - /* - * We may not actually need to put DCACHE_UNHASHED - * manipulations under the hash lock, but follow - * the principle of least surprise. - */ - dentry->d_flags |= DCACHE_UNHASHED; - hlist_bl_del_rcu(&dentry->d_hash); - spin_unlock_bucket(b); - dentry_rcuwalk_barrier(dentry); - } + + spin_lock_bucket(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + spin_unlock_bucket(b); + + dentry_rcuwalk_barrier(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -1301,7 +1292,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dname[name->len] = 0; dentry->d_count = 1; - dentry->d_flags = DCACHE_UNHASHED; + dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; @@ -1603,10 +1594,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); - bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); - tmp->d_flags &= ~DCACHE_UNHASHED; + spin_lock_bucket(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); - __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); + spin_unlock_bucket(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); security_d_instantiate(tmp, inode); @@ -2087,7 +2077,7 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); spin_lock_bucket(b); - entry->d_flags &= ~DCACHE_UNHASHED; + entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); spin_unlock_bucket(b); } -- cgit v1.2.2 From 1879fd6a26571fd4e8e1f4bb3e7537bc936b1fe7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Apr 2011 14:01:36 -0400 Subject: add hlist_bl_lock/unlock helpers Now that the whole dcache_hash_bucket crap is gone, go all the way and also remove the weird locking layering violations for locking the hash buckets. Add hlist_bl_lock/unlock helpers to move the locking into the list abstraction instead of requiring each caller to open code it. After all allowing for the bit locks is the whole point of these helpers over the plain hlist variant. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/dcache.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d600a0af3b2e..22a0ef41bad1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -109,16 +109,6 @@ static inline struct hlist_bl_head *d_hash(struct dentry *parent, return dentry_hashtable + (hash & D_HASHMASK); } -static inline void spin_lock_bucket(struct hlist_bl_head *b) -{ - bit_spin_lock(0, (unsigned long *)&b->first); -} - -static inline void spin_unlock_bucket(struct hlist_bl_head *b) -{ - __bit_spin_unlock(0, (unsigned long *)&b->first); -} - /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, @@ -334,10 +324,10 @@ void __d_drop(struct dentry *dentry) else b = d_hash(dentry->d_parent, dentry->d_name.hash); - spin_lock_bucket(b); + hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); dentry->d_hash.pprev = NULL; - spin_unlock_bucket(b); + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } @@ -1594,9 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); - spin_lock_bucket(&tmp->d_sb->s_anon); + hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); - spin_unlock_bucket(&tmp->d_sb->s_anon); + hlist_bl_unlock(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); security_d_instantiate(tmp, inode); @@ -2076,10 +2066,10 @@ EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); - spin_lock_bucket(b); + hlist_bl_lock(b); entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); - spin_unlock_bucket(b); + hlist_bl_unlock(b); } static void _d_rehash(struct dentry * entry) -- cgit v1.2.2 From 268bb0ce3e87872cb9290c322b0d35bce230d88f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 May 2011 12:50:29 -0700 Subject: sanitize usage Commit e66eed651fd1 ("list: remove prefetching from regular list iterators") removed the include of prefetch.h from list.h, which uncovered several cases that had apparently relied on that rather obscure header file dependency. So this fixes things up a bit, using grep -L linux/prefetch.h $(git grep -l '[^a-z_]prefetchw*(' -- '*.[ch]') grep -L 'prefetchw*(' $(git grep -l 'linux/prefetch.h' -- '*.[ch]') to guide us in finding files that either need inclusion, or have it despite not needing it. There are more of them around (mostly network drivers), but this gets many core ones. Reported-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 22a0ef41bad1..18b2a1f10ed8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "internal.h" /* -- cgit v1.2.2 From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:27 -0700 Subject: vmscan: change shrinker API by passing shrink_control struct Change each shrinker's API by consolidating the existing parameters into shrink_control struct. This will simplify any further features added w/o touching each file of shrinker. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: fix warning] [kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API] [akpm@linux-foundation.org: fix xfs warning] [akpm@linux-foundation.org: update gfs2] Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 18b2a1f10ed8..37f72ee5bf7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent) EXPORT_SYMBOL(shrink_dcache_parent); /* - * Scan `nr' dentries and return the number which remain. + * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. * * We need to avoid reentering the filesystem if the caller is performing a * GFP_NOFS allocation attempt. One example deadlock is: @@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent); * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_dcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; -- cgit v1.2.2 From 1836750115f20b774e55c032a3893e8c5bdf41ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Jul 2011 21:42:24 -0400 Subject: fix loop checks in d_materialise_unique() Both __d_unalias() and __d_materialise_dentry() need loop prevention. Grab rename_lock in caller, check for loops there... As a side benefit, we have dentry_lock_for_move() called only under rename_lock, which seriously reduces deadlock potential of the execrable "locking order" used for ->d_lock. Signed-off-by: Al Viro --- fs/dcache.c | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 37f72ee5bf7c..6e4ea6d87774 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2213,14 +2213,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * The hash value has to match the hash queue that the dentry is on.. */ /* - * d_move - move a dentry + * __d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. + * dcache entries should not be moved in this way. Caller hold + * rename_lock. */ -void d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2228,8 +2229,6 @@ void d_move(struct dentry * dentry, struct dentry * target) BUG_ON(d_ancestor(dentry, target)); BUG_ON(d_ancestor(target, dentry)); - write_seqlock(&rename_lock); - dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); @@ -2275,6 +2274,20 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); +} + +/* + * d_move - move a dentry + * @dentry: entry to move + * @target: new dentry + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. + */ +void d_move(struct dentry *dentry, struct dentry *target) +{ + write_seqlock(&rename_lock); + __d_move(dentry, target); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); @@ -2302,7 +2315,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the inode->i_lock + * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... @@ -2317,11 +2330,6 @@ static struct dentry *__d_unalias(struct inode *inode, if (alias->d_parent == dentry->d_parent) goto out_unalias; - /* Check for loops */ - ret = ERR_PTR(-ELOOP); - if (d_ancestor(alias, dentry)) - goto out_err; - /* See lock_rename() */ ret = ERR_PTR(-EBUSY); if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) @@ -2331,7 +2339,7 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move(alias, dentry); + __d_move(alias, dentry); ret = alias; out_err: spin_unlock(&inode->i_lock); @@ -2416,15 +2424,24 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) alias = __d_find_alias(inode, 0); if (alias) { actual = alias; - /* Is this an anonymous mountpoint that we could splice - * into our tree? */ - if (IS_ROOT(alias)) { + write_seqlock(&rename_lock); + + if (d_ancestor(alias, dentry)) { + /* Check for loops */ + actual = ERR_PTR(-ELOOP); + } else if (IS_ROOT(alias)) { + /* Is this an anonymous mountpoint that we + * could splice into our tree? */ __d_materialise_dentry(dentry, alias); + write_sequnlock(&rename_lock); __d_drop(alias); goto found; + } else { + /* Nope, but we must(!) avoid directory + * aliasing */ + actual = __d_unalias(inode, dentry, alias); } - /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(inode, dentry, alias); + write_sequnlock(&rename_lock); if (IS_ERR(actual)) dput(alias); goto out_nolock; -- cgit v1.2.2 From b91da88fed84843313a1b6fd1b1c834a24bbcf9e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jul 2011 11:01:42 -0700 Subject: vfs: drop conditional inode prefetch in __do_lookup_rcu It seems to hurt performance in real life. Yes, the inode will be used later, but the conditional doesn't seem to predict all that well (negative dentries are not uncommon) and it looks like the cost of prefetching is simply higher than depending on the cache doing the right thing. As usual. Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6e4ea6d87774..fbdcbca40725 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1813,8 +1813,6 @@ seqretry: tname = dentry->d_name.name; i = dentry->d_inode; prefetch(tname); - if (i) - prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the -- cgit v1.2.2