aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2011-01-07 01:49:37 -0500
committerNick Piggin <npiggin@kernel.dk>2011-01-07 01:50:22 -0500
commit949854d02455080d20cd3e1db28a3a18daf7599d (patch)
tree9b13a6f86c1d0b91e462a471e53b0e717036b18e /fs/dcache.c
parent9abca36087288fe28de4749c71ca003d4b9e3ed0 (diff)
fs: Use rename lock and RCU for multi-step operations
The remaining usages for dcache_lock is to allow atomic, multi-step read-side operations over the directory tree by excluding modifications to the tree. Also, to walk in the leaf->root direction in the tree where we don't have a natural d_lock ordering. This could be accomplished by taking every d_lock, but this would mean a huge number of locks and actually gets very tricky. Solve this instead by using the rename seqlock for multi-step read-side operations, retry in case of a rename so we don't walk up the wrong parent. Concurrent dentry insertions are not serialised against. Concurrent deletes are tricky when walking up the directory: our parent might have been deleted when dropping locks so also need to check and retry for that. We can also use the rename lock in cases where livelock is a worry (and it is introduced in subsequent patch). Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c134
1 files changed, 111 insertions, 23 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index a09f0771fd27..a9bc4ecc21e1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -80,6 +80,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
80__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 80__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
81__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 81__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
82 82
83EXPORT_SYMBOL(rename_lock);
83EXPORT_SYMBOL(dcache_inode_lock); 84EXPORT_SYMBOL(dcache_inode_lock);
84EXPORT_SYMBOL(dcache_lock); 85EXPORT_SYMBOL(dcache_lock);
85 86
@@ -243,6 +244,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
243 __releases(dcache_inode_lock) 244 __releases(dcache_inode_lock)
244 __releases(dcache_lock) 245 __releases(dcache_lock)
245{ 246{
247 dentry->d_parent = NULL;
246 list_del(&dentry->d_u.d_child); 248 list_del(&dentry->d_u.d_child);
247 if (parent) 249 if (parent)
248 spin_unlock(&parent->d_lock); 250 spin_unlock(&parent->d_lock);
@@ -1017,11 +1019,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
1017 * Return true if the parent or its subdirectories contain 1019 * Return true if the parent or its subdirectories contain
1018 * a mount point 1020 * a mount point
1019 */ 1021 */
1020
1021int have_submounts(struct dentry *parent) 1022int have_submounts(struct dentry *parent)
1022{ 1023{
1023 struct dentry *this_parent = parent; 1024 struct dentry *this_parent;
1024 struct list_head *next; 1025 struct list_head *next;
1026 unsigned seq;
1027
1028rename_retry:
1029 this_parent = parent;
1030 seq = read_seqbegin(&rename_lock);
1025 1031
1026 spin_lock(&dcache_lock); 1032 spin_lock(&dcache_lock);
1027 if (d_mountpoint(parent)) 1033 if (d_mountpoint(parent))
@@ -1055,17 +1061,37 @@ resume:
1055 * All done at this level ... ascend and resume the search. 1061 * All done at this level ... ascend and resume the search.
1056 */ 1062 */
1057 if (this_parent != parent) { 1063 if (this_parent != parent) {
1058 next = this_parent->d_u.d_child.next; 1064 struct dentry *tmp;
1065 struct dentry *child;
1066
1067 tmp = this_parent->d_parent;
1068 rcu_read_lock();
1059 spin_unlock(&this_parent->d_lock); 1069 spin_unlock(&this_parent->d_lock);
1060 this_parent = this_parent->d_parent; 1070 child = this_parent;
1071 this_parent = tmp;
1061 spin_lock(&this_parent->d_lock); 1072 spin_lock(&this_parent->d_lock);
1073 /* might go back up the wrong parent if we have had a rename
1074 * or deletion */
1075 if (this_parent != child->d_parent ||
1076 read_seqretry(&rename_lock, seq)) {
1077 spin_unlock(&this_parent->d_lock);
1078 spin_unlock(&dcache_lock);
1079 rcu_read_unlock();
1080 goto rename_retry;
1081 }
1082 rcu_read_unlock();
1083 next = child->d_u.d_child.next;
1062 goto resume; 1084 goto resume;
1063 } 1085 }
1064 spin_unlock(&this_parent->d_lock); 1086 spin_unlock(&this_parent->d_lock);
1065 spin_unlock(&dcache_lock); 1087 spin_unlock(&dcache_lock);
1088 if (read_seqretry(&rename_lock, seq))
1089 goto rename_retry;
1066 return 0; /* No mount points found in tree */ 1090 return 0; /* No mount points found in tree */
1067positive: 1091positive:
1068 spin_unlock(&dcache_lock); 1092 spin_unlock(&dcache_lock);
1093 if (read_seqretry(&rename_lock, seq))
1094 goto rename_retry;
1069 return 1; 1095 return 1;
1070} 1096}
1071EXPORT_SYMBOL(have_submounts); 1097EXPORT_SYMBOL(have_submounts);
@@ -1086,10 +1112,15 @@ EXPORT_SYMBOL(have_submounts);
1086 */ 1112 */
1087static int select_parent(struct dentry * parent) 1113static int select_parent(struct dentry * parent)
1088{ 1114{
1089 struct dentry *this_parent = parent; 1115 struct dentry *this_parent;
1090 struct list_head *next; 1116 struct list_head *next;
1117 unsigned seq;
1091 int found = 0; 1118 int found = 0;
1092 1119
1120rename_retry:
1121 this_parent = parent;
1122 seq = read_seqbegin(&rename_lock);
1123
1093 spin_lock(&dcache_lock); 1124 spin_lock(&dcache_lock);
1094 spin_lock(&this_parent->d_lock); 1125 spin_lock(&this_parent->d_lock);
1095repeat: 1126repeat:
@@ -1099,7 +1130,6 @@ resume:
1099 struct list_head *tmp = next; 1130 struct list_head *tmp = next;
1100 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1131 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1101 next = tmp->next; 1132 next = tmp->next;
1102 BUG_ON(this_parent == dentry);
1103 1133
1104 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1134 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1105 1135
@@ -1142,17 +1172,32 @@ resume:
1142 */ 1172 */
1143 if (this_parent != parent) { 1173 if (this_parent != parent) {
1144 struct dentry *tmp; 1174 struct dentry *tmp;
1145 next = this_parent->d_u.d_child.next; 1175 struct dentry *child;
1176
1146 tmp = this_parent->d_parent; 1177 tmp = this_parent->d_parent;
1178 rcu_read_lock();
1147 spin_unlock(&this_parent->d_lock); 1179 spin_unlock(&this_parent->d_lock);
1148 BUG_ON(tmp == this_parent); 1180 child = this_parent;
1149 this_parent = tmp; 1181 this_parent = tmp;
1150 spin_lock(&this_parent->d_lock); 1182 spin_lock(&this_parent->d_lock);
1183 /* might go back up the wrong parent if we have had a rename
1184 * or deletion */
1185 if (this_parent != child->d_parent ||
1186 read_seqretry(&rename_lock, seq)) {
1187 spin_unlock(&this_parent->d_lock);
1188 spin_unlock(&dcache_lock);
1189 rcu_read_unlock();
1190 goto rename_retry;
1191 }
1192 rcu_read_unlock();
1193 next = child->d_u.d_child.next;
1151 goto resume; 1194 goto resume;
1152 } 1195 }
1153out: 1196out:
1154 spin_unlock(&this_parent->d_lock); 1197 spin_unlock(&this_parent->d_lock);
1155 spin_unlock(&dcache_lock); 1198 spin_unlock(&dcache_lock);
1199 if (read_seqretry(&rename_lock, seq))
1200 goto rename_retry;
1156 return found; 1201 return found;
1157} 1202}
1158 1203
@@ -1654,7 +1699,7 @@ EXPORT_SYMBOL(d_add_ci);
1654struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1699struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1655{ 1700{
1656 struct dentry * dentry = NULL; 1701 struct dentry * dentry = NULL;
1657 unsigned long seq; 1702 unsigned seq;
1658 1703
1659 do { 1704 do {
1660 seq = read_seqbegin(&rename_lock); 1705 seq = read_seqbegin(&rename_lock);
@@ -2290,7 +2335,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2290 * @buffer: pointer to the end of the buffer 2335 * @buffer: pointer to the end of the buffer
2291 * @buflen: pointer to buffer length 2336 * @buflen: pointer to buffer length
2292 * 2337 *
2293 * Caller holds the dcache_lock. 2338 * Caller holds the rename_lock.
2294 * 2339 *
2295 * If path is not reachable from the supplied root, then the value of 2340 * If path is not reachable from the supplied root, then the value of
2296 * root is changed (without modifying refcounts). 2341 * root is changed (without modifying refcounts).
@@ -2377,7 +2422,9 @@ char *__d_path(const struct path *path, struct path *root,
2377 2422
2378 prepend(&res, &buflen, "\0", 1); 2423 prepend(&res, &buflen, "\0", 1);
2379 spin_lock(&dcache_lock); 2424 spin_lock(&dcache_lock);
2425 write_seqlock(&rename_lock);
2380 error = prepend_path(path, root, &res, &buflen); 2426 error = prepend_path(path, root, &res, &buflen);
2427 write_sequnlock(&rename_lock);
2381 spin_unlock(&dcache_lock); 2428 spin_unlock(&dcache_lock);
2382 2429
2383 if (error) 2430 if (error)
@@ -2441,10 +2488,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2441 2488
2442 get_fs_root(current->fs, &root); 2489 get_fs_root(current->fs, &root);
2443 spin_lock(&dcache_lock); 2490 spin_lock(&dcache_lock);
2491 write_seqlock(&rename_lock);
2444 tmp = root; 2492 tmp = root;
2445 error = path_with_deleted(path, &tmp, &res, &buflen); 2493 error = path_with_deleted(path, &tmp, &res, &buflen);
2446 if (error) 2494 if (error)
2447 res = ERR_PTR(error); 2495 res = ERR_PTR(error);
2496 write_sequnlock(&rename_lock);
2448 spin_unlock(&dcache_lock); 2497 spin_unlock(&dcache_lock);
2449 path_put(&root); 2498 path_put(&root);
2450 return res; 2499 return res;
@@ -2472,10 +2521,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2472 2521
2473 get_fs_root(current->fs, &root); 2522 get_fs_root(current->fs, &root);
2474 spin_lock(&dcache_lock); 2523 spin_lock(&dcache_lock);
2524 write_seqlock(&rename_lock);
2475 tmp = root; 2525 tmp = root;
2476 error = path_with_deleted(path, &tmp, &res, &buflen); 2526 error = path_with_deleted(path, &tmp, &res, &buflen);
2477 if (!error && !path_equal(&tmp, &root)) 2527 if (!error && !path_equal(&tmp, &root))
2478 error = prepend_unreachable(&res, &buflen); 2528 error = prepend_unreachable(&res, &buflen);
2529 write_sequnlock(&rename_lock);
2479 spin_unlock(&dcache_lock); 2530 spin_unlock(&dcache_lock);
2480 path_put(&root); 2531 path_put(&root);
2481 if (error) 2532 if (error)
@@ -2544,7 +2595,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2544 char *retval; 2595 char *retval;
2545 2596
2546 spin_lock(&dcache_lock); 2597 spin_lock(&dcache_lock);
2598 write_seqlock(&rename_lock);
2547 retval = __dentry_path(dentry, buf, buflen); 2599 retval = __dentry_path(dentry, buf, buflen);
2600 write_sequnlock(&rename_lock);
2548 spin_unlock(&dcache_lock); 2601 spin_unlock(&dcache_lock);
2549 2602
2550 return retval; 2603 return retval;
@@ -2557,6 +2610,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2557 char *retval; 2610 char *retval;
2558 2611
2559 spin_lock(&dcache_lock); 2612 spin_lock(&dcache_lock);
2613 write_seqlock(&rename_lock);
2560 if (d_unlinked(dentry)) { 2614 if (d_unlinked(dentry)) {
2561 p = buf + buflen; 2615 p = buf + buflen;
2562 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2616 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2564,6 +2618,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2564 buflen++; 2618 buflen++;
2565 } 2619 }
2566 retval = __dentry_path(dentry, buf, buflen); 2620 retval = __dentry_path(dentry, buf, buflen);
2621 write_sequnlock(&rename_lock);
2567 spin_unlock(&dcache_lock); 2622 spin_unlock(&dcache_lock);
2568 if (!IS_ERR(retval) && p) 2623 if (!IS_ERR(retval) && p)
2569 *p = '/'; /* restore '/' overriden with '\0' */ 2624 *p = '/'; /* restore '/' overriden with '\0' */
@@ -2604,6 +2659,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2604 2659
2605 error = -ENOENT; 2660 error = -ENOENT;
2606 spin_lock(&dcache_lock); 2661 spin_lock(&dcache_lock);
2662 write_seqlock(&rename_lock);
2607 if (!d_unlinked(pwd.dentry)) { 2663 if (!d_unlinked(pwd.dentry)) {
2608 unsigned long len; 2664 unsigned long len;
2609 struct path tmp = root; 2665 struct path tmp = root;
@@ -2612,6 +2668,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2612 2668
2613 prepend(&cwd, &buflen, "\0", 1); 2669 prepend(&cwd, &buflen, "\0", 1);
2614 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2670 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2671 write_sequnlock(&rename_lock);
2615 spin_unlock(&dcache_lock); 2672 spin_unlock(&dcache_lock);
2616 2673
2617 if (error) 2674 if (error)
@@ -2631,8 +2688,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2631 if (copy_to_user(buf, cwd, len)) 2688 if (copy_to_user(buf, cwd, len))
2632 error = -EFAULT; 2689 error = -EFAULT;
2633 } 2690 }
2634 } else 2691 } else {
2692 write_sequnlock(&rename_lock);
2635 spin_unlock(&dcache_lock); 2693 spin_unlock(&dcache_lock);
2694 }
2636 2695
2637out: 2696out:
2638 path_put(&pwd); 2697 path_put(&pwd);
@@ -2660,25 +2719,25 @@ out:
2660int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2719int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2661{ 2720{
2662 int result; 2721 int result;
2663 unsigned long seq; 2722 unsigned seq;
2664 2723
2665 if (new_dentry == old_dentry) 2724 if (new_dentry == old_dentry)
2666 return 1; 2725 return 1;
2667 2726
2668 /*
2669 * Need rcu_readlock to protect against the d_parent trashing
2670 * due to d_move
2671 */
2672 rcu_read_lock();
2673 do { 2727 do {
2674 /* for restarting inner loop in case of seq retry */ 2728 /* for restarting inner loop in case of seq retry */
2675 seq = read_seqbegin(&rename_lock); 2729 seq = read_seqbegin(&rename_lock);
2730 /*
2731 * Need rcu_readlock to protect against the d_parent trashing
2732 * due to d_move
2733 */
2734 rcu_read_lock();
2676 if (d_ancestor(old_dentry, new_dentry)) 2735 if (d_ancestor(old_dentry, new_dentry))
2677 result = 1; 2736 result = 1;
2678 else 2737 else
2679 result = 0; 2738 result = 0;
2739 rcu_read_unlock();
2680 } while (read_seqretry(&rename_lock, seq)); 2740 } while (read_seqretry(&rename_lock, seq));
2681 rcu_read_unlock();
2682 2741
2683 return result; 2742 return result;
2684} 2743}
@@ -2710,9 +2769,13 @@ EXPORT_SYMBOL(path_is_under);
2710 2769
2711void d_genocide(struct dentry *root) 2770void d_genocide(struct dentry *root)
2712{ 2771{
2713 struct dentry *this_parent = root; 2772 struct dentry *this_parent;
2714 struct list_head *next; 2773 struct list_head *next;
2774 unsigned seq;
2715 2775
2776rename_retry:
2777 this_parent = root;
2778 seq = read_seqbegin(&rename_lock);
2716 spin_lock(&dcache_lock); 2779 spin_lock(&dcache_lock);
2717 spin_lock(&this_parent->d_lock); 2780 spin_lock(&this_parent->d_lock);
2718repeat: 2781repeat:
@@ -2722,6 +2785,7 @@ resume:
2722 struct list_head *tmp = next; 2785 struct list_head *tmp = next;
2723 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2786 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2724 next = tmp->next; 2787 next = tmp->next;
2788
2725 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2789 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2726 if (d_unhashed(dentry) || !dentry->d_inode) { 2790 if (d_unhashed(dentry) || !dentry->d_inode) {
2727 spin_unlock(&dentry->d_lock); 2791 spin_unlock(&dentry->d_lock);
@@ -2734,19 +2798,43 @@ resume:
2734 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2798 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2735 goto repeat; 2799 goto repeat;
2736 } 2800 }
2737 dentry->d_count--; 2801 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2802 dentry->d_flags |= DCACHE_GENOCIDE;
2803 dentry->d_count--;
2804 }
2738 spin_unlock(&dentry->d_lock); 2805 spin_unlock(&dentry->d_lock);
2739 } 2806 }
2740 if (this_parent != root) { 2807 if (this_parent != root) {
2741 next = this_parent->d_u.d_child.next; 2808 struct dentry *tmp;
2742 this_parent->d_count--; 2809 struct dentry *child;
2810
2811 tmp = this_parent->d_parent;
2812 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2813 this_parent->d_flags |= DCACHE_GENOCIDE;
2814 this_parent->d_count--;
2815 }
2816 rcu_read_lock();
2743 spin_unlock(&this_parent->d_lock); 2817 spin_unlock(&this_parent->d_lock);
2744 this_parent = this_parent->d_parent; 2818 child = this_parent;
2819 this_parent = tmp;
2745 spin_lock(&this_parent->d_lock); 2820 spin_lock(&this_parent->d_lock);
2821 /* might go back up the wrong parent if we have had a rename
2822 * or deletion */
2823 if (this_parent != child->d_parent ||
2824 read_seqretry(&rename_lock, seq)) {
2825 spin_unlock(&this_parent->d_lock);
2826 spin_unlock(&dcache_lock);
2827 rcu_read_unlock();
2828 goto rename_retry;
2829 }
2830 rcu_read_unlock();
2831 next = child->d_u.d_child.next;
2746 goto resume; 2832 goto resume;
2747 } 2833 }
2748 spin_unlock(&this_parent->d_lock); 2834 spin_unlock(&this_parent->d_lock);
2749 spin_unlock(&dcache_lock); 2835 spin_unlock(&dcache_lock);
2836 if (read_seqretry(&rename_lock, seq))
2837 goto rename_retry;
2750} 2838}
2751 2839
2752/** 2840/**