aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-02-18 03:38:14 -0500
committerSage Weil <sage@inktank.com>2013-05-02 00:14:33 -0400
commita8673d61ad77ddf2118599507bd40cc345e95368 (patch)
tree0b8688fee3c2f6c7c35db75d42edd806df689d1d /fs/ceph
parent964266cce94cee7e4aca42994fcda206c111e917 (diff)
ceph: use I_COMPLETE inode flag instead of D_COMPLETE flag
commit c6ffe10015 moved the flag that tracks if the dcache contents for a directory are complete to dentry. The problem is there are lots of places that use ceph_dir_{set,clear,test}_complete() while holding i_ceph_lock. but ceph_dir_{set,clear,test}_complete() may sleep because they call dput(). This patch basically reverts that commit. For ceph_d_prune(), it's called with both the dentry to prune and the parent dentry are locked. So it's safe to access the parent dentry's d_inode and clear I_COMPLETE flag. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Greg Farnum <greg@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c8
-rw-r--r--fs/ceph/dir.c62
-rw-r--r--fs/ceph/inode.c30
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/super.h23
5 files changed, 34 insertions, 95 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ea1f177739b2..bc575a4a813e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -490,15 +490,17 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
490 ci->i_rdcache_gen++; 490 ci->i_rdcache_gen++;
491 491
492 /* 492 /*
493 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we 493 * if we are newly issued FILE_SHARED, clear I_COMPLETE; we
494 * don't know what happened to this directory while we didn't 494 * don't know what happened to this directory while we didn't
495 * have the cap. 495 * have the cap.
496 */ 496 */
497 if ((issued & CEPH_CAP_FILE_SHARED) && 497 if ((issued & CEPH_CAP_FILE_SHARED) &&
498 (had & CEPH_CAP_FILE_SHARED) == 0) { 498 (had & CEPH_CAP_FILE_SHARED) == 0) {
499 ci->i_shared_gen++; 499 ci->i_shared_gen++;
500 if (S_ISDIR(ci->vfs_inode.i_mode)) 500 if (S_ISDIR(ci->vfs_inode.i_mode)) {
501 ceph_dir_clear_complete(&ci->vfs_inode); 501 dout(" marking %p NOT complete\n", &ci->vfs_inode);
502 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
503 }
502 } 504 }
503} 505}
504 506
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6d797f46d772..0c369ac62c07 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -107,7 +107,7 @@ static unsigned fpos_off(loff_t p)
107 * falling back to a "normal" sync readdir if any dentries in the dir 107 * falling back to a "normal" sync readdir if any dentries in the dir
108 * are dropped. 108 * are dropped.
109 * 109 *
110 * D_COMPLETE tells indicates we have all dentries in the dir. It is 110 * I_COMPLETE tells indicates we have all dentries in the dir. It is
111 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 111 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
112 * the MDS if/when the directory is modified). 112 * the MDS if/when the directory is modified).
113 */ 113 */
@@ -198,8 +198,8 @@ more:
198 filp->f_pos++; 198 filp->f_pos++;
199 199
200 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 200 /* make sure a dentry wasn't dropped while we didn't have parent lock */
201 if (!ceph_dir_test_complete(dir)) { 201 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
202 dout(" lost D_COMPLETE on %p; falling back to mds\n", dir); 202 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
203 err = -EAGAIN; 203 err = -EAGAIN;
204 goto out; 204 goto out;
205 } 205 }
@@ -284,7 +284,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
284 if ((filp->f_pos == 2 || fi->dentry) && 284 if ((filp->f_pos == 2 || fi->dentry) &&
285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
286 ceph_snap(inode) != CEPH_SNAPDIR && 286 ceph_snap(inode) != CEPH_SNAPDIR &&
287 ceph_dir_test_complete(inode) && 287 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
288 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 288 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
289 spin_unlock(&ci->i_ceph_lock); 289 spin_unlock(&ci->i_ceph_lock);
290 err = __dcache_readdir(filp, dirent, filldir); 290 err = __dcache_readdir(filp, dirent, filldir);
@@ -350,7 +350,7 @@ more:
350 350
351 if (!req->r_did_prepopulate) { 351 if (!req->r_did_prepopulate) {
352 dout("readdir !did_prepopulate"); 352 dout("readdir !did_prepopulate");
353 fi->dir_release_count--; /* preclude D_COMPLETE */ 353 fi->dir_release_count--; /* preclude I_COMPLETE */
354 } 354 }
355 355
356 /* note next offset and last dentry name */ 356 /* note next offset and last dentry name */
@@ -429,7 +429,8 @@ more:
429 */ 429 */
430 spin_lock(&ci->i_ceph_lock); 430 spin_lock(&ci->i_ceph_lock);
431 if (ci->i_release_count == fi->dir_release_count) { 431 if (ci->i_release_count == fi->dir_release_count) {
432 ceph_dir_set_complete(inode); 432 dout(" marking %p complete\n", inode);
433 ci->i_ceph_flags |= CEPH_I_COMPLETE;
433 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
434 } 435 }
435 spin_unlock(&ci->i_ceph_lock); 436 spin_unlock(&ci->i_ceph_lock);
@@ -604,7 +605,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
604 fsc->mount_options->snapdir_name, 605 fsc->mount_options->snapdir_name,
605 dentry->d_name.len) && 606 dentry->d_name.len) &&
606 !is_root_ceph_dentry(dir, dentry) && 607 !is_root_ceph_dentry(dir, dentry) &&
607 ceph_dir_test_complete(dir) && 608 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
608 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 609 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
609 spin_unlock(&ci->i_ceph_lock); 610 spin_unlock(&ci->i_ceph_lock);
610 dout(" dir %p complete, -ENOENT\n", dir); 611 dout(" dir %p complete, -ENOENT\n", dir);
@@ -908,7 +909,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
908 */ 909 */
909 910
910 /* d_move screws up d_subdirs order */ 911 /* d_move screws up d_subdirs order */
911 ceph_dir_clear_complete(new_dir); 912 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
912 913
913 d_move(old_dentry, new_dentry); 914 d_move(old_dentry, new_dentry);
914 915
@@ -1065,44 +1066,6 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1065} 1066}
1066 1067
1067/* 1068/*
1068 * Set/clear/test dir complete flag on the dir's dentry.
1069 */
1070void ceph_dir_set_complete(struct inode *inode)
1071{
1072 struct dentry *dentry = d_find_any_alias(inode);
1073
1074 if (dentry && ceph_dentry(dentry) &&
1075 ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
1076 dout(" marking %p (%p) complete\n", inode, dentry);
1077 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1078 }
1079 dput(dentry);
1080}
1081
1082void ceph_dir_clear_complete(struct inode *inode)
1083{
1084 struct dentry *dentry = d_find_any_alias(inode);
1085
1086 if (dentry && ceph_dentry(dentry)) {
1087 dout(" marking %p (%p) complete\n", inode, dentry);
1088 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1089 }
1090 dput(dentry);
1091}
1092
1093bool ceph_dir_test_complete(struct inode *inode)
1094{
1095 struct dentry *dentry = d_find_any_alias(inode);
1096
1097 if (dentry && ceph_dentry(dentry)) {
1098 dout(" marking %p (%p) NOT complete\n", inode, dentry);
1099 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1100 }
1101 dput(dentry);
1102 return false;
1103}
1104
1105/*
1106 * When the VFS prunes a dentry from the cache, we need to clear the 1069 * When the VFS prunes a dentry from the cache, we need to clear the
1107 * complete flag on the parent directory. 1070 * complete flag on the parent directory.
1108 * 1071 *
@@ -1110,15 +1073,13 @@ bool ceph_dir_test_complete(struct inode *inode)
1110 */ 1073 */
1111static void ceph_d_prune(struct dentry *dentry) 1074static void ceph_d_prune(struct dentry *dentry)
1112{ 1075{
1113 struct ceph_dentry_info *di;
1114
1115 dout("ceph_d_prune %p\n", dentry); 1076 dout("ceph_d_prune %p\n", dentry);
1116 1077
1117 /* do we have a valid parent? */ 1078 /* do we have a valid parent? */
1118 if (IS_ROOT(dentry)) 1079 if (IS_ROOT(dentry))
1119 return; 1080 return;
1120 1081
1121 /* if we are not hashed, we don't affect D_COMPLETE */ 1082 /* if we are not hashed, we don't affect I_COMPLETE */
1122 if (d_unhashed(dentry)) 1083 if (d_unhashed(dentry))
1123 return; 1084 return;
1124 1085
@@ -1126,8 +1087,7 @@ static void ceph_d_prune(struct dentry *dentry)
1126 * we hold d_lock, so d_parent is stable, and d_fsdata is never 1087 * we hold d_lock, so d_parent is stable, and d_fsdata is never
1127 * cleared until d_release 1088 * cleared until d_release
1128 */ 1089 */
1129 di = ceph_dentry(dentry->d_parent); 1090 ceph_i_clear(dentry->d_parent->d_inode, CEPH_I_COMPLETE);
1130 clear_bit(CEPH_D_COMPLETE, &di->flags);
1131} 1091}
1132 1092
1133/* 1093/*
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 851814d951cd..be2f262b822d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -561,7 +561,6 @@ static int fill_inode(struct inode *inode,
561 struct ceph_inode_info *ci = ceph_inode(inode); 561 struct ceph_inode_info *ci = ceph_inode(inode);
562 int i; 562 int i;
563 int issued = 0, implemented; 563 int issued = 0, implemented;
564 int updating_inode = 0;
565 struct timespec mtime, atime, ctime; 564 struct timespec mtime, atime, ctime;
566 u32 nsplits; 565 u32 nsplits;
567 struct ceph_buffer *xattr_blob = NULL; 566 struct ceph_buffer *xattr_blob = NULL;
@@ -601,7 +600,6 @@ static int fill_inode(struct inode *inode,
601 (ci->i_version & ~1) >= le64_to_cpu(info->version)) 600 (ci->i_version & ~1) >= le64_to_cpu(info->version))
602 goto no_change; 601 goto no_change;
603 602
604 updating_inode = 1;
605 issued = __ceph_caps_issued(ci, &implemented); 603 issued = __ceph_caps_issued(ci, &implemented);
606 issued |= implemented | __ceph_caps_dirty(ci); 604 issued |= implemented | __ceph_caps_dirty(ci);
607 605
@@ -717,6 +715,17 @@ static int fill_inode(struct inode *inode,
717 ceph_vinop(inode), inode->i_mode); 715 ceph_vinop(inode), inode->i_mode);
718 } 716 }
719 717
718 /* set dir completion flag? */
719 if (S_ISDIR(inode->i_mode) &&
720 ci->i_files == 0 && ci->i_subdirs == 0 &&
721 ceph_snap(inode) == CEPH_NOSNAP &&
722 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
723 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
724 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
725 dout(" marking %p complete (empty)\n", inode);
726 ci->i_ceph_flags |= CEPH_I_COMPLETE;
727 ci->i_max_offset = 2;
728 }
720no_change: 729no_change:
721 spin_unlock(&ci->i_ceph_lock); 730 spin_unlock(&ci->i_ceph_lock);
722 731
@@ -767,19 +776,6 @@ no_change:
767 __ceph_get_fmode(ci, cap_fmode); 776 __ceph_get_fmode(ci, cap_fmode);
768 } 777 }
769 778
770 /* set dir completion flag? */
771 if (S_ISDIR(inode->i_mode) &&
772 updating_inode && /* didn't jump to no_change */
773 ci->i_files == 0 && ci->i_subdirs == 0 &&
774 ceph_snap(inode) == CEPH_NOSNAP &&
775 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
776 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
777 !ceph_dir_test_complete(inode)) {
778 dout(" marking %p complete (empty)\n", inode);
779 ceph_dir_set_complete(inode);
780 ci->i_max_offset = 2;
781 }
782
783 /* update delegation info? */ 779 /* update delegation info? */
784 if (dirinfo) 780 if (dirinfo)
785 ceph_fill_dirfrag(inode, dirinfo); 781 ceph_fill_dirfrag(inode, dirinfo);
@@ -861,7 +857,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
861 di = ceph_dentry(dn); 857 di = ceph_dentry(dn);
862 858
863 spin_lock(&ci->i_ceph_lock); 859 spin_lock(&ci->i_ceph_lock);
864 if (!ceph_dir_test_complete(inode)) { 860 if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
865 spin_unlock(&ci->i_ceph_lock); 861 spin_unlock(&ci->i_ceph_lock);
866 return; 862 return;
867 } 863 }
@@ -1066,7 +1062,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1066 * d_move() puts the renamed dentry at the end of 1062 * d_move() puts the renamed dentry at the end of
1067 * d_subdirs. We need to assign it an appropriate 1063 * d_subdirs. We need to assign it an appropriate
1068 * directory offset so we can behave when holding 1064 * directory offset so we can behave when holding
1069 * D_COMPLETE. 1065 * I_COMPLETE.
1070 */ 1066 */
1071 ceph_set_dentry_offset(req->r_old_dentry); 1067 ceph_set_dentry_offset(req->r_old_dentry);
1072 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1068 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fb7cb05d8aba..56da380878c5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2029,7 +2029,7 @@ out:
2029} 2029}
2030 2030
2031/* 2031/*
2032 * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS 2032 * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
2033 * namespace request. 2033 * namespace request.
2034 */ 2034 */
2035void ceph_invalidate_dir_request(struct ceph_mds_request *req) 2035void ceph_invalidate_dir_request(struct ceph_mds_request *req)
@@ -2037,9 +2037,9 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2037 struct inode *inode = req->r_locked_dir; 2037 struct inode *inode = req->r_locked_dir;
2038 struct ceph_inode_info *ci = ceph_inode(inode); 2038 struct ceph_inode_info *ci = ceph_inode(inode);
2039 2039
2040 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); 2040 dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
2041 spin_lock(&ci->i_ceph_lock); 2041 spin_lock(&ci->i_ceph_lock);
2042 ceph_dir_clear_complete(inode); 2042 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
2043 ci->i_release_count++; 2043 ci->i_release_count++;
2044 spin_unlock(&ci->i_ceph_lock); 2044 spin_unlock(&ci->i_ceph_lock);
2045 2045
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 86810b6d973b..20dd1ee3c4f0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -204,7 +204,6 @@ struct ceph_inode_xattr {
204 * Ceph dentry state 204 * Ceph dentry state
205 */ 205 */
206struct ceph_dentry_info { 206struct ceph_dentry_info {
207 unsigned long flags;
208 struct ceph_mds_session *lease_session; 207 struct ceph_mds_session *lease_session;
209 u32 lease_gen, lease_shared_gen; 208 u32 lease_gen, lease_shared_gen;
210 u32 lease_seq; 209 u32 lease_seq;
@@ -215,18 +214,6 @@ struct ceph_dentry_info {
215 u64 offset; 214 u64 offset;
216}; 215};
217 216
218/*
219 * dentry flags
220 *
221 * The locking for D_COMPLETE is a bit odd:
222 * - we can clear it at almost any time (see ceph_d_prune)
223 * - it is only meaningful if:
224 * - we hold dir inode i_ceph_lock
225 * - we hold dir FILE_SHARED caps
226 * - the dentry D_COMPLETE is set
227 */
228#define CEPH_D_COMPLETE 1 /* if set, d_u.d_subdirs is complete directory */
229
230struct ceph_inode_xattrs_info { 217struct ceph_inode_xattrs_info {
231 /* 218 /*
232 * (still encoded) xattr blob. we avoid the overhead of parsing 219 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -267,7 +254,7 @@ struct ceph_inode_info {
267 struct timespec i_rctime; 254 struct timespec i_rctime;
268 u64 i_rbytes, i_rfiles, i_rsubdirs; 255 u64 i_rbytes, i_rfiles, i_rsubdirs;
269 u64 i_files, i_subdirs; 256 u64 i_files, i_subdirs;
270 u64 i_max_offset; /* largest readdir offset, set with D_COMPLETE */ 257 u64 i_max_offset; /* largest readdir offset, set with I_COMPLETE */
271 258
272 struct rb_root i_fragtree; 259 struct rb_root i_fragtree;
273 struct mutex i_fragtree_mutex; 260 struct mutex i_fragtree_mutex;
@@ -432,6 +419,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
432/* 419/*
433 * Ceph inode. 420 * Ceph inode.
434 */ 421 */
422#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
435#define CEPH_I_NODELAY 4 /* do not delay cap release */ 423#define CEPH_I_NODELAY 4 /* do not delay cap release */
436#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ 424#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
437#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ 425#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
@@ -489,13 +477,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
489} 477}
490 478
491/* 479/*
492 * set/clear directory D_COMPLETE flag
493 */
494void ceph_dir_set_complete(struct inode *inode);
495void ceph_dir_clear_complete(struct inode *inode);
496bool ceph_dir_test_complete(struct inode *inode);
497
498/*
499 * caps helpers 480 * caps helpers
500 */ 481 */
501static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) 482static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)