aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2014-04-14 01:13:02 -0400
committerSage Weil <sage@inktank.com>2014-04-28 15:54:44 -0400
commit0a8a70f96fe1bd3e07c15bb86fd247e76102398a (patch)
tree0d6f728345f3f4efa33896aab71dd24f611af011 /fs
parent92b2e75158f6b8316b5a567c73dcf5b3d8f6bbce (diff)
ceph: clear directory's completeness when creating file
When creating a file, ceph_set_dentry_offset() puts the new dentry at the end of directory's d_subdirs, then set the dentry's offset based on directory's max offset. The offset does not reflect the real postion of the dentry in directory. Later readdir reply from MDS may change the dentry's position/offset. This inconsistency can cause missing/duplicate entries in readdir result if readdir is partly satisfied by dcache_readdir(). The fix is clear directory's completeness after creating/renaming file. It prevents later readdir from using dcache_readdir(). Fixes: http://tracker.ceph.com/issues/8025 Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/inode.c71
-rw-r--r--fs/ceph/super.h1
3 files changed, 21 insertions, 60 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fb4f7a203eda..c29d6ae68874 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -448,7 +448,6 @@ more:
448 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { 448 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
449 dout(" marking %p complete\n", inode); 449 dout(" marking %p complete\n", inode);
450 __ceph_dir_set_complete(ci, fi->dir_release_count); 450 __ceph_dir_set_complete(ci, fi->dir_release_count);
451 ci->i_max_offset = ctx->pos;
452 } 451 }
453 spin_unlock(&ci->i_ceph_lock); 452 spin_unlock(&ci->i_ceph_lock);
454 453
@@ -937,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
937 * to do it here. 936 * to do it here.
938 */ 937 */
939 938
940 /* d_move screws up d_subdirs order */
941 ceph_dir_clear_complete(new_dir);
942
943 d_move(old_dentry, new_dentry); 939 d_move(old_dentry, new_dentry);
944 940
945 /* ensure target dentry is invalidated, despite 941 /* ensure target dentry is invalidated, despite
946 rehashing bug in vfs_rename_dir */ 942 rehashing bug in vfs_rename_dir */
947 ceph_invalidate_dentry_lease(new_dentry); 943 ceph_invalidate_dentry_lease(new_dentry);
944
945 /* d_move screws up sibling dentries' offsets */
946 ceph_dir_clear_complete(old_dir);
947 ceph_dir_clear_complete(new_dir);
948
948 } 949 }
949 ceph_mdsc_put_request(req); 950 ceph_mdsc_put_request(req);
950 return err; 951 return err;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0728e5be2d..233c6f96910a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
744 !__ceph_dir_is_complete(ci)) { 744 !__ceph_dir_is_complete(ci)) {
745 dout(" marking %p complete (empty)\n", inode); 745 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 ci->i_max_offset = 2;
748 } 747 }
749no_change: 748no_change:
750 /* only update max_size on auth cap */ 749 /* only update max_size on auth cap */
@@ -890,41 +889,6 @@ out_unlock:
890} 889}
891 890
892/* 891/*
893 * Set dentry's directory position based on the current dir's max, and
894 * order it in d_subdirs, so that dcache_readdir behaves.
895 *
896 * Always called under directory's i_mutex.
897 */
898static void ceph_set_dentry_offset(struct dentry *dn)
899{
900 struct dentry *dir = dn->d_parent;
901 struct inode *inode = dir->d_inode;
902 struct ceph_inode_info *ci;
903 struct ceph_dentry_info *di;
904
905 BUG_ON(!inode);
906
907 ci = ceph_inode(inode);
908 di = ceph_dentry(dn);
909
910 spin_lock(&ci->i_ceph_lock);
911 if (!__ceph_dir_is_complete(ci)) {
912 spin_unlock(&ci->i_ceph_lock);
913 return;
914 }
915 di->offset = ceph_inode(inode)->i_max_offset++;
916 spin_unlock(&ci->i_ceph_lock);
917
918 spin_lock(&dir->d_lock);
919 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
920 list_move(&dn->d_u.d_child, &dir->d_subdirs);
921 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
922 dn->d_u.d_child.prev, dn->d_u.d_child.next);
923 spin_unlock(&dn->d_lock);
924 spin_unlock(&dir->d_lock);
925}
926
927/*
928 * splice a dentry to an inode. 892 * splice a dentry to an inode.
929 * caller must hold directory i_mutex for this to be safe. 893 * caller must hold directory i_mutex for this to be safe.
930 * 894 *
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
933 * the caller) if we fail. 897 * the caller) if we fail.
934 */ 898 */
935static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 899static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
936 bool *prehash, bool set_offset) 900 bool *prehash)
937{ 901{
938 struct dentry *realdn; 902 struct dentry *realdn;
939 903
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
965 } 929 }
966 if ((!prehash || *prehash) && d_unhashed(dn)) 930 if ((!prehash || *prehash) && d_unhashed(dn))
967 d_rehash(dn); 931 d_rehash(dn);
968 if (set_offset)
969 ceph_set_dentry_offset(dn);
970out: 932out:
971 return dn; 933 return dn;
972} 934}
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
987{ 949{
988 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 950 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
989 struct inode *in = NULL; 951 struct inode *in = NULL;
990 struct ceph_mds_reply_inode *ininfo;
991 struct ceph_vino vino; 952 struct ceph_vino vino;
992 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 953 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
993 int err = 0; 954 int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
1161 1122
1162 /* rename? */ 1123 /* rename? */
1163 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { 1124 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
1125 struct inode *olddir = req->r_old_dentry_dir;
1126 BUG_ON(!olddir);
1127
1164 dout(" src %p '%.*s' dst %p '%.*s'\n", 1128 dout(" src %p '%.*s' dst %p '%.*s'\n",
1165 req->r_old_dentry, 1129 req->r_old_dentry,
1166 req->r_old_dentry->d_name.len, 1130 req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
1180 rehashing bug in vfs_rename_dir */ 1144 rehashing bug in vfs_rename_dir */
1181 ceph_invalidate_dentry_lease(dn); 1145 ceph_invalidate_dentry_lease(dn);
1182 1146
1183 /* 1147 /* d_move screws up sibling dentries' offsets */
1184 * d_move() puts the renamed dentry at the end of 1148 ceph_dir_clear_complete(dir);
1185 * d_subdirs. We need to assign it an appropriate 1149 ceph_dir_clear_complete(olddir);
1186 * directory offset so we can behave when dir is 1150
1187 * complete.
1188 */
1189 ceph_set_dentry_offset(req->r_old_dentry);
1190 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1151 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1191 ceph_dentry(req->r_old_dentry)->offset); 1152 ceph_dentry(req->r_old_dentry)->offset);
1192 1153
@@ -1213,8 +1174,9 @@ retry_lookup:
1213 1174
1214 /* attach proper inode */ 1175 /* attach proper inode */
1215 if (!dn->d_inode) { 1176 if (!dn->d_inode) {
1177 ceph_dir_clear_complete(dir);
1216 ihold(in); 1178 ihold(in);
1217 dn = splice_dentry(dn, in, &have_lease, true); 1179 dn = splice_dentry(dn, in, &have_lease);
1218 if (IS_ERR(dn)) { 1180 if (IS_ERR(dn)) {
1219 err = PTR_ERR(dn); 1181 err = PTR_ERR(dn);
1220 goto done; 1182 goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
1235 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1197 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1236 req->r_op == CEPH_MDS_OP_MKSNAP)) { 1198 req->r_op == CEPH_MDS_OP_MKSNAP)) {
1237 struct dentry *dn = req->r_dentry; 1199 struct dentry *dn = req->r_dentry;
1200 struct inode *dir = req->r_locked_dir;
1238 1201
1239 /* fill out a snapdir LOOKUPSNAP dentry */ 1202 /* fill out a snapdir LOOKUPSNAP dentry */
1240 BUG_ON(!dn); 1203 BUG_ON(!dn);
1241 BUG_ON(!req->r_locked_dir); 1204 BUG_ON(!dir);
1242 BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); 1205 BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
1243 ininfo = rinfo->targeti.in;
1244 vino.ino = le64_to_cpu(ininfo->ino);
1245 vino.snap = le64_to_cpu(ininfo->snapid);
1246 dout(" linking snapped dir %p to dn %p\n", in, dn); 1206 dout(" linking snapped dir %p to dn %p\n", in, dn);
1207 ceph_dir_clear_complete(dir);
1247 ihold(in); 1208 ihold(in);
1248 dn = splice_dentry(dn, in, NULL, true); 1209 dn = splice_dentry(dn, in, NULL);
1249 if (IS_ERR(dn)) { 1210 if (IS_ERR(dn)) {
1250 err = PTR_ERR(dn); 1211 err = PTR_ERR(dn);
1251 goto done; 1212 goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
1407 } 1368 }
1408 1369
1409 if (!dn->d_inode) { 1370 if (!dn->d_inode) {
1410 dn = splice_dentry(dn, in, NULL, false); 1371 dn = splice_dentry(dn, in, NULL);
1411 if (IS_ERR(dn)) { 1372 if (IS_ERR(dn)) {
1412 err = PTR_ERR(dn); 1373 err = PTR_ERR(dn);
1413 dn = NULL; 1374 dn = NULL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7866cd05a6bb..ead05cc1f447 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -266,7 +266,6 @@ struct ceph_inode_info {
266 struct timespec i_rctime; 266 struct timespec i_rctime;
267 u64 i_rbytes, i_rfiles, i_rsubdirs; 267 u64 i_rbytes, i_rfiles, i_rsubdirs;
268 u64 i_files, i_subdirs; 268 u64 i_files, i_subdirs;
269 u64 i_max_offset; /* largest readdir offset, set with complete dir */
270 269
271 struct rb_root i_fragtree; 270 struct rb_root i_fragtree;
272 struct mutex i_fragtree_mutex; 271 struct mutex i_fragtree_mutex;