diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-05 18:17:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-05 18:17:02 -0400 |
commit | 5575eeb7b9f687ca4899e2d8721a9b17265d0060 (patch) | |
tree | f105b068410e48f5b2d65e85802a9d4bfa3e0638 /fs | |
parent | 0624bcaaf06c1fe5aca4e72287a3f13026764d36 (diff) | |
parent | 3bd58143bafc56dbc07f4f085e4d7e018d332674 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil:
"First, there is a critical fix for the new primary-affinity function
that went into -rc1.
The second batch of patches from Zheng fix a range of problems with
directory fragmentation, readdir, and a few odds and ends for cephfs"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: reserve caps for file layout/lock MDS requests
ceph: avoid releasing caps that are being used
ceph: clear directory's completeness when creating file
libceph: fix non-default values check in apply_primary_affinity()
ceph: use fpos_cmp() to compare dentry positions
ceph: check directory's completeness before emitting directory entry
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/caps.c | 2 | ||||
-rw-r--r-- | fs/ceph/dir.c | 33 | ||||
-rw-r--r-- | fs/ceph/inode.c | 71 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 3 | ||||
-rw-r--r-- | fs/ceph/locks.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 |
6 files changed, 39 insertions, 72 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2e5e648eb5c3..c561b628ebce 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
3261 | rel->seq = cpu_to_le32(cap->seq); | 3261 | rel->seq = cpu_to_le32(cap->seq); |
3262 | rel->issue_seq = cpu_to_le32(cap->issue_seq), | 3262 | rel->issue_seq = cpu_to_le32(cap->issue_seq), |
3263 | rel->mseq = cpu_to_le32(cap->mseq); | 3263 | rel->mseq = cpu_to_le32(cap->mseq); |
3264 | rel->caps = cpu_to_le32(cap->issued); | 3264 | rel->caps = cpu_to_le32(cap->implemented); |
3265 | rel->wanted = cpu_to_le32(cap->mds_wanted); | 3265 | rel->wanted = cpu_to_le32(cap->mds_wanted); |
3266 | rel->dname_len = 0; | 3266 | rel->dname_len = 0; |
3267 | rel->dname_seq = 0; | 3267 | rel->dname_seq = 0; |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 766410a12c2c..c29d6ae68874 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, | |||
141 | 141 | ||
142 | /* start at beginning? */ | 142 | /* start at beginning? */ |
143 | if (ctx->pos == 2 || last == NULL || | 143 | if (ctx->pos == 2 || last == NULL || |
144 | ctx->pos < ceph_dentry(last)->offset) { | 144 | fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { |
145 | if (list_empty(&parent->d_subdirs)) | 145 | if (list_empty(&parent->d_subdirs)) |
146 | goto out_unlock; | 146 | goto out_unlock; |
147 | p = parent->d_subdirs.prev; | 147 | p = parent->d_subdirs.prev; |
@@ -182,9 +182,16 @@ more: | |||
182 | spin_unlock(&dentry->d_lock); | 182 | spin_unlock(&dentry->d_lock); |
183 | spin_unlock(&parent->d_lock); | 183 | spin_unlock(&parent->d_lock); |
184 | 184 | ||
185 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ | ||
186 | if (!ceph_dir_is_complete(dir)) { | ||
187 | dout(" lost dir complete on %p; falling back to mds\n", dir); | ||
188 | dput(dentry); | ||
189 | err = -EAGAIN; | ||
190 | goto out; | ||
191 | } | ||
192 | |||
185 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, | 193 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, |
186 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 194 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
187 | ctx->pos = di->offset; | ||
188 | if (!dir_emit(ctx, dentry->d_name.name, | 195 | if (!dir_emit(ctx, dentry->d_name.name, |
189 | dentry->d_name.len, | 196 | dentry->d_name.len, |
190 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), | 197 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), |
@@ -198,19 +205,12 @@ more: | |||
198 | return 0; | 205 | return 0; |
199 | } | 206 | } |
200 | 207 | ||
208 | ctx->pos = di->offset + 1; | ||
209 | |||
201 | if (last) | 210 | if (last) |
202 | dput(last); | 211 | dput(last); |
203 | last = dentry; | 212 | last = dentry; |
204 | 213 | ||
205 | ctx->pos++; | ||
206 | |||
207 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ | ||
208 | if (!ceph_dir_is_complete(dir)) { | ||
209 | dout(" lost dir complete on %p; falling back to mds\n", dir); | ||
210 | err = -EAGAIN; | ||
211 | goto out; | ||
212 | } | ||
213 | |||
214 | spin_lock(&parent->d_lock); | 214 | spin_lock(&parent->d_lock); |
215 | p = p->prev; /* advance to next dentry */ | 215 | p = p->prev; /* advance to next dentry */ |
216 | goto more; | 216 | goto more; |
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
296 | err = __dcache_readdir(file, ctx, shared_gen); | 296 | err = __dcache_readdir(file, ctx, shared_gen); |
297 | if (err != -EAGAIN) | 297 | if (err != -EAGAIN) |
298 | return err; | 298 | return err; |
299 | frag = fpos_frag(ctx->pos); | ||
300 | off = fpos_off(ctx->pos); | ||
299 | } else { | 301 | } else { |
300 | spin_unlock(&ci->i_ceph_lock); | 302 | spin_unlock(&ci->i_ceph_lock); |
301 | } | 303 | } |
@@ -446,7 +448,6 @@ more: | |||
446 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { | 448 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { |
447 | dout(" marking %p complete\n", inode); | 449 | dout(" marking %p complete\n", inode); |
448 | __ceph_dir_set_complete(ci, fi->dir_release_count); | 450 | __ceph_dir_set_complete(ci, fi->dir_release_count); |
449 | ci->i_max_offset = ctx->pos; | ||
450 | } | 451 | } |
451 | spin_unlock(&ci->i_ceph_lock); | 452 | spin_unlock(&ci->i_ceph_lock); |
452 | 453 | ||
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
935 | * to do it here. | 936 | * to do it here. |
936 | */ | 937 | */ |
937 | 938 | ||
938 | /* d_move screws up d_subdirs order */ | ||
939 | ceph_dir_clear_complete(new_dir); | ||
940 | |||
941 | d_move(old_dentry, new_dentry); | 939 | d_move(old_dentry, new_dentry); |
942 | 940 | ||
943 | /* ensure target dentry is invalidated, despite | 941 | /* ensure target dentry is invalidated, despite |
944 | rehashing bug in vfs_rename_dir */ | 942 | rehashing bug in vfs_rename_dir */ |
945 | ceph_invalidate_dentry_lease(new_dentry); | 943 | ceph_invalidate_dentry_lease(new_dentry); |
944 | |||
945 | /* d_move screws up sibling dentries' offsets */ | ||
946 | ceph_dir_clear_complete(old_dir); | ||
947 | ceph_dir_clear_complete(new_dir); | ||
948 | |||
946 | } | 949 | } |
947 | ceph_mdsc_put_request(req); | 950 | ceph_mdsc_put_request(req); |
948 | return err; | 951 | return err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0b0728e5be2d..233c6f96910a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode, | |||
744 | !__ceph_dir_is_complete(ci)) { | 744 | !__ceph_dir_is_complete(ci)) { |
745 | dout(" marking %p complete (empty)\n", inode); | 745 | dout(" marking %p complete (empty)\n", inode); |
746 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); | 746 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); |
747 | ci->i_max_offset = 2; | ||
748 | } | 747 | } |
749 | no_change: | 748 | no_change: |
750 | /* only update max_size on auth cap */ | 749 | /* only update max_size on auth cap */ |
@@ -890,41 +889,6 @@ out_unlock: | |||
890 | } | 889 | } |
891 | 890 | ||
892 | /* | 891 | /* |
893 | * Set dentry's directory position based on the current dir's max, and | ||
894 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
895 | * | ||
896 | * Always called under directory's i_mutex. | ||
897 | */ | ||
898 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
899 | { | ||
900 | struct dentry *dir = dn->d_parent; | ||
901 | struct inode *inode = dir->d_inode; | ||
902 | struct ceph_inode_info *ci; | ||
903 | struct ceph_dentry_info *di; | ||
904 | |||
905 | BUG_ON(!inode); | ||
906 | |||
907 | ci = ceph_inode(inode); | ||
908 | di = ceph_dentry(dn); | ||
909 | |||
910 | spin_lock(&ci->i_ceph_lock); | ||
911 | if (!__ceph_dir_is_complete(ci)) { | ||
912 | spin_unlock(&ci->i_ceph_lock); | ||
913 | return; | ||
914 | } | ||
915 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
916 | spin_unlock(&ci->i_ceph_lock); | ||
917 | |||
918 | spin_lock(&dir->d_lock); | ||
919 | spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); | ||
920 | list_move(&dn->d_u.d_child, &dir->d_subdirs); | ||
921 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
922 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
923 | spin_unlock(&dn->d_lock); | ||
924 | spin_unlock(&dir->d_lock); | ||
925 | } | ||
926 | |||
927 | /* | ||
928 | * splice a dentry to an inode. | 892 | * splice a dentry to an inode. |
929 | * caller must hold directory i_mutex for this to be safe. | 893 | * caller must hold directory i_mutex for this to be safe. |
930 | * | 894 | * |
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
933 | * the caller) if we fail. | 897 | * the caller) if we fail. |
934 | */ | 898 | */ |
935 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 899 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, |
936 | bool *prehash, bool set_offset) | 900 | bool *prehash) |
937 | { | 901 | { |
938 | struct dentry *realdn; | 902 | struct dentry *realdn; |
939 | 903 | ||
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
965 | } | 929 | } |
966 | if ((!prehash || *prehash) && d_unhashed(dn)) | 930 | if ((!prehash || *prehash) && d_unhashed(dn)) |
967 | d_rehash(dn); | 931 | d_rehash(dn); |
968 | if (set_offset) | ||
969 | ceph_set_dentry_offset(dn); | ||
970 | out: | 932 | out: |
971 | return dn; | 933 | return dn; |
972 | } | 934 | } |
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
987 | { | 949 | { |
988 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | 950 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; |
989 | struct inode *in = NULL; | 951 | struct inode *in = NULL; |
990 | struct ceph_mds_reply_inode *ininfo; | ||
991 | struct ceph_vino vino; | 952 | struct ceph_vino vino; |
992 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | 953 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
993 | int err = 0; | 954 | int err = 0; |
@@ -1161,6 +1122,9 @@ retry_lookup: | |||
1161 | 1122 | ||
1162 | /* rename? */ | 1123 | /* rename? */ |
1163 | if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { | 1124 | if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { |
1125 | struct inode *olddir = req->r_old_dentry_dir; | ||
1126 | BUG_ON(!olddir); | ||
1127 | |||
1164 | dout(" src %p '%.*s' dst %p '%.*s'\n", | 1128 | dout(" src %p '%.*s' dst %p '%.*s'\n", |
1165 | req->r_old_dentry, | 1129 | req->r_old_dentry, |
1166 | req->r_old_dentry->d_name.len, | 1130 | req->r_old_dentry->d_name.len, |
@@ -1180,13 +1144,10 @@ retry_lookup: | |||
1180 | rehashing bug in vfs_rename_dir */ | 1144 | rehashing bug in vfs_rename_dir */ |
1181 | ceph_invalidate_dentry_lease(dn); | 1145 | ceph_invalidate_dentry_lease(dn); |
1182 | 1146 | ||
1183 | /* | 1147 | /* d_move screws up sibling dentries' offsets */ |
1184 | * d_move() puts the renamed dentry at the end of | 1148 | ceph_dir_clear_complete(dir); |
1185 | * d_subdirs. We need to assign it an appropriate | 1149 | ceph_dir_clear_complete(olddir); |
1186 | * directory offset so we can behave when dir is | 1150 | |
1187 | * complete. | ||
1188 | */ | ||
1189 | ceph_set_dentry_offset(req->r_old_dentry); | ||
1190 | dout("dn %p gets new offset %lld\n", req->r_old_dentry, | 1151 | dout("dn %p gets new offset %lld\n", req->r_old_dentry, |
1191 | ceph_dentry(req->r_old_dentry)->offset); | 1152 | ceph_dentry(req->r_old_dentry)->offset); |
1192 | 1153 | ||
@@ -1213,8 +1174,9 @@ retry_lookup: | |||
1213 | 1174 | ||
1214 | /* attach proper inode */ | 1175 | /* attach proper inode */ |
1215 | if (!dn->d_inode) { | 1176 | if (!dn->d_inode) { |
1177 | ceph_dir_clear_complete(dir); | ||
1216 | ihold(in); | 1178 | ihold(in); |
1217 | dn = splice_dentry(dn, in, &have_lease, true); | 1179 | dn = splice_dentry(dn, in, &have_lease); |
1218 | if (IS_ERR(dn)) { | 1180 | if (IS_ERR(dn)) { |
1219 | err = PTR_ERR(dn); | 1181 | err = PTR_ERR(dn); |
1220 | goto done; | 1182 | goto done; |
@@ -1235,17 +1197,16 @@ retry_lookup: | |||
1235 | (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || | 1197 | (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || |
1236 | req->r_op == CEPH_MDS_OP_MKSNAP)) { | 1198 | req->r_op == CEPH_MDS_OP_MKSNAP)) { |
1237 | struct dentry *dn = req->r_dentry; | 1199 | struct dentry *dn = req->r_dentry; |
1200 | struct inode *dir = req->r_locked_dir; | ||
1238 | 1201 | ||
1239 | /* fill out a snapdir LOOKUPSNAP dentry */ | 1202 | /* fill out a snapdir LOOKUPSNAP dentry */ |
1240 | BUG_ON(!dn); | 1203 | BUG_ON(!dn); |
1241 | BUG_ON(!req->r_locked_dir); | 1204 | BUG_ON(!dir); |
1242 | BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); | 1205 | BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); |
1243 | ininfo = rinfo->targeti.in; | ||
1244 | vino.ino = le64_to_cpu(ininfo->ino); | ||
1245 | vino.snap = le64_to_cpu(ininfo->snapid); | ||
1246 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1206 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1207 | ceph_dir_clear_complete(dir); | ||
1247 | ihold(in); | 1208 | ihold(in); |
1248 | dn = splice_dentry(dn, in, NULL, true); | 1209 | dn = splice_dentry(dn, in, NULL); |
1249 | if (IS_ERR(dn)) { | 1210 | if (IS_ERR(dn)) { |
1250 | err = PTR_ERR(dn); | 1211 | err = PTR_ERR(dn); |
1251 | goto done; | 1212 | goto done; |
@@ -1407,7 +1368,7 @@ retry_lookup: | |||
1407 | } | 1368 | } |
1408 | 1369 | ||
1409 | if (!dn->d_inode) { | 1370 | if (!dn->d_inode) { |
1410 | dn = splice_dentry(dn, in, NULL, false); | 1371 | dn = splice_dentry(dn, in, NULL); |
1411 | if (IS_ERR(dn)) { | 1372 | if (IS_ERR(dn)) { |
1412 | err = PTR_ERR(dn); | 1373 | err = PTR_ERR(dn); |
1413 | dn = NULL; | 1374 | dn = NULL; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index fdf941b44ff1..a822a6e58290 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
109 | return PTR_ERR(req); | 109 | return PTR_ERR(req); |
110 | req->r_inode = inode; | 110 | req->r_inode = inode; |
111 | ihold(inode); | 111 | ihold(inode); |
112 | req->r_num_caps = 1; | ||
113 | |||
112 | req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; | 114 | req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; |
113 | 115 | ||
114 | req->r_args.setlayout.layout.fl_stripe_unit = | 116 | req->r_args.setlayout.layout.fl_stripe_unit = |
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | |||
153 | return PTR_ERR(req); | 155 | return PTR_ERR(req); |
154 | req->r_inode = inode; | 156 | req->r_inode = inode; |
155 | ihold(inode); | 157 | ihold(inode); |
158 | req->r_num_caps = 1; | ||
156 | 159 | ||
157 | req->r_args.setlayout.layout.fl_stripe_unit = | 160 | req->r_args.setlayout.layout.fl_stripe_unit = |
158 | cpu_to_le32(l.stripe_unit); | 161 | cpu_to_le32(l.stripe_unit); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d94ba0df9f4d..191398852a2e 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
45 | return PTR_ERR(req); | 45 | return PTR_ERR(req); |
46 | req->r_inode = inode; | 46 | req->r_inode = inode; |
47 | ihold(inode); | 47 | ihold(inode); |
48 | req->r_num_caps = 1; | ||
48 | 49 | ||
49 | /* mds requires start and length rather than start and end */ | 50 | /* mds requires start and length rather than start and end */ |
50 | if (LLONG_MAX == fl->fl_end) | 51 | if (LLONG_MAX == fl->fl_end) |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7866cd05a6bb..ead05cc1f447 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -266,7 +266,6 @@ struct ceph_inode_info { | |||
266 | struct timespec i_rctime; | 266 | struct timespec i_rctime; |
267 | u64 i_rbytes, i_rfiles, i_rsubdirs; | 267 | u64 i_rbytes, i_rfiles, i_rsubdirs; |
268 | u64 i_files, i_subdirs; | 268 | u64 i_files, i_subdirs; |
269 | u64 i_max_offset; /* largest readdir offset, set with complete dir */ | ||
270 | 269 | ||
271 | struct rb_root i_fragtree; | 270 | struct rb_root i_fragtree; |
272 | struct mutex i_fragtree_mutex; | 271 | struct mutex i_fragtree_mutex; |