aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-05-05 18:17:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-05 18:17:02 -0400
commit5575eeb7b9f687ca4899e2d8721a9b17265d0060 (patch)
treef105b068410e48f5b2d65e85802a9d4bfa3e0638 /fs
parent0624bcaaf06c1fe5aca4e72287a3f13026764d36 (diff)
parent3bd58143bafc56dbc07f4f085e4d7e018d332674 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil: "First, there is a critical fix for the new primary-affinity function that went into -rc1. The second batch of patches from Zheng fix a range of problems with directory fragmentation, readdir, and a few odds and ends for cephfs" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: reserve caps for file layout/lock MDS requests ceph: avoid releasing caps that are being used ceph: clear directory's completeness when creating file libceph: fix non-default values check in apply_primary_affinity() ceph: use fpos_cmp() to compare dentry positions ceph: check directory's completeness before emitting directory entry
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/dir.c33
-rw-r--r--fs/ceph/inode.c71
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/ceph/locks.c1
-rw-r--r--fs/ceph/super.h1
6 files changed, 39 insertions, 72 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2e5e648eb5c3..c561b628ebce 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3261 rel->seq = cpu_to_le32(cap->seq); 3261 rel->seq = cpu_to_le32(cap->seq);
3262 rel->issue_seq = cpu_to_le32(cap->issue_seq), 3262 rel->issue_seq = cpu_to_le32(cap->issue_seq),
3263 rel->mseq = cpu_to_le32(cap->mseq); 3263 rel->mseq = cpu_to_le32(cap->mseq);
3264 rel->caps = cpu_to_le32(cap->issued); 3264 rel->caps = cpu_to_le32(cap->implemented);
3265 rel->wanted = cpu_to_le32(cap->mds_wanted); 3265 rel->wanted = cpu_to_le32(cap->mds_wanted);
3266 rel->dname_len = 0; 3266 rel->dname_len = 0;
3267 rel->dname_seq = 0; 3267 rel->dname_seq = 0;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 766410a12c2c..c29d6ae68874 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
141 141
142 /* start at beginning? */ 142 /* start at beginning? */
143 if (ctx->pos == 2 || last == NULL || 143 if (ctx->pos == 2 || last == NULL ||
144 ctx->pos < ceph_dentry(last)->offset) { 144 fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
145 if (list_empty(&parent->d_subdirs)) 145 if (list_empty(&parent->d_subdirs))
146 goto out_unlock; 146 goto out_unlock;
147 p = parent->d_subdirs.prev; 147 p = parent->d_subdirs.prev;
@@ -182,9 +182,16 @@ more:
182 spin_unlock(&dentry->d_lock); 182 spin_unlock(&dentry->d_lock);
183 spin_unlock(&parent->d_lock); 183 spin_unlock(&parent->d_lock);
184 184
185 /* make sure a dentry wasn't dropped while we didn't have parent lock */
186 if (!ceph_dir_is_complete(dir)) {
187 dout(" lost dir complete on %p; falling back to mds\n", dir);
188 dput(dentry);
189 err = -EAGAIN;
190 goto out;
191 }
192
185 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, 193 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
186 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 194 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
187 ctx->pos = di->offset;
188 if (!dir_emit(ctx, dentry->d_name.name, 195 if (!dir_emit(ctx, dentry->d_name.name,
189 dentry->d_name.len, 196 dentry->d_name.len,
190 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), 197 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -198,19 +205,12 @@ more:
198 return 0; 205 return 0;
199 } 206 }
200 207
208 ctx->pos = di->offset + 1;
209
201 if (last) 210 if (last)
202 dput(last); 211 dput(last);
203 last = dentry; 212 last = dentry;
204 213
205 ctx->pos++;
206
207 /* make sure a dentry wasn't dropped while we didn't have parent lock */
208 if (!ceph_dir_is_complete(dir)) {
209 dout(" lost dir complete on %p; falling back to mds\n", dir);
210 err = -EAGAIN;
211 goto out;
212 }
213
214 spin_lock(&parent->d_lock); 214 spin_lock(&parent->d_lock);
215 p = p->prev; /* advance to next dentry */ 215 p = p->prev; /* advance to next dentry */
216 goto more; 216 goto more;
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
296 err = __dcache_readdir(file, ctx, shared_gen); 296 err = __dcache_readdir(file, ctx, shared_gen);
297 if (err != -EAGAIN) 297 if (err != -EAGAIN)
298 return err; 298 return err;
299 frag = fpos_frag(ctx->pos);
300 off = fpos_off(ctx->pos);
299 } else { 301 } else {
300 spin_unlock(&ci->i_ceph_lock); 302 spin_unlock(&ci->i_ceph_lock);
301 } 303 }
@@ -446,7 +448,6 @@ more:
446 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { 448 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
447 dout(" marking %p complete\n", inode); 449 dout(" marking %p complete\n", inode);
448 __ceph_dir_set_complete(ci, fi->dir_release_count); 450 __ceph_dir_set_complete(ci, fi->dir_release_count);
449 ci->i_max_offset = ctx->pos;
450 } 451 }
451 spin_unlock(&ci->i_ceph_lock); 452 spin_unlock(&ci->i_ceph_lock);
452 453
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
935 * to do it here. 936 * to do it here.
936 */ 937 */
937 938
938 /* d_move screws up d_subdirs order */
939 ceph_dir_clear_complete(new_dir);
940
941 d_move(old_dentry, new_dentry); 939 d_move(old_dentry, new_dentry);
942 940
943 /* ensure target dentry is invalidated, despite 941 /* ensure target dentry is invalidated, despite
944 rehashing bug in vfs_rename_dir */ 942 rehashing bug in vfs_rename_dir */
945 ceph_invalidate_dentry_lease(new_dentry); 943 ceph_invalidate_dentry_lease(new_dentry);
944
945 /* d_move screws up sibling dentries' offsets */
946 ceph_dir_clear_complete(old_dir);
947 ceph_dir_clear_complete(new_dir);
948
946 } 949 }
947 ceph_mdsc_put_request(req); 950 ceph_mdsc_put_request(req);
948 return err; 951 return err;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0728e5be2d..233c6f96910a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
744 !__ceph_dir_is_complete(ci)) { 744 !__ceph_dir_is_complete(ci)) {
745 dout(" marking %p complete (empty)\n", inode); 745 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 ci->i_max_offset = 2;
748 } 747 }
749no_change: 748no_change:
750 /* only update max_size on auth cap */ 749 /* only update max_size on auth cap */
@@ -890,41 +889,6 @@ out_unlock:
890} 889}
891 890
892/* 891/*
893 * Set dentry's directory position based on the current dir's max, and
894 * order it in d_subdirs, so that dcache_readdir behaves.
895 *
896 * Always called under directory's i_mutex.
897 */
898static void ceph_set_dentry_offset(struct dentry *dn)
899{
900 struct dentry *dir = dn->d_parent;
901 struct inode *inode = dir->d_inode;
902 struct ceph_inode_info *ci;
903 struct ceph_dentry_info *di;
904
905 BUG_ON(!inode);
906
907 ci = ceph_inode(inode);
908 di = ceph_dentry(dn);
909
910 spin_lock(&ci->i_ceph_lock);
911 if (!__ceph_dir_is_complete(ci)) {
912 spin_unlock(&ci->i_ceph_lock);
913 return;
914 }
915 di->offset = ceph_inode(inode)->i_max_offset++;
916 spin_unlock(&ci->i_ceph_lock);
917
918 spin_lock(&dir->d_lock);
919 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
920 list_move(&dn->d_u.d_child, &dir->d_subdirs);
921 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
922 dn->d_u.d_child.prev, dn->d_u.d_child.next);
923 spin_unlock(&dn->d_lock);
924 spin_unlock(&dir->d_lock);
925}
926
927/*
928 * splice a dentry to an inode. 892 * splice a dentry to an inode.
929 * caller must hold directory i_mutex for this to be safe. 893 * caller must hold directory i_mutex for this to be safe.
930 * 894 *
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
933 * the caller) if we fail. 897 * the caller) if we fail.
934 */ 898 */
935static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 899static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
936 bool *prehash, bool set_offset) 900 bool *prehash)
937{ 901{
938 struct dentry *realdn; 902 struct dentry *realdn;
939 903
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
965 } 929 }
966 if ((!prehash || *prehash) && d_unhashed(dn)) 930 if ((!prehash || *prehash) && d_unhashed(dn))
967 d_rehash(dn); 931 d_rehash(dn);
968 if (set_offset)
969 ceph_set_dentry_offset(dn);
970out: 932out:
971 return dn; 933 return dn;
972} 934}
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
987{ 949{
988 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 950 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
989 struct inode *in = NULL; 951 struct inode *in = NULL;
990 struct ceph_mds_reply_inode *ininfo;
991 struct ceph_vino vino; 952 struct ceph_vino vino;
992 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 953 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
993 int err = 0; 954 int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
1161 1122
1162 /* rename? */ 1123 /* rename? */
1163 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { 1124 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
1125 struct inode *olddir = req->r_old_dentry_dir;
1126 BUG_ON(!olddir);
1127
1164 dout(" src %p '%.*s' dst %p '%.*s'\n", 1128 dout(" src %p '%.*s' dst %p '%.*s'\n",
1165 req->r_old_dentry, 1129 req->r_old_dentry,
1166 req->r_old_dentry->d_name.len, 1130 req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
1180 rehashing bug in vfs_rename_dir */ 1144 rehashing bug in vfs_rename_dir */
1181 ceph_invalidate_dentry_lease(dn); 1145 ceph_invalidate_dentry_lease(dn);
1182 1146
1183 /* 1147 /* d_move screws up sibling dentries' offsets */
1184 * d_move() puts the renamed dentry at the end of 1148 ceph_dir_clear_complete(dir);
1185 * d_subdirs. We need to assign it an appropriate 1149 ceph_dir_clear_complete(olddir);
1186 * directory offset so we can behave when dir is 1150
1187 * complete.
1188 */
1189 ceph_set_dentry_offset(req->r_old_dentry);
1190 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1151 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1191 ceph_dentry(req->r_old_dentry)->offset); 1152 ceph_dentry(req->r_old_dentry)->offset);
1192 1153
@@ -1213,8 +1174,9 @@ retry_lookup:
1213 1174
1214 /* attach proper inode */ 1175 /* attach proper inode */
1215 if (!dn->d_inode) { 1176 if (!dn->d_inode) {
1177 ceph_dir_clear_complete(dir);
1216 ihold(in); 1178 ihold(in);
1217 dn = splice_dentry(dn, in, &have_lease, true); 1179 dn = splice_dentry(dn, in, &have_lease);
1218 if (IS_ERR(dn)) { 1180 if (IS_ERR(dn)) {
1219 err = PTR_ERR(dn); 1181 err = PTR_ERR(dn);
1220 goto done; 1182 goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
1235 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1197 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1236 req->r_op == CEPH_MDS_OP_MKSNAP)) { 1198 req->r_op == CEPH_MDS_OP_MKSNAP)) {
1237 struct dentry *dn = req->r_dentry; 1199 struct dentry *dn = req->r_dentry;
1200 struct inode *dir = req->r_locked_dir;
1238 1201
1239 /* fill out a snapdir LOOKUPSNAP dentry */ 1202 /* fill out a snapdir LOOKUPSNAP dentry */
1240 BUG_ON(!dn); 1203 BUG_ON(!dn);
1241 BUG_ON(!req->r_locked_dir); 1204 BUG_ON(!dir);
1242 BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); 1205 BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
1243 ininfo = rinfo->targeti.in;
1244 vino.ino = le64_to_cpu(ininfo->ino);
1245 vino.snap = le64_to_cpu(ininfo->snapid);
1246 dout(" linking snapped dir %p to dn %p\n", in, dn); 1206 dout(" linking snapped dir %p to dn %p\n", in, dn);
1207 ceph_dir_clear_complete(dir);
1247 ihold(in); 1208 ihold(in);
1248 dn = splice_dentry(dn, in, NULL, true); 1209 dn = splice_dentry(dn, in, NULL);
1249 if (IS_ERR(dn)) { 1210 if (IS_ERR(dn)) {
1250 err = PTR_ERR(dn); 1211 err = PTR_ERR(dn);
1251 goto done; 1212 goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
1407 } 1368 }
1408 1369
1409 if (!dn->d_inode) { 1370 if (!dn->d_inode) {
1410 dn = splice_dentry(dn, in, NULL, false); 1371 dn = splice_dentry(dn, in, NULL);
1411 if (IS_ERR(dn)) { 1372 if (IS_ERR(dn)) {
1412 err = PTR_ERR(dn); 1373 err = PTR_ERR(dn);
1413 dn = NULL; 1374 dn = NULL;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index fdf941b44ff1..a822a6e58290 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
109 return PTR_ERR(req); 109 return PTR_ERR(req);
110 req->r_inode = inode; 110 req->r_inode = inode;
111 ihold(inode); 111 ihold(inode);
112 req->r_num_caps = 1;
113
112 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 114 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
113 115
114 req->r_args.setlayout.layout.fl_stripe_unit = 116 req->r_args.setlayout.layout.fl_stripe_unit =
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
153 return PTR_ERR(req); 155 return PTR_ERR(req);
154 req->r_inode = inode; 156 req->r_inode = inode;
155 ihold(inode); 157 ihold(inode);
158 req->r_num_caps = 1;
156 159
157 req->r_args.setlayout.layout.fl_stripe_unit = 160 req->r_args.setlayout.layout.fl_stripe_unit =
158 cpu_to_le32(l.stripe_unit); 161 cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d94ba0df9f4d..191398852a2e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 req->r_inode = inode; 46 req->r_inode = inode;
47 ihold(inode); 47 ihold(inode);
48 req->r_num_caps = 1;
48 49
49 /* mds requires start and length rather than start and end */ 50 /* mds requires start and length rather than start and end */
50 if (LLONG_MAX == fl->fl_end) 51 if (LLONG_MAX == fl->fl_end)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7866cd05a6bb..ead05cc1f447 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -266,7 +266,6 @@ struct ceph_inode_info {
266 struct timespec i_rctime; 266 struct timespec i_rctime;
267 u64 i_rbytes, i_rfiles, i_rsubdirs; 267 u64 i_rbytes, i_rfiles, i_rsubdirs;
268 u64 i_files, i_subdirs; 268 u64 i_files, i_subdirs;
269 u64 i_max_offset; /* largest readdir offset, set with complete dir */
270 269
271 struct rb_root i_fragtree; 270 struct rb_root i_fragtree;
272 struct mutex i_fragtree_mutex; 271 struct mutex i_fragtree_mutex;