aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/export.c350
-rw-r--r--include/linux/ceph/ceph_fs.h6
2 files changed, 329 insertions, 27 deletions
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index d64e7472fa41..d3ef7ee429ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -22,18 +22,77 @@ struct ceph_nfs_confh {
22 u64 ino, parent_ino; 22 u64 ino, parent_ino;
23} __attribute__ ((packed)); 23} __attribute__ ((packed));
24 24
25/*
26 * fh for snapped inode
27 */
28struct ceph_nfs_snapfh {
29 u64 ino;
30 u64 snapid;
31 u64 parent_ino;
32 u32 hash;
33} __attribute__ ((packed));
34
35static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
36 struct inode *parent_inode)
37{
38 const static int snap_handle_length =
39 sizeof(struct ceph_nfs_snapfh) >> 2;
40 struct ceph_nfs_snapfh *sfh = (void *)rawfh;
41 u64 snapid = ceph_snap(inode);
42 int ret;
43 bool no_parent = true;
44
45 if (*max_len < snap_handle_length) {
46 *max_len = snap_handle_length;
47 ret = FILEID_INVALID;
48 goto out;
49 }
50
51 ret = -EINVAL;
52 if (snapid != CEPH_SNAPDIR) {
53 struct inode *dir;
54 struct dentry *dentry = d_find_alias(inode);
55 if (!dentry)
56 goto out;
57
58 rcu_read_lock();
59 dir = d_inode_rcu(dentry->d_parent);
60 if (ceph_snap(dir) != CEPH_SNAPDIR) {
61 sfh->parent_ino = ceph_ino(dir);
62 sfh->hash = ceph_dentry_hash(dir, dentry);
63 no_parent = false;
64 }
65 rcu_read_unlock();
66 dput(dentry);
67 }
68
69 if (no_parent) {
70 if (!S_ISDIR(inode->i_mode))
71 goto out;
72 sfh->parent_ino = sfh->ino;
73 sfh->hash = 0;
74 }
75 sfh->ino = ceph_ino(inode);
76 sfh->snapid = snapid;
77
78 *max_len = snap_handle_length;
79 ret = FILEID_BTRFS_WITH_PARENT;
80out:
81 dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
82 return ret;
83}
84
25static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, 85static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
26 struct inode *parent_inode) 86 struct inode *parent_inode)
27{ 87{
88 const static int handle_length =
89 sizeof(struct ceph_nfs_fh) >> 2;
90 const static int connected_handle_length =
91 sizeof(struct ceph_nfs_confh) >> 2;
28 int type; 92 int type;
29 struct ceph_nfs_fh *fh = (void *)rawfh;
30 struct ceph_nfs_confh *cfh = (void *)rawfh;
31 int connected_handle_length = sizeof(*cfh)/4;
32 int handle_length = sizeof(*fh)/4;
33 93
34 /* don't re-export snaps */
35 if (ceph_snap(inode) != CEPH_NOSNAP) 94 if (ceph_snap(inode) != CEPH_NOSNAP)
36 return -EINVAL; 95 return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
37 96
38 if (parent_inode && (*max_len < connected_handle_length)) { 97 if (parent_inode && (*max_len < connected_handle_length)) {
39 *max_len = connected_handle_length; 98 *max_len = connected_handle_length;
@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
44 } 103 }
45 104
46 if (parent_inode) { 105 if (parent_inode) {
106 struct ceph_nfs_confh *cfh = (void *)rawfh;
47 dout("encode_fh %llx with parent %llx\n", 107 dout("encode_fh %llx with parent %llx\n",
48 ceph_ino(inode), ceph_ino(parent_inode)); 108 ceph_ino(inode), ceph_ino(parent_inode));
49 cfh->ino = ceph_ino(inode); 109 cfh->ino = ceph_ino(inode);
@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
51 *max_len = connected_handle_length; 111 *max_len = connected_handle_length;
52 type = FILEID_INO32_GEN_PARENT; 112 type = FILEID_INO32_GEN_PARENT;
53 } else { 113 } else {
114 struct ceph_nfs_fh *fh = (void *)rawfh;
54 dout("encode_fh %llx\n", ceph_ino(inode)); 115 dout("encode_fh %llx\n", ceph_ino(inode));
55 fh->ino = ceph_ino(inode); 116 fh->ino = ceph_ino(inode);
56 *max_len = handle_length; 117 *max_len = handle_length;
@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
59 return type; 120 return type;
60} 121}
61 122
62struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino) 123static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
63{ 124{
64 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; 125 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
65 struct inode *inode; 126 struct inode *inode;
@@ -81,7 +142,7 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
81 mask = CEPH_STAT_CAP_INODE; 142 mask = CEPH_STAT_CAP_INODE;
82 if (ceph_security_xattr_wanted(d_inode(sb->s_root))) 143 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
83 mask |= CEPH_CAP_XATTR_SHARED; 144 mask |= CEPH_CAP_XATTR_SHARED;
84 req->r_args.getattr.mask = cpu_to_le32(mask); 145 req->r_args.lookupino.mask = cpu_to_le32(mask);
85 146
86 req->r_ino1 = vino; 147 req->r_ino1 = vino;
87 req->r_num_caps = 1; 148 req->r_num_caps = 1;
@@ -92,25 +153,113 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
92 ceph_mdsc_put_request(req); 153 ceph_mdsc_put_request(req);
93 if (!inode) 154 if (!inode)
94 return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE); 155 return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
95 if (inode->i_nlink == 0) {
96 iput(inode);
97 return ERR_PTR(-ESTALE);
98 }
99 } 156 }
157 return inode;
158}
100 159
160struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
161{
162 struct inode *inode = __lookup_inode(sb, ino);
163 if (IS_ERR(inode))
164 return inode;
165 if (inode->i_nlink == 0) {
166 iput(inode);
167 return ERR_PTR(-ESTALE);
168 }
101 return inode; 169 return inode;
102} 170}
103 171
104static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) 172static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
105{ 173{
106 struct inode *inode = ceph_lookup_inode(sb, ino); 174 struct inode *inode = __lookup_inode(sb, ino);
107
108 if (IS_ERR(inode)) 175 if (IS_ERR(inode))
109 return ERR_CAST(inode); 176 return ERR_CAST(inode);
110 177 if (inode->i_nlink == 0) {
178 iput(inode);
179 return ERR_PTR(-ESTALE);
180 }
111 return d_obtain_alias(inode); 181 return d_obtain_alias(inode);
112} 182}
113 183
184static struct dentry *__snapfh_to_dentry(struct super_block *sb,
185 struct ceph_nfs_snapfh *sfh,
186 bool want_parent)
187{
188 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
189 struct ceph_mds_request *req;
190 struct inode *inode;
191 struct ceph_vino vino;
192 int mask;
193 int err;
194 bool unlinked = false;
195
196 if (want_parent) {
197 vino.ino = sfh->parent_ino;
198 if (sfh->snapid == CEPH_SNAPDIR)
199 vino.snap = CEPH_NOSNAP;
200 else if (sfh->ino == sfh->parent_ino)
201 vino.snap = CEPH_SNAPDIR;
202 else
203 vino.snap = sfh->snapid;
204 } else {
205 vino.ino = sfh->ino;
206 vino.snap = sfh->snapid;
207 }
208 inode = ceph_find_inode(sb, vino);
209 if (inode)
210 return d_obtain_alias(inode);
211
212 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
213 USE_ANY_MDS);
214 if (IS_ERR(req))
215 return ERR_CAST(req);
216
217 mask = CEPH_STAT_CAP_INODE;
218 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
219 mask |= CEPH_CAP_XATTR_SHARED;
220 req->r_args.lookupino.mask = cpu_to_le32(mask);
221 if (vino.snap < CEPH_NOSNAP) {
222 req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
223 if (!want_parent && sfh->ino != sfh->parent_ino) {
224 req->r_args.lookupino.parent =
225 cpu_to_le64(sfh->parent_ino);
226 req->r_args.lookupino.hash =
227 cpu_to_le32(sfh->hash);
228 }
229 }
230
231 req->r_ino1 = vino;
232 req->r_num_caps = 1;
233 err = ceph_mdsc_do_request(mdsc, NULL, req);
234 inode = req->r_target_inode;
235 if (inode) {
236 if (vino.snap == CEPH_SNAPDIR) {
237 if (inode->i_nlink == 0)
238 unlinked = true;
239 inode = ceph_get_snapdir(inode);
240 } else if (ceph_snap(inode) == vino.snap) {
241 ihold(inode);
242 } else {
243 /* mds does not support lookup snapped inode */
244 err = -EOPNOTSUPP;
245 inode = NULL;
246 }
247 }
248 ceph_mdsc_put_request(req);
249
250 if (want_parent) {
251 dout("snapfh_to_parent %llx.%llx\n err=%d\n",
252 vino.ino, vino.snap, err);
253 } else {
254 dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
255 vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
256 }
257 if (!inode)
258 return ERR_PTR(-ESTALE);
259 /* see comments in ceph_get_parent() */
260 return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
261}
262
114/* 263/*
115 * convert regular fh to dentry 264 * convert regular fh to dentry
116 */ 265 */
@@ -120,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
120{ 269{
121 struct ceph_nfs_fh *fh = (void *)fid->raw; 270 struct ceph_nfs_fh *fh = (void *)fid->raw;
122 271
272 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
273 struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
274 return __snapfh_to_dentry(sb, sfh, false);
275 }
276
123 if (fh_type != FILEID_INO32_GEN && 277 if (fh_type != FILEID_INO32_GEN &&
124 fh_type != FILEID_INO32_GEN_PARENT) 278 fh_type != FILEID_INO32_GEN_PARENT)
125 return NULL; 279 return NULL;
@@ -173,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb,
173 327
174static struct dentry *ceph_get_parent(struct dentry *child) 328static struct dentry *ceph_get_parent(struct dentry *child)
175{ 329{
176 /* don't re-export snaps */ 330 struct inode *inode = d_inode(child);
177 if (ceph_snap(d_inode(child)) != CEPH_NOSNAP) 331 struct dentry *dn;
178 return ERR_PTR(-EINVAL); 332
179 333 if (ceph_snap(inode) != CEPH_NOSNAP) {
180 dout("get_parent %p ino %llx.%llx\n", 334 struct inode* dir;
181 child, ceph_vinop(d_inode(child))); 335 bool unlinked = false;
182 return __get_parent(child->d_sb, child, 0); 336 /* do not support non-directory */
337 if (!d_is_dir(child)) {
338 dn = ERR_PTR(-EINVAL);
339 goto out;
340 }
341 dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
342 if (IS_ERR(dir)) {
343 dn = ERR_CAST(dir);
344 goto out;
345 }
346 /* There can be multiple paths to access snapped inode.
347 * For simplicity, treat snapdir of head inode as parent */
348 if (ceph_snap(inode) != CEPH_SNAPDIR) {
349 struct inode *snapdir = ceph_get_snapdir(dir);
350 if (dir->i_nlink == 0)
351 unlinked = true;
352 iput(dir);
353 if (IS_ERR(snapdir)) {
354 dn = ERR_CAST(snapdir);
355 goto out;
356 }
357 dir = snapdir;
358 }
359 /* If directory has already been deleted, futher get_parent
360 * will fail. Do not mark snapdir dentry as disconnected,
361 * this prevent exportfs from doing futher get_parent. */
362 if (unlinked)
363 dn = d_obtain_root(dir);
364 else
365 dn = d_obtain_alias(dir);
366 } else {
367 dn = __get_parent(child->d_sb, child, 0);
368 }
369out:
370 dout("get_parent %p ino %llx.%llx err=%ld\n",
371 child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
372 return dn;
183} 373}
184 374
185/* 375/*
@@ -192,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
192 struct ceph_nfs_confh *cfh = (void *)fid->raw; 382 struct ceph_nfs_confh *cfh = (void *)fid->raw;
193 struct dentry *dentry; 383 struct dentry *dentry;
194 384
385 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
386 struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
387 return __snapfh_to_dentry(sb, sfh, true);
388 }
389
195 if (fh_type != FILEID_INO32_GEN_PARENT) 390 if (fh_type != FILEID_INO32_GEN_PARENT)
196 return NULL; 391 return NULL;
197 if (fh_len < sizeof(*cfh) / 4) 392 if (fh_len < sizeof(*cfh) / 4)
@@ -204,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
204 return dentry; 399 return dentry;
205} 400}
206 401
402static int __get_snap_name(struct dentry *parent, char *name,
403 struct dentry *child)
404{
405 struct inode *inode = d_inode(child);
406 struct inode *dir = d_inode(parent);
407 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
408 struct ceph_mds_request *req = NULL;
409 char *last_name = NULL;
410 unsigned next_offset = 2;
411 int err = -EINVAL;
412
413 if (ceph_ino(inode) != ceph_ino(dir))
414 goto out;
415 if (ceph_snap(inode) == CEPH_SNAPDIR) {
416 if (ceph_snap(dir) == CEPH_NOSNAP) {
417 strcpy(name, fsc->mount_options->snapdir_name);
418 err = 0;
419 }
420 goto out;
421 }
422 if (ceph_snap(dir) != CEPH_SNAPDIR)
423 goto out;
424
425 while (1) {
426 struct ceph_mds_reply_info_parsed *rinfo;
427 struct ceph_mds_reply_dir_entry *rde;
428 int i;
429
430 req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
431 USE_AUTH_MDS);
432 if (IS_ERR(req)) {
433 err = PTR_ERR(req);
434 req = NULL;
435 goto out;
436 }
437 err = ceph_alloc_readdir_reply_buffer(req, inode);
438 if (err)
439 goto out;
440
441 req->r_direct_mode = USE_AUTH_MDS;
442 req->r_readdir_offset = next_offset;
443 req->r_args.readdir.flags =
444 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
445 if (last_name) {
446 req->r_path2 = last_name;
447 last_name = NULL;
448 }
449
450 req->r_inode = dir;
451 ihold(dir);
452 req->r_dentry = dget(parent);
453
454 inode_lock(dir);
455 err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
456 inode_unlock(dir);
457
458 if (err < 0)
459 goto out;
460
461 rinfo = &req->r_reply_info;
462 for (i = 0; i < rinfo->dir_nr; i++) {
463 rde = rinfo->dir_entries + i;
464 BUG_ON(!rde->inode.in);
465 if (ceph_snap(inode) ==
466 le64_to_cpu(rde->inode.in->snapid)) {
467 memcpy(name, rde->name, rde->name_len);
468 name[rde->name_len] = '\0';
469 err = 0;
470 goto out;
471 }
472 }
473
474 if (rinfo->dir_end)
475 break;
476
477 BUG_ON(rinfo->dir_nr <= 0);
478 rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
479 next_offset += rinfo->dir_nr;
480 last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
481 if (!last_name) {
482 err = -ENOMEM;
483 goto out;
484 }
485
486 ceph_mdsc_put_request(req);
487 req = NULL;
488 }
489 err = -ENOENT;
490out:
491 if (req)
492 ceph_mdsc_put_request(req);
493 kfree(last_name);
494 dout("get_snap_name %p ino %llx.%llx err=%d\n",
495 child, ceph_vinop(inode), err);
496 return err;
497}
498
207static int ceph_get_name(struct dentry *parent, char *name, 499static int ceph_get_name(struct dentry *parent, char *name,
208 struct dentry *child) 500 struct dentry *child)
209{ 501{
210 struct ceph_mds_client *mdsc; 502 struct ceph_mds_client *mdsc;
211 struct ceph_mds_request *req; 503 struct ceph_mds_request *req;
504 struct inode *inode = d_inode(child);
212 int err; 505 int err;
213 506
214 mdsc = ceph_inode_to_client(d_inode(child))->mdsc; 507 if (ceph_snap(inode) != CEPH_NOSNAP)
508 return __get_snap_name(parent, name, child);
509
510 mdsc = ceph_inode_to_client(inode)->mdsc;
215 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, 511 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
216 USE_ANY_MDS); 512 USE_ANY_MDS);
217 if (IS_ERR(req)) 513 if (IS_ERR(req))
@@ -219,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
219 515
220 inode_lock(d_inode(parent)); 516 inode_lock(d_inode(parent));
221 517
222 req->r_inode = d_inode(child); 518 req->r_inode = inode;
223 ihold(d_inode(child)); 519 ihold(inode);
224 req->r_ino2 = ceph_vino(d_inode(parent)); 520 req->r_ino2 = ceph_vino(d_inode(parent));
225 req->r_parent = d_inode(parent); 521 req->r_parent = d_inode(parent);
226 set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); 522 set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
@@ -234,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name,
234 memcpy(name, rinfo->dname, rinfo->dname_len); 530 memcpy(name, rinfo->dname, rinfo->dname_len);
235 name[rinfo->dname_len] = 0; 531 name[rinfo->dname_len] = 0;
236 dout("get_name %p ino %llx.%llx name %s\n", 532 dout("get_name %p ino %llx.%llx name %s\n",
237 child, ceph_vinop(d_inode(child)), name); 533 child, ceph_vinop(inode), name);
238 } else { 534 } else {
239 dout("get_name %p ino %llx.%llx err %d\n", 535 dout("get_name %p ino %llx.%llx err %d\n",
240 child, ceph_vinop(d_inode(child)), err); 536 child, ceph_vinop(inode), err);
241 } 537 }
242 538
243 ceph_mdsc_put_request(req); 539 ceph_mdsc_put_request(req);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 4903deb0777a..3ac0feaf2b5e 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -436,6 +436,12 @@ union ceph_mds_request_args {
436 __le64 length; /* num bytes to lock from start */ 436 __le64 length; /* num bytes to lock from start */
437 __u8 wait; /* will caller wait for lock to become available? */ 437 __u8 wait; /* will caller wait for lock to become available? */
438 } __attribute__ ((packed)) filelock_change; 438 } __attribute__ ((packed)) filelock_change;
439 struct {
440 __le32 mask; /* CEPH_CAP_* */
441 __le64 snapid;
442 __le64 parent;
443 __le32 hash;
444 } __attribute__ ((packed)) lookupino;
439} __attribute__ ((packed)); 445} __attribute__ ((packed));
440 446
441#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ 447#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */