diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/cache.c | 1 | ||||
-rw-r--r-- | fs/ceph/cache.h | 10 | ||||
-rw-r--r-- | fs/ceph/caps.c | 9 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 5 | ||||
-rw-r--r-- | fs/ceph/dir.c | 53 | ||||
-rw-r--r-- | fs/ceph/export.c | 267 | ||||
-rw-r--r-- | fs/ceph/file.c | 20 | ||||
-rw-r--r-- | fs/ceph/inode.c | 76 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 8 | ||||
-rw-r--r-- | fs/ceph/locks.c | 98 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 97 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 4 | ||||
-rw-r--r-- | fs/ceph/strings.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 3 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 48 |
16 files changed, 415 insertions, 286 deletions
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 8c44fdd4e1c3..834f9f3723fb 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, | |||
205 | ci->fscache = fscache_acquire_cookie(fsc->fscache, | 205 | ci->fscache = fscache_acquire_cookie(fsc->fscache, |
206 | &ceph_fscache_inode_object_def, | 206 | &ceph_fscache_inode_object_def, |
207 | ci, true); | 207 | ci, true); |
208 | fscache_check_consistency(ci->fscache); | ||
208 | done: | 209 | done: |
209 | mutex_unlock(&inode->i_mutex); | 210 | mutex_unlock(&inode->i_mutex); |
210 | 211 | ||
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index da95f61b7a09..5ac591bd012b 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h | |||
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page); | |||
48 | void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); | 48 | void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); |
49 | void ceph_queue_revalidate(struct inode *inode); | 49 | void ceph_queue_revalidate(struct inode *inode); |
50 | 50 | ||
51 | static inline void ceph_fscache_update_objectsize(struct inode *inode) | ||
52 | { | ||
53 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
54 | fscache_attr_changed(ci->fscache); | ||
55 | } | ||
56 | |||
51 | static inline void ceph_fscache_invalidate(struct inode *inode) | 57 | static inline void ceph_fscache_invalidate(struct inode *inode) |
52 | { | 58 | { |
53 | fscache_invalidate(ceph_inode(inode)->fscache); | 59 | fscache_invalidate(ceph_inode(inode)->fscache); |
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode, | |||
135 | { | 141 | { |
136 | } | 142 | } |
137 | 143 | ||
144 | static inline void ceph_fscache_update_objectsize(struct inode *inode) | ||
145 | { | ||
146 | } | ||
147 | |||
138 | static inline void ceph_fscache_invalidate(struct inode *inode) | 148 | static inline void ceph_fscache_invalidate(struct inode *inode) |
139 | { | 149 | { |
140 | } | 150 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 17543383545c..2e5e648eb5c3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -622,8 +622,10 @@ retry: | |||
622 | 622 | ||
623 | if (flags & CEPH_CAP_FLAG_AUTH) { | 623 | if (flags & CEPH_CAP_FLAG_AUTH) { |
624 | if (ci->i_auth_cap == NULL || | 624 | if (ci->i_auth_cap == NULL || |
625 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) | 625 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) { |
626 | ci->i_auth_cap = cap; | 626 | ci->i_auth_cap = cap; |
627 | cap->mds_wanted = wanted; | ||
628 | } | ||
627 | ci->i_cap_exporting_issued = 0; | 629 | ci->i_cap_exporting_issued = 0; |
628 | } else { | 630 | } else { |
629 | WARN_ON(ci->i_auth_cap == cap); | 631 | WARN_ON(ci->i_auth_cap == cap); |
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
885 | cap = rb_entry(p, struct ceph_cap, ci_node); | 887 | cap = rb_entry(p, struct ceph_cap, ci_node); |
886 | if (!__cap_is_valid(cap)) | 888 | if (!__cap_is_valid(cap)) |
887 | continue; | 889 | continue; |
888 | mds_wanted |= cap->mds_wanted; | 890 | if (cap == ci->i_auth_cap) |
891 | mds_wanted |= cap->mds_wanted; | ||
892 | else | ||
893 | mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR); | ||
889 | } | 894 | } |
890 | return mds_wanted; | 895 | return mds_wanted; |
891 | } | 896 | } |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6d59006bfa27..16b54aa31f08 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
93 | } else if (req->r_path1) { | 93 | } else if (req->r_path1) { |
94 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, | 94 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, |
95 | req->r_path1); | 95 | req->r_path1); |
96 | } else { | ||
97 | seq_printf(s, " #%llx", req->r_ino1.ino); | ||
96 | } | 98 | } |
97 | 99 | ||
98 | if (req->r_old_dentry) { | 100 | if (req->r_old_dentry) { |
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
102 | path = NULL; | 104 | path = NULL; |
103 | spin_lock(&req->r_old_dentry->d_lock); | 105 | spin_lock(&req->r_old_dentry->d_lock); |
104 | seq_printf(s, " #%llx/%.*s (%s)", | 106 | seq_printf(s, " #%llx/%.*s (%s)", |
105 | ceph_ino(req->r_old_dentry_dir), | 107 | req->r_old_dentry_dir ? |
108 | ceph_ino(req->r_old_dentry_dir) : 0, | ||
106 | req->r_old_dentry->d_name.len, | 109 | req->r_old_dentry->d_name.len, |
107 | req->r_old_dentry->d_name.name, | 110 | req->r_old_dentry->d_name.name, |
108 | path ? path : ""); | 111 | path ? path : ""); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 45eda6d7a40c..766410a12c2c 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r) | |||
119 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by | 119 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by |
120 | * the MDS if/when the directory is modified). | 120 | * the MDS if/when the directory is modified). |
121 | */ | 121 | */ |
122 | static int __dcache_readdir(struct file *file, struct dir_context *ctx) | 122 | static int __dcache_readdir(struct file *file, struct dir_context *ctx, |
123 | u32 shared_gen) | ||
123 | { | 124 | { |
124 | struct ceph_file_info *fi = file->private_data; | 125 | struct ceph_file_info *fi = file->private_data; |
125 | struct dentry *parent = file->f_dentry; | 126 | struct dentry *parent = file->f_dentry; |
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx) | |||
133 | last = fi->dentry; | 134 | last = fi->dentry; |
134 | fi->dentry = NULL; | 135 | fi->dentry = NULL; |
135 | 136 | ||
136 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, | 137 | dout("__dcache_readdir %p v%u at %llu (last %p)\n", |
137 | last); | 138 | dir, shared_gen, ctx->pos, last); |
138 | 139 | ||
139 | spin_lock(&parent->d_lock); | 140 | spin_lock(&parent->d_lock); |
140 | 141 | ||
@@ -161,7 +162,8 @@ more: | |||
161 | goto out_unlock; | 162 | goto out_unlock; |
162 | } | 163 | } |
163 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 164 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
164 | if (!d_unhashed(dentry) && dentry->d_inode && | 165 | if (di->lease_shared_gen == shared_gen && |
166 | !d_unhashed(dentry) && dentry->d_inode && | ||
165 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && | 167 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && |
166 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && | 168 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && |
167 | fpos_cmp(ctx->pos, di->offset) <= 0) | 169 | fpos_cmp(ctx->pos, di->offset) <= 0) |
@@ -190,7 +192,7 @@ more: | |||
190 | if (last) { | 192 | if (last) { |
191 | /* remember our position */ | 193 | /* remember our position */ |
192 | fi->dentry = last; | 194 | fi->dentry = last; |
193 | fi->next_offset = di->offset; | 195 | fi->next_offset = fpos_off(di->offset); |
194 | } | 196 | } |
195 | dput(dentry); | 197 | dput(dentry); |
196 | return 0; | 198 | return 0; |
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
252 | int err; | 254 | int err; |
253 | u32 ftype; | 255 | u32 ftype; |
254 | struct ceph_mds_reply_info_parsed *rinfo; | 256 | struct ceph_mds_reply_info_parsed *rinfo; |
255 | const int max_entries = fsc->mount_options->max_readdir; | ||
256 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | ||
257 | 257 | ||
258 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); | 258 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); |
259 | if (fi->flags & CEPH_F_ATEND) | 259 | if (fi->flags & CEPH_F_ATEND) |
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
291 | ceph_snap(inode) != CEPH_SNAPDIR && | 291 | ceph_snap(inode) != CEPH_SNAPDIR && |
292 | __ceph_dir_is_complete(ci) && | 292 | __ceph_dir_is_complete(ci) && |
293 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 293 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
294 | u32 shared_gen = ci->i_shared_gen; | ||
294 | spin_unlock(&ci->i_ceph_lock); | 295 | spin_unlock(&ci->i_ceph_lock); |
295 | err = __dcache_readdir(file, ctx); | 296 | err = __dcache_readdir(file, ctx, shared_gen); |
296 | if (err != -EAGAIN) | 297 | if (err != -EAGAIN) |
297 | return err; | 298 | return err; |
298 | } else { | 299 | } else { |
@@ -322,14 +323,16 @@ more: | |||
322 | fi->last_readdir = NULL; | 323 | fi->last_readdir = NULL; |
323 | } | 324 | } |
324 | 325 | ||
325 | /* requery frag tree, as the frag topology may have changed */ | ||
326 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); | ||
327 | |||
328 | dout("readdir fetching %llx.%llx frag %x offset '%s'\n", | 326 | dout("readdir fetching %llx.%llx frag %x offset '%s'\n", |
329 | ceph_vinop(inode), frag, fi->last_name); | 327 | ceph_vinop(inode), frag, fi->last_name); |
330 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); | 328 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
331 | if (IS_ERR(req)) | 329 | if (IS_ERR(req)) |
332 | return PTR_ERR(req); | 330 | return PTR_ERR(req); |
331 | err = ceph_alloc_readdir_reply_buffer(req, inode); | ||
332 | if (err) { | ||
333 | ceph_mdsc_put_request(req); | ||
334 | return err; | ||
335 | } | ||
333 | req->r_inode = inode; | 336 | req->r_inode = inode; |
334 | ihold(inode); | 337 | ihold(inode); |
335 | req->r_dentry = dget(file->f_dentry); | 338 | req->r_dentry = dget(file->f_dentry); |
@@ -340,9 +343,6 @@ more: | |||
340 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | 343 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); |
341 | req->r_readdir_offset = fi->next_offset; | 344 | req->r_readdir_offset = fi->next_offset; |
342 | req->r_args.readdir.frag = cpu_to_le32(frag); | 345 | req->r_args.readdir.frag = cpu_to_le32(frag); |
343 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | ||
344 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
345 | req->r_num_caps = max_entries + 1; | ||
346 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 346 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
347 | if (err < 0) { | 347 | if (err < 0) { |
348 | ceph_mdsc_put_request(req); | 348 | ceph_mdsc_put_request(req); |
@@ -369,9 +369,9 @@ more: | |||
369 | fi->next_offset = 0; | 369 | fi->next_offset = 0; |
370 | off = fi->next_offset; | 370 | off = fi->next_offset; |
371 | } | 371 | } |
372 | fi->frag = frag; | ||
372 | fi->offset = fi->next_offset; | 373 | fi->offset = fi->next_offset; |
373 | fi->last_readdir = req; | 374 | fi->last_readdir = req; |
374 | fi->frag = frag; | ||
375 | 375 | ||
376 | if (req->r_reply_info.dir_end) { | 376 | if (req->r_reply_info.dir_end) { |
377 | kfree(fi->last_name); | 377 | kfree(fi->last_name); |
@@ -454,7 +454,7 @@ more: | |||
454 | return 0; | 454 | return 0; |
455 | } | 455 | } |
456 | 456 | ||
457 | static void reset_readdir(struct ceph_file_info *fi) | 457 | static void reset_readdir(struct ceph_file_info *fi, unsigned frag) |
458 | { | 458 | { |
459 | if (fi->last_readdir) { | 459 | if (fi->last_readdir) { |
460 | ceph_mdsc_put_request(fi->last_readdir); | 460 | ceph_mdsc_put_request(fi->last_readdir); |
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
462 | } | 462 | } |
463 | kfree(fi->last_name); | 463 | kfree(fi->last_name); |
464 | fi->last_name = NULL; | 464 | fi->last_name = NULL; |
465 | fi->next_offset = 2; /* compensate for . and .. */ | 465 | if (ceph_frag_is_leftmost(frag)) |
466 | fi->next_offset = 2; /* compensate for . and .. */ | ||
467 | else | ||
468 | fi->next_offset = 0; | ||
466 | if (fi->dentry) { | 469 | if (fi->dentry) { |
467 | dput(fi->dentry); | 470 | dput(fi->dentry); |
468 | fi->dentry = NULL; | 471 | fi->dentry = NULL; |
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
474 | { | 477 | { |
475 | struct ceph_file_info *fi = file->private_data; | 478 | struct ceph_file_info *fi = file->private_data; |
476 | struct inode *inode = file->f_mapping->host; | 479 | struct inode *inode = file->f_mapping->host; |
477 | loff_t old_offset = offset; | 480 | loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset); |
478 | loff_t retval; | 481 | loff_t retval; |
479 | 482 | ||
480 | mutex_lock(&inode->i_mutex); | 483 | mutex_lock(&inode->i_mutex); |
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
491 | goto out; | 494 | goto out; |
492 | } | 495 | } |
493 | 496 | ||
494 | if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { | 497 | if (offset >= 0) { |
495 | if (offset != file->f_pos) { | 498 | if (offset != file->f_pos) { |
496 | file->f_pos = offset; | 499 | file->f_pos = offset; |
497 | file->f_version = 0; | 500 | file->f_version = 0; |
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
504 | * seek to new frag, or seek prior to current chunk. | 507 | * seek to new frag, or seek prior to current chunk. |
505 | */ | 508 | */ |
506 | if (offset == 0 || | 509 | if (offset == 0 || |
507 | fpos_frag(offset) != fpos_frag(old_offset) || | 510 | fpos_frag(offset) != fi->frag || |
508 | fpos_off(offset) < fi->offset) { | 511 | fpos_off(offset) < fi->offset) { |
509 | dout("dir_llseek dropping %p content\n", file); | 512 | dout("dir_llseek dropping %p content\n", file); |
510 | reset_readdir(fi); | 513 | reset_readdir(fi, fpos_frag(offset)); |
511 | } | 514 | } |
512 | 515 | ||
513 | /* bump dir_release_count if we did a forward seek */ | 516 | /* bump dir_release_count if we did a forward seek */ |
514 | if (offset > old_offset) | 517 | if (fpos_cmp(offset, old_offset) > 0) |
515 | fi->dir_release_count--; | 518 | fi->dir_release_count--; |
516 | } | 519 | } |
517 | out: | 520 | out: |
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, | |||
812 | } | 815 | } |
813 | req->r_dentry = dget(dentry); | 816 | req->r_dentry = dget(dentry); |
814 | req->r_num_caps = 2; | 817 | req->r_num_caps = 2; |
815 | req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ | 818 | req->r_old_dentry = dget(old_dentry); |
816 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | ||
817 | req->r_locked_dir = dir; | 819 | req->r_locked_dir = dir; |
818 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 820 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
819 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 821 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
911 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); | 913 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); |
912 | if (IS_ERR(req)) | 914 | if (IS_ERR(req)) |
913 | return PTR_ERR(req); | 915 | return PTR_ERR(req); |
916 | ihold(old_dir); | ||
914 | req->r_dentry = dget(new_dentry); | 917 | req->r_dentry = dget(new_dentry); |
915 | req->r_num_caps = 2; | 918 | req->r_num_caps = 2; |
916 | req->r_old_dentry = dget(old_dentry); | 919 | req->r_old_dentry = dget(old_dentry); |
917 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | 920 | req->r_old_dentry_dir = old_dir; |
918 | req->r_locked_dir = new_dir; | 921 | req->r_locked_dir = new_dir; |
919 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; | 922 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; |
920 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; | 923 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 16796be53ca5..00d6af6a32ec 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -8,23 +8,6 @@ | |||
8 | #include "mds_client.h" | 8 | #include "mds_client.h" |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * NFS export support | ||
12 | * | ||
13 | * NFS re-export of a ceph mount is, at present, only semireliable. | ||
14 | * The basic issue is that the Ceph architectures doesn't lend itself | ||
15 | * well to generating filehandles that will remain valid forever. | ||
16 | * | ||
17 | * So, we do our best. If you're lucky, your inode will be in the | ||
18 | * client's cache. If it's not, and you have a connectable fh, then | ||
19 | * the MDS server may be able to find it for you. Otherwise, you get | ||
20 | * ESTALE. | ||
21 | * | ||
22 | * There are ways to this more reliable, but in the non-connectable fh | ||
23 | * case, we won't every work perfectly, and in the connectable case, | ||
24 | * some changes are needed on the MDS side to work better. | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * Basic fh | 11 | * Basic fh |
29 | */ | 12 | */ |
30 | struct ceph_nfs_fh { | 13 | struct ceph_nfs_fh { |
@@ -32,22 +15,12 @@ struct ceph_nfs_fh { | |||
32 | } __attribute__ ((packed)); | 15 | } __attribute__ ((packed)); |
33 | 16 | ||
34 | /* | 17 | /* |
35 | * Larger 'connectable' fh that includes parent ino and name hash. | 18 | * Larger fh that includes parent ino. |
36 | * Use this whenever possible, as it works more reliably. | ||
37 | */ | 19 | */ |
38 | struct ceph_nfs_confh { | 20 | struct ceph_nfs_confh { |
39 | u64 ino, parent_ino; | 21 | u64 ino, parent_ino; |
40 | u32 parent_name_hash; | ||
41 | } __attribute__ ((packed)); | 22 | } __attribute__ ((packed)); |
42 | 23 | ||
43 | /* | ||
44 | * The presence of @parent_inode here tells us whether NFS wants a | ||
45 | * connectable file handle. However, we want to make a connectionable | ||
46 | * file handle unconditionally so that the MDS gets as much of a hint | ||
47 | * as possible. That means we only use @parent_dentry to indicate | ||
48 | * whether nfsd wants a connectable fh, and whether we should indicate | ||
49 | * failure from a too-small @max_len. | ||
50 | */ | ||
51 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | 24 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, |
52 | struct inode *parent_inode) | 25 | struct inode *parent_inode) |
53 | { | 26 | { |
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
56 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 29 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
57 | int connected_handle_length = sizeof(*cfh)/4; | 30 | int connected_handle_length = sizeof(*cfh)/4; |
58 | int handle_length = sizeof(*fh)/4; | 31 | int handle_length = sizeof(*fh)/4; |
59 | struct dentry *dentry; | ||
60 | struct dentry *parent; | ||
61 | 32 | ||
62 | /* don't re-export snaps */ | 33 | /* don't re-export snaps */ |
63 | if (ceph_snap(inode) != CEPH_NOSNAP) | 34 | if (ceph_snap(inode) != CEPH_NOSNAP) |
64 | return -EINVAL; | 35 | return -EINVAL; |
65 | 36 | ||
66 | dentry = d_find_alias(inode); | 37 | if (parent_inode && (*max_len < connected_handle_length)) { |
38 | *max_len = connected_handle_length; | ||
39 | return FILEID_INVALID; | ||
40 | } else if (*max_len < handle_length) { | ||
41 | *max_len = handle_length; | ||
42 | return FILEID_INVALID; | ||
43 | } | ||
67 | 44 | ||
68 | /* if we found an alias, generate a connectable fh */ | 45 | if (parent_inode) { |
69 | if (*max_len >= connected_handle_length && dentry) { | 46 | dout("encode_fh %llx with parent %llx\n", |
70 | dout("encode_fh %p connectable\n", dentry); | 47 | ceph_ino(inode), ceph_ino(parent_inode)); |
71 | spin_lock(&dentry->d_lock); | ||
72 | parent = dentry->d_parent; | ||
73 | cfh->ino = ceph_ino(inode); | 48 | cfh->ino = ceph_ino(inode); |
74 | cfh->parent_ino = ceph_ino(parent->d_inode); | 49 | cfh->parent_ino = ceph_ino(parent_inode); |
75 | cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, | ||
76 | dentry); | ||
77 | *max_len = connected_handle_length; | 50 | *max_len = connected_handle_length; |
78 | type = 2; | 51 | type = FILEID_INO32_GEN_PARENT; |
79 | spin_unlock(&dentry->d_lock); | ||
80 | } else if (*max_len >= handle_length) { | ||
81 | if (parent_inode) { | ||
82 | /* nfsd wants connectable */ | ||
83 | *max_len = connected_handle_length; | ||
84 | type = FILEID_INVALID; | ||
85 | } else { | ||
86 | dout("encode_fh %p\n", dentry); | ||
87 | fh->ino = ceph_ino(inode); | ||
88 | *max_len = handle_length; | ||
89 | type = 1; | ||
90 | } | ||
91 | } else { | 52 | } else { |
53 | dout("encode_fh %llx\n", ceph_ino(inode)); | ||
54 | fh->ino = ceph_ino(inode); | ||
92 | *max_len = handle_length; | 55 | *max_len = handle_length; |
93 | type = FILEID_INVALID; | 56 | type = FILEID_INO32_GEN; |
94 | } | 57 | } |
95 | if (dentry) | ||
96 | dput(dentry); | ||
97 | return type; | 58 | return type; |
98 | } | 59 | } |
99 | 60 | ||
100 | /* | 61 | static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) |
101 | * convert regular fh to dentry | ||
102 | * | ||
103 | * FIXME: we should try harder by querying the mds for the ino. | ||
104 | */ | ||
105 | static struct dentry *__fh_to_dentry(struct super_block *sb, | ||
106 | struct ceph_nfs_fh *fh, int fh_len) | ||
107 | { | 62 | { |
108 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 63 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
109 | struct inode *inode; | 64 | struct inode *inode; |
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
111 | struct ceph_vino vino; | 66 | struct ceph_vino vino; |
112 | int err; | 67 | int err; |
113 | 68 | ||
114 | if (fh_len < sizeof(*fh) / 4) | 69 | vino.ino = ino; |
115 | return ERR_PTR(-ESTALE); | ||
116 | |||
117 | dout("__fh_to_dentry %llx\n", fh->ino); | ||
118 | vino.ino = fh->ino; | ||
119 | vino.snap = CEPH_NOSNAP; | 70 | vino.snap = CEPH_NOSNAP; |
120 | inode = ceph_find_inode(sb, vino); | 71 | inode = ceph_find_inode(sb, vino); |
121 | if (!inode) { | 72 | if (!inode) { |
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
139 | 90 | ||
140 | dentry = d_obtain_alias(inode); | 91 | dentry = d_obtain_alias(inode); |
141 | if (IS_ERR(dentry)) { | 92 | if (IS_ERR(dentry)) { |
142 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | ||
143 | fh->ino, inode); | ||
144 | iput(inode); | 93 | iput(inode); |
145 | return dentry; | 94 | return dentry; |
146 | } | 95 | } |
147 | err = ceph_init_dentry(dentry); | 96 | err = ceph_init_dentry(dentry); |
148 | if (err < 0) { | 97 | if (err < 0) { |
149 | iput(inode); | 98 | dput(dentry); |
150 | return ERR_PTR(err); | 99 | return ERR_PTR(err); |
151 | } | 100 | } |
152 | dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry); | 101 | dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry); |
153 | return dentry; | 102 | return dentry; |
154 | } | 103 | } |
155 | 104 | ||
156 | /* | 105 | /* |
157 | * convert connectable fh to dentry | 106 | * convert regular fh to dentry |
158 | */ | 107 | */ |
159 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 108 | static struct dentry *ceph_fh_to_dentry(struct super_block *sb, |
160 | struct ceph_nfs_confh *cfh, int fh_len) | 109 | struct fid *fid, |
110 | int fh_len, int fh_type) | ||
111 | { | ||
112 | struct ceph_nfs_fh *fh = (void *)fid->raw; | ||
113 | |||
114 | if (fh_type != FILEID_INO32_GEN && | ||
115 | fh_type != FILEID_INO32_GEN_PARENT) | ||
116 | return NULL; | ||
117 | if (fh_len < sizeof(*fh) / 4) | ||
118 | return NULL; | ||
119 | |||
120 | dout("fh_to_dentry %llx\n", fh->ino); | ||
121 | return __fh_to_dentry(sb, fh->ino); | ||
122 | } | ||
123 | |||
124 | static struct dentry *__get_parent(struct super_block *sb, | ||
125 | struct dentry *child, u64 ino) | ||
161 | { | 126 | { |
162 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 127 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
128 | struct ceph_mds_request *req; | ||
163 | struct inode *inode; | 129 | struct inode *inode; |
164 | struct dentry *dentry; | 130 | struct dentry *dentry; |
165 | struct ceph_vino vino; | ||
166 | int err; | 131 | int err; |
167 | 132 | ||
168 | if (fh_len < sizeof(*cfh) / 4) | 133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT, |
169 | return ERR_PTR(-ESTALE); | 134 | USE_ANY_MDS); |
170 | 135 | if (IS_ERR(req)) | |
171 | dout("__cfh_to_dentry %llx (%llx/%x)\n", | 136 | return ERR_CAST(req); |
172 | cfh->ino, cfh->parent_ino, cfh->parent_name_hash); | ||
173 | |||
174 | vino.ino = cfh->ino; | ||
175 | vino.snap = CEPH_NOSNAP; | ||
176 | inode = ceph_find_inode(sb, vino); | ||
177 | if (!inode) { | ||
178 | struct ceph_mds_request *req; | ||
179 | |||
180 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, | ||
181 | USE_ANY_MDS); | ||
182 | if (IS_ERR(req)) | ||
183 | return ERR_CAST(req); | ||
184 | 137 | ||
185 | req->r_ino1 = vino; | 138 | if (child) { |
186 | req->r_ino2.ino = cfh->parent_ino; | 139 | req->r_inode = child->d_inode; |
187 | req->r_ino2.snap = CEPH_NOSNAP; | 140 | ihold(child->d_inode); |
188 | req->r_path2 = kmalloc(16, GFP_NOFS); | 141 | } else { |
189 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); | 142 | req->r_ino1 = (struct ceph_vino) { |
190 | req->r_num_caps = 1; | 143 | .ino = ino, |
191 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 144 | .snap = CEPH_NOSNAP, |
192 | inode = req->r_target_inode; | 145 | }; |
193 | if (inode) | ||
194 | ihold(inode); | ||
195 | ceph_mdsc_put_request(req); | ||
196 | if (!inode) | ||
197 | return ERR_PTR(err ? err : -ESTALE); | ||
198 | } | 146 | } |
147 | req->r_num_caps = 1; | ||
148 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
149 | inode = req->r_target_inode; | ||
150 | if (inode) | ||
151 | ihold(inode); | ||
152 | ceph_mdsc_put_request(req); | ||
153 | if (!inode) | ||
154 | return ERR_PTR(-ENOENT); | ||
199 | 155 | ||
200 | dentry = d_obtain_alias(inode); | 156 | dentry = d_obtain_alias(inode); |
201 | if (IS_ERR(dentry)) { | 157 | if (IS_ERR(dentry)) { |
202 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | ||
203 | cfh->ino, inode); | ||
204 | iput(inode); | 158 | iput(inode); |
205 | return dentry; | 159 | return dentry; |
206 | } | 160 | } |
207 | err = ceph_init_dentry(dentry); | 161 | err = ceph_init_dentry(dentry); |
208 | if (err < 0) { | 162 | if (err < 0) { |
209 | iput(inode); | 163 | dput(dentry); |
210 | return ERR_PTR(err); | 164 | return ERR_PTR(err); |
211 | } | 165 | } |
212 | dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry); | 166 | dout("__get_parent ino %llx parent %p ino %llx.%llx\n", |
167 | child ? ceph_ino(child->d_inode) : ino, | ||
168 | dentry, ceph_vinop(inode)); | ||
213 | return dentry; | 169 | return dentry; |
214 | } | 170 | } |
215 | 171 | ||
216 | static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, | 172 | struct dentry *ceph_get_parent(struct dentry *child) |
217 | int fh_len, int fh_type) | ||
218 | { | 173 | { |
219 | if (fh_type == 1) | 174 | /* don't re-export snaps */ |
220 | return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, | 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) |
221 | fh_len); | 176 | return ERR_PTR(-EINVAL); |
222 | else | 177 | |
223 | return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, | 178 | dout("get_parent %p ino %llx.%llx\n", |
224 | fh_len); | 179 | child, ceph_vinop(child->d_inode)); |
180 | return __get_parent(child->d_sb, child, 0); | ||
225 | } | 181 | } |
226 | 182 | ||
227 | /* | 183 | /* |
228 | * get parent, if possible. | 184 | * convert regular fh to parent |
229 | * | ||
230 | * FIXME: we could do better by querying the mds to discover the | ||
231 | * parent. | ||
232 | */ | 185 | */ |
233 | static struct dentry *ceph_fh_to_parent(struct super_block *sb, | 186 | static struct dentry *ceph_fh_to_parent(struct super_block *sb, |
234 | struct fid *fid, | 187 | struct fid *fid, |
235 | int fh_len, int fh_type) | 188 | int fh_len, int fh_type) |
236 | { | 189 | { |
237 | struct ceph_nfs_confh *cfh = (void *)fid->raw; | 190 | struct ceph_nfs_confh *cfh = (void *)fid->raw; |
238 | struct ceph_vino vino; | ||
239 | struct inode *inode; | ||
240 | struct dentry *dentry; | 191 | struct dentry *dentry; |
241 | int err; | ||
242 | 192 | ||
243 | if (fh_type == 1) | 193 | if (fh_type != FILEID_INO32_GEN_PARENT) |
244 | return ERR_PTR(-ESTALE); | 194 | return NULL; |
245 | if (fh_len < sizeof(*cfh) / 4) | 195 | if (fh_len < sizeof(*cfh) / 4) |
246 | return ERR_PTR(-ESTALE); | 196 | return NULL; |
247 | 197 | ||
248 | pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, | 198 | dout("fh_to_parent %llx\n", cfh->parent_ino); |
249 | cfh->parent_name_hash); | 199 | dentry = __get_parent(sb, NULL, cfh->ino); |
200 | if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) | ||
201 | dentry = __fh_to_dentry(sb, cfh->parent_ino); | ||
202 | return dentry; | ||
203 | } | ||
250 | 204 | ||
251 | vino.ino = cfh->ino; | 205 | static int ceph_get_name(struct dentry *parent, char *name, |
252 | vino.snap = CEPH_NOSNAP; | 206 | struct dentry *child) |
253 | inode = ceph_find_inode(sb, vino); | 207 | { |
254 | if (!inode) | 208 | struct ceph_mds_client *mdsc; |
255 | return ERR_PTR(-ESTALE); | 209 | struct ceph_mds_request *req; |
210 | int err; | ||
256 | 211 | ||
257 | dentry = d_obtain_alias(inode); | 212 | mdsc = ceph_inode_to_client(child->d_inode)->mdsc; |
258 | if (IS_ERR(dentry)) { | 213 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, |
259 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 214 | USE_ANY_MDS); |
260 | cfh->ino, inode); | 215 | if (IS_ERR(req)) |
261 | iput(inode); | 216 | return PTR_ERR(req); |
262 | return dentry; | 217 | |
263 | } | 218 | mutex_lock(&parent->d_inode->i_mutex); |
264 | err = ceph_init_dentry(dentry); | 219 | |
265 | if (err < 0) { | 220 | req->r_inode = child->d_inode; |
266 | iput(inode); | 221 | ihold(child->d_inode); |
267 | return ERR_PTR(err); | 222 | req->r_ino2 = ceph_vino(parent->d_inode); |
223 | req->r_locked_dir = parent->d_inode; | ||
224 | req->r_num_caps = 2; | ||
225 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
226 | |||
227 | mutex_unlock(&parent->d_inode->i_mutex); | ||
228 | |||
229 | if (!err) { | ||
230 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | ||
231 | memcpy(name, rinfo->dname, rinfo->dname_len); | ||
232 | name[rinfo->dname_len] = 0; | ||
233 | dout("get_name %p ino %llx.%llx name %s\n", | ||
234 | child, ceph_vinop(child->d_inode), name); | ||
235 | } else { | ||
236 | dout("get_name %p ino %llx.%llx err %d\n", | ||
237 | child, ceph_vinop(child->d_inode), err); | ||
268 | } | 238 | } |
269 | dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry); | 239 | |
270 | return dentry; | 240 | ceph_mdsc_put_request(req); |
241 | return err; | ||
271 | } | 242 | } |
272 | 243 | ||
273 | const struct export_operations ceph_export_ops = { | 244 | const struct export_operations ceph_export_ops = { |
274 | .encode_fh = ceph_encode_fh, | 245 | .encode_fh = ceph_encode_fh, |
275 | .fh_to_dentry = ceph_fh_to_dentry, | 246 | .fh_to_dentry = ceph_fh_to_dentry, |
276 | .fh_to_parent = ceph_fh_to_parent, | 247 | .fh_to_parent = ceph_fh_to_parent, |
248 | .get_parent = ceph_get_parent, | ||
249 | .get_name = ceph_get_name, | ||
277 | }; | 250 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 09c7afe32e49..39da1c2efa50 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file) | |||
210 | ihold(inode); | 210 | ihold(inode); |
211 | 211 | ||
212 | req->r_num_caps = 1; | 212 | req->r_num_caps = 1; |
213 | if (flags & (O_CREAT|O_TRUNC)) | 213 | if (flags & O_CREAT) |
214 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | 214 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); |
215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
216 | iput(parent_inode); | 216 | iput(parent_inode); |
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
291 | } | 291 | } |
292 | err = finish_open(file, dentry, ceph_open, opened); | 292 | err = finish_open(file, dentry, ceph_open, opened); |
293 | } | 293 | } |
294 | |||
295 | out_err: | 294 | out_err: |
295 | if (!req->r_err && req->r_target_inode) | ||
296 | ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode); | ||
296 | ceph_mdsc_put_request(req); | 297 | ceph_mdsc_put_request(req); |
297 | dout("atomic_open result=%d\n", err); | 298 | dout("atomic_open result=%d\n", err); |
298 | return err; | 299 | return err; |
@@ -600,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
600 | false); | 601 | false); |
601 | if (IS_ERR(req)) { | 602 | if (IS_ERR(req)) { |
602 | ret = PTR_ERR(req); | 603 | ret = PTR_ERR(req); |
603 | goto out; | 604 | break; |
604 | } | 605 | } |
605 | 606 | ||
606 | num_pages = calc_pages_for(page_align, len); | 607 | num_pages = calc_pages_for(page_align, len); |
@@ -718,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, | |||
718 | false); | 719 | false); |
719 | if (IS_ERR(req)) { | 720 | if (IS_ERR(req)) { |
720 | ret = PTR_ERR(req); | 721 | ret = PTR_ERR(req); |
721 | goto out; | 722 | break; |
722 | } | 723 | } |
723 | 724 | ||
724 | /* | 725 | /* |
@@ -970,6 +971,8 @@ retry_snap: | |||
970 | goto retry_snap; | 971 | goto retry_snap; |
971 | } | 972 | } |
972 | } else { | 973 | } else { |
974 | loff_t old_size = inode->i_size; | ||
975 | struct iov_iter from; | ||
973 | /* | 976 | /* |
974 | * No need to acquire the i_truncate_mutex. Because | 977 | * No need to acquire the i_truncate_mutex. Because |
975 | * the MDS revokes Fwb caps before sending truncate | 978 | * the MDS revokes Fwb caps before sending truncate |
@@ -977,9 +980,12 @@ retry_snap: | |||
977 | * are pending vmtruncate. So write and vmtruncate | 980 | * are pending vmtruncate. So write and vmtruncate |
978 | * can not run at the same time | 981 | * can not run at the same time |
979 | */ | 982 | */ |
980 | written = generic_file_buffered_write(iocb, iov, nr_segs, | 983 | iov_iter_init(&from, iov, nr_segs, count, 0); |
981 | pos, &iocb->ki_pos, | 984 | written = generic_perform_write(file, &from, pos); |
982 | count, 0); | 985 | if (likely(written >= 0)) |
986 | iocb->ki_pos = pos + written; | ||
987 | if (inode->i_size > old_size) | ||
988 | ceph_fscache_update_objectsize(inode); | ||
983 | mutex_unlock(&inode->i_mutex); | 989 | mutex_unlock(&inode->i_mutex); |
984 | } | 990 | } |
985 | 991 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 32d519d8a2e2..0b0728e5be2d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode, | |||
659 | le32_to_cpu(info->time_warp_seq), | 659 | le32_to_cpu(info->time_warp_seq), |
660 | &ctime, &mtime, &atime); | 660 | &ctime, &mtime, &atime); |
661 | 661 | ||
662 | /* only update max_size on auth cap */ | ||
663 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
664 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
665 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
666 | le64_to_cpu(info->max_size)); | ||
667 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
668 | } | ||
669 | |||
670 | ci->i_layout = info->layout; | 662 | ci->i_layout = info->layout; |
671 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 663 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
672 | 664 | ||
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode, | |||
755 | ci->i_max_offset = 2; | 747 | ci->i_max_offset = 2; |
756 | } | 748 | } |
757 | no_change: | 749 | no_change: |
750 | /* only update max_size on auth cap */ | ||
751 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
752 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
753 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
754 | le64_to_cpu(info->max_size)); | ||
755 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
756 | } | ||
757 | |||
758 | spin_unlock(&ci->i_ceph_lock); | 758 | spin_unlock(&ci->i_ceph_lock); |
759 | 759 | ||
760 | /* queue truncate if we saw i_size decrease */ | 760 | /* queue truncate if we saw i_size decrease */ |
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1044 | session, req->r_request_started, -1, | 1044 | session, req->r_request_started, -1, |
1045 | &req->r_caps_reservation); | 1045 | &req->r_caps_reservation); |
1046 | if (err < 0) | 1046 | if (err < 0) |
1047 | return err; | 1047 | goto done; |
1048 | } else { | 1048 | } else { |
1049 | WARN_ON_ONCE(1); | 1049 | WARN_ON_ONCE(1); |
1050 | } | 1050 | } |
1051 | |||
1052 | if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) { | ||
1053 | struct qstr dname; | ||
1054 | struct dentry *dn, *parent; | ||
1055 | |||
1056 | BUG_ON(!rinfo->head->is_target); | ||
1057 | BUG_ON(req->r_dentry); | ||
1058 | |||
1059 | parent = d_find_any_alias(dir); | ||
1060 | BUG_ON(!parent); | ||
1061 | |||
1062 | dname.name = rinfo->dname; | ||
1063 | dname.len = rinfo->dname_len; | ||
1064 | dname.hash = full_name_hash(dname.name, dname.len); | ||
1065 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1066 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1067 | retry_lookup: | ||
1068 | dn = d_lookup(parent, &dname); | ||
1069 | dout("d_lookup on parent=%p name=%.*s got %p\n", | ||
1070 | parent, dname.len, dname.name, dn); | ||
1071 | |||
1072 | if (!dn) { | ||
1073 | dn = d_alloc(parent, &dname); | ||
1074 | dout("d_alloc %p '%.*s' = %p\n", parent, | ||
1075 | dname.len, dname.name, dn); | ||
1076 | if (dn == NULL) { | ||
1077 | dput(parent); | ||
1078 | err = -ENOMEM; | ||
1079 | goto done; | ||
1080 | } | ||
1081 | err = ceph_init_dentry(dn); | ||
1082 | if (err < 0) { | ||
1083 | dput(dn); | ||
1084 | dput(parent); | ||
1085 | goto done; | ||
1086 | } | ||
1087 | } else if (dn->d_inode && | ||
1088 | (ceph_ino(dn->d_inode) != vino.ino || | ||
1089 | ceph_snap(dn->d_inode) != vino.snap)) { | ||
1090 | dout(" dn %p points to wrong inode %p\n", | ||
1091 | dn, dn->d_inode); | ||
1092 | d_delete(dn); | ||
1093 | dput(dn); | ||
1094 | goto retry_lookup; | ||
1095 | } | ||
1096 | |||
1097 | req->r_dentry = dn; | ||
1098 | dput(parent); | ||
1099 | } | ||
1051 | } | 1100 | } |
1052 | 1101 | ||
1053 | if (rinfo->head->is_target) { | 1102 | if (rinfo->head->is_target) { |
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1063 | 1112 | ||
1064 | err = fill_inode(in, &rinfo->targeti, NULL, | 1113 | err = fill_inode(in, &rinfo->targeti, NULL, |
1065 | session, req->r_request_started, | 1114 | session, req->r_request_started, |
1066 | (le32_to_cpu(rinfo->head->result) == 0) ? | 1115 | (!req->r_aborted && rinfo->head->result == 0) ? |
1067 | req->r_fmode : -1, | 1116 | req->r_fmode : -1, |
1068 | &req->r_caps_reservation); | 1117 | &req->r_caps_reservation); |
1069 | if (err < 0) { | 1118 | if (err < 0) { |
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = { | |||
1616 | .getxattr = ceph_getxattr, | 1665 | .getxattr = ceph_getxattr, |
1617 | .listxattr = ceph_listxattr, | 1666 | .listxattr = ceph_listxattr, |
1618 | .removexattr = ceph_removexattr, | 1667 | .removexattr = ceph_removexattr, |
1619 | .get_acl = ceph_get_acl, | ||
1620 | .set_acl = ceph_set_acl, | ||
1621 | }; | 1668 | }; |
1622 | 1669 | ||
1623 | /* | 1670 | /* |
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1627 | { | 1674 | { |
1628 | struct inode *inode = dentry->d_inode; | 1675 | struct inode *inode = dentry->d_inode; |
1629 | struct ceph_inode_info *ci = ceph_inode(inode); | 1676 | struct ceph_inode_info *ci = ceph_inode(inode); |
1630 | struct inode *parent_inode; | ||
1631 | const unsigned int ia_valid = attr->ia_valid; | 1677 | const unsigned int ia_valid = attr->ia_valid; |
1632 | struct ceph_mds_request *req; | 1678 | struct ceph_mds_request *req; |
1633 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 1679 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1819 | req->r_inode_drop = release; | 1865 | req->r_inode_drop = release; |
1820 | req->r_args.setattr.mask = cpu_to_le32(mask); | 1866 | req->r_args.setattr.mask = cpu_to_le32(mask); |
1821 | req->r_num_caps = 1; | 1867 | req->r_num_caps = 1; |
1822 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 1868 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1823 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
1824 | iput(parent_inode); | ||
1825 | } | 1869 | } |
1826 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, | 1870 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, |
1827 | ceph_cap_string(dirtied), mask); | 1871 | ceph_cap_string(dirtied), mask); |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index dc66c9e023e4..fdf941b44ff1 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -1,9 +1,8 @@ | |||
1 | #include <linux/ceph/ceph_debug.h> | ||
1 | #include <linux/in.h> | 2 | #include <linux/in.h> |
2 | 3 | ||
3 | #include "super.h" | 4 | #include "super.h" |
4 | #include "mds_client.h" | 5 | #include "mds_client.h" |
5 | #include <linux/ceph/ceph_debug.h> | ||
6 | |||
7 | #include "ioctl.h" | 6 | #include "ioctl.h" |
8 | 7 | ||
9 | 8 | ||
@@ -64,7 +63,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc, | |||
64 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | 63 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) |
65 | { | 64 | { |
66 | struct inode *inode = file_inode(file); | 65 | struct inode *inode = file_inode(file); |
67 | struct inode *parent_inode; | ||
68 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 66 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
69 | struct ceph_mds_request *req; | 67 | struct ceph_mds_request *req; |
70 | struct ceph_ioctl_layout l; | 68 | struct ceph_ioctl_layout l; |
@@ -121,9 +119,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
121 | cpu_to_le32(l.object_size); | 119 | cpu_to_le32(l.object_size); |
122 | req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); | 120 | req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); |
123 | 121 | ||
124 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | 122 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
125 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
126 | iput(parent_inode); | ||
127 | ceph_mdsc_put_request(req); | 123 | ceph_mdsc_put_request(req); |
128 | return err; | 124 | return err; |
129 | } | 125 | } |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae6d14e82b0f..d94ba0df9f4d 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -2,11 +2,31 @@ | |||
2 | 2 | ||
3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
5 | #include <linux/random.h> | ||
5 | 6 | ||
6 | #include "super.h" | 7 | #include "super.h" |
7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
8 | #include <linux/ceph/pagelist.h> | 9 | #include <linux/ceph/pagelist.h> |
9 | 10 | ||
11 | static u64 lock_secret; | ||
12 | |||
13 | static inline u64 secure_addr(void *addr) | ||
14 | { | ||
15 | u64 v = lock_secret ^ (u64)(unsigned long)addr; | ||
16 | /* | ||
17 | * Set the most significant bit, so that MDS knows the 'owner' | ||
18 | * is sufficient to identify the owner of lock. (old code uses | ||
19 | * both 'owner' and 'pid') | ||
20 | */ | ||
21 | v |= (1ULL << 63); | ||
22 | return v; | ||
23 | } | ||
24 | |||
25 | void __init ceph_flock_init(void) | ||
26 | { | ||
27 | get_random_bytes(&lock_secret, sizeof(lock_secret)); | ||
28 | } | ||
29 | |||
10 | /** | 30 | /** |
11 | * Implement fcntl and flock locking functions. | 31 | * Implement fcntl and flock locking functions. |
12 | */ | 32 | */ |
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
14 | int cmd, u8 wait, struct file_lock *fl) | 34 | int cmd, u8 wait, struct file_lock *fl) |
15 | { | 35 | { |
16 | struct inode *inode = file_inode(file); | 36 | struct inode *inode = file_inode(file); |
17 | struct ceph_mds_client *mdsc = | 37 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
18 | ceph_sb_to_client(inode->i_sb)->mdsc; | ||
19 | struct ceph_mds_request *req; | 38 | struct ceph_mds_request *req; |
20 | int err; | 39 | int err; |
21 | u64 length = 0; | 40 | u64 length = 0; |
41 | u64 owner; | ||
22 | 42 | ||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 43 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 44 | if (IS_ERR(req)) |
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
32 | else | 52 | else |
33 | length = fl->fl_end - fl->fl_start + 1; | 53 | length = fl->fl_end - fl->fl_start + 1; |
34 | 54 | ||
35 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 55 | if (lock_type == CEPH_LOCK_FCNTL) |
36 | "length: %llu, wait: %d, type: %d", (int)lock_type, | 56 | owner = secure_addr(fl->fl_owner); |
37 | (int)operation, (u64)fl->fl_pid, fl->fl_start, | 57 | else |
38 | length, wait, fl->fl_type); | 58 | owner = secure_addr(fl->fl_file); |
59 | |||
60 | dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " | ||
61 | "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, | ||
62 | (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, | ||
63 | wait, fl->fl_type); | ||
39 | 64 | ||
40 | req->r_args.filelock_change.rule = lock_type; | 65 | req->r_args.filelock_change.rule = lock_type; |
41 | req->r_args.filelock_change.type = cmd; | 66 | req->r_args.filelock_change.type = cmd; |
67 | req->r_args.filelock_change.owner = cpu_to_le64(owner); | ||
42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); | 68 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
43 | /* This should be adjusted, but I'm not sure if | ||
44 | namespaces actually get id numbers*/ | ||
45 | req->r_args.filelock_change.pid_namespace = | ||
46 | cpu_to_le64((u64)(unsigned long)fl->fl_nspid); | ||
47 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); | 69 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
48 | req->r_args.filelock_change.length = cpu_to_le64(length); | 70 | req->r_args.filelock_change.length = cpu_to_le64(length); |
49 | req->r_args.filelock_change.wait = wait; | 71 | req->r_args.filelock_change.wait = wait; |
50 | 72 | ||
51 | err = ceph_mdsc_do_request(mdsc, inode, req); | 73 | err = ceph_mdsc_do_request(mdsc, inode, req); |
52 | 74 | ||
53 | if ( operation == CEPH_MDS_OP_GETFILELOCK){ | 75 | if (operation == CEPH_MDS_OP_GETFILELOCK) { |
54 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); | 76 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); |
55 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | 77 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) |
56 | fl->fl_type = F_RDLCK; | 78 | fl->fl_type = F_RDLCK; |
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
87 | u8 wait = 0; | 109 | u8 wait = 0; |
88 | u16 op = CEPH_MDS_OP_SETFILELOCK; | 110 | u16 op = CEPH_MDS_OP_SETFILELOCK; |
89 | 111 | ||
90 | fl->fl_nspid = get_pid(task_tgid(current)); | 112 | if (!(fl->fl_flags & FL_POSIX)) |
91 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | 113 | return -ENOLCK; |
114 | /* No mandatory locks */ | ||
115 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | ||
116 | return -ENOLCK; | ||
117 | |||
118 | dout("ceph_lock, fl_owner: %p", fl->fl_owner); | ||
92 | 119 | ||
93 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | 120 | /* set wait bit as appropriate, then make command as Ceph expects it*/ |
94 | if (F_SETLKW == cmd) | 121 | if (IS_GETLK(cmd)) |
95 | wait = 1; | ||
96 | if (F_GETLK == cmd) | ||
97 | op = CEPH_MDS_OP_GETFILELOCK; | 122 | op = CEPH_MDS_OP_GETFILELOCK; |
123 | else if (IS_SETLKW(cmd)) | ||
124 | wait = 1; | ||
98 | 125 | ||
99 | if (F_RDLCK == fl->fl_type) | 126 | if (F_RDLCK == fl->fl_type) |
100 | lock_cmd = CEPH_LOCK_SHARED; | 127 | lock_cmd = CEPH_LOCK_SHARED; |
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
105 | 132 | ||
106 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); | 133 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
107 | if (!err) { | 134 | if (!err) { |
108 | if ( op != CEPH_MDS_OP_GETFILELOCK ){ | 135 | if (op != CEPH_MDS_OP_GETFILELOCK) { |
109 | dout("mds locked, locking locally"); | 136 | dout("mds locked, locking locally"); |
110 | err = posix_lock_file(file, fl, NULL); | 137 | err = posix_lock_file(file, fl, NULL); |
111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 138 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
131 | { | 158 | { |
132 | u8 lock_cmd; | 159 | u8 lock_cmd; |
133 | int err; | 160 | int err; |
134 | u8 wait = 1; | 161 | u8 wait = 0; |
135 | 162 | ||
136 | fl->fl_nspid = get_pid(task_tgid(current)); | 163 | if (!(fl->fl_flags & FL_FLOCK)) |
137 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | 164 | return -ENOLCK; |
138 | 165 | /* No mandatory locks */ | |
139 | /* set wait bit, then clear it out of cmd*/ | 166 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) |
140 | if (cmd & LOCK_NB) | 167 | return -ENOLCK; |
141 | wait = 0; | 168 | |
142 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | 169 | dout("ceph_flock, fl_file: %p", fl->fl_file); |
143 | /* set command sequence that Ceph wants to see: | 170 | |
144 | shared lock, exclusive lock, or unlock */ | 171 | if (IS_SETLKW(cmd)) |
145 | if (LOCK_SH == cmd) | 172 | wait = 1; |
173 | |||
174 | if (F_RDLCK == fl->fl_type) | ||
146 | lock_cmd = CEPH_LOCK_SHARED; | 175 | lock_cmd = CEPH_LOCK_SHARED; |
147 | else if (LOCK_EX == cmd) | 176 | else if (F_WRLCK == fl->fl_type) |
148 | lock_cmd = CEPH_LOCK_EXCL; | 177 | lock_cmd = CEPH_LOCK_EXCL; |
149 | else | 178 | else |
150 | lock_cmd = CEPH_LOCK_UNLOCK; | 179 | lock_cmd = CEPH_LOCK_UNLOCK; |
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock, | |||
280 | struct ceph_filelock *cephlock) | 309 | struct ceph_filelock *cephlock) |
281 | { | 310 | { |
282 | int err = 0; | 311 | int err = 0; |
283 | |||
284 | cephlock->start = cpu_to_le64(lock->fl_start); | 312 | cephlock->start = cpu_to_le64(lock->fl_start); |
285 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | 313 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); |
286 | cephlock->client = cpu_to_le64(0); | 314 | cephlock->client = cpu_to_le64(0); |
287 | cephlock->pid = cpu_to_le64(lock->fl_pid); | 315 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); |
288 | cephlock->pid_namespace = | 316 | if (lock->fl_flags & FL_POSIX) |
289 | cpu_to_le64((u64)(unsigned long)lock->fl_nspid); | 317 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); |
318 | else | ||
319 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file)); | ||
290 | 320 | ||
291 | switch (lock->fl_type) { | 321 | switch (lock->fl_type) { |
292 | case F_RDLCK: | 322 | case F_RDLCK: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f4f050a69a48..2b4d093d0563 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/gfp.h> | ||
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | 8 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end, | |||
165 | if (num == 0) | 166 | if (num == 0) |
166 | goto done; | 167 | goto done; |
167 | 168 | ||
168 | /* alloc large array */ | 169 | BUG_ON(!info->dir_in); |
169 | info->dir_nr = num; | ||
170 | info->dir_in = kcalloc(num, sizeof(*info->dir_in) + | ||
171 | sizeof(*info->dir_dname) + | ||
172 | sizeof(*info->dir_dname_len) + | ||
173 | sizeof(*info->dir_dlease), | ||
174 | GFP_NOFS); | ||
175 | if (info->dir_in == NULL) { | ||
176 | err = -ENOMEM; | ||
177 | goto out_bad; | ||
178 | } | ||
179 | info->dir_dname = (void *)(info->dir_in + num); | 170 | info->dir_dname = (void *)(info->dir_in + num); |
180 | info->dir_dname_len = (void *)(info->dir_dname + num); | 171 | info->dir_dname_len = (void *)(info->dir_dname + num); |
181 | info->dir_dlease = (void *)(info->dir_dname_len + num); | 172 | info->dir_dlease = (void *)(info->dir_dname_len + num); |
173 | if ((unsigned long)(info->dir_dlease + num) > | ||
174 | (unsigned long)info->dir_in + info->dir_buf_size) { | ||
175 | pr_err("dir contents are larger than expected\n"); | ||
176 | WARN_ON(1); | ||
177 | goto bad; | ||
178 | } | ||
182 | 179 | ||
180 | info->dir_nr = num; | ||
183 | while (num) { | 181 | while (num) { |
184 | /* dentry */ | 182 | /* dentry */ |
185 | ceph_decode_need(p, end, sizeof(u32)*2, bad); | 183 | ceph_decode_need(p, end, sizeof(u32)*2, bad); |
@@ -327,7 +325,9 @@ out_bad: | |||
327 | 325 | ||
328 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) | 326 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) |
329 | { | 327 | { |
330 | kfree(info->dir_in); | 328 | if (!info->dir_in) |
329 | return; | ||
330 | free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size)); | ||
331 | } | 331 | } |
332 | 332 | ||
333 | 333 | ||
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
512 | struct ceph_mds_request *req = container_of(kref, | 512 | struct ceph_mds_request *req = container_of(kref, |
513 | struct ceph_mds_request, | 513 | struct ceph_mds_request, |
514 | r_kref); | 514 | r_kref); |
515 | destroy_reply_info(&req->r_reply_info); | ||
515 | if (req->r_request) | 516 | if (req->r_request) |
516 | ceph_msg_put(req->r_request); | 517 | ceph_msg_put(req->r_request); |
517 | if (req->r_reply) { | 518 | if (req->r_reply) |
518 | ceph_msg_put(req->r_reply); | 519 | ceph_msg_put(req->r_reply); |
519 | destroy_reply_info(&req->r_reply_info); | ||
520 | } | ||
521 | if (req->r_inode) { | 520 | if (req->r_inode) { |
522 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 521 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
523 | iput(req->r_inode); | 522 | iput(req->r_inode); |
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
528 | iput(req->r_target_inode); | 527 | iput(req->r_target_inode); |
529 | if (req->r_dentry) | 528 | if (req->r_dentry) |
530 | dput(req->r_dentry); | 529 | dput(req->r_dentry); |
531 | if (req->r_old_dentry) { | 530 | if (req->r_old_dentry) |
531 | dput(req->r_old_dentry); | ||
532 | if (req->r_old_dentry_dir) { | ||
532 | /* | 533 | /* |
533 | * track (and drop pins for) r_old_dentry_dir | 534 | * track (and drop pins for) r_old_dentry_dir |
534 | * separately, since r_old_dentry's d_parent may have | 535 | * separately, since r_old_dentry's d_parent may have |
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
537 | */ | 538 | */ |
538 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), | 539 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), |
539 | CEPH_CAP_PIN); | 540 | CEPH_CAP_PIN); |
540 | dput(req->r_old_dentry); | ||
541 | iput(req->r_old_dentry_dir); | 541 | iput(req->r_old_dentry_dir); |
542 | } | 542 | } |
543 | kfree(req->r_path1); | 543 | kfree(req->r_path1); |
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1311 | trim_caps - session->s_trim_caps); | 1311 | trim_caps - session->s_trim_caps); |
1312 | session->s_trim_caps = 0; | 1312 | session->s_trim_caps = 0; |
1313 | } | 1313 | } |
1314 | |||
1315 | ceph_add_cap_releases(mdsc, session); | ||
1316 | ceph_send_cap_releases(mdsc, session); | ||
1314 | return 0; | 1317 | return 0; |
1315 | } | 1318 | } |
1316 | 1319 | ||
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1461 | 1464 | ||
1462 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1465 | dout("discard_cap_releases mds%d\n", session->s_mds); |
1463 | 1466 | ||
1464 | /* zero out the in-progress message */ | 1467 | if (!list_empty(&session->s_cap_releases)) { |
1465 | msg = list_first_entry(&session->s_cap_releases, | 1468 | /* zero out the in-progress message */ |
1466 | struct ceph_msg, list_head); | 1469 | msg = list_first_entry(&session->s_cap_releases, |
1467 | head = msg->front.iov_base; | 1470 | struct ceph_msg, list_head); |
1468 | num = le32_to_cpu(head->num); | 1471 | head = msg->front.iov_base; |
1469 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | 1472 | num = le32_to_cpu(head->num); |
1470 | head->num = cpu_to_le32(0); | 1473 | dout("discard_cap_releases mds%d %p %u\n", |
1471 | msg->front.iov_len = sizeof(*head); | 1474 | session->s_mds, msg, num); |
1472 | session->s_num_cap_releases += num; | 1475 | head->num = cpu_to_le32(0); |
1476 | msg->front.iov_len = sizeof(*head); | ||
1477 | session->s_num_cap_releases += num; | ||
1478 | } | ||
1473 | 1479 | ||
1474 | /* requeue completed messages */ | 1480 | /* requeue completed messages */ |
1475 | while (!list_empty(&session->s_cap_releases_done)) { | 1481 | while (!list_empty(&session->s_cap_releases_done)) { |
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1492 | * requests | 1498 | * requests |
1493 | */ | 1499 | */ |
1494 | 1500 | ||
1501 | int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | ||
1502 | struct inode *dir) | ||
1503 | { | ||
1504 | struct ceph_inode_info *ci = ceph_inode(dir); | ||
1505 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | ||
1506 | struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; | ||
1507 | size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) + | ||
1508 | sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease); | ||
1509 | int order, num_entries; | ||
1510 | |||
1511 | spin_lock(&ci->i_ceph_lock); | ||
1512 | num_entries = ci->i_files + ci->i_subdirs; | ||
1513 | spin_unlock(&ci->i_ceph_lock); | ||
1514 | num_entries = max(num_entries, 1); | ||
1515 | num_entries = min(num_entries, opt->max_readdir); | ||
1516 | |||
1517 | order = get_order(size * num_entries); | ||
1518 | while (order >= 0) { | ||
1519 | rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, | ||
1520 | order); | ||
1521 | if (rinfo->dir_in) | ||
1522 | break; | ||
1523 | order--; | ||
1524 | } | ||
1525 | if (!rinfo->dir_in) | ||
1526 | return -ENOMEM; | ||
1527 | |||
1528 | num_entries = (PAGE_SIZE << order) / size; | ||
1529 | num_entries = min(num_entries, opt->max_readdir); | ||
1530 | |||
1531 | rinfo->dir_buf_size = PAGE_SIZE << order; | ||
1532 | req->r_num_caps = num_entries + 1; | ||
1533 | req->r_args.readdir.max_entries = cpu_to_le32(num_entries); | ||
1534 | req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes); | ||
1535 | return 0; | ||
1536 | } | ||
1537 | |||
1495 | /* | 1538 | /* |
1496 | * Create an mds request. | 1539 | * Create an mds request. |
1497 | */ | 1540 | */ |
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2053 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 2096 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
2054 | if (req->r_locked_dir) | 2097 | if (req->r_locked_dir) |
2055 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); | 2098 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); |
2056 | if (req->r_old_dentry) | 2099 | if (req->r_old_dentry_dir) |
2057 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), | 2100 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
2058 | CEPH_CAP_PIN); | 2101 | CEPH_CAP_PIN); |
2059 | 2102 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 68288917c737..e90cfccf93bd 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed { | |||
67 | /* for readdir results */ | 67 | /* for readdir results */ |
68 | struct { | 68 | struct { |
69 | struct ceph_mds_reply_dirfrag *dir_dir; | 69 | struct ceph_mds_reply_dirfrag *dir_dir; |
70 | size_t dir_buf_size; | ||
70 | int dir_nr; | 71 | int dir_nr; |
71 | char **dir_dname; | 72 | char **dir_dname; |
72 | u32 *dir_dname_len; | 73 | u32 *dir_dname_len; |
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
346 | struct dentry *dn); | 347 | struct dentry *dn); |
347 | 348 | ||
348 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | 349 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); |
349 | 350 | extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | |
351 | struct inode *dir); | ||
350 | extern struct ceph_mds_request * | 352 | extern struct ceph_mds_request * |
351 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 353 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
352 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 354 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 4440f447fd3f..51cc23e48111 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c | |||
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op) | |||
54 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; | 54 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; |
55 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; | 55 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; |
56 | case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; | 56 | case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; |
57 | case CEPH_MDS_OP_LOOKUPNAME: return "lookupname"; | ||
57 | case CEPH_MDS_OP_GETATTR: return "getattr"; | 58 | case CEPH_MDS_OP_GETATTR: return "getattr"; |
58 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; | 59 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; |
59 | case CEPH_MDS_OP_SETATTR: return "setattr"; | 60 | case CEPH_MDS_OP_SETATTR: return "setattr"; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 10a4ccbf38da..06150fd745ac 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void) | |||
1026 | if (ret) | 1026 | if (ret) |
1027 | goto out; | 1027 | goto out; |
1028 | 1028 | ||
1029 | ceph_flock_init(); | ||
1029 | ceph_xattr_init(); | 1030 | ceph_xattr_init(); |
1030 | ret = register_filesystem(&ceph_fs_type); | 1031 | ret = register_filesystem(&ceph_fs_type); |
1031 | if (ret) | 1032 | if (ret) |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d8801a95b685..7866cd05a6bb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -577,7 +577,7 @@ struct ceph_file_info { | |||
577 | 577 | ||
578 | /* readdir: position within a frag */ | 578 | /* readdir: position within a frag */ |
579 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ | 579 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ |
580 | u64 next_offset; /* offset of next chunk (last_name's + 1) */ | 580 | unsigned next_offset; /* offset of next chunk (last_name's + 1) */ |
581 | char *last_name; /* last entry in previous chunk */ | 581 | char *last_name; /* last entry in previous chunk */ |
582 | struct dentry *dentry; /* next dentry (for dcache readdir) */ | 582 | struct dentry *dentry; /* next dentry (for dcache readdir) */ |
583 | int dir_release_count; | 583 | int dir_release_count; |
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
871 | extern const struct export_operations ceph_export_ops; | 871 | extern const struct export_operations ceph_export_ops; |
872 | 872 | ||
873 | /* locks.c */ | 873 | /* locks.c */ |
874 | extern __init void ceph_flock_init(void); | ||
874 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 875 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
875 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 876 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
876 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | 877 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a55ec37378c6..c9c2b887381e 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) | |||
64 | } | 64 | } |
65 | 65 | ||
66 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | 66 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, |
67 | size_t size) | 67 | size_t size) |
68 | { | 68 | { |
69 | int ret; | 69 | int ret; |
70 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); | 70 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); |
71 | struct ceph_osd_client *osdc = &fsc->client->osdc; | 71 | struct ceph_osd_client *osdc = &fsc->client->osdc; |
72 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); | 72 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); |
73 | const char *pool_name; | 73 | const char *pool_name; |
74 | char buf[128]; | ||
74 | 75 | ||
75 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); | 76 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); |
76 | down_read(&osdc->map_sem); | 77 | down_read(&osdc->map_sem); |
77 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); | 78 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); |
78 | if (pool_name) | 79 | if (pool_name) { |
79 | ret = snprintf(val, size, | 80 | size_t len = strlen(pool_name); |
80 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", | 81 | ret = snprintf(buf, sizeof(buf), |
82 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", | ||
81 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | 83 | (unsigned long long)ceph_file_layout_su(ci->i_layout), |
82 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | 84 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), |
83 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | 85 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); |
84 | pool_name); | 86 | if (!size) { |
85 | else | 87 | ret += len; |
86 | ret = snprintf(val, size, | 88 | } else if (ret + len > size) { |
89 | ret = -ERANGE; | ||
90 | } else { | ||
91 | memcpy(val, buf, ret); | ||
92 | memcpy(val + ret, pool_name, len); | ||
93 | ret += len; | ||
94 | } | ||
95 | } else { | ||
96 | ret = snprintf(buf, sizeof(buf), | ||
87 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", | 97 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", |
88 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | 98 | (unsigned long long)ceph_file_layout_su(ci->i_layout), |
89 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | 99 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), |
90 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | 100 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), |
91 | (unsigned long long)pool); | 101 | (unsigned long long)pool); |
92 | 102 | if (size) { | |
103 | if (ret <= size) | ||
104 | memcpy(val, buf, ret); | ||
105 | else | ||
106 | ret = -ERANGE; | ||
107 | } | ||
108 | } | ||
93 | up_read(&osdc->map_sem); | 109 | up_read(&osdc->map_sem); |
94 | return ret; | 110 | return ret; |
95 | } | 111 | } |
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { | |||
215 | .name_size = sizeof("ceph.dir.layout"), | 231 | .name_size = sizeof("ceph.dir.layout"), |
216 | .getxattr_cb = ceph_vxattrcb_layout, | 232 | .getxattr_cb = ceph_vxattrcb_layout, |
217 | .readonly = false, | 233 | .readonly = false, |
218 | .hidden = false, | 234 | .hidden = true, |
219 | .exists_cb = ceph_vxattrcb_layout_exists, | 235 | .exists_cb = ceph_vxattrcb_layout_exists, |
220 | }, | 236 | }, |
221 | XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), | 237 | XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), |
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { | |||
242 | .name_size = sizeof("ceph.file.layout"), | 258 | .name_size = sizeof("ceph.file.layout"), |
243 | .getxattr_cb = ceph_vxattrcb_layout, | 259 | .getxattr_cb = ceph_vxattrcb_layout, |
244 | .readonly = false, | 260 | .readonly = false, |
245 | .hidden = false, | 261 | .hidden = true, |
246 | .exists_cb = ceph_vxattrcb_layout_exists, | 262 | .exists_cb = ceph_vxattrcb_layout_exists, |
247 | }, | 263 | }, |
248 | XATTR_LAYOUT_FIELD(file, layout, stripe_unit), | 264 | XATTR_LAYOUT_FIELD(file, layout, stripe_unit), |
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
842 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 858 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
843 | struct inode *inode = dentry->d_inode; | 859 | struct inode *inode = dentry->d_inode; |
844 | struct ceph_inode_info *ci = ceph_inode(inode); | 860 | struct ceph_inode_info *ci = ceph_inode(inode); |
845 | struct inode *parent_inode; | ||
846 | struct ceph_mds_request *req; | 861 | struct ceph_mds_request *req; |
847 | struct ceph_mds_client *mdsc = fsc->mdsc; | 862 | struct ceph_mds_client *mdsc = fsc->mdsc; |
848 | int err; | 863 | int err; |
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
893 | req->r_data_len = size; | 908 | req->r_data_len = size; |
894 | 909 | ||
895 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); | 910 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); |
896 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 911 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
897 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
898 | iput(parent_inode); | ||
899 | ceph_mdsc_put_request(req); | 912 | ceph_mdsc_put_request(req); |
900 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); | 913 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); |
901 | 914 | ||
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1019 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 1032 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
1020 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1033 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1021 | struct inode *inode = dentry->d_inode; | 1034 | struct inode *inode = dentry->d_inode; |
1022 | struct inode *parent_inode; | ||
1023 | struct ceph_mds_request *req; | 1035 | struct ceph_mds_request *req; |
1024 | int err; | 1036 | int err; |
1025 | 1037 | ||
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1033 | req->r_num_caps = 1; | 1045 | req->r_num_caps = 1; |
1034 | req->r_path2 = kstrdup(name, GFP_NOFS); | 1046 | req->r_path2 = kstrdup(name, GFP_NOFS); |
1035 | 1047 | ||
1036 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 1048 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1037 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
1038 | iput(parent_inode); | ||
1039 | ceph_mdsc_put_request(req); | 1049 | ceph_mdsc_put_request(req); |
1040 | return err; | 1050 | return err; |
1041 | } | 1051 | } |