aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/cache.c1
-rw-r--r--fs/ceph/cache.h10
-rw-r--r--fs/ceph/caps.c9
-rw-r--r--fs/ceph/debugfs.c5
-rw-r--r--fs/ceph/dir.c53
-rw-r--r--fs/ceph/export.c267
-rw-r--r--fs/ceph/file.c20
-rw-r--r--fs/ceph/inode.c76
-rw-r--r--fs/ceph/ioctl.c8
-rw-r--r--fs/ceph/locks.c98
-rw-r--r--fs/ceph/mds_client.c97
-rw-r--r--fs/ceph/mds_client.h4
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/ceph/xattr.c48
16 files changed, 415 insertions, 286 deletions
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 8c44fdd4e1c3..834f9f3723fb 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
205 ci->fscache = fscache_acquire_cookie(fsc->fscache, 205 ci->fscache = fscache_acquire_cookie(fsc->fscache,
206 &ceph_fscache_inode_object_def, 206 &ceph_fscache_inode_object_def,
207 ci, true); 207 ci, true);
208 fscache_check_consistency(ci->fscache);
208done: 209done:
209 mutex_unlock(&inode->i_mutex); 210 mutex_unlock(&inode->i_mutex);
210 211
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index da95f61b7a09..5ac591bd012b 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
48void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); 48void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
49void ceph_queue_revalidate(struct inode *inode); 49void ceph_queue_revalidate(struct inode *inode);
50 50
51static inline void ceph_fscache_update_objectsize(struct inode *inode)
52{
53 struct ceph_inode_info *ci = ceph_inode(inode);
54 fscache_attr_changed(ci->fscache);
55}
56
51static inline void ceph_fscache_invalidate(struct inode *inode) 57static inline void ceph_fscache_invalidate(struct inode *inode)
52{ 58{
53 fscache_invalidate(ceph_inode(inode)->fscache); 59 fscache_invalidate(ceph_inode(inode)->fscache);
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
135{ 141{
136} 142}
137 143
144static inline void ceph_fscache_update_objectsize(struct inode *inode)
145{
146}
147
138static inline void ceph_fscache_invalidate(struct inode *inode) 148static inline void ceph_fscache_invalidate(struct inode *inode)
139{ 149{
140} 150}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 17543383545c..2e5e648eb5c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -622,8 +622,10 @@ retry:
622 622
623 if (flags & CEPH_CAP_FLAG_AUTH) { 623 if (flags & CEPH_CAP_FLAG_AUTH) {
624 if (ci->i_auth_cap == NULL || 624 if (ci->i_auth_cap == NULL ||
625 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) 625 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
626 ci->i_auth_cap = cap; 626 ci->i_auth_cap = cap;
627 cap->mds_wanted = wanted;
628 }
627 ci->i_cap_exporting_issued = 0; 629 ci->i_cap_exporting_issued = 0;
628 } else { 630 } else {
629 WARN_ON(ci->i_auth_cap == cap); 631 WARN_ON(ci->i_auth_cap == cap);
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
885 cap = rb_entry(p, struct ceph_cap, ci_node); 887 cap = rb_entry(p, struct ceph_cap, ci_node);
886 if (!__cap_is_valid(cap)) 888 if (!__cap_is_valid(cap))
887 continue; 889 continue;
888 mds_wanted |= cap->mds_wanted; 890 if (cap == ci->i_auth_cap)
891 mds_wanted |= cap->mds_wanted;
892 else
893 mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
889 } 894 }
890 return mds_wanted; 895 return mds_wanted;
891} 896}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 6d59006bfa27..16b54aa31f08 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p)
93 } else if (req->r_path1) { 93 } else if (req->r_path1) {
94 seq_printf(s, " #%llx/%s", req->r_ino1.ino, 94 seq_printf(s, " #%llx/%s", req->r_ino1.ino,
95 req->r_path1); 95 req->r_path1);
96 } else {
97 seq_printf(s, " #%llx", req->r_ino1.ino);
96 } 98 }
97 99
98 if (req->r_old_dentry) { 100 if (req->r_old_dentry) {
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p)
102 path = NULL; 104 path = NULL;
103 spin_lock(&req->r_old_dentry->d_lock); 105 spin_lock(&req->r_old_dentry->d_lock);
104 seq_printf(s, " #%llx/%.*s (%s)", 106 seq_printf(s, " #%llx/%.*s (%s)",
105 ceph_ino(req->r_old_dentry_dir), 107 req->r_old_dentry_dir ?
108 ceph_ino(req->r_old_dentry_dir) : 0,
106 req->r_old_dentry->d_name.len, 109 req->r_old_dentry->d_name.len,
107 req->r_old_dentry->d_name.name, 110 req->r_old_dentry->d_name.name,
108 path ? path : ""); 111 path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 45eda6d7a40c..766410a12c2c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r)
119 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 119 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
120 * the MDS if/when the directory is modified). 120 * the MDS if/when the directory is modified).
121 */ 121 */
122static int __dcache_readdir(struct file *file, struct dir_context *ctx) 122static int __dcache_readdir(struct file *file, struct dir_context *ctx,
123 u32 shared_gen)
123{ 124{
124 struct ceph_file_info *fi = file->private_data; 125 struct ceph_file_info *fi = file->private_data;
125 struct dentry *parent = file->f_dentry; 126 struct dentry *parent = file->f_dentry;
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx)
133 last = fi->dentry; 134 last = fi->dentry;
134 fi->dentry = NULL; 135 fi->dentry = NULL;
135 136
136 dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, 137 dout("__dcache_readdir %p v%u at %llu (last %p)\n",
137 last); 138 dir, shared_gen, ctx->pos, last);
138 139
139 spin_lock(&parent->d_lock); 140 spin_lock(&parent->d_lock);
140 141
@@ -161,7 +162,8 @@ more:
161 goto out_unlock; 162 goto out_unlock;
162 } 163 }
163 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 164 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
164 if (!d_unhashed(dentry) && dentry->d_inode && 165 if (di->lease_shared_gen == shared_gen &&
166 !d_unhashed(dentry) && dentry->d_inode &&
165 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 167 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
166 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 168 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
167 fpos_cmp(ctx->pos, di->offset) <= 0) 169 fpos_cmp(ctx->pos, di->offset) <= 0)
@@ -190,7 +192,7 @@ more:
190 if (last) { 192 if (last) {
191 /* remember our position */ 193 /* remember our position */
192 fi->dentry = last; 194 fi->dentry = last;
193 fi->next_offset = di->offset; 195 fi->next_offset = fpos_off(di->offset);
194 } 196 }
195 dput(dentry); 197 dput(dentry);
196 return 0; 198 return 0;
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
252 int err; 254 int err;
253 u32 ftype; 255 u32 ftype;
254 struct ceph_mds_reply_info_parsed *rinfo; 256 struct ceph_mds_reply_info_parsed *rinfo;
255 const int max_entries = fsc->mount_options->max_readdir;
256 const int max_bytes = fsc->mount_options->max_readdir_bytes;
257 257
258 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); 258 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
259 if (fi->flags & CEPH_F_ATEND) 259 if (fi->flags & CEPH_F_ATEND)
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
291 ceph_snap(inode) != CEPH_SNAPDIR && 291 ceph_snap(inode) != CEPH_SNAPDIR &&
292 __ceph_dir_is_complete(ci) && 292 __ceph_dir_is_complete(ci) &&
293 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 293 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
294 u32 shared_gen = ci->i_shared_gen;
294 spin_unlock(&ci->i_ceph_lock); 295 spin_unlock(&ci->i_ceph_lock);
295 err = __dcache_readdir(file, ctx); 296 err = __dcache_readdir(file, ctx, shared_gen);
296 if (err != -EAGAIN) 297 if (err != -EAGAIN)
297 return err; 298 return err;
298 } else { 299 } else {
@@ -322,14 +323,16 @@ more:
322 fi->last_readdir = NULL; 323 fi->last_readdir = NULL;
323 } 324 }
324 325
325 /* requery frag tree, as the frag topology may have changed */
326 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
327
328 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 326 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
329 ceph_vinop(inode), frag, fi->last_name); 327 ceph_vinop(inode), frag, fi->last_name);
330 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 328 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
331 if (IS_ERR(req)) 329 if (IS_ERR(req))
332 return PTR_ERR(req); 330 return PTR_ERR(req);
331 err = ceph_alloc_readdir_reply_buffer(req, inode);
332 if (err) {
333 ceph_mdsc_put_request(req);
334 return err;
335 }
333 req->r_inode = inode; 336 req->r_inode = inode;
334 ihold(inode); 337 ihold(inode);
335 req->r_dentry = dget(file->f_dentry); 338 req->r_dentry = dget(file->f_dentry);
@@ -340,9 +343,6 @@ more:
340 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 343 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
341 req->r_readdir_offset = fi->next_offset; 344 req->r_readdir_offset = fi->next_offset;
342 req->r_args.readdir.frag = cpu_to_le32(frag); 345 req->r_args.readdir.frag = cpu_to_le32(frag);
343 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
344 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
345 req->r_num_caps = max_entries + 1;
346 err = ceph_mdsc_do_request(mdsc, NULL, req); 346 err = ceph_mdsc_do_request(mdsc, NULL, req);
347 if (err < 0) { 347 if (err < 0) {
348 ceph_mdsc_put_request(req); 348 ceph_mdsc_put_request(req);
@@ -369,9 +369,9 @@ more:
369 fi->next_offset = 0; 369 fi->next_offset = 0;
370 off = fi->next_offset; 370 off = fi->next_offset;
371 } 371 }
372 fi->frag = frag;
372 fi->offset = fi->next_offset; 373 fi->offset = fi->next_offset;
373 fi->last_readdir = req; 374 fi->last_readdir = req;
374 fi->frag = frag;
375 375
376 if (req->r_reply_info.dir_end) { 376 if (req->r_reply_info.dir_end) {
377 kfree(fi->last_name); 377 kfree(fi->last_name);
@@ -454,7 +454,7 @@ more:
454 return 0; 454 return 0;
455} 455}
456 456
457static void reset_readdir(struct ceph_file_info *fi) 457static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
458{ 458{
459 if (fi->last_readdir) { 459 if (fi->last_readdir) {
460 ceph_mdsc_put_request(fi->last_readdir); 460 ceph_mdsc_put_request(fi->last_readdir);
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi)
462 } 462 }
463 kfree(fi->last_name); 463 kfree(fi->last_name);
464 fi->last_name = NULL; 464 fi->last_name = NULL;
465 fi->next_offset = 2; /* compensate for . and .. */ 465 if (ceph_frag_is_leftmost(frag))
466 fi->next_offset = 2; /* compensate for . and .. */
467 else
468 fi->next_offset = 0;
466 if (fi->dentry) { 469 if (fi->dentry) {
467 dput(fi->dentry); 470 dput(fi->dentry);
468 fi->dentry = NULL; 471 fi->dentry = NULL;
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
474{ 477{
475 struct ceph_file_info *fi = file->private_data; 478 struct ceph_file_info *fi = file->private_data;
476 struct inode *inode = file->f_mapping->host; 479 struct inode *inode = file->f_mapping->host;
477 loff_t old_offset = offset; 480 loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
478 loff_t retval; 481 loff_t retval;
479 482
480 mutex_lock(&inode->i_mutex); 483 mutex_lock(&inode->i_mutex);
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
491 goto out; 494 goto out;
492 } 495 }
493 496
494 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 497 if (offset >= 0) {
495 if (offset != file->f_pos) { 498 if (offset != file->f_pos) {
496 file->f_pos = offset; 499 file->f_pos = offset;
497 file->f_version = 0; 500 file->f_version = 0;
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
504 * seek to new frag, or seek prior to current chunk. 507 * seek to new frag, or seek prior to current chunk.
505 */ 508 */
506 if (offset == 0 || 509 if (offset == 0 ||
507 fpos_frag(offset) != fpos_frag(old_offset) || 510 fpos_frag(offset) != fi->frag ||
508 fpos_off(offset) < fi->offset) { 511 fpos_off(offset) < fi->offset) {
509 dout("dir_llseek dropping %p content\n", file); 512 dout("dir_llseek dropping %p content\n", file);
510 reset_readdir(fi); 513 reset_readdir(fi, fpos_frag(offset));
511 } 514 }
512 515
513 /* bump dir_release_count if we did a forward seek */ 516 /* bump dir_release_count if we did a forward seek */
514 if (offset > old_offset) 517 if (fpos_cmp(offset, old_offset) > 0)
515 fi->dir_release_count--; 518 fi->dir_release_count--;
516 } 519 }
517out: 520out:
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
812 } 815 }
813 req->r_dentry = dget(dentry); 816 req->r_dentry = dget(dentry);
814 req->r_num_caps = 2; 817 req->r_num_caps = 2;
815 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ 818 req->r_old_dentry = dget(old_dentry);
816 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
817 req->r_locked_dir = dir; 819 req->r_locked_dir = dir;
818 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 820 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
819 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 821 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
911 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 913 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
912 if (IS_ERR(req)) 914 if (IS_ERR(req))
913 return PTR_ERR(req); 915 return PTR_ERR(req);
916 ihold(old_dir);
914 req->r_dentry = dget(new_dentry); 917 req->r_dentry = dget(new_dentry);
915 req->r_num_caps = 2; 918 req->r_num_caps = 2;
916 req->r_old_dentry = dget(old_dentry); 919 req->r_old_dentry = dget(old_dentry);
917 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 920 req->r_old_dentry_dir = old_dir;
918 req->r_locked_dir = new_dir; 921 req->r_locked_dir = new_dir;
919 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 922 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
920 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 923 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 16796be53ca5..00d6af6a32ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -8,23 +8,6 @@
8#include "mds_client.h" 8#include "mds_client.h"
9 9
10/* 10/*
11 * NFS export support
12 *
13 * NFS re-export of a ceph mount is, at present, only semireliable.
14 * The basic issue is that the Ceph architectures doesn't lend itself
15 * well to generating filehandles that will remain valid forever.
16 *
17 * So, we do our best. If you're lucky, your inode will be in the
18 * client's cache. If it's not, and you have a connectable fh, then
19 * the MDS server may be able to find it for you. Otherwise, you get
20 * ESTALE.
21 *
22 * There are ways to this more reliable, but in the non-connectable fh
23 * case, we won't every work perfectly, and in the connectable case,
24 * some changes are needed on the MDS side to work better.
25 */
26
27/*
28 * Basic fh 11 * Basic fh
29 */ 12 */
30struct ceph_nfs_fh { 13struct ceph_nfs_fh {
@@ -32,22 +15,12 @@ struct ceph_nfs_fh {
32} __attribute__ ((packed)); 15} __attribute__ ((packed));
33 16
34/* 17/*
35 * Larger 'connectable' fh that includes parent ino and name hash. 18 * Larger fh that includes parent ino.
36 * Use this whenever possible, as it works more reliably.
37 */ 19 */
38struct ceph_nfs_confh { 20struct ceph_nfs_confh {
39 u64 ino, parent_ino; 21 u64 ino, parent_ino;
40 u32 parent_name_hash;
41} __attribute__ ((packed)); 22} __attribute__ ((packed));
42 23
43/*
44 * The presence of @parent_inode here tells us whether NFS wants a
45 * connectable file handle. However, we want to make a connectionable
46 * file handle unconditionally so that the MDS gets as much of a hint
47 * as possible. That means we only use @parent_dentry to indicate
48 * whether nfsd wants a connectable fh, and whether we should indicate
49 * failure from a too-small @max_len.
50 */
51static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, 24static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
52 struct inode *parent_inode) 25 struct inode *parent_inode)
53{ 26{
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
56 struct ceph_nfs_confh *cfh = (void *)rawfh; 29 struct ceph_nfs_confh *cfh = (void *)rawfh;
57 int connected_handle_length = sizeof(*cfh)/4; 30 int connected_handle_length = sizeof(*cfh)/4;
58 int handle_length = sizeof(*fh)/4; 31 int handle_length = sizeof(*fh)/4;
59 struct dentry *dentry;
60 struct dentry *parent;
61 32
62 /* don't re-export snaps */ 33 /* don't re-export snaps */
63 if (ceph_snap(inode) != CEPH_NOSNAP) 34 if (ceph_snap(inode) != CEPH_NOSNAP)
64 return -EINVAL; 35 return -EINVAL;
65 36
66 dentry = d_find_alias(inode); 37 if (parent_inode && (*max_len < connected_handle_length)) {
38 *max_len = connected_handle_length;
39 return FILEID_INVALID;
40 } else if (*max_len < handle_length) {
41 *max_len = handle_length;
42 return FILEID_INVALID;
43 }
67 44
68 /* if we found an alias, generate a connectable fh */ 45 if (parent_inode) {
69 if (*max_len >= connected_handle_length && dentry) { 46 dout("encode_fh %llx with parent %llx\n",
70 dout("encode_fh %p connectable\n", dentry); 47 ceph_ino(inode), ceph_ino(parent_inode));
71 spin_lock(&dentry->d_lock);
72 parent = dentry->d_parent;
73 cfh->ino = ceph_ino(inode); 48 cfh->ino = ceph_ino(inode);
74 cfh->parent_ino = ceph_ino(parent->d_inode); 49 cfh->parent_ino = ceph_ino(parent_inode);
75 cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
76 dentry);
77 *max_len = connected_handle_length; 50 *max_len = connected_handle_length;
78 type = 2; 51 type = FILEID_INO32_GEN_PARENT;
79 spin_unlock(&dentry->d_lock);
80 } else if (*max_len >= handle_length) {
81 if (parent_inode) {
82 /* nfsd wants connectable */
83 *max_len = connected_handle_length;
84 type = FILEID_INVALID;
85 } else {
86 dout("encode_fh %p\n", dentry);
87 fh->ino = ceph_ino(inode);
88 *max_len = handle_length;
89 type = 1;
90 }
91 } else { 52 } else {
53 dout("encode_fh %llx\n", ceph_ino(inode));
54 fh->ino = ceph_ino(inode);
92 *max_len = handle_length; 55 *max_len = handle_length;
93 type = FILEID_INVALID; 56 type = FILEID_INO32_GEN;
94 } 57 }
95 if (dentry)
96 dput(dentry);
97 return type; 58 return type;
98} 59}
99 60
100/* 61static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
101 * convert regular fh to dentry
102 *
103 * FIXME: we should try harder by querying the mds for the ino.
104 */
105static struct dentry *__fh_to_dentry(struct super_block *sb,
106 struct ceph_nfs_fh *fh, int fh_len)
107{ 62{
108 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; 63 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
109 struct inode *inode; 64 struct inode *inode;
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
111 struct ceph_vino vino; 66 struct ceph_vino vino;
112 int err; 67 int err;
113 68
114 if (fh_len < sizeof(*fh) / 4) 69 vino.ino = ino;
115 return ERR_PTR(-ESTALE);
116
117 dout("__fh_to_dentry %llx\n", fh->ino);
118 vino.ino = fh->ino;
119 vino.snap = CEPH_NOSNAP; 70 vino.snap = CEPH_NOSNAP;
120 inode = ceph_find_inode(sb, vino); 71 inode = ceph_find_inode(sb, vino);
121 if (!inode) { 72 if (!inode) {
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
139 90
140 dentry = d_obtain_alias(inode); 91 dentry = d_obtain_alias(inode);
141 if (IS_ERR(dentry)) { 92 if (IS_ERR(dentry)) {
142 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
143 fh->ino, inode);
144 iput(inode); 93 iput(inode);
145 return dentry; 94 return dentry;
146 } 95 }
147 err = ceph_init_dentry(dentry); 96 err = ceph_init_dentry(dentry);
148 if (err < 0) { 97 if (err < 0) {
149 iput(inode); 98 dput(dentry);
150 return ERR_PTR(err); 99 return ERR_PTR(err);
151 } 100 }
152 dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry); 101 dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
153 return dentry; 102 return dentry;
154} 103}
155 104
156/* 105/*
157 * convert connectable fh to dentry 106 * convert regular fh to dentry
158 */ 107 */
159static struct dentry *__cfh_to_dentry(struct super_block *sb, 108static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
160 struct ceph_nfs_confh *cfh, int fh_len) 109 struct fid *fid,
110 int fh_len, int fh_type)
111{
112 struct ceph_nfs_fh *fh = (void *)fid->raw;
113
114 if (fh_type != FILEID_INO32_GEN &&
115 fh_type != FILEID_INO32_GEN_PARENT)
116 return NULL;
117 if (fh_len < sizeof(*fh) / 4)
118 return NULL;
119
120 dout("fh_to_dentry %llx\n", fh->ino);
121 return __fh_to_dentry(sb, fh->ino);
122}
123
124static struct dentry *__get_parent(struct super_block *sb,
125 struct dentry *child, u64 ino)
161{ 126{
162 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; 127 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
128 struct ceph_mds_request *req;
163 struct inode *inode; 129 struct inode *inode;
164 struct dentry *dentry; 130 struct dentry *dentry;
165 struct ceph_vino vino;
166 int err; 131 int err;
167 132
168 if (fh_len < sizeof(*cfh) / 4) 133 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
169 return ERR_PTR(-ESTALE); 134 USE_ANY_MDS);
170 135 if (IS_ERR(req))
171 dout("__cfh_to_dentry %llx (%llx/%x)\n", 136 return ERR_CAST(req);
172 cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
173
174 vino.ino = cfh->ino;
175 vino.snap = CEPH_NOSNAP;
176 inode = ceph_find_inode(sb, vino);
177 if (!inode) {
178 struct ceph_mds_request *req;
179
180 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
181 USE_ANY_MDS);
182 if (IS_ERR(req))
183 return ERR_CAST(req);
184 137
185 req->r_ino1 = vino; 138 if (child) {
186 req->r_ino2.ino = cfh->parent_ino; 139 req->r_inode = child->d_inode;
187 req->r_ino2.snap = CEPH_NOSNAP; 140 ihold(child->d_inode);
188 req->r_path2 = kmalloc(16, GFP_NOFS); 141 } else {
189 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); 142 req->r_ino1 = (struct ceph_vino) {
190 req->r_num_caps = 1; 143 .ino = ino,
191 err = ceph_mdsc_do_request(mdsc, NULL, req); 144 .snap = CEPH_NOSNAP,
192 inode = req->r_target_inode; 145 };
193 if (inode)
194 ihold(inode);
195 ceph_mdsc_put_request(req);
196 if (!inode)
197 return ERR_PTR(err ? err : -ESTALE);
198 } 146 }
147 req->r_num_caps = 1;
148 err = ceph_mdsc_do_request(mdsc, NULL, req);
149 inode = req->r_target_inode;
150 if (inode)
151 ihold(inode);
152 ceph_mdsc_put_request(req);
153 if (!inode)
154 return ERR_PTR(-ENOENT);
199 155
200 dentry = d_obtain_alias(inode); 156 dentry = d_obtain_alias(inode);
201 if (IS_ERR(dentry)) { 157 if (IS_ERR(dentry)) {
202 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
203 cfh->ino, inode);
204 iput(inode); 158 iput(inode);
205 return dentry; 159 return dentry;
206 } 160 }
207 err = ceph_init_dentry(dentry); 161 err = ceph_init_dentry(dentry);
208 if (err < 0) { 162 if (err < 0) {
209 iput(inode); 163 dput(dentry);
210 return ERR_PTR(err); 164 return ERR_PTR(err);
211 } 165 }
212 dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry); 166 dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
167 child ? ceph_ino(child->d_inode) : ino,
168 dentry, ceph_vinop(inode));
213 return dentry; 169 return dentry;
214} 170}
215 171
216static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, 172struct dentry *ceph_get_parent(struct dentry *child)
217 int fh_len, int fh_type)
218{ 173{
219 if (fh_type == 1) 174 /* don't re-export snaps */
220 return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, 175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
221 fh_len); 176 return ERR_PTR(-EINVAL);
222 else 177
223 return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, 178 dout("get_parent %p ino %llx.%llx\n",
224 fh_len); 179 child, ceph_vinop(child->d_inode));
180 return __get_parent(child->d_sb, child, 0);
225} 181}
226 182
227/* 183/*
228 * get parent, if possible. 184 * convert regular fh to parent
229 *
230 * FIXME: we could do better by querying the mds to discover the
231 * parent.
232 */ 185 */
233static struct dentry *ceph_fh_to_parent(struct super_block *sb, 186static struct dentry *ceph_fh_to_parent(struct super_block *sb,
234 struct fid *fid, 187 struct fid *fid,
235 int fh_len, int fh_type) 188 int fh_len, int fh_type)
236{ 189{
237 struct ceph_nfs_confh *cfh = (void *)fid->raw; 190 struct ceph_nfs_confh *cfh = (void *)fid->raw;
238 struct ceph_vino vino;
239 struct inode *inode;
240 struct dentry *dentry; 191 struct dentry *dentry;
241 int err;
242 192
243 if (fh_type == 1) 193 if (fh_type != FILEID_INO32_GEN_PARENT)
244 return ERR_PTR(-ESTALE); 194 return NULL;
245 if (fh_len < sizeof(*cfh) / 4) 195 if (fh_len < sizeof(*cfh) / 4)
246 return ERR_PTR(-ESTALE); 196 return NULL;
247 197
248 pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, 198 dout("fh_to_parent %llx\n", cfh->parent_ino);
249 cfh->parent_name_hash); 199 dentry = __get_parent(sb, NULL, cfh->ino);
200 if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
201 dentry = __fh_to_dentry(sb, cfh->parent_ino);
202 return dentry;
203}
250 204
251 vino.ino = cfh->ino; 205static int ceph_get_name(struct dentry *parent, char *name,
252 vino.snap = CEPH_NOSNAP; 206 struct dentry *child)
253 inode = ceph_find_inode(sb, vino); 207{
254 if (!inode) 208 struct ceph_mds_client *mdsc;
255 return ERR_PTR(-ESTALE); 209 struct ceph_mds_request *req;
210 int err;
256 211
257 dentry = d_obtain_alias(inode); 212 mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
258 if (IS_ERR(dentry)) { 213 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
259 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", 214 USE_ANY_MDS);
260 cfh->ino, inode); 215 if (IS_ERR(req))
261 iput(inode); 216 return PTR_ERR(req);
262 return dentry; 217
263 } 218 mutex_lock(&parent->d_inode->i_mutex);
264 err = ceph_init_dentry(dentry); 219
265 if (err < 0) { 220 req->r_inode = child->d_inode;
266 iput(inode); 221 ihold(child->d_inode);
267 return ERR_PTR(err); 222 req->r_ino2 = ceph_vino(parent->d_inode);
223 req->r_locked_dir = parent->d_inode;
224 req->r_num_caps = 2;
225 err = ceph_mdsc_do_request(mdsc, NULL, req);
226
227 mutex_unlock(&parent->d_inode->i_mutex);
228
229 if (!err) {
230 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
231 memcpy(name, rinfo->dname, rinfo->dname_len);
232 name[rinfo->dname_len] = 0;
233 dout("get_name %p ino %llx.%llx name %s\n",
234 child, ceph_vinop(child->d_inode), name);
235 } else {
236 dout("get_name %p ino %llx.%llx err %d\n",
237 child, ceph_vinop(child->d_inode), err);
268 } 238 }
269 dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry); 239
270 return dentry; 240 ceph_mdsc_put_request(req);
241 return err;
271} 242}
272 243
273const struct export_operations ceph_export_ops = { 244const struct export_operations ceph_export_ops = {
274 .encode_fh = ceph_encode_fh, 245 .encode_fh = ceph_encode_fh,
275 .fh_to_dentry = ceph_fh_to_dentry, 246 .fh_to_dentry = ceph_fh_to_dentry,
276 .fh_to_parent = ceph_fh_to_parent, 247 .fh_to_parent = ceph_fh_to_parent,
248 .get_parent = ceph_get_parent,
249 .get_name = ceph_get_name,
277}; 250};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 09c7afe32e49..39da1c2efa50 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file)
210 ihold(inode); 210 ihold(inode);
211 211
212 req->r_num_caps = 1; 212 req->r_num_caps = 1;
213 if (flags & (O_CREAT|O_TRUNC)) 213 if (flags & O_CREAT)
214 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); 214 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
215 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 215 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
216 iput(parent_inode); 216 iput(parent_inode);
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
291 } 291 }
292 err = finish_open(file, dentry, ceph_open, opened); 292 err = finish_open(file, dentry, ceph_open, opened);
293 } 293 }
294
295out_err: 294out_err:
295 if (!req->r_err && req->r_target_inode)
296 ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
296 ceph_mdsc_put_request(req); 297 ceph_mdsc_put_request(req);
297 dout("atomic_open result=%d\n", err); 298 dout("atomic_open result=%d\n", err);
298 return err; 299 return err;
@@ -600,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
600 false); 601 false);
601 if (IS_ERR(req)) { 602 if (IS_ERR(req)) {
602 ret = PTR_ERR(req); 603 ret = PTR_ERR(req);
603 goto out; 604 break;
604 } 605 }
605 606
606 num_pages = calc_pages_for(page_align, len); 607 num_pages = calc_pages_for(page_align, len);
@@ -718,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
718 false); 719 false);
719 if (IS_ERR(req)) { 720 if (IS_ERR(req)) {
720 ret = PTR_ERR(req); 721 ret = PTR_ERR(req);
721 goto out; 722 break;
722 } 723 }
723 724
724 /* 725 /*
@@ -970,6 +971,8 @@ retry_snap:
970 goto retry_snap; 971 goto retry_snap;
971 } 972 }
972 } else { 973 } else {
974 loff_t old_size = inode->i_size;
975 struct iov_iter from;
973 /* 976 /*
974 * No need to acquire the i_truncate_mutex. Because 977 * No need to acquire the i_truncate_mutex. Because
975 * the MDS revokes Fwb caps before sending truncate 978 * the MDS revokes Fwb caps before sending truncate
@@ -977,9 +980,12 @@ retry_snap:
977 * are pending vmtruncate. So write and vmtruncate 980 * are pending vmtruncate. So write and vmtruncate
978 * can not run at the same time 981 * can not run at the same time
979 */ 982 */
980 written = generic_file_buffered_write(iocb, iov, nr_segs, 983 iov_iter_init(&from, iov, nr_segs, count, 0);
981 pos, &iocb->ki_pos, 984 written = generic_perform_write(file, &from, pos);
982 count, 0); 985 if (likely(written >= 0))
986 iocb->ki_pos = pos + written;
987 if (inode->i_size > old_size)
988 ceph_fscache_update_objectsize(inode);
983 mutex_unlock(&inode->i_mutex); 989 mutex_unlock(&inode->i_mutex);
984 } 990 }
985 991
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 32d519d8a2e2..0b0728e5be2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode,
659 le32_to_cpu(info->time_warp_seq), 659 le32_to_cpu(info->time_warp_seq),
660 &ctime, &mtime, &atime); 660 &ctime, &mtime, &atime);
661 661
662 /* only update max_size on auth cap */
663 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
664 ci->i_max_size != le64_to_cpu(info->max_size)) {
665 dout("max_size %lld -> %llu\n", ci->i_max_size,
666 le64_to_cpu(info->max_size));
667 ci->i_max_size = le64_to_cpu(info->max_size);
668 }
669
670 ci->i_layout = info->layout; 662 ci->i_layout = info->layout;
671 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 663 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
672 664
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode,
755 ci->i_max_offset = 2; 747 ci->i_max_offset = 2;
756 } 748 }
757no_change: 749no_change:
750 /* only update max_size on auth cap */
751 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
752 ci->i_max_size != le64_to_cpu(info->max_size)) {
753 dout("max_size %lld -> %llu\n", ci->i_max_size,
754 le64_to_cpu(info->max_size));
755 ci->i_max_size = le64_to_cpu(info->max_size);
756 }
757
758 spin_unlock(&ci->i_ceph_lock); 758 spin_unlock(&ci->i_ceph_lock);
759 759
760 /* queue truncate if we saw i_size decrease */ 760 /* queue truncate if we saw i_size decrease */
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1044 session, req->r_request_started, -1, 1044 session, req->r_request_started, -1,
1045 &req->r_caps_reservation); 1045 &req->r_caps_reservation);
1046 if (err < 0) 1046 if (err < 0)
1047 return err; 1047 goto done;
1048 } else { 1048 } else {
1049 WARN_ON_ONCE(1); 1049 WARN_ON_ONCE(1);
1050 } 1050 }
1051
1052 if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
1053 struct qstr dname;
1054 struct dentry *dn, *parent;
1055
1056 BUG_ON(!rinfo->head->is_target);
1057 BUG_ON(req->r_dentry);
1058
1059 parent = d_find_any_alias(dir);
1060 BUG_ON(!parent);
1061
1062 dname.name = rinfo->dname;
1063 dname.len = rinfo->dname_len;
1064 dname.hash = full_name_hash(dname.name, dname.len);
1065 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1066 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1067retry_lookup:
1068 dn = d_lookup(parent, &dname);
1069 dout("d_lookup on parent=%p name=%.*s got %p\n",
1070 parent, dname.len, dname.name, dn);
1071
1072 if (!dn) {
1073 dn = d_alloc(parent, &dname);
1074 dout("d_alloc %p '%.*s' = %p\n", parent,
1075 dname.len, dname.name, dn);
1076 if (dn == NULL) {
1077 dput(parent);
1078 err = -ENOMEM;
1079 goto done;
1080 }
1081 err = ceph_init_dentry(dn);
1082 if (err < 0) {
1083 dput(dn);
1084 dput(parent);
1085 goto done;
1086 }
1087 } else if (dn->d_inode &&
1088 (ceph_ino(dn->d_inode) != vino.ino ||
1089 ceph_snap(dn->d_inode) != vino.snap)) {
1090 dout(" dn %p points to wrong inode %p\n",
1091 dn, dn->d_inode);
1092 d_delete(dn);
1093 dput(dn);
1094 goto retry_lookup;
1095 }
1096
1097 req->r_dentry = dn;
1098 dput(parent);
1099 }
1051 } 1100 }
1052 1101
1053 if (rinfo->head->is_target) { 1102 if (rinfo->head->is_target) {
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1063 1112
1064 err = fill_inode(in, &rinfo->targeti, NULL, 1113 err = fill_inode(in, &rinfo->targeti, NULL,
1065 session, req->r_request_started, 1114 session, req->r_request_started,
1066 (le32_to_cpu(rinfo->head->result) == 0) ? 1115 (!req->r_aborted && rinfo->head->result == 0) ?
1067 req->r_fmode : -1, 1116 req->r_fmode : -1,
1068 &req->r_caps_reservation); 1117 &req->r_caps_reservation);
1069 if (err < 0) { 1118 if (err < 0) {
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = {
1616 .getxattr = ceph_getxattr, 1665 .getxattr = ceph_getxattr,
1617 .listxattr = ceph_listxattr, 1666 .listxattr = ceph_listxattr,
1618 .removexattr = ceph_removexattr, 1667 .removexattr = ceph_removexattr,
1619 .get_acl = ceph_get_acl,
1620 .set_acl = ceph_set_acl,
1621}; 1668};
1622 1669
1623/* 1670/*
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1627{ 1674{
1628 struct inode *inode = dentry->d_inode; 1675 struct inode *inode = dentry->d_inode;
1629 struct ceph_inode_info *ci = ceph_inode(inode); 1676 struct ceph_inode_info *ci = ceph_inode(inode);
1630 struct inode *parent_inode;
1631 const unsigned int ia_valid = attr->ia_valid; 1677 const unsigned int ia_valid = attr->ia_valid;
1632 struct ceph_mds_request *req; 1678 struct ceph_mds_request *req;
1633 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1679 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1819 req->r_inode_drop = release; 1865 req->r_inode_drop = release;
1820 req->r_args.setattr.mask = cpu_to_le32(mask); 1866 req->r_args.setattr.mask = cpu_to_le32(mask);
1821 req->r_num_caps = 1; 1867 req->r_num_caps = 1;
1822 parent_inode = ceph_get_dentry_parent_inode(dentry); 1868 err = ceph_mdsc_do_request(mdsc, NULL, req);
1823 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
1824 iput(parent_inode);
1825 } 1869 }
1826 dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, 1870 dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
1827 ceph_cap_string(dirtied), mask); 1871 ceph_cap_string(dirtied), mask);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index dc66c9e023e4..fdf941b44ff1 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -1,9 +1,8 @@
1#include <linux/ceph/ceph_debug.h>
1#include <linux/in.h> 2#include <linux/in.h>
2 3
3#include "super.h" 4#include "super.h"
4#include "mds_client.h" 5#include "mds_client.h"
5#include <linux/ceph/ceph_debug.h>
6
7#include "ioctl.h" 6#include "ioctl.h"
8 7
9 8
@@ -64,7 +63,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
64static long ceph_ioctl_set_layout(struct file *file, void __user *arg) 63static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
65{ 64{
66 struct inode *inode = file_inode(file); 65 struct inode *inode = file_inode(file);
67 struct inode *parent_inode;
68 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 66 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
69 struct ceph_mds_request *req; 67 struct ceph_mds_request *req;
70 struct ceph_ioctl_layout l; 68 struct ceph_ioctl_layout l;
@@ -121,9 +119,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
121 cpu_to_le32(l.object_size); 119 cpu_to_le32(l.object_size);
122 req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); 120 req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
123 121
124 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); 122 err = ceph_mdsc_do_request(mdsc, NULL, req);
125 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
126 iput(parent_inode);
127 ceph_mdsc_put_request(req); 123 ceph_mdsc_put_request(req);
128 return err; 124 return err;
129} 125}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae6d14e82b0f..d94ba0df9f4d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -2,11 +2,31 @@
2 2
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/namei.h> 4#include <linux/namei.h>
5#include <linux/random.h>
5 6
6#include "super.h" 7#include "super.h"
7#include "mds_client.h" 8#include "mds_client.h"
8#include <linux/ceph/pagelist.h> 9#include <linux/ceph/pagelist.h>
9 10
11static u64 lock_secret;
12
13static inline u64 secure_addr(void *addr)
14{
15 u64 v = lock_secret ^ (u64)(unsigned long)addr;
16 /*
17 * Set the most significant bit, so that MDS knows the 'owner'
18 * is sufficient to identify the owner of lock. (old code uses
19 * both 'owner' and 'pid')
20 */
21 v |= (1ULL << 63);
22 return v;
23}
24
25void __init ceph_flock_init(void)
26{
27 get_random_bytes(&lock_secret, sizeof(lock_secret));
28}
29
10/** 30/**
11 * Implement fcntl and flock locking functions. 31 * Implement fcntl and flock locking functions.
12 */ 32 */
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 int cmd, u8 wait, struct file_lock *fl) 34 int cmd, u8 wait, struct file_lock *fl)
15{ 35{
16 struct inode *inode = file_inode(file); 36 struct inode *inode = file_inode(file);
17 struct ceph_mds_client *mdsc = 37 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
18 ceph_sb_to_client(inode->i_sb)->mdsc;
19 struct ceph_mds_request *req; 38 struct ceph_mds_request *req;
20 int err; 39 int err;
21 u64 length = 0; 40 u64 length = 0;
41 u64 owner;
22 42
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 43 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 44 if (IS_ERR(req))
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
32 else 52 else
33 length = fl->fl_end - fl->fl_start + 1; 53 length = fl->fl_end - fl->fl_start + 1;
34 54
35 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 55 if (lock_type == CEPH_LOCK_FCNTL)
36 "length: %llu, wait: %d, type: %d", (int)lock_type, 56 owner = secure_addr(fl->fl_owner);
37 (int)operation, (u64)fl->fl_pid, fl->fl_start, 57 else
38 length, wait, fl->fl_type); 58 owner = secure_addr(fl->fl_file);
59
60 dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
61 "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
62 (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
63 wait, fl->fl_type);
39 64
40 req->r_args.filelock_change.rule = lock_type; 65 req->r_args.filelock_change.rule = lock_type;
41 req->r_args.filelock_change.type = cmd; 66 req->r_args.filelock_change.type = cmd;
67 req->r_args.filelock_change.owner = cpu_to_le64(owner);
42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 68 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
43 /* This should be adjusted, but I'm not sure if
44 namespaces actually get id numbers*/
45 req->r_args.filelock_change.pid_namespace =
46 cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
47 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); 69 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
48 req->r_args.filelock_change.length = cpu_to_le64(length); 70 req->r_args.filelock_change.length = cpu_to_le64(length);
49 req->r_args.filelock_change.wait = wait; 71 req->r_args.filelock_change.wait = wait;
50 72
51 err = ceph_mdsc_do_request(mdsc, inode, req); 73 err = ceph_mdsc_do_request(mdsc, inode, req);
52 74
53 if ( operation == CEPH_MDS_OP_GETFILELOCK){ 75 if (operation == CEPH_MDS_OP_GETFILELOCK) {
54 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 76 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
55 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 77 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
56 fl->fl_type = F_RDLCK; 78 fl->fl_type = F_RDLCK;
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
87 u8 wait = 0; 109 u8 wait = 0;
88 u16 op = CEPH_MDS_OP_SETFILELOCK; 110 u16 op = CEPH_MDS_OP_SETFILELOCK;
89 111
90 fl->fl_nspid = get_pid(task_tgid(current)); 112 if (!(fl->fl_flags & FL_POSIX))
91 dout("ceph_lock, fl_pid:%d", fl->fl_pid); 113 return -ENOLCK;
114 /* No mandatory locks */
115 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
116 return -ENOLCK;
117
118 dout("ceph_lock, fl_owner: %p", fl->fl_owner);
92 119
93 /* set wait bit as appropriate, then make command as Ceph expects it*/ 120 /* set wait bit as appropriate, then make command as Ceph expects it*/
94 if (F_SETLKW == cmd) 121 if (IS_GETLK(cmd))
95 wait = 1;
96 if (F_GETLK == cmd)
97 op = CEPH_MDS_OP_GETFILELOCK; 122 op = CEPH_MDS_OP_GETFILELOCK;
123 else if (IS_SETLKW(cmd))
124 wait = 1;
98 125
99 if (F_RDLCK == fl->fl_type) 126 if (F_RDLCK == fl->fl_type)
100 lock_cmd = CEPH_LOCK_SHARED; 127 lock_cmd = CEPH_LOCK_SHARED;
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
105 132
106 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); 133 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
107 if (!err) { 134 if (!err) {
108 if ( op != CEPH_MDS_OP_GETFILELOCK ){ 135 if (op != CEPH_MDS_OP_GETFILELOCK) {
109 dout("mds locked, locking locally"); 136 dout("mds locked, locking locally");
110 err = posix_lock_file(file, fl, NULL); 137 err = posix_lock_file(file, fl, NULL);
111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 138 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
131{ 158{
132 u8 lock_cmd; 159 u8 lock_cmd;
133 int err; 160 int err;
134 u8 wait = 1; 161 u8 wait = 0;
135 162
136 fl->fl_nspid = get_pid(task_tgid(current)); 163 if (!(fl->fl_flags & FL_FLOCK))
137 dout("ceph_flock, fl_pid:%d", fl->fl_pid); 164 return -ENOLCK;
138 165 /* No mandatory locks */
139 /* set wait bit, then clear it out of cmd*/ 166 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
140 if (cmd & LOCK_NB) 167 return -ENOLCK;
141 wait = 0; 168
142 cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); 169 dout("ceph_flock, fl_file: %p", fl->fl_file);
143 /* set command sequence that Ceph wants to see: 170
144 shared lock, exclusive lock, or unlock */ 171 if (IS_SETLKW(cmd))
145 if (LOCK_SH == cmd) 172 wait = 1;
173
174 if (F_RDLCK == fl->fl_type)
146 lock_cmd = CEPH_LOCK_SHARED; 175 lock_cmd = CEPH_LOCK_SHARED;
147 else if (LOCK_EX == cmd) 176 else if (F_WRLCK == fl->fl_type)
148 lock_cmd = CEPH_LOCK_EXCL; 177 lock_cmd = CEPH_LOCK_EXCL;
149 else 178 else
150 lock_cmd = CEPH_LOCK_UNLOCK; 179 lock_cmd = CEPH_LOCK_UNLOCK;
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock,
280 struct ceph_filelock *cephlock) 309 struct ceph_filelock *cephlock)
281{ 310{
282 int err = 0; 311 int err = 0;
283
284 cephlock->start = cpu_to_le64(lock->fl_start); 312 cephlock->start = cpu_to_le64(lock->fl_start);
285 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 313 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
286 cephlock->client = cpu_to_le64(0); 314 cephlock->client = cpu_to_le64(0);
287 cephlock->pid = cpu_to_le64(lock->fl_pid); 315 cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
288 cephlock->pid_namespace = 316 if (lock->fl_flags & FL_POSIX)
289 cpu_to_le64((u64)(unsigned long)lock->fl_nspid); 317 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
318 else
319 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
290 320
291 switch (lock->fl_type) { 321 switch (lock->fl_type) {
292 case F_RDLCK: 322 case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f4f050a69a48..2b4d093d0563 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3,6 +3,7 @@
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/wait.h> 4#include <linux/wait.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/gfp.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/debugfs.h> 8#include <linux/debugfs.h>
8#include <linux/seq_file.h> 9#include <linux/seq_file.h>
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end,
165 if (num == 0) 166 if (num == 0)
166 goto done; 167 goto done;
167 168
168 /* alloc large array */ 169 BUG_ON(!info->dir_in);
169 info->dir_nr = num;
170 info->dir_in = kcalloc(num, sizeof(*info->dir_in) +
171 sizeof(*info->dir_dname) +
172 sizeof(*info->dir_dname_len) +
173 sizeof(*info->dir_dlease),
174 GFP_NOFS);
175 if (info->dir_in == NULL) {
176 err = -ENOMEM;
177 goto out_bad;
178 }
179 info->dir_dname = (void *)(info->dir_in + num); 170 info->dir_dname = (void *)(info->dir_in + num);
180 info->dir_dname_len = (void *)(info->dir_dname + num); 171 info->dir_dname_len = (void *)(info->dir_dname + num);
181 info->dir_dlease = (void *)(info->dir_dname_len + num); 172 info->dir_dlease = (void *)(info->dir_dname_len + num);
173 if ((unsigned long)(info->dir_dlease + num) >
174 (unsigned long)info->dir_in + info->dir_buf_size) {
175 pr_err("dir contents are larger than expected\n");
176 WARN_ON(1);
177 goto bad;
178 }
182 179
180 info->dir_nr = num;
183 while (num) { 181 while (num) {
184 /* dentry */ 182 /* dentry */
185 ceph_decode_need(p, end, sizeof(u32)*2, bad); 183 ceph_decode_need(p, end, sizeof(u32)*2, bad);
@@ -327,7 +325,9 @@ out_bad:
327 325
328static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) 326static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
329{ 327{
330 kfree(info->dir_in); 328 if (!info->dir_in)
329 return;
330 free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
331} 331}
332 332
333 333
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref)
512 struct ceph_mds_request *req = container_of(kref, 512 struct ceph_mds_request *req = container_of(kref,
513 struct ceph_mds_request, 513 struct ceph_mds_request,
514 r_kref); 514 r_kref);
515 destroy_reply_info(&req->r_reply_info);
515 if (req->r_request) 516 if (req->r_request)
516 ceph_msg_put(req->r_request); 517 ceph_msg_put(req->r_request);
517 if (req->r_reply) { 518 if (req->r_reply)
518 ceph_msg_put(req->r_reply); 519 ceph_msg_put(req->r_reply);
519 destroy_reply_info(&req->r_reply_info);
520 }
521 if (req->r_inode) { 520 if (req->r_inode) {
522 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 521 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
523 iput(req->r_inode); 522 iput(req->r_inode);
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref)
528 iput(req->r_target_inode); 527 iput(req->r_target_inode);
529 if (req->r_dentry) 528 if (req->r_dentry)
530 dput(req->r_dentry); 529 dput(req->r_dentry);
531 if (req->r_old_dentry) { 530 if (req->r_old_dentry)
531 dput(req->r_old_dentry);
532 if (req->r_old_dentry_dir) {
532 /* 533 /*
533 * track (and drop pins for) r_old_dentry_dir 534 * track (and drop pins for) r_old_dentry_dir
534 * separately, since r_old_dentry's d_parent may have 535 * separately, since r_old_dentry's d_parent may have
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref)
537 */ 538 */
538 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), 539 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
539 CEPH_CAP_PIN); 540 CEPH_CAP_PIN);
540 dput(req->r_old_dentry);
541 iput(req->r_old_dentry_dir); 541 iput(req->r_old_dentry_dir);
542 } 542 }
543 kfree(req->r_path1); 543 kfree(req->r_path1);
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
1311 trim_caps - session->s_trim_caps); 1311 trim_caps - session->s_trim_caps);
1312 session->s_trim_caps = 0; 1312 session->s_trim_caps = 0;
1313 } 1313 }
1314
1315 ceph_add_cap_releases(mdsc, session);
1316 ceph_send_cap_releases(mdsc, session);
1314 return 0; 1317 return 0;
1315} 1318}
1316 1319
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1461 1464
1462 dout("discard_cap_releases mds%d\n", session->s_mds); 1465 dout("discard_cap_releases mds%d\n", session->s_mds);
1463 1466
1464 /* zero out the in-progress message */ 1467 if (!list_empty(&session->s_cap_releases)) {
1465 msg = list_first_entry(&session->s_cap_releases, 1468 /* zero out the in-progress message */
1466 struct ceph_msg, list_head); 1469 msg = list_first_entry(&session->s_cap_releases,
1467 head = msg->front.iov_base; 1470 struct ceph_msg, list_head);
1468 num = le32_to_cpu(head->num); 1471 head = msg->front.iov_base;
1469 dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); 1472 num = le32_to_cpu(head->num);
1470 head->num = cpu_to_le32(0); 1473 dout("discard_cap_releases mds%d %p %u\n",
1471 msg->front.iov_len = sizeof(*head); 1474 session->s_mds, msg, num);
1472 session->s_num_cap_releases += num; 1475 head->num = cpu_to_le32(0);
1476 msg->front.iov_len = sizeof(*head);
1477 session->s_num_cap_releases += num;
1478 }
1473 1479
1474 /* requeue completed messages */ 1480 /* requeue completed messages */
1475 while (!list_empty(&session->s_cap_releases_done)) { 1481 while (!list_empty(&session->s_cap_releases_done)) {
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1492 * requests 1498 * requests
1493 */ 1499 */
1494 1500
1501int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
1502 struct inode *dir)
1503{
1504 struct ceph_inode_info *ci = ceph_inode(dir);
1505 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
1506 struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
1507 size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
1508 sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
1509 int order, num_entries;
1510
1511 spin_lock(&ci->i_ceph_lock);
1512 num_entries = ci->i_files + ci->i_subdirs;
1513 spin_unlock(&ci->i_ceph_lock);
1514 num_entries = max(num_entries, 1);
1515 num_entries = min(num_entries, opt->max_readdir);
1516
1517 order = get_order(size * num_entries);
1518 while (order >= 0) {
1519 rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
1520 order);
1521 if (rinfo->dir_in)
1522 break;
1523 order--;
1524 }
1525 if (!rinfo->dir_in)
1526 return -ENOMEM;
1527
1528 num_entries = (PAGE_SIZE << order) / size;
1529 num_entries = min(num_entries, opt->max_readdir);
1530
1531 rinfo->dir_buf_size = PAGE_SIZE << order;
1532 req->r_num_caps = num_entries + 1;
1533 req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
1534 req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
1535 return 0;
1536}
1537
1495/* 1538/*
1496 * Create an mds request. 1539 * Create an mds request.
1497 */ 1540 */
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
2053 ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 2096 ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
2054 if (req->r_locked_dir) 2097 if (req->r_locked_dir)
2055 ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); 2098 ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
2056 if (req->r_old_dentry) 2099 if (req->r_old_dentry_dir)
2057 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), 2100 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
2058 CEPH_CAP_PIN); 2101 CEPH_CAP_PIN);
2059 2102
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 68288917c737..e90cfccf93bd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed {
67 /* for readdir results */ 67 /* for readdir results */
68 struct { 68 struct {
69 struct ceph_mds_reply_dirfrag *dir_dir; 69 struct ceph_mds_reply_dirfrag *dir_dir;
70 size_t dir_buf_size;
70 int dir_nr; 71 int dir_nr;
71 char **dir_dname; 72 char **dir_dname;
72 u32 *dir_dname_len; 73 u32 *dir_dname_len;
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
346 struct dentry *dn); 347 struct dentry *dn);
347 348
348extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); 349extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
349 350extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
351 struct inode *dir);
350extern struct ceph_mds_request * 352extern struct ceph_mds_request *
351ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); 353ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
352extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, 354extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 4440f447fd3f..51cc23e48111 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op)
54 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; 54 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
55 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; 55 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent";
56 case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; 56 case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
57 case CEPH_MDS_OP_LOOKUPNAME: return "lookupname";
57 case CEPH_MDS_OP_GETATTR: return "getattr"; 58 case CEPH_MDS_OP_GETATTR: return "getattr";
58 case CEPH_MDS_OP_SETXATTR: return "setxattr"; 59 case CEPH_MDS_OP_SETXATTR: return "setxattr";
59 case CEPH_MDS_OP_SETATTR: return "setattr"; 60 case CEPH_MDS_OP_SETATTR: return "setattr";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 10a4ccbf38da..06150fd745ac 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void)
1026 if (ret) 1026 if (ret)
1027 goto out; 1027 goto out;
1028 1028
1029 ceph_flock_init();
1029 ceph_xattr_init(); 1030 ceph_xattr_init();
1030 ret = register_filesystem(&ceph_fs_type); 1031 ret = register_filesystem(&ceph_fs_type);
1031 if (ret) 1032 if (ret)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d8801a95b685..7866cd05a6bb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -577,7 +577,7 @@ struct ceph_file_info {
577 577
578 /* readdir: position within a frag */ 578 /* readdir: position within a frag */
579 unsigned offset; /* offset of last chunk, adjusted for . and .. */ 579 unsigned offset; /* offset of last chunk, adjusted for . and .. */
580 u64 next_offset; /* offset of next chunk (last_name's + 1) */ 580 unsigned next_offset; /* offset of next chunk (last_name's + 1) */
581 char *last_name; /* last entry in previous chunk */ 581 char *last_name; /* last entry in previous chunk */
582 struct dentry *dentry; /* next dentry (for dcache readdir) */ 582 struct dentry *dentry; /* next dentry (for dcache readdir) */
583 int dir_release_count; 583 int dir_release_count;
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
871extern const struct export_operations ceph_export_ops; 871extern const struct export_operations ceph_export_ops;
872 872
873/* locks.c */ 873/* locks.c */
874extern __init void ceph_flock_init(void);
874extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); 875extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
875extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); 876extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
876extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); 877extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a55ec37378c6..c9c2b887381e 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
64} 64}
65 65
66static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 66static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
67 size_t size) 67 size_t size)
68{ 68{
69 int ret; 69 int ret;
70 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 70 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
71 struct ceph_osd_client *osdc = &fsc->client->osdc; 71 struct ceph_osd_client *osdc = &fsc->client->osdc;
72 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 72 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
73 const char *pool_name; 73 const char *pool_name;
74 char buf[128];
74 75
75 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 76 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
76 down_read(&osdc->map_sem); 77 down_read(&osdc->map_sem);
77 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 78 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
78 if (pool_name) 79 if (pool_name) {
79 ret = snprintf(val, size, 80 size_t len = strlen(pool_name);
80 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", 81 ret = snprintf(buf, sizeof(buf),
82 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
81 (unsigned long long)ceph_file_layout_su(ci->i_layout), 83 (unsigned long long)ceph_file_layout_su(ci->i_layout),
82 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 84 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
83 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 85 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
84 pool_name); 86 if (!size) {
85 else 87 ret += len;
86 ret = snprintf(val, size, 88 } else if (ret + len > size) {
89 ret = -ERANGE;
90 } else {
91 memcpy(val, buf, ret);
92 memcpy(val + ret, pool_name, len);
93 ret += len;
94 }
95 } else {
96 ret = snprintf(buf, sizeof(buf),
87 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", 97 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
88 (unsigned long long)ceph_file_layout_su(ci->i_layout), 98 (unsigned long long)ceph_file_layout_su(ci->i_layout),
89 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 99 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
90 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 100 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
91 (unsigned long long)pool); 101 (unsigned long long)pool);
92 102 if (size) {
103 if (ret <= size)
104 memcpy(val, buf, ret);
105 else
106 ret = -ERANGE;
107 }
108 }
93 up_read(&osdc->map_sem); 109 up_read(&osdc->map_sem);
94 return ret; 110 return ret;
95} 111}
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
215 .name_size = sizeof("ceph.dir.layout"), 231 .name_size = sizeof("ceph.dir.layout"),
216 .getxattr_cb = ceph_vxattrcb_layout, 232 .getxattr_cb = ceph_vxattrcb_layout,
217 .readonly = false, 233 .readonly = false,
218 .hidden = false, 234 .hidden = true,
219 .exists_cb = ceph_vxattrcb_layout_exists, 235 .exists_cb = ceph_vxattrcb_layout_exists,
220 }, 236 },
221 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 237 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
242 .name_size = sizeof("ceph.file.layout"), 258 .name_size = sizeof("ceph.file.layout"),
243 .getxattr_cb = ceph_vxattrcb_layout, 259 .getxattr_cb = ceph_vxattrcb_layout,
244 .readonly = false, 260 .readonly = false,
245 .hidden = false, 261 .hidden = true,
246 .exists_cb = ceph_vxattrcb_layout_exists, 262 .exists_cb = ceph_vxattrcb_layout_exists,
247 }, 263 },
248 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 264 XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
842 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 858 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
843 struct inode *inode = dentry->d_inode; 859 struct inode *inode = dentry->d_inode;
844 struct ceph_inode_info *ci = ceph_inode(inode); 860 struct ceph_inode_info *ci = ceph_inode(inode);
845 struct inode *parent_inode;
846 struct ceph_mds_request *req; 861 struct ceph_mds_request *req;
847 struct ceph_mds_client *mdsc = fsc->mdsc; 862 struct ceph_mds_client *mdsc = fsc->mdsc;
848 int err; 863 int err;
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
893 req->r_data_len = size; 908 req->r_data_len = size;
894 909
895 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 910 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
896 parent_inode = ceph_get_dentry_parent_inode(dentry); 911 err = ceph_mdsc_do_request(mdsc, NULL, req);
897 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
898 iput(parent_inode);
899 ceph_mdsc_put_request(req); 912 ceph_mdsc_put_request(req);
900 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 913 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
901 914
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1019 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 1032 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
1020 struct ceph_mds_client *mdsc = fsc->mdsc; 1033 struct ceph_mds_client *mdsc = fsc->mdsc;
1021 struct inode *inode = dentry->d_inode; 1034 struct inode *inode = dentry->d_inode;
1022 struct inode *parent_inode;
1023 struct ceph_mds_request *req; 1035 struct ceph_mds_request *req;
1024 int err; 1036 int err;
1025 1037
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1033 req->r_num_caps = 1; 1045 req->r_num_caps = 1;
1034 req->r_path2 = kstrdup(name, GFP_NOFS); 1046 req->r_path2 = kstrdup(name, GFP_NOFS);
1035 1047
1036 parent_inode = ceph_get_dentry_parent_inode(dentry); 1048 err = ceph_mdsc_do_request(mdsc, NULL, req);
1037 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
1038 iput(parent_inode);
1039 ceph_mdsc_put_request(req); 1049 ceph_mdsc_put_request(req);
1040 return err; 1050 return err;
1041} 1051}