16 files changed, 415 insertions, 286 deletions
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 8c44fdd4e1c3..834f9f3723fb 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
        ci->fscache = fscache_acquire_cookie(fsc->fscache,
                                             &ceph_fscache_inode_object_def,
                                             ci, true);
+        fscache_check_consistency(ci->fscache);
 done:
        mutex_unlock(&inode->i_mutex);
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index da95f61b7a09..5ac591bd012b 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
 void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
 void ceph_queue_revalidate(struct inode *inode);
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+        struct ceph_inode_info *ci = ceph_inode(inode);
+        fscache_attr_changed(ci->fscache);
+}
 static inline void ceph_fscache_invalidate(struct inode *inode)
 {
        fscache_invalidate(ceph_inode(inode)->fscache);
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
 {
 }
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+}
 static inline void ceph_fscache_invalidate(struct inode *inode)
 {
 }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 17543383545c..2e5e648eb5c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -622,8 +622,10 @@ retry:
        if (flags & CEPH_CAP_FLAG_AUTH) {
                if (ci->i_auth_cap == NULL ||
-                    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0)
+                    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
                        ci->i_auth_cap = cap;
+                        cap->mds_wanted = wanted;
+                }
                ci->i_cap_exporting_issued = 0;
        } else {
                WARN_ON(ci->i_auth_cap == cap);
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
                cap = rb_entry(p, struct ceph_cap, ci_node);
                if (!__cap_is_valid(cap))
                        continue;
-                mds_wanted |= cap->mds_wanted;
+                if (cap == ci->i_auth_cap)
+                        mds_wanted |= cap->mds_wanted;
+                else
+                        mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
        }
        return mds_wanted;
 }
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 6d59006bfa27..16b54aa31f08 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p)
                } else if (req->r_path1) {
                        seq_printf(s, " #%llx/%s", req->r_ino1.ino,
                                   req->r_path1);
+                } else {
+                        seq_printf(s, " #%llx", req->r_ino1.ino);
                }
                if (req->r_old_dentry) {
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p)
                                path = NULL;
                        spin_lock(&req->r_old_dentry->d_lock);
                        seq_printf(s, " #%llx/%.*s (%s)",
-                           ceph_ino(req->r_old_dentry_dir),
+                                   req->r_old_dentry_dir ?
+                                   ceph_ino(req->r_old_dentry_dir) : 0,
                                   req->r_old_dentry->d_name.len,
                                   req->r_old_dentry->d_name.name,
                                   path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 45eda6d7a40c..766410a12c2c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r)
 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
 * the MDS if/when the directory is modified).
 */
-static int __dcache_readdir(struct file *file, struct dir_context *ctx)
+static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
+                            u32 shared_gen)
 {
        struct ceph_file_info *fi = file->private_data;
        struct dentry *parent = file->f_dentry;
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx)
        last = fi->dentry;
        fi->dentry = NULL;
-        dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos,
+        dout("__dcache_readdir %p v%u at %llu (last %p)\n",
-             last);
+             dir, shared_gen, ctx->pos, last);
        spin_lock(&parent->d_lock);
@@ -161,7 +162,8 @@ more:
                        goto out_unlock;
                }
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-                if (!d_unhashed(dentry) && dentry->d_inode &&
+                if (di->lease_shared_gen == shared_gen &&
+                    !d_unhashed(dentry) && dentry->d_inode &&
                    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
                    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
                    fpos_cmp(ctx->pos, di->offset) <= 0)
@@ -190,7 +192,7 @@ more:
                if (last) {
                        /* remember our position */
                        fi->dentry = last;
-                        fi->next_offset = di->offset;
+                        fi->next_offset = fpos_off(di->offset);
                }
                dput(dentry);
                return 0;
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        int err;
        u32 ftype;
        struct ceph_mds_reply_info_parsed *rinfo;
-        const int max_entries = fsc->mount_options->max_readdir;
-        const int max_bytes = fsc->mount_options->max_readdir_bytes;
        dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
        if (fi->flags & CEPH_F_ATEND)
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
            ceph_snap(inode) != CEPH_SNAPDIR &&
            __ceph_dir_is_complete(ci) &&
            __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+                u32 shared_gen = ci->i_shared_gen;
                spin_unlock(&ci->i_ceph_lock);
-                err = __dcache_readdir(file, ctx);
+                err = __dcache_readdir(file, ctx, shared_gen);
                if (err != -EAGAIN)
                        return err;
        } else {
@@ -322,14 +323,16 @@ more:
                        fi->last_readdir = NULL;
                }
-                /* requery frag tree, as the frag topology may have changed */
-                frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
                dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
                     ceph_vinop(inode), frag, fi->last_name);
                req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
                if (IS_ERR(req))
                        return PTR_ERR(req);
+                err = ceph_alloc_readdir_reply_buffer(req, inode);
+                if (err) {
+                        ceph_mdsc_put_request(req);
+                        return err;
+                }
                req->r_inode = inode;
                ihold(inode);
                req->r_dentry = dget(file->f_dentry);
@@ -340,9 +343,6 @@ more:
                req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
                req->r_readdir_offset = fi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
-                req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-                req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
-                req->r_num_caps = max_entries + 1;
                err = ceph_mdsc_do_request(mdsc, NULL, req);
                if (err < 0) {
                        ceph_mdsc_put_request(req);
@@ -369,9 +369,9 @@ more:
                                fi->next_offset = 0;
                        off = fi->next_offset;
                }
+                fi->frag = frag;
                fi->offset = fi->next_offset;
                fi->last_readdir = req;
-                fi->frag = frag;
                if (req->r_reply_info.dir_end) {
                        kfree(fi->last_name);
@@ -454,7 +454,7 @@ more:
        return 0;
 }
-static void reset_readdir(struct ceph_file_info *fi)
+static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
 {
        if (fi->last_readdir) {
                ceph_mdsc_put_request(fi->last_readdir);
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi)
        }
        kfree(fi->last_name);
        fi->last_name = NULL;
-        fi->next_offset = 2;  /* compensate for . and .. */
+        if (ceph_frag_is_leftmost(frag))
+                fi->next_offset = 2;  /* compensate for . and .. */
+        else
+                fi->next_offset = 0;
        if (fi->dentry) {
                dput(fi->dentry);
                fi->dentry = NULL;
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
        struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_mapping->host;
-        loff_t old_offset = offset;
+        loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
        loff_t retval;
        mutex_lock(&inode->i_mutex);
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                goto out;
        }
-        if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
+        if (offset >= 0) {
                if (offset != file->f_pos) {
                        file->f_pos = offset;
                        file->f_version = 0;
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                 * seek to new frag, or seek prior to current chunk.
                 */
                if (offset == 0 ||
-                    fpos_frag(offset) != fpos_frag(old_offset) ||
+                    fpos_frag(offset) != fi->frag ||
                    fpos_off(offset) < fi->offset) {
                        dout("dir_llseek dropping %p content\n", file);
-                        reset_readdir(fi);
+                        reset_readdir(fi, fpos_frag(offset));
                }
                /* bump dir_release_count if we did a forward seek */
-                if (offset > old_offset)
+                if (fpos_cmp(offset, old_offset) > 0)
                        fi->dir_release_count--;
        }
 out:
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        }
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
-        req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
+        req->r_old_dentry = dget(old_dentry);
-        req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
        req->r_locked_dir = dir;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
        if (IS_ERR(req))
                return PTR_ERR(req);
+        ihold(old_dir);
        req->r_dentry = dget(new_dentry);
        req->r_num_caps = 2;
        req->r_old_dentry = dget(old_dentry);
-        req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
+        req->r_old_dentry_dir = old_dir;
        req->r_locked_dir = new_dir;
        req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 16796be53ca5..00d6af6a32ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -8,23 +8,6 @@
 #include "mds_client.h"
 /*
- * NFS export support
- *
- * NFS re-export of a ceph mount is, at present, only semireliable.
- * The basic issue is that the Ceph architectures doesn't lend itself
- * well to generating filehandles that will remain valid forever.
- *
- * So, we do our best.  If you're lucky, your inode will be in the
- * client's cache.  If it's not, and you have a connectable fh, then
- * the MDS server may be able to find it for you.  Otherwise, you get
- * ESTALE.
- *
- * There are ways to this more reliable, but in the non-connectable fh
- * case, we won't every work perfectly, and in the connectable case,
- * some changes are needed on the MDS side to work better.
- */
-/*
 * Basic fh
 */
 struct ceph_nfs_fh {
@@ -32,22 +15,12 @@ struct ceph_nfs_fh {
 } __attribute__ ((packed));
 /*
- * Larger 'connectable' fh that includes parent ino and name hash.
+ * Larger fh that includes parent ino.
- * Use this whenever possible, as it works more reliably.
 */
 struct ceph_nfs_confh {
        u64 ino, parent_ino;
-        u32 parent_name_hash;
 } __attribute__ ((packed));
-/*
- * The presence of @parent_inode here tells us whether NFS wants a
- * connectable file handle.  However, we want to make a connectionable
- * file handle unconditionally so that the MDS gets as much of a hint
- * as possible.  That means we only use @parent_dentry to indicate
- * whether nfsd wants a connectable fh, and whether we should indicate
- * failure from a too-small @max_len.
- */
 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                          struct inode *parent_inode)
 {
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
        struct ceph_nfs_confh *cfh = (void *)rawfh;
        int connected_handle_length = sizeof(*cfh)/4;
        int handle_length = sizeof(*fh)/4;
-        struct dentry *dentry;
-        struct dentry *parent;
        /* don't re-export snaps */
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EINVAL;
-        dentry = d_find_alias(inode);
+        if (parent_inode && (*max_len < connected_handle_length)) {
+                *max_len = connected_handle_length;
+                return FILEID_INVALID;
+        } else if (*max_len < handle_length) {
+                *max_len = handle_length;
+                return FILEID_INVALID;
+        }
-        /* if we found an alias, generate a connectable fh */
+        if (parent_inode) {
-        if (*max_len >= connected_handle_length && dentry) {
+                dout("encode_fh %llx with parent %llx\n",
-                dout("encode_fh %p connectable\n", dentry);
+                     ceph_ino(inode), ceph_ino(parent_inode));
-                spin_lock(&dentry->d_lock);
-                parent = dentry->d_parent;
                cfh->ino = ceph_ino(inode);
-                cfh->parent_ino = ceph_ino(parent->d_inode);
+                cfh->parent_ino = ceph_ino(parent_inode);
-                cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
-                                                         dentry);
                *max_len = connected_handle_length;
-                type = 2;
+                type = FILEID_INO32_GEN_PARENT;
-                spin_unlock(&dentry->d_lock);
-        } else if (*max_len >= handle_length) {
-                if (parent_inode) {
-                        /* nfsd wants connectable */
-                        *max_len = connected_handle_length;
-                        type = FILEID_INVALID;
-                } else {
-                        dout("encode_fh %p\n", dentry);
-                        fh->ino = ceph_ino(inode);
-                        *max_len = handle_length;
-                        type = 1;
-                }
        } else {
+                dout("encode_fh %llx\n", ceph_ino(inode));
+                fh->ino = ceph_ino(inode);
                *max_len = handle_length;
-                type = FILEID_INVALID;
+                type = FILEID_INO32_GEN;
        }
-        if (dentry)
-                dput(dentry);
        return type;
 }
-/*
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
- * convert regular fh to dentry
- *
- * FIXME: we should try harder by querying the mds for the ino.
- */
-static struct dentry *__fh_to_dentry(struct super_block *sb,
-                                     struct ceph_nfs_fh *fh, int fh_len)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
        struct inode *inode;
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
        struct ceph_vino vino;
        int err;
-        if (fh_len < sizeof(*fh) / 4)
+        vino.ino = ino;
-                return ERR_PTR(-ESTALE);
-        dout("__fh_to_dentry %llx\n", fh->ino);
-        vino.ino = fh->ino;
        vino.snap = CEPH_NOSNAP;
        inode = ceph_find_inode(sb, vino);
        if (!inode) {
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
        dentry = d_obtain_alias(inode);
        if (IS_ERR(dentry)) {
-                pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
-                       fh->ino, inode);
                iput(inode);
                return dentry;
        }
        err = ceph_init_dentry(dentry);
        if (err < 0) {
-                iput(inode);
+                dput(dentry);
                return ERR_PTR(err);
        }
-        dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
+        dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
        return dentry;
 }
 /*
- * convert connectable fh to dentry
+ * convert regular fh to dentry
 */
-static struct dentry *__cfh_to_dentry(struct super_block *sb,
+static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
-                                      struct ceph_nfs_confh *cfh, int fh_len)
+                                        struct fid *fid,
+                                        int fh_len, int fh_type)
+{
+        struct ceph_nfs_fh *fh = (void *)fid->raw;
+        if (fh_type != FILEID_INO32_GEN  &&
+            fh_type != FILEID_INO32_GEN_PARENT)
+                return NULL;
+        if (fh_len < sizeof(*fh) / 4)
+                return NULL;
+        dout("fh_to_dentry %llx\n", fh->ino);
+        return __fh_to_dentry(sb, fh->ino);
+}
+static struct dentry *__get_parent(struct super_block *sb,
+                                   struct dentry *child, u64 ino)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+        struct ceph_mds_request *req;
        struct inode *inode;
        struct dentry *dentry;
-        struct ceph_vino vino;
        int err;
-        if (fh_len < sizeof(*cfh) / 4)
+        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
-                return ERR_PTR(-ESTALE);
+                                       USE_ANY_MDS);
+        if (IS_ERR(req))
-        dout("__cfh_to_dentry %llx (%llx/%x)\n",
+                return ERR_CAST(req);
-             cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
-        vino.ino = cfh->ino;
-        vino.snap = CEPH_NOSNAP;
-        inode = ceph_find_inode(sb, vino);
-        if (!inode) {
-                struct ceph_mds_request *req;
-                req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
-                                               USE_ANY_MDS);
-                if (IS_ERR(req))
-                        return ERR_CAST(req);
-                req->r_ino1 = vino;
+        if (child) {
-                req->r_ino2.ino = cfh->parent_ino;
+                req->r_inode = child->d_inode;
-                req->r_ino2.snap = CEPH_NOSNAP;
+                ihold(child->d_inode);
-                req->r_path2 = kmalloc(16, GFP_NOFS);
+        } else {
-                snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
+                req->r_ino1 = (struct ceph_vino) {
-                req->r_num_caps = 1;
+                        .ino = ino,
-                err = ceph_mdsc_do_request(mdsc, NULL, req);
+                        .snap = CEPH_NOSNAP,
-                inode = req->r_target_inode;
+                };
-                if (inode)
-                        ihold(inode);
-                ceph_mdsc_put_request(req);
-                if (!inode)
-                        return ERR_PTR(err ? err : -ESTALE);
        }
+        req->r_num_caps = 1;
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
+        inode = req->r_target_inode;
+        if (inode)
+                ihold(inode);
+        ceph_mdsc_put_request(req);
+        if (!inode)
+                return ERR_PTR(-ENOENT);
        dentry = d_obtain_alias(inode);
        if (IS_ERR(dentry)) {
-                pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
-                       cfh->ino, inode);
                iput(inode);
                return dentry;
        }
        err = ceph_init_dentry(dentry);
        if (err < 0) {
-                iput(inode);
+                dput(dentry);
                return ERR_PTR(err);
        }
-        dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
+        dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
+             child ? ceph_ino(child->d_inode) : ino,
+             dentry, ceph_vinop(inode));
        return dentry;
 }
-static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
+struct dentry *ceph_get_parent(struct dentry *child)
-                                        int fh_len, int fh_type)
 {
-        if (fh_type == 1)
+        /* don't re-export snaps */
-                return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw,
+        if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
-                                                                fh_len);
+                return ERR_PTR(-EINVAL);
-        else
-                return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw,
+        dout("get_parent %p ino %llx.%llx\n",
-                                                                fh_len);
+             child, ceph_vinop(child->d_inode));
+        return __get_parent(child->d_sb, child, 0);
 }
 /*
- * get parent, if possible.
+ * convert regular fh to parent
- *
- * FIXME: we could do better by querying the mds to discover the
- * parent.
 */
 static struct dentry *ceph_fh_to_parent(struct super_block *sb,
-                                         struct fid *fid,
+                                        struct fid *fid,
                                        int fh_len, int fh_type)
 {
        struct ceph_nfs_confh *cfh = (void *)fid->raw;
-        struct ceph_vino vino;
-        struct inode *inode;
        struct dentry *dentry;
-        int err;
-        if (fh_type == 1)
+        if (fh_type != FILEID_INO32_GEN_PARENT)
-                return ERR_PTR(-ESTALE);
+                return NULL;
        if (fh_len < sizeof(*cfh) / 4)
-                return ERR_PTR(-ESTALE);
+                return NULL;
-        pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
+        dout("fh_to_parent %llx\n", cfh->parent_ino);
-                 cfh->parent_name_hash);
+        dentry = __get_parent(sb, NULL, cfh->ino);
+        if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+                dentry = __fh_to_dentry(sb, cfh->parent_ino);
+        return dentry;
+}
-        vino.ino = cfh->ino;
+static int ceph_get_name(struct dentry *parent, char *name,
-        vino.snap = CEPH_NOSNAP;
+                         struct dentry *child)
-        inode = ceph_find_inode(sb, vino);
+{
-        if (!inode)
+        struct ceph_mds_client *mdsc;
-                return ERR_PTR(-ESTALE);
+        struct ceph_mds_request *req;
+        int err;
-        dentry = d_obtain_alias(inode);
+        mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
-        if (IS_ERR(dentry)) {
+        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
-                pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
+                                       USE_ANY_MDS);
-                       cfh->ino, inode);
+        if (IS_ERR(req))
-                iput(inode);
+                return PTR_ERR(req);
-                return dentry;
-        }
+        mutex_lock(&parent->d_inode->i_mutex);
-        err = ceph_init_dentry(dentry);
-        if (err < 0) {
+        req->r_inode = child->d_inode;
-                iput(inode);
+        ihold(child->d_inode);
-                return ERR_PTR(err);
+        req->r_ino2 = ceph_vino(parent->d_inode);
+        req->r_locked_dir = parent->d_inode;
+        req->r_num_caps = 2;
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
+        mutex_unlock(&parent->d_inode->i_mutex);
+        if (!err) {
+                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+                memcpy(name, rinfo->dname, rinfo->dname_len);
+                name[rinfo->dname_len] = 0;
+                dout("get_name %p ino %llx.%llx name %s\n",
+                     child, ceph_vinop(child->d_inode), name);
+        } else {
+                dout("get_name %p ino %llx.%llx err %d\n",
+                     child, ceph_vinop(child->d_inode), err);
        }
-        dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
-        return dentry;
+        ceph_mdsc_put_request(req);
+        return err;
 }
 const struct export_operations ceph_export_ops = {
        .encode_fh = ceph_encode_fh,
        .fh_to_dentry = ceph_fh_to_dentry,
        .fh_to_parent = ceph_fh_to_parent,
+        .get_parent = ceph_get_parent,
+        .get_name = ceph_get_name,
 };
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 09c7afe32e49..39da1c2efa50 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file)
        ihold(inode);
        req->r_num_caps = 1;
-        if (flags & (O_CREAT|O_TRUNC))
+        if (flags & O_CREAT)
                parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
        iput(parent_inode);
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                }
                err = finish_open(file, dentry, ceph_open, opened);
        }
 out_err:
+        if (!req->r_err && req->r_target_inode)
+                ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
        ceph_mdsc_put_request(req);
        dout("atomic_open result=%d\n", err);
        return err;
@@ -600,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                        goto out;
+                        break;
                }
                num_pages = calc_pages_for(page_align, len);
@@ -718,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                        goto out;
+                        break;
                }
                /*
@@ -970,6 +971,8 @@ retry_snap:
                        goto retry_snap;
                }
        } else {
+                loff_t old_size = inode->i_size;
+                struct iov_iter from;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -977,9 +980,12 @@ retry_snap:
                 * are pending vmtruncate. So write and vmtruncate
                 * can not run at the same time
                 */
-                written = generic_file_buffered_write(iocb, iov, nr_segs,
+                iov_iter_init(&from, iov, nr_segs, count, 0);
-                                                      pos, &iocb->ki_pos,
+                written = generic_perform_write(file, &from, pos);
-                                                      count, 0);
+                if (likely(written >= 0))
+                        iocb->ki_pos = pos + written;
+                if (inode->i_size > old_size)
+                        ceph_fscache_update_objectsize(inode);
                mutex_unlock(&inode->i_mutex);
        }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 32d519d8a2e2..0b0728e5be2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode,
                            le32_to_cpu(info->time_warp_seq),
                            &ctime, &mtime, &atime);
-        /* only update max_size on auth cap */
-        if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-            ci->i_max_size != le64_to_cpu(info->max_size)) {
-                dout("max_size %lld -> %llu\n", ci->i_max_size,
-                     le64_to_cpu(info->max_size));
-                ci->i_max_size = le64_to_cpu(info->max_size);
-        }
        ci->i_layout = info->layout;
        inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode,
                ci->i_max_offset = 2;
        }
 no_change:
+        /* only update max_size on auth cap */
+        if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+            ci->i_max_size != le64_to_cpu(info->max_size)) {
+                dout("max_size %lld -> %llu\n", ci->i_max_size,
+                     le64_to_cpu(info->max_size));
+                ci->i_max_size = le64_to_cpu(info->max_size);
+        }
        spin_unlock(&ci->i_ceph_lock);
        /* queue truncate if we saw i_size decrease */
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                                         session, req->r_request_started, -1,
                                         &req->r_caps_reservation);
                        if (err < 0)
-                                return err;
+                                goto done;
                } else {
                        WARN_ON_ONCE(1);
                }
+                if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
+                        struct qstr dname;
+                        struct dentry *dn, *parent;
+                        BUG_ON(!rinfo->head->is_target);
+                        BUG_ON(req->r_dentry);
+                        parent = d_find_any_alias(dir);
+                        BUG_ON(!parent);
+                        dname.name = rinfo->dname;
+                        dname.len = rinfo->dname_len;
+                        dname.hash = full_name_hash(dname.name, dname.len);
+                        vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                        vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+retry_lookup:
+                        dn = d_lookup(parent, &dname);
+                        dout("d_lookup on parent=%p name=%.*s got %p\n",
+                             parent, dname.len, dname.name, dn);
+                        if (!dn) {
+                                dn = d_alloc(parent, &dname);
+                                dout("d_alloc %p '%.*s' = %p\n", parent,
+                                     dname.len, dname.name, dn);
+                                if (dn == NULL) {
+                                        dput(parent);
+                                        err = -ENOMEM;
+                                        goto done;
+                                }
+                                err = ceph_init_dentry(dn);
+                                if (err < 0) {
+                                        dput(dn);
+                                        dput(parent);
+                                        goto done;
+                                }
+                        } else if (dn->d_inode &&
+                                   (ceph_ino(dn->d_inode) != vino.ino ||
+                                    ceph_snap(dn->d_inode) != vino.snap)) {
+                                dout(" dn %p points to wrong inode %p\n",
+                                     dn, dn->d_inode);
+                                d_delete(dn);
+                                dput(dn);
+                                goto retry_lookup;
+                        }
+                        req->r_dentry = dn;
+                        dput(parent);
+                }
        }
        if (rinfo->head->is_target) {
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                err = fill_inode(in, &rinfo->targeti, NULL,
                                session, req->r_request_started,
-                                (le32_to_cpu(rinfo->head->result) == 0) ?
+                                (!req->r_aborted && rinfo->head->result == 0) ?
                                req->r_fmode : -1,
                                &req->r_caps_reservation);
                if (err < 0) {
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = {
        .getxattr = ceph_getxattr,
        .listxattr = ceph_listxattr,
        .removexattr = ceph_removexattr,
-        .get_acl = ceph_get_acl,
-        .set_acl = ceph_set_acl,
 };
 /*
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-        struct inode *parent_inode;
        const unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                req->r_inode_drop = release;
                req->r_args.setattr.mask = cpu_to_le32(mask);
                req->r_num_caps = 1;
-                parent_inode = ceph_get_dentry_parent_inode(dentry);
+                err = ceph_mdsc_do_request(mdsc, NULL, req);
-                err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-                iput(parent_inode);
        }
        dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
             ceph_cap_string(dirtied), mask);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index dc66c9e023e4..fdf941b44ff1 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -1,9 +1,8 @@
+#include <linux/ceph/ceph_debug.h>
 #include <linux/in.h>
 #include "super.h"
 #include "mds_client.h"
-#include <linux/ceph/ceph_debug.h>
 #include "ioctl.h"
@@ -64,7 +63,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
        struct inode *inode = file_inode(file);
-        struct inode *parent_inode;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        struct ceph_ioctl_layout l;
@@ -121,9 +119,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                cpu_to_le32(l.object_size);
        req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
-        parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        return err;
 }
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae6d14e82b0f..d94ba0df9f4d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -2,11 +2,31 @@
 #include <linux/file.h>
 #include <linux/namei.h>
+#include <linux/random.h>
 #include "super.h"
 #include "mds_client.h"
 #include <linux/ceph/pagelist.h>
+static u64 lock_secret;
+static inline u64 secure_addr(void *addr)
+{
+        u64 v = lock_secret ^ (u64)(unsigned long)addr;
+        /*
+         * Set the most significant bit, so that MDS knows the 'owner'
+         * is sufficient to identify the owner of lock. (old code uses
+         * both 'owner' and 'pid')
+         */
+        v |= (1ULL << 63);
+        return v;
+}
+void __init ceph_flock_init(void)
+{
+        get_random_bytes(&lock_secret, sizeof(lock_secret));
+}
 /**
 * Implement fcntl and flock locking functions.
 */
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
                             int cmd, u8 wait, struct file_lock *fl)
 {
        struct inode *inode = file_inode(file);
-        struct ceph_mds_client *mdsc =
+        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-                ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        int err;
        u64 length = 0;
+        u64 owner;
        req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
        if (IS_ERR(req))
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
        else
                length = fl->fl_end - fl->fl_start + 1;
-        dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+        if (lock_type == CEPH_LOCK_FCNTL)
-             "length: %llu, wait: %d, type: %d", (int)lock_type,
+                owner = secure_addr(fl->fl_owner);
-             (int)operation, (u64)fl->fl_pid, fl->fl_start,
+        else
-             length, wait, fl->fl_type);
+                owner = secure_addr(fl->fl_file);
+        dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
+             "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+             (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
+             wait, fl->fl_type);
        req->r_args.filelock_change.rule = lock_type;
        req->r_args.filelock_change.type = cmd;
+        req->r_args.filelock_change.owner = cpu_to_le64(owner);
        req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
-        /* This should be adjusted, but I'm not sure if
-           namespaces actually get id numbers*/
-        req->r_args.filelock_change.pid_namespace =
-                cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
        req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
        req->r_args.filelock_change.length = cpu_to_le64(length);
        req->r_args.filelock_change.wait = wait;
        err = ceph_mdsc_do_request(mdsc, inode, req);
-        if ( operation == CEPH_MDS_OP_GETFILELOCK){
+        if (operation == CEPH_MDS_OP_GETFILELOCK) {
                fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
                if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
                        fl->fl_type = F_RDLCK;
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        u8 wait = 0;
        u16 op = CEPH_MDS_OP_SETFILELOCK;
-        fl->fl_nspid = get_pid(task_tgid(current));
+        if (!(fl->fl_flags & FL_POSIX))
-        dout("ceph_lock, fl_pid:%d", fl->fl_pid);
+                return -ENOLCK;
+        /* No mandatory locks */
+        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+                return -ENOLCK;
+        dout("ceph_lock, fl_owner: %p", fl->fl_owner);
        /* set wait bit as appropriate, then make command as Ceph expects it*/
-        if (F_SETLKW == cmd)
+        if (IS_GETLK(cmd))
-                wait = 1;
-        if (F_GETLK == cmd)
                op = CEPH_MDS_OP_GETFILELOCK;
+        else if (IS_SETLKW(cmd))
+                wait = 1;
        if (F_RDLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_SHARED;
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
        if (!err) {
-                if ( op != CEPH_MDS_OP_GETFILELOCK ){
+                if (op != CEPH_MDS_OP_GETFILELOCK) {
                        dout("mds locked, locking locally");
                        err = posix_lock_file(file, fl, NULL);
                        if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        u8 lock_cmd;
        int err;
-        u8 wait = 1;
+        u8 wait = 0;
-        fl->fl_nspid = get_pid(task_tgid(current));
+        if (!(fl->fl_flags & FL_FLOCK))
-        dout("ceph_flock, fl_pid:%d", fl->fl_pid);
+                return -ENOLCK;
+        /* No mandatory locks */
-        /* set wait bit, then clear it out of cmd*/
+        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
-        if (cmd & LOCK_NB)
+                return -ENOLCK;
-                wait = 0;
-        cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN);
+        dout("ceph_flock, fl_file: %p", fl->fl_file);
-        /* set command sequence that Ceph wants to see:
-           shared lock, exclusive lock, or unlock */
+        if (IS_SETLKW(cmd))
-        if (LOCK_SH == cmd)
+                wait = 1;
+        if (F_RDLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_SHARED;
-        else if (LOCK_EX == cmd)
+        else if (F_WRLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_EXCL;
        else
                lock_cmd = CEPH_LOCK_UNLOCK;
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock,
                          struct ceph_filelock *cephlock)
 {
        int err = 0;
        cephlock->start = cpu_to_le64(lock->fl_start);
        cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
        cephlock->client = cpu_to_le64(0);
-        cephlock->pid = cpu_to_le64(lock->fl_pid);
+        cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
-        cephlock->pid_namespace =
+        if (lock->fl_flags & FL_POSIX)
-                cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
+                cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+        else
+                cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
        switch (lock->fl_type) {
        case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f4f050a69a48..2b4d093d0563 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end,
        if (num == 0)
                goto done;
-        /* alloc large array */
+        BUG_ON(!info->dir_in);
-        info->dir_nr = num;
-        info->dir_in = kcalloc(num, sizeof(*info->dir_in) +
-                               sizeof(*info->dir_dname) +
-                               sizeof(*info->dir_dname_len) +
-                               sizeof(*info->dir_dlease),
-                               GFP_NOFS);
-        if (info->dir_in == NULL) {
-                err = -ENOMEM;
-                goto out_bad;
-        }
        info->dir_dname = (void *)(info->dir_in + num);
        info->dir_dname_len = (void *)(info->dir_dname + num);
        info->dir_dlease = (void *)(info->dir_dname_len + num);
+        if ((unsigned long)(info->dir_dlease + num) >
+            (unsigned long)info->dir_in + info->dir_buf_size) {
+                pr_err("dir contents are larger than expected\n");
+                WARN_ON(1);
+                goto bad;
+        }
+        info->dir_nr = num;
        while (num) {
                /* dentry */
                ceph_decode_need(p, end, sizeof(u32)*2, bad);
@@ -327,7 +325,9 @@ out_bad:
 static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
 {
-        kfree(info->dir_in);
+        if (!info->dir_in)
+                return;
+        free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
 }
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref)
        struct ceph_mds_request *req = container_of(kref,
                                                    struct ceph_mds_request,
                                                    r_kref);
+        destroy_reply_info(&req->r_reply_info);
        if (req->r_request)
                ceph_msg_put(req->r_request);
-        if (req->r_reply) {
+        if (req->r_reply)
                ceph_msg_put(req->r_reply);
-                destroy_reply_info(&req->r_reply_info);
-        }
        if (req->r_inode) {
                ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
                iput(req->r_inode);
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref)
                iput(req->r_target_inode);
        if (req->r_dentry)
                dput(req->r_dentry);
-        if (req->r_old_dentry) {
+        if (req->r_old_dentry)
+                dput(req->r_old_dentry);
+        if (req->r_old_dentry_dir) {
                /*
                 * track (and drop pins for) r_old_dentry_dir
                 * separately, since r_old_dentry's d_parent may have
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref)
                 */
                ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
-                dput(req->r_old_dentry);
                iput(req->r_old_dentry_dir);
        }
        kfree(req->r_path1);
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
                        trim_caps - session->s_trim_caps);
                session->s_trim_caps = 0;
        }
+        ceph_add_cap_releases(mdsc, session);
+        ceph_send_cap_releases(mdsc, session);
        return 0;
 }
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
        dout("discard_cap_releases mds%d\n", session->s_mds);
-        /* zero out the in-progress message */
+        if (!list_empty(&session->s_cap_releases)) {
-        msg = list_first_entry(&session->s_cap_releases,
+                /* zero out the in-progress message */
-                               struct ceph_msg, list_head);
+                msg = list_first_entry(&session->s_cap_releases,
-        head = msg->front.iov_base;
+                                        struct ceph_msg, list_head);
-        num = le32_to_cpu(head->num);
+                head = msg->front.iov_base;
-        dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num);
+                num = le32_to_cpu(head->num);
-        head->num = cpu_to_le32(0);
+                dout("discard_cap_releases mds%d %p %u\n",
-        msg->front.iov_len = sizeof(*head);
+                     session->s_mds, msg, num);
-        session->s_num_cap_releases += num;
+                head->num = cpu_to_le32(0);
+                msg->front.iov_len = sizeof(*head);
+                session->s_num_cap_releases += num;
+        }
        /* requeue completed messages */
        while (!list_empty(&session->s_cap_releases_done)) {
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 * requests
 */
+int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+                                    struct inode *dir)
+{
+        struct ceph_inode_info *ci = ceph_inode(dir);
+        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+        struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
+        size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
+                      sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
+        int order, num_entries;
+        spin_lock(&ci->i_ceph_lock);
+        num_entries = ci->i_files + ci->i_subdirs;
+        spin_unlock(&ci->i_ceph_lock);
+        num_entries = max(num_entries, 1);
+        num_entries = min(num_entries, opt->max_readdir);
+        order = get_order(size * num_entries);
+        while (order >= 0) {
+                rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
+                                                        order);
+                if (rinfo->dir_in)
+                        break;
+                order--;
+        }
+        if (!rinfo->dir_in)
+                return -ENOMEM;
+        num_entries = (PAGE_SIZE << order) / size;
+        num_entries = min(num_entries, opt->max_readdir);
+        rinfo->dir_buf_size = PAGE_SIZE << order;
+        req->r_num_caps = num_entries + 1;
+        req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
+        req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
+        return 0;
+}
 /*
 * Create an mds request.
 */
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
                ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
        if (req->r_locked_dir)
                ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-        if (req->r_old_dentry)
+        if (req->r_old_dentry_dir)
                ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 68288917c737..e90cfccf93bd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed {
                /* for readdir results */
                struct {
                        struct ceph_mds_reply_dirfrag *dir_dir;
+                        size_t                        dir_buf_size;
                        int                           dir_nr;
                        char                          **dir_dname;
                        u32                           *dir_dname_len;
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
                                    struct dentry *dn);
 extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
+extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+                                           struct inode *dir);
 extern struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
 extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 4440f447fd3f..51cc23e48111 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op)
        case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
        case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
        case CEPH_MDS_OP_LOOKUPINO:  return "lookupino";
+        case CEPH_MDS_OP_LOOKUPNAME:  return "lookupname";
        case CEPH_MDS_OP_GETATTR:  return "getattr";
        case CEPH_MDS_OP_SETXATTR: return "setxattr";
        case CEPH_MDS_OP_SETATTR: return "setattr";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 10a4ccbf38da..06150fd745ac 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void)
        if (ret)
                goto out;
+        ceph_flock_init();
        ceph_xattr_init();
        ret = register_filesystem(&ceph_fs_type);
        if (ret)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d8801a95b685..7866cd05a6bb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -577,7 +577,7 @@ struct ceph_file_info {
        /* readdir: position within a frag */
        unsigned offset;       /* offset of last chunk, adjusted for . and .. */
-        u64 next_offset;       /* offset of next chunk (last_name's + 1) */
+        unsigned next_offset;  /* offset of next chunk (last_name's + 1) */
        char *last_name;       /* last entry in previous chunk */
        struct dentry *dentry; /* next dentry (for dcache readdir) */
        int dir_release_count;
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 extern const struct export_operations ceph_export_ops;
 /* locks.c */
+extern __init void ceph_flock_init(void);
 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
 extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a55ec37378c6..c9c2b887381e 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
 }
 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
-                                        size_t size)
+                                   size_t size)
 {
        int ret;
        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
        const char *pool_name;
+        char buf[128];
        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
        down_read(&osdc->map_sem);
        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
-        if (pool_name)
+        if (pool_name) {
-                ret = snprintf(val, size,
+                size_t len = strlen(pool_name);
-                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
+                ret = snprintf(buf, sizeof(buf),
+                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
                (unsigned long long)ceph_file_layout_su(ci->i_layout),
                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
-                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-                pool_name);
+                if (!size) {
-        else
+                        ret += len;
-                ret = snprintf(val, size,
+                } else if (ret + len > size) {
+                        ret = -ERANGE;
+                } else {
+                        memcpy(val, buf, ret);
+                        memcpy(val + ret, pool_name, len);
+                        ret += len;
+                }
+        } else {
+                ret = snprintf(buf, sizeof(buf),
                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
                (unsigned long long)ceph_file_layout_su(ci->i_layout),
                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
                (unsigned long long)pool);
+                if (size) {
+                        if (ret <= size)
+                                memcpy(val, buf, ret);
+                        else
+                                ret = -ERANGE;
+                }
+        }
        up_read(&osdc->map_sem);
        return ret;
 }
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
                .name_size = sizeof("ceph.dir.layout"),
                .getxattr_cb = ceph_vxattrcb_layout,
                .readonly = false,
-                .hidden = false,
+                .hidden = true,
                .exists_cb = ceph_vxattrcb_layout_exists,
        },
        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
                .name_size = sizeof("ceph.file.layout"),
                .getxattr_cb = ceph_vxattrcb_layout,
                .readonly = false,
-                .hidden = false,
+                .hidden = true,
                .exists_cb = ceph_vxattrcb_layout_exists,
        },
        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct inode *inode = dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-        struct inode *parent_inode;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = fsc->mdsc;
        int err;
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
        req->r_data_len = size;
        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
-        parent_inode = ceph_get_dentry_parent_inode(dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = dentry->d_inode;
-        struct inode *parent_inode;
        struct ceph_mds_request *req;
        int err;
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
        req->r_num_caps = 1;
        req->r_path2 = kstrdup(name, GFP_NOFS);
-        parent_inode = ceph_get_dentry_parent_inode(dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        return err;
 }