126 files changed, 2089 insertions, 1426 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a16b0ff497ca..d8223209d4b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -832,6 +832,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
 static const struct vm_operations_struct v9fs_file_vm_ops = {
        .fault = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = v9fs_vm_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
@@ -839,6 +840,7 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
 static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
        .close = v9fs_mmap_vm_close,
        .fault = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = v9fs_vm_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 952aeb048349..9852bdf34d76 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
                                             sizeof(struct adfs_inode_info),
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3952121f2f28..25b23b1e7f22 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -5,14 +5,6 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-/* AmigaOS allows file names with up to 30 characters length.
- * Names longer than that will be silently truncated. If you
- * want to disallow this, comment out the following #define.
- * Creating filesystem objects with longer names will then
- * result in an error (ENAMETOOLONG).
- */
-/*#define AFFS_NO_TRUNCATE */
 /* Ugly macros make the code more pretty. */
 #define GET_END_PTR(st,p,sz)             ((st *)((char *)(p)+((sz)-sizeof(st))))
@@ -28,7 +20,6 @@
 #define AFFS_CACHE_SIZE         PAGE_SIZE
-#define AFFS_MAX_PREALLOC       32
 #define AFFS_LC_SIZE            (AFFS_CACHE_SIZE/sizeof(u32)/2)
 #define AFFS_AC_SIZE            (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2)
 #define AFFS_AC_MASK            (AFFS_AC_SIZE-1)
@@ -118,6 +109,7 @@ struct affs_sb_info {
 #define SF_OFS          0x0200          /* Old filesystem */
 #define SF_PREFIX       0x0400          /* Buffer for prefix is allocated */
 #define SF_VERBOSE      0x0800          /* Talk about fs when mounting */
+#define SF_NO_TRUNCATE  0x1000          /* Don't truncate filenames */
 /* short cut to get to the affs specific sb data */
 static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
@@ -137,9 +129,13 @@ extern void	affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
 extern void     secs_to_datestamp(time_t secs, struct affs_date *ds);
 extern umode_t  prot_to_mode(u32 prot);
 extern void     mode_to_prot(struct inode *inode);
-extern void     affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
+extern void     affs_error(struct super_block *sb, const char *function,
-extern void     affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
+                           const char *fmt, ...);
-extern int      affs_check_name(const unsigned char *name, int len);
+extern void     affs_warning(struct super_block *sb, const char *function,
+                             const char *fmt, ...);
+extern bool     affs_nofilenametruncate(const struct dentry *dentry);
+extern int      affs_check_name(const unsigned char *name, int len,
+                                bool notruncate);
 extern int      affs_copy_name(unsigned char *bstr, struct dentry *dentry);
 /* bitmap. c */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index d9a43674cb94..533a322c41c0 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -471,20 +471,27 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
                function,ErrorBuffer);
 }
+bool
+affs_nofilenametruncate(const struct dentry *dentry)
+{
+        struct inode *inode = dentry->d_inode;
+        return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
+}
 /* Check if the name is valid for a affs object. */
 int
-affs_check_name(const unsigned char *name, int len)
+affs_check_name(const unsigned char *name, int len, bool notruncate)
 {
        int      i;
-        if (len > 30)
+        if (len > 30) {
-#ifdef AFFS_NO_TRUNCATE
+                if (notruncate)
-                return -ENAMETOOLONG;
+                        return -ENAMETOOLONG;
-#else
+                else
-                len = 30;
+                        len = 30;
-#endif
+        }
        for (i = 0; i < len; i++) {
                if (name[i] < ' ' || name[i] == ':'
                    || (name[i] > 0x7e && name[i] < 0xa0))
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index f1eba8c3644e..cbbda476a805 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -52,8 +52,10 @@ affs_readdir(struct file *file, struct dir_context *ctx)
        int                      hash_pos;
        int                      chain_pos;
        u32                      ino;
+        int                      error = 0;
-        pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos);
+        pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
+                 inode->i_ino, (unsigned long)ctx->pos);
        if (ctx->pos < 2) {
                file->private_data = (void *)0;
@@ -72,7 +74,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
        }
        dir_bh = affs_bread(sb, inode->i_ino);
        if (!dir_bh)
-                goto readdir_out;
+                goto out_unlock_dir;
        /* If the directory hasn't changed since the last call to readdir(),
         * we can jump directly to where we left off.
@@ -88,7 +90,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
                fh_bh = affs_bread(sb, ino);
                if (!fh_bh) {
                        affs_error(sb, "readdir","Cannot read block %d", i);
-                        return -EIO;
+                        error = -EIO;
+                        goto out_brelse_dir;
                }
                ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
                affs_brelse(fh_bh);
@@ -107,29 +110,34 @@ inside:
                do {
                        fh_bh = affs_bread(sb, ino);
                        if (!fh_bh) {
-                                affs_error(sb, "readdir","Cannot read block %d", ino);
+                                affs_error(sb, "readdir",
+                                           "Cannot read block %d", ino);
                                break;
                        }
                        namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
                        name = AFFS_TAIL(sb, fh_bh)->name + 1;
-                        pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n",
+                        pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
+                                 "ino=%u), hash=%d, f_pos=%x\n",
                                 namelen, name, ino, hash_pos, (u32)ctx->pos);
                        if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN))
-                                goto readdir_done;
+                                goto done;
                        ctx->pos++;
                        ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
                        affs_brelse(fh_bh);
                        fh_bh = NULL;
                } while (ino);
        }
-readdir_done:
+done:
        file->f_version = inode->i_version;
        file->private_data = (void *)(long)ino;
+        affs_brelse(fh_bh);
-readdir_out:
+out_brelse_dir:
        affs_brelse(dir_bh);
-        affs_brelse(fh_bh);
+out_unlock_dir:
        affs_unlock_dir(inode);
-        return 0;
+        return error;
 }
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index c36cbb4537a2..6dae1ccd176d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -60,13 +60,13 @@ affs_get_toupper(struct super_block *sb)
 * Note: the dentry argument is the parent dentry.
 */
 static inline int
-__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
+__affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate)
 {
        const u8 *name = qstr->name;
        unsigned long hash;
        int i;
-        i = affs_check_name(qstr->name, qstr->len);
+        i = affs_check_name(qstr->name, qstr->len, notruncate);
        if (i)
                return i;
@@ -82,16 +82,22 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
 static int
 affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
 {
-        return __affs_hash_dentry(qstr, affs_toupper);
+        return __affs_hash_dentry(qstr, affs_toupper,
+                                  affs_nofilenametruncate(dentry));
 }
 static int
 affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
 {
-        return __affs_hash_dentry(qstr, affs_intl_toupper);
+        return __affs_hash_dentry(qstr, affs_intl_toupper,
+                                  affs_nofilenametruncate(dentry));
 }
 static inline int __affs_compare_dentry(unsigned int len,
-                const char *str, const struct qstr *name, toupper_t toupper)
+                const char *str, const struct qstr *name, toupper_t toupper,
+                bool notruncate)
 {
        const u8 *aname = str;
        const u8 *bname = name->name;
@@ -101,7 +107,7 @@ static inline int __affs_compare_dentry(unsigned int len,
         * must be valid. 'name' must be validated first.
         */
-        if (affs_check_name(name->name, name->len))
+        if (affs_check_name(name->name, name->len, notruncate))
                return 1;
        /*
@@ -126,13 +132,18 @@ static int
 affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
                unsigned int len, const char *str, const struct qstr *name)
 {
-        return __affs_compare_dentry(len, str, name, affs_toupper);
+        return __affs_compare_dentry(len, str, name, affs_toupper,
+                                     affs_nofilenametruncate(parent));
 }
 static int
 affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
                unsigned int len, const char *str, const struct qstr *name)
 {
-        return __affs_compare_dentry(len, str, name, affs_intl_toupper);
+        return __affs_compare_dentry(len, str, name, affs_intl_toupper,
+                                     affs_nofilenametruncate(parent));
 }
 /*
@@ -411,7 +422,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
                 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
-        retval = affs_check_name(new_dentry->d_name.name,new_dentry->d_name.len);
+        retval = affs_check_name(new_dentry->d_name.name,
+                                 new_dentry->d_name.len,
+                                 affs_nofilenametruncate(old_dentry));
        if (retval)
                return retval;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 307453086c3f..6d589f28bf9b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -128,7 +128,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        affs_inode_cachep = kmem_cache_create("affs_inode_cache",
                                             sizeof(struct affs_inode_info),
@@ -163,7 +163,7 @@ static const struct super_operations affs_sops = {
 };
 enum {
-        Opt_bs, Opt_mode, Opt_mufs, Opt_prefix, Opt_protect,
+        Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect,
        Opt_reserved, Opt_root, Opt_setgid, Opt_setuid,
        Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
 };
@@ -172,6 +172,7 @@ static const match_table_t tokens = {
        {Opt_bs, "bs=%u"},
        {Opt_mode, "mode=%o"},
        {Opt_mufs, "mufs"},
+        {Opt_notruncate, "nofilenametruncate"},
        {Opt_prefix, "prefix=%s"},
        {Opt_protect, "protect"},
        {Opt_reserved, "reserved=%u"},
@@ -233,6 +234,9 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
                case Opt_mufs:
                        *mount_opts |= SF_MUFS;
                        break;
+                case Opt_notruncate:
+                        *mount_opts |= SF_NO_TRUNCATE;
+                        break;
                case Opt_prefix:
                        *prefix = match_strdup(&args[0]);
                        if (!*prefix)
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 3182c0e68b42..232e03d4780d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -103,6 +103,9 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
        if (tmp.size < sizeof(tmp))
                return ERR_PTR(-EINVAL);
+        if (tmp.size > (PATH_MAX + sizeof(tmp)))
+                return ERR_PTR(-ENAMETOOLONG);
        return memdup_user(in, tmp.size);
 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 29aa5cf6639b..7041ac35ace8 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
        inode_init_once(&bi->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
                                             sizeof(struct bfs_inode_info),
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0f59799fa105..aa3cb626671e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -584,7 +584,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long reloc_func_desc __maybe_unused = 0;
        int executable_stack = EXSTACK_DEFAULT;
-        unsigned long def_flags = 0;
        struct pt_regs *regs = current_pt_regs();
        struct {
                struct elfhdr elf_ex;
@@ -724,9 +723,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
        if (retval)
                goto out_free_dentry;
-        /* OK, This is the point of no return */
-        current->mm->def_flags = def_flags;
        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
           may depend on the personality.  */
        SET_PERSONALITY(loc->elf_ex);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 29696b78d1f4..1c2ce0c87711 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -182,6 +182,9 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
 */
 int bio_integrity_enabled(struct bio *bio)
 {
+        if (!bio_is_rw(bio))
+                return 0;
        /* Already protected? */
        if (bio_integrity(bio))
                return 0;
@@ -309,10 +312,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 {
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
        struct blk_integrity_exchg bix;
-        struct bio_vec bv;
+        struct bio_vec *bv;
-        struct bvec_iter iter;
        sector_t sector;
-        unsigned int sectors, ret = 0;
+        unsigned int sectors, ret = 0, i;
        void *prot_buf = bio->bi_integrity->bip_buf;
        if (operate)
@@ -323,16 +325,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
        bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
        bix.sector_size = bi->sector_size;
-        bio_for_each_segment(bv, bio, iter) {
+        bio_for_each_segment_all(bv, bio, i) {
-                void *kaddr = kmap_atomic(bv.bv_page);
+                void *kaddr = kmap_atomic(bv->bv_page);
-                bix.data_buf = kaddr + bv.bv_offset;
+                bix.data_buf = kaddr + bv->bv_offset;
-                bix.data_size = bv.bv_len;
+                bix.data_size = bv->bv_len;
                bix.prot_buf = prot_buf;
                bix.sector = sector;
-                if (operate) {
+                if (operate)
                        bi->generate_fn(&bix);
-                } else {
+                else {
                        ret = bi->verify_fn(&bix);
                        if (ret) {
                                kunmap_atomic(kaddr);
@@ -340,7 +342,7 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
                        }
                }
-                sectors = bv.bv_len / bi->sector_size;
+                sectors = bv->bv_len / bi->sector_size;
                sector += sectors;
                prot_buf += sectors * bi->tuple_size;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e1ffb1e22898..c660527af838 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2025,6 +2025,7 @@ out:
 static const struct vm_operations_struct btrfs_file_vm_ops = {
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = btrfs_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 8c44fdd4e1c3..834f9f3723fb 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
        ci->fscache = fscache_acquire_cookie(fsc->fscache,
                                             &ceph_fscache_inode_object_def,
                                             ci, true);
+        fscache_check_consistency(ci->fscache);
 done:
        mutex_unlock(&inode->i_mutex);
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index da95f61b7a09..5ac591bd012b 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
 void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
 void ceph_queue_revalidate(struct inode *inode);
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+        struct ceph_inode_info *ci = ceph_inode(inode);
+        fscache_attr_changed(ci->fscache);
+}
 static inline void ceph_fscache_invalidate(struct inode *inode)
 {
        fscache_invalidate(ceph_inode(inode)->fscache);
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
 {
 }
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+}
 static inline void ceph_fscache_invalidate(struct inode *inode)
 {
 }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 17543383545c..2e5e648eb5c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -622,8 +622,10 @@ retry:
        if (flags & CEPH_CAP_FLAG_AUTH) {
                if (ci->i_auth_cap == NULL ||
-                    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0)
+                    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
                        ci->i_auth_cap = cap;
+                        cap->mds_wanted = wanted;
+                }
                ci->i_cap_exporting_issued = 0;
        } else {
                WARN_ON(ci->i_auth_cap == cap);
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
                cap = rb_entry(p, struct ceph_cap, ci_node);
                if (!__cap_is_valid(cap))
                        continue;
-                mds_wanted |= cap->mds_wanted;
+                if (cap == ci->i_auth_cap)
+                        mds_wanted |= cap->mds_wanted;
+                else
+                        mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
        }
        return mds_wanted;
 }
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 6d59006bfa27..16b54aa31f08 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p)
                } else if (req->r_path1) {
                        seq_printf(s, " #%llx/%s", req->r_ino1.ino,
                                   req->r_path1);
+                } else {
+                        seq_printf(s, " #%llx", req->r_ino1.ino);
                }
                if (req->r_old_dentry) {
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p)
                                path = NULL;
                        spin_lock(&req->r_old_dentry->d_lock);
                        seq_printf(s, " #%llx/%.*s (%s)",
-                           ceph_ino(req->r_old_dentry_dir),
+                                   req->r_old_dentry_dir ?
+                                   ceph_ino(req->r_old_dentry_dir) : 0,
                                   req->r_old_dentry->d_name.len,
                                   req->r_old_dentry->d_name.name,
                                   path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 45eda6d7a40c..766410a12c2c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r)
 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
 * the MDS if/when the directory is modified).
 */
-static int __dcache_readdir(struct file *file, struct dir_context *ctx)
+static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
+                            u32 shared_gen)
 {
        struct ceph_file_info *fi = file->private_data;
        struct dentry *parent = file->f_dentry;
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx)
        last = fi->dentry;
        fi->dentry = NULL;
-        dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos,
+        dout("__dcache_readdir %p v%u at %llu (last %p)\n",
-             last);
+             dir, shared_gen, ctx->pos, last);
        spin_lock(&parent->d_lock);
@@ -161,7 +162,8 @@ more:
                        goto out_unlock;
                }
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-                if (!d_unhashed(dentry) && dentry->d_inode &&
+                if (di->lease_shared_gen == shared_gen &&
+                    !d_unhashed(dentry) && dentry->d_inode &&
                    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
                    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
                    fpos_cmp(ctx->pos, di->offset) <= 0)
@@ -190,7 +192,7 @@ more:
                if (last) {
                        /* remember our position */
                        fi->dentry = last;
-                        fi->next_offset = di->offset;
+                        fi->next_offset = fpos_off(di->offset);
                }
                dput(dentry);
                return 0;
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        int err;
        u32 ftype;
        struct ceph_mds_reply_info_parsed *rinfo;
-        const int max_entries = fsc->mount_options->max_readdir;
-        const int max_bytes = fsc->mount_options->max_readdir_bytes;
        dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
        if (fi->flags & CEPH_F_ATEND)
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
            ceph_snap(inode) != CEPH_SNAPDIR &&
            __ceph_dir_is_complete(ci) &&
            __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+                u32 shared_gen = ci->i_shared_gen;
                spin_unlock(&ci->i_ceph_lock);
-                err = __dcache_readdir(file, ctx);
+                err = __dcache_readdir(file, ctx, shared_gen);
                if (err != -EAGAIN)
                        return err;
        } else {
@@ -322,14 +323,16 @@ more:
                        fi->last_readdir = NULL;
                }
-                /* requery frag tree, as the frag topology may have changed */
-                frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
                dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
                     ceph_vinop(inode), frag, fi->last_name);
                req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
                if (IS_ERR(req))
                        return PTR_ERR(req);
+                err = ceph_alloc_readdir_reply_buffer(req, inode);
+                if (err) {
+                        ceph_mdsc_put_request(req);
+                        return err;
+                }
                req->r_inode = inode;
                ihold(inode);
                req->r_dentry = dget(file->f_dentry);
@@ -340,9 +343,6 @@ more:
                req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
                req->r_readdir_offset = fi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
-                req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-                req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
-                req->r_num_caps = max_entries + 1;
                err = ceph_mdsc_do_request(mdsc, NULL, req);
                if (err < 0) {
                        ceph_mdsc_put_request(req);
@@ -369,9 +369,9 @@ more:
                                fi->next_offset = 0;
                        off = fi->next_offset;
                }
+                fi->frag = frag;
                fi->offset = fi->next_offset;
                fi->last_readdir = req;
-                fi->frag = frag;
                if (req->r_reply_info.dir_end) {
                        kfree(fi->last_name);
@@ -454,7 +454,7 @@ more:
        return 0;
 }
-static void reset_readdir(struct ceph_file_info *fi)
+static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
 {
        if (fi->last_readdir) {
                ceph_mdsc_put_request(fi->last_readdir);
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi)
        }
        kfree(fi->last_name);
        fi->last_name = NULL;
-        fi->next_offset = 2;  /* compensate for . and .. */
+        if (ceph_frag_is_leftmost(frag))
+                fi->next_offset = 2;  /* compensate for . and .. */
+        else
+                fi->next_offset = 0;
        if (fi->dentry) {
                dput(fi->dentry);
                fi->dentry = NULL;
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
        struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_mapping->host;
-        loff_t old_offset = offset;
+        loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
        loff_t retval;
        mutex_lock(&inode->i_mutex);
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                goto out;
        }
-        if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
+        if (offset >= 0) {
                if (offset != file->f_pos) {
                        file->f_pos = offset;
                        file->f_version = 0;
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                 * seek to new frag, or seek prior to current chunk.
                 */
                if (offset == 0 ||
-                    fpos_frag(offset) != fpos_frag(old_offset) ||
+                    fpos_frag(offset) != fi->frag ||
                    fpos_off(offset) < fi->offset) {
                        dout("dir_llseek dropping %p content\n", file);
-                        reset_readdir(fi);
+                        reset_readdir(fi, fpos_frag(offset));
                }
                /* bump dir_release_count if we did a forward seek */
-                if (offset > old_offset)
+                if (fpos_cmp(offset, old_offset) > 0)
                        fi->dir_release_count--;
        }
 out:
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        }
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
-        req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
+        req->r_old_dentry = dget(old_dentry);
-        req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
        req->r_locked_dir = dir;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
        if (IS_ERR(req))
                return PTR_ERR(req);
+        ihold(old_dir);
        req->r_dentry = dget(new_dentry);
        req->r_num_caps = 2;
        req->r_old_dentry = dget(old_dentry);
-        req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
+        req->r_old_dentry_dir = old_dir;
        req->r_locked_dir = new_dir;
        req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 16796be53ca5..00d6af6a32ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -8,23 +8,6 @@
 #include "mds_client.h"
 /*
- * NFS export support
- *
- * NFS re-export of a ceph mount is, at present, only semireliable.
- * The basic issue is that the Ceph architectures doesn't lend itself
- * well to generating filehandles that will remain valid forever.
- *
- * So, we do our best.  If you're lucky, your inode will be in the
- * client's cache.  If it's not, and you have a connectable fh, then
- * the MDS server may be able to find it for you.  Otherwise, you get
- * ESTALE.
- *
- * There are ways to this more reliable, but in the non-connectable fh
- * case, we won't every work perfectly, and in the connectable case,
- * some changes are needed on the MDS side to work better.
- */
-/*
 * Basic fh
 */
 struct ceph_nfs_fh {
@@ -32,22 +15,12 @@ struct ceph_nfs_fh {
 } __attribute__ ((packed));
 /*
- * Larger 'connectable' fh that includes parent ino and name hash.
+ * Larger fh that includes parent ino.
- * Use this whenever possible, as it works more reliably.
 */
 struct ceph_nfs_confh {
        u64 ino, parent_ino;
-        u32 parent_name_hash;
 } __attribute__ ((packed));
-/*
- * The presence of @parent_inode here tells us whether NFS wants a
- * connectable file handle.  However, we want to make a connectionable
- * file handle unconditionally so that the MDS gets as much of a hint
- * as possible.  That means we only use @parent_dentry to indicate
- * whether nfsd wants a connectable fh, and whether we should indicate
- * failure from a too-small @max_len.
- */
 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                          struct inode *parent_inode)
 {
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
        struct ceph_nfs_confh *cfh = (void *)rawfh;
        int connected_handle_length = sizeof(*cfh)/4;
        int handle_length = sizeof(*fh)/4;
-        struct dentry *dentry;
-        struct dentry *parent;
        /* don't re-export snaps */
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EINVAL;
-        dentry = d_find_alias(inode);
+        if (parent_inode && (*max_len < connected_handle_length)) {
+                *max_len = connected_handle_length;
+                return FILEID_INVALID;
+        } else if (*max_len < handle_length) {
+                *max_len = handle_length;
+                return FILEID_INVALID;
+        }
-        /* if we found an alias, generate a connectable fh */
+        if (parent_inode) {
-        if (*max_len >= connected_handle_length && dentry) {
+                dout("encode_fh %llx with parent %llx\n",
-                dout("encode_fh %p connectable\n", dentry);
+                     ceph_ino(inode), ceph_ino(parent_inode));
-                spin_lock(&dentry->d_lock);
-                parent = dentry->d_parent;
                cfh->ino = ceph_ino(inode);
-                cfh->parent_ino = ceph_ino(parent->d_inode);
+                cfh->parent_ino = ceph_ino(parent_inode);
-                cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
-                                                         dentry);
                *max_len = connected_handle_length;
-                type = 2;
+                type = FILEID_INO32_GEN_PARENT;
-                spin_unlock(&dentry->d_lock);
-        } else if (*max_len >= handle_length) {
-                if (parent_inode) {
-                        /* nfsd wants connectable */
-                        *max_len = connected_handle_length;
-                        type = FILEID_INVALID;
-                } else {
-                        dout("encode_fh %p\n", dentry);
-                        fh->ino = ceph_ino(inode);
-                        *max_len = handle_length;
-                        type = 1;
-                }
        } else {
+                dout("encode_fh %llx\n", ceph_ino(inode));
+                fh->ino = ceph_ino(inode);
                *max_len = handle_length;
-                type = FILEID_INVALID;
+                type = FILEID_INO32_GEN;
        }
-        if (dentry)
-                dput(dentry);
        return type;
 }
-/*
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
- * convert regular fh to dentry
- *
- * FIXME: we should try harder by querying the mds for the ino.
- */
-static struct dentry *__fh_to_dentry(struct super_block *sb,
-                                     struct ceph_nfs_fh *fh, int fh_len)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
        struct inode *inode;
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
        struct ceph_vino vino;
        int err;
-        if (fh_len < sizeof(*fh) / 4)
+        vino.ino = ino;
-                return ERR_PTR(-ESTALE);
-        dout("__fh_to_dentry %llx\n", fh->ino);
-        vino.ino = fh->ino;
        vino.snap = CEPH_NOSNAP;
        inode = ceph_find_inode(sb, vino);
        if (!inode) {
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
        dentry = d_obtain_alias(inode);
        if (IS_ERR(dentry)) {
-                pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
-                       fh->ino, inode);
                iput(inode);
                return dentry;
        }
        err = ceph_init_dentry(dentry);
        if (err < 0) {
-                iput(inode);
+                dput(dentry);
                return ERR_PTR(err);
        }
-        dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
+        dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
        return dentry;
 }
 /*
- * convert connectable fh to dentry
+ * convert regular fh to dentry
 */
-static struct dentry *__cfh_to_dentry(struct super_block *sb,
+static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
-                                      struct ceph_nfs_confh *cfh, int fh_len)
+                                        struct fid *fid,
+                                        int fh_len, int fh_type)
+{
+        struct ceph_nfs_fh *fh = (void *)fid->raw;
+        if (fh_type != FILEID_INO32_GEN  &&
+            fh_type != FILEID_INO32_GEN_PARENT)
+                return NULL;
+        if (fh_len < sizeof(*fh) / 4)
+                return NULL;
+        dout("fh_to_dentry %llx\n", fh->ino);
+        return __fh_to_dentry(sb, fh->ino);
+}
+static struct dentry *__get_parent(struct super_block *sb,
+                                   struct dentry *child, u64 ino)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+        struct ceph_mds_request *req;
        struct inode *inode;
        struct dentry *dentry;
-        struct ceph_vino vino;
        int err;
-        if (fh_len < sizeof(*cfh) / 4)
+        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
-                return ERR_PTR(-ESTALE);
+                                       USE_ANY_MDS);
+        if (IS_ERR(req))
-        dout("__cfh_to_dentry %llx (%llx/%x)\n",
+                return ERR_CAST(req);
-             cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
-        vino.ino = cfh->ino;
-        vino.snap = CEPH_NOSNAP;
-        inode = ceph_find_inode(sb, vino);
-        if (!inode) {
-                struct ceph_mds_request *req;
-                req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
-                                               USE_ANY_MDS);
-                if (IS_ERR(req))
-                        return ERR_CAST(req);
-                req->r_ino1 = vino;
+        if (child) {
-                req->r_ino2.ino = cfh->parent_ino;
+                req->r_inode = child->d_inode;
-                req->r_ino2.snap = CEPH_NOSNAP;
+                ihold(child->d_inode);
-                req->r_path2 = kmalloc(16, GFP_NOFS);
+        } else {
-                snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
+                req->r_ino1 = (struct ceph_vino) {
-                req->r_num_caps = 1;
+                        .ino = ino,
-                err = ceph_mdsc_do_request(mdsc, NULL, req);
+                        .snap = CEPH_NOSNAP,
-                inode = req->r_target_inode;
+                };
-                if (inode)
-                        ihold(inode);
-                ceph_mdsc_put_request(req);
-                if (!inode)
-                        return ERR_PTR(err ? err : -ESTALE);
        }
+        req->r_num_caps = 1;
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
+        inode = req->r_target_inode;
+        if (inode)
+                ihold(inode);
+        ceph_mdsc_put_request(req);
+        if (!inode)
+                return ERR_PTR(-ENOENT);
        dentry = d_obtain_alias(inode);
        if (IS_ERR(dentry)) {
-                pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
-                       cfh->ino, inode);
                iput(inode);
                return dentry;
        }
        err = ceph_init_dentry(dentry);
        if (err < 0) {
-                iput(inode);
+                dput(dentry);
                return ERR_PTR(err);
        }
-        dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
+        dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
+             child ? ceph_ino(child->d_inode) : ino,
+             dentry, ceph_vinop(inode));
        return dentry;
 }
-static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
+struct dentry *ceph_get_parent(struct dentry *child)
-                                        int fh_len, int fh_type)
 {
-        if (fh_type == 1)
+        /* don't re-export snaps */
-                return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw,
+        if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
-                                                                fh_len);
+                return ERR_PTR(-EINVAL);
-        else
-                return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw,
+        dout("get_parent %p ino %llx.%llx\n",
-                                                                fh_len);
+             child, ceph_vinop(child->d_inode));
+        return __get_parent(child->d_sb, child, 0);
 }
 /*
- * get parent, if possible.
+ * convert regular fh to parent
- *
- * FIXME: we could do better by querying the mds to discover the
- * parent.
 */
 static struct dentry *ceph_fh_to_parent(struct super_block *sb,
-                                         struct fid *fid,
+                                        struct fid *fid,
                                        int fh_len, int fh_type)
 {
        struct ceph_nfs_confh *cfh = (void *)fid->raw;
-        struct ceph_vino vino;
-        struct inode *inode;
        struct dentry *dentry;
-        int err;
-        if (fh_type == 1)
+        if (fh_type != FILEID_INO32_GEN_PARENT)
-                return ERR_PTR(-ESTALE);
+                return NULL;
        if (fh_len < sizeof(*cfh) / 4)
-                return ERR_PTR(-ESTALE);
+                return NULL;
-        pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
+        dout("fh_to_parent %llx\n", cfh->parent_ino);
-                 cfh->parent_name_hash);
+        dentry = __get_parent(sb, NULL, cfh->ino);
+        if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+                dentry = __fh_to_dentry(sb, cfh->parent_ino);
+        return dentry;
+}
-        vino.ino = cfh->ino;
+static int ceph_get_name(struct dentry *parent, char *name,
-        vino.snap = CEPH_NOSNAP;
+                         struct dentry *child)
-        inode = ceph_find_inode(sb, vino);
+{
-        if (!inode)
+        struct ceph_mds_client *mdsc;
-                return ERR_PTR(-ESTALE);
+        struct ceph_mds_request *req;
+        int err;
-        dentry = d_obtain_alias(inode);
+        mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
-        if (IS_ERR(dentry)) {
+        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
-                pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
+                                       USE_ANY_MDS);
-                       cfh->ino, inode);
+        if (IS_ERR(req))
-                iput(inode);
+                return PTR_ERR(req);
-                return dentry;
-        }
+        mutex_lock(&parent->d_inode->i_mutex);
-        err = ceph_init_dentry(dentry);
-        if (err < 0) {
+        req->r_inode = child->d_inode;
-                iput(inode);
+        ihold(child->d_inode);
-                return ERR_PTR(err);
+        req->r_ino2 = ceph_vino(parent->d_inode);
+        req->r_locked_dir = parent->d_inode;
+        req->r_num_caps = 2;
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
+        mutex_unlock(&parent->d_inode->i_mutex);
+        if (!err) {
+                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+                memcpy(name, rinfo->dname, rinfo->dname_len);
+                name[rinfo->dname_len] = 0;
+                dout("get_name %p ino %llx.%llx name %s\n",
+                     child, ceph_vinop(child->d_inode), name);
+        } else {
+                dout("get_name %p ino %llx.%llx err %d\n",
+                     child, ceph_vinop(child->d_inode), err);
        }
-        dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
-        return dentry;
+        ceph_mdsc_put_request(req);
+        return err;
 }
 const struct export_operations ceph_export_ops = {
        .encode_fh = ceph_encode_fh,
        .fh_to_dentry = ceph_fh_to_dentry,
        .fh_to_parent = ceph_fh_to_parent,
+        .get_parent = ceph_get_parent,
+        .get_name = ceph_get_name,
 };
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 09c7afe32e49..66075a4ad979 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file)
        ihold(inode);
        req->r_num_caps = 1;
-        if (flags & (O_CREAT|O_TRUNC))
+        if (flags & O_CREAT)
                parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
        iput(parent_inode);
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                }
                err = finish_open(file, dentry, ceph_open, opened);
        }
 out_err:
+        if (!req->r_err && req->r_target_inode)
+                ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
        ceph_mdsc_put_request(req);
        dout("atomic_open result=%d\n", err);
        return err;
@@ -970,6 +971,7 @@ retry_snap:
                        goto retry_snap;
                }
        } else {
+                loff_t old_size = inode->i_size;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -980,6 +982,8 @@ retry_snap:
                written = generic_file_buffered_write(iocb, iov, nr_segs,
                                                      pos, &iocb->ki_pos,
                                                      count, 0);
+                if (inode->i_size > old_size)
+                        ceph_fscache_update_objectsize(inode);
                mutex_unlock(&inode->i_mutex);
        }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 32d519d8a2e2..0b0728e5be2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode,
                            le32_to_cpu(info->time_warp_seq),
                            &ctime, &mtime, &atime);
-        /* only update max_size on auth cap */
-        if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-            ci->i_max_size != le64_to_cpu(info->max_size)) {
-                dout("max_size %lld -> %llu\n", ci->i_max_size,
-                     le64_to_cpu(info->max_size));
-                ci->i_max_size = le64_to_cpu(info->max_size);
-        }
        ci->i_layout = info->layout;
        inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode,
                ci->i_max_offset = 2;
        }
 no_change:
+        /* only update max_size on auth cap */
+        if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+            ci->i_max_size != le64_to_cpu(info->max_size)) {
+                dout("max_size %lld -> %llu\n", ci->i_max_size,
+                     le64_to_cpu(info->max_size));
+                ci->i_max_size = le64_to_cpu(info->max_size);
+        }
        spin_unlock(&ci->i_ceph_lock);
        /* queue truncate if we saw i_size decrease */
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                                         session, req->r_request_started, -1,
                                         &req->r_caps_reservation);
                        if (err < 0)
-                                return err;
+                                goto done;
                } else {
                        WARN_ON_ONCE(1);
                }
+                if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
+                        struct qstr dname;
+                        struct dentry *dn, *parent;
+                        BUG_ON(!rinfo->head->is_target);
+                        BUG_ON(req->r_dentry);
+                        parent = d_find_any_alias(dir);
+                        BUG_ON(!parent);
+                        dname.name = rinfo->dname;
+                        dname.len = rinfo->dname_len;
+                        dname.hash = full_name_hash(dname.name, dname.len);
+                        vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                        vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+retry_lookup:
+                        dn = d_lookup(parent, &dname);
+                        dout("d_lookup on parent=%p name=%.*s got %p\n",
+                             parent, dname.len, dname.name, dn);
+                        if (!dn) {
+                                dn = d_alloc(parent, &dname);
+                                dout("d_alloc %p '%.*s' = %p\n", parent,
+                                     dname.len, dname.name, dn);
+                                if (dn == NULL) {
+                                        dput(parent);
+                                        err = -ENOMEM;
+                                        goto done;
+                                }
+                                err = ceph_init_dentry(dn);
+                                if (err < 0) {
+                                        dput(dn);
+                                        dput(parent);
+                                        goto done;
+                                }
+                        } else if (dn->d_inode &&
+                                   (ceph_ino(dn->d_inode) != vino.ino ||
+                                    ceph_snap(dn->d_inode) != vino.snap)) {
+                                dout(" dn %p points to wrong inode %p\n",
+                                     dn, dn->d_inode);
+                                d_delete(dn);
+                                dput(dn);
+                                goto retry_lookup;
+                        }
+                        req->r_dentry = dn;
+                        dput(parent);
+                }
        }
        if (rinfo->head->is_target) {
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                err = fill_inode(in, &rinfo->targeti, NULL,
                                session, req->r_request_started,
-                                (le32_to_cpu(rinfo->head->result) == 0) ?
+                                (!req->r_aborted && rinfo->head->result == 0) ?
                                req->r_fmode : -1,
                                &req->r_caps_reservation);
                if (err < 0) {
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = {
        .getxattr = ceph_getxattr,
        .listxattr = ceph_listxattr,
        .removexattr = ceph_removexattr,
-        .get_acl = ceph_get_acl,
-        .set_acl = ceph_set_acl,
 };
 /*
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-        struct inode *parent_inode;
        const unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                req->r_inode_drop = release;
                req->r_args.setattr.mask = cpu_to_le32(mask);
                req->r_num_caps = 1;
-                parent_inode = ceph_get_dentry_parent_inode(dentry);
+                err = ceph_mdsc_do_request(mdsc, NULL, req);
-                err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-                iput(parent_inode);
        }
        dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
             ceph_cap_string(dirtied), mask);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index dc66c9e023e4..efbe08289292 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -64,7 +64,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
        struct inode *inode = file_inode(file);
-        struct inode *parent_inode;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        struct ceph_ioctl_layout l;
@@ -121,9 +120,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                cpu_to_le32(l.object_size);
        req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
-        parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        return err;
 }
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae6d14e82b0f..d94ba0df9f4d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -2,11 +2,31 @@
 #include <linux/file.h>
 #include <linux/namei.h>
+#include <linux/random.h>
 #include "super.h"
 #include "mds_client.h"
 #include <linux/ceph/pagelist.h>
+static u64 lock_secret;
+static inline u64 secure_addr(void *addr)
+{
+        u64 v = lock_secret ^ (u64)(unsigned long)addr;
+        /*
+         * Set the most significant bit, so that MDS knows the 'owner'
+         * is sufficient to identify the owner of lock. (old code uses
+         * both 'owner' and 'pid')
+         */
+        v |= (1ULL << 63);
+        return v;
+}
+void __init ceph_flock_init(void)
+{
+        get_random_bytes(&lock_secret, sizeof(lock_secret));
+}
 /**
 * Implement fcntl and flock locking functions.
 */
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
                             int cmd, u8 wait, struct file_lock *fl)
 {
        struct inode *inode = file_inode(file);
-        struct ceph_mds_client *mdsc =
+        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-                ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_mds_request *req;
        int err;
        u64 length = 0;
+        u64 owner;
        req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
        if (IS_ERR(req))
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
        else
                length = fl->fl_end - fl->fl_start + 1;
-        dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+        if (lock_type == CEPH_LOCK_FCNTL)
-             "length: %llu, wait: %d, type: %d", (int)lock_type,
+                owner = secure_addr(fl->fl_owner);
-             (int)operation, (u64)fl->fl_pid, fl->fl_start,
+        else
-             length, wait, fl->fl_type);
+                owner = secure_addr(fl->fl_file);
+        dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
+             "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+             (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
+             wait, fl->fl_type);
        req->r_args.filelock_change.rule = lock_type;
        req->r_args.filelock_change.type = cmd;
+        req->r_args.filelock_change.owner = cpu_to_le64(owner);
        req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
-        /* This should be adjusted, but I'm not sure if
-           namespaces actually get id numbers*/
-        req->r_args.filelock_change.pid_namespace =
-                cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
        req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
        req->r_args.filelock_change.length = cpu_to_le64(length);
        req->r_args.filelock_change.wait = wait;
        err = ceph_mdsc_do_request(mdsc, inode, req);
-        if ( operation == CEPH_MDS_OP_GETFILELOCK){
+        if (operation == CEPH_MDS_OP_GETFILELOCK) {
                fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
                if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
                        fl->fl_type = F_RDLCK;
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        u8 wait = 0;
        u16 op = CEPH_MDS_OP_SETFILELOCK;
-        fl->fl_nspid = get_pid(task_tgid(current));
+        if (!(fl->fl_flags & FL_POSIX))
-        dout("ceph_lock, fl_pid:%d", fl->fl_pid);
+                return -ENOLCK;
+        /* No mandatory locks */
+        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+                return -ENOLCK;
+        dout("ceph_lock, fl_owner: %p", fl->fl_owner);
        /* set wait bit as appropriate, then make command as Ceph expects it*/
-        if (F_SETLKW == cmd)
+        if (IS_GETLK(cmd))
-                wait = 1;
-        if (F_GETLK == cmd)
                op = CEPH_MDS_OP_GETFILELOCK;
+        else if (IS_SETLKW(cmd))
+                wait = 1;
        if (F_RDLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_SHARED;
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
        if (!err) {
-                if ( op != CEPH_MDS_OP_GETFILELOCK ){
+                if (op != CEPH_MDS_OP_GETFILELOCK) {
                        dout("mds locked, locking locally");
                        err = posix_lock_file(file, fl, NULL);
                        if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        u8 lock_cmd;
        int err;
-        u8 wait = 1;
+        u8 wait = 0;
-        fl->fl_nspid = get_pid(task_tgid(current));
+        if (!(fl->fl_flags & FL_FLOCK))
-        dout("ceph_flock, fl_pid:%d", fl->fl_pid);
+                return -ENOLCK;
+        /* No mandatory locks */
-        /* set wait bit, then clear it out of cmd*/
+        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
-        if (cmd & LOCK_NB)
+                return -ENOLCK;
-                wait = 0;
-        cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN);
+        dout("ceph_flock, fl_file: %p", fl->fl_file);
-        /* set command sequence that Ceph wants to see:
-           shared lock, exclusive lock, or unlock */
+        if (IS_SETLKW(cmd))
-        if (LOCK_SH == cmd)
+                wait = 1;
+        if (F_RDLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_SHARED;
-        else if (LOCK_EX == cmd)
+        else if (F_WRLCK == fl->fl_type)
                lock_cmd = CEPH_LOCK_EXCL;
        else
                lock_cmd = CEPH_LOCK_UNLOCK;
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock,
                          struct ceph_filelock *cephlock)
 {
        int err = 0;
        cephlock->start = cpu_to_le64(lock->fl_start);
        cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
        cephlock->client = cpu_to_le64(0);
-        cephlock->pid = cpu_to_le64(lock->fl_pid);
+        cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
-        cephlock->pid_namespace =
+        if (lock->fl_flags & FL_POSIX)
-                cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
+                cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+        else
+                cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
        switch (lock->fl_type) {
        case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f4f050a69a48..2b4d093d0563 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end,
        if (num == 0)
                goto done;
-        /* alloc large array */
+        BUG_ON(!info->dir_in);
-        info->dir_nr = num;
-        info->dir_in = kcalloc(num, sizeof(*info->dir_in) +
-                               sizeof(*info->dir_dname) +
-                               sizeof(*info->dir_dname_len) +
-                               sizeof(*info->dir_dlease),
-                               GFP_NOFS);
-        if (info->dir_in == NULL) {
-                err = -ENOMEM;
-                goto out_bad;
-        }
        info->dir_dname = (void *)(info->dir_in + num);
        info->dir_dname_len = (void *)(info->dir_dname + num);
        info->dir_dlease = (void *)(info->dir_dname_len + num);
+        if ((unsigned long)(info->dir_dlease + num) >
+            (unsigned long)info->dir_in + info->dir_buf_size) {
+                pr_err("dir contents are larger than expected\n");
+                WARN_ON(1);
+                goto bad;
+        }
+        info->dir_nr = num;
        while (num) {
                /* dentry */
                ceph_decode_need(p, end, sizeof(u32)*2, bad);
@@ -327,7 +325,9 @@ out_bad:
 static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
 {
-        kfree(info->dir_in);
+        if (!info->dir_in)
+                return;
+        free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
 }
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref)
        struct ceph_mds_request *req = container_of(kref,
                                                    struct ceph_mds_request,
                                                    r_kref);
+        destroy_reply_info(&req->r_reply_info);
        if (req->r_request)
                ceph_msg_put(req->r_request);
-        if (req->r_reply) {
+        if (req->r_reply)
                ceph_msg_put(req->r_reply);
-                destroy_reply_info(&req->r_reply_info);
-        }
        if (req->r_inode) {
                ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
                iput(req->r_inode);
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref)
                iput(req->r_target_inode);
        if (req->r_dentry)
                dput(req->r_dentry);
-        if (req->r_old_dentry) {
+        if (req->r_old_dentry)
+                dput(req->r_old_dentry);
+        if (req->r_old_dentry_dir) {
                /*
                 * track (and drop pins for) r_old_dentry_dir
                 * separately, since r_old_dentry's d_parent may have
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref)
                 */
                ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
-                dput(req->r_old_dentry);
                iput(req->r_old_dentry_dir);
        }
        kfree(req->r_path1);
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
                        trim_caps - session->s_trim_caps);
                session->s_trim_caps = 0;
        }
+        ceph_add_cap_releases(mdsc, session);
+        ceph_send_cap_releases(mdsc, session);
        return 0;
 }
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
        dout("discard_cap_releases mds%d\n", session->s_mds);
-        /* zero out the in-progress message */
+        if (!list_empty(&session->s_cap_releases)) {
-        msg = list_first_entry(&session->s_cap_releases,
+                /* zero out the in-progress message */
-                               struct ceph_msg, list_head);
+                msg = list_first_entry(&session->s_cap_releases,
-        head = msg->front.iov_base;
+                                        struct ceph_msg, list_head);
-        num = le32_to_cpu(head->num);
+                head = msg->front.iov_base;
-        dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num);
+                num = le32_to_cpu(head->num);
-        head->num = cpu_to_le32(0);
+                dout("discard_cap_releases mds%d %p %u\n",
-        msg->front.iov_len = sizeof(*head);
+                     session->s_mds, msg, num);
-        session->s_num_cap_releases += num;
+                head->num = cpu_to_le32(0);
+                msg->front.iov_len = sizeof(*head);
+                session->s_num_cap_releases += num;
+        }
        /* requeue completed messages */
        while (!list_empty(&session->s_cap_releases_done)) {
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 * requests
 */
+int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+                                    struct inode *dir)
+{
+        struct ceph_inode_info *ci = ceph_inode(dir);
+        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+        struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
+        size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
+                      sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
+        int order, num_entries;
+        spin_lock(&ci->i_ceph_lock);
+        num_entries = ci->i_files + ci->i_subdirs;
+        spin_unlock(&ci->i_ceph_lock);
+        num_entries = max(num_entries, 1);
+        num_entries = min(num_entries, opt->max_readdir);
+        order = get_order(size * num_entries);
+        while (order >= 0) {
+                rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
+                                                        order);
+                if (rinfo->dir_in)
+                        break;
+                order--;
+        }
+        if (!rinfo->dir_in)
+                return -ENOMEM;
+        num_entries = (PAGE_SIZE << order) / size;
+        num_entries = min(num_entries, opt->max_readdir);
+        rinfo->dir_buf_size = PAGE_SIZE << order;
+        req->r_num_caps = num_entries + 1;
+        req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
+        req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
+        return 0;
+}
 /*
 * Create an mds request.
 */
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
                ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
        if (req->r_locked_dir)
                ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-        if (req->r_old_dentry)
+        if (req->r_old_dentry_dir)
                ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 68288917c737..e90cfccf93bd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed {
                /* for readdir results */
                struct {
                        struct ceph_mds_reply_dirfrag *dir_dir;
+                        size_t                        dir_buf_size;
                        int                           dir_nr;
                        char                          **dir_dname;
                        u32                           *dir_dname_len;
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
                                    struct dentry *dn);
 extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
+extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+                                           struct inode *dir);
 extern struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
 extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 4440f447fd3f..51cc23e48111 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op)
        case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
        case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
        case CEPH_MDS_OP_LOOKUPINO:  return "lookupino";
+        case CEPH_MDS_OP_LOOKUPNAME:  return "lookupname";
        case CEPH_MDS_OP_GETATTR:  return "getattr";
        case CEPH_MDS_OP_SETXATTR: return "setxattr";
        case CEPH_MDS_OP_SETATTR: return "setattr";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 10a4ccbf38da..06150fd745ac 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void)
        if (ret)
                goto out;
+        ceph_flock_init();
        ceph_xattr_init();
        ret = register_filesystem(&ceph_fs_type);
        if (ret)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d8801a95b685..7866cd05a6bb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -577,7 +577,7 @@ struct ceph_file_info {
        /* readdir: position within a frag */
        unsigned offset;       /* offset of last chunk, adjusted for . and .. */
-        u64 next_offset;       /* offset of next chunk (last_name's + 1) */
+        unsigned next_offset;  /* offset of next chunk (last_name's + 1) */
        char *last_name;       /* last entry in previous chunk */
        struct dentry *dentry; /* next dentry (for dcache readdir) */
        int dir_release_count;
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 extern const struct export_operations ceph_export_ops;
 /* locks.c */
+extern __init void ceph_flock_init(void);
 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
 extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a55ec37378c6..c9c2b887381e 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
 }
 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
-                                        size_t size)
+                                   size_t size)
 {
        int ret;
        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
        const char *pool_name;
+        char buf[128];
        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
        down_read(&osdc->map_sem);
        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
-        if (pool_name)
+        if (pool_name) {
-                ret = snprintf(val, size,
+                size_t len = strlen(pool_name);
-                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
+                ret = snprintf(buf, sizeof(buf),
+                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
                (unsigned long long)ceph_file_layout_su(ci->i_layout),
                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
-                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-                pool_name);
+                if (!size) {
-        else
+                        ret += len;
-                ret = snprintf(val, size,
+                } else if (ret + len > size) {
+                        ret = -ERANGE;
+                } else {
+                        memcpy(val, buf, ret);
+                        memcpy(val + ret, pool_name, len);
+                        ret += len;
+                }
+        } else {
+                ret = snprintf(buf, sizeof(buf),
                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
                (unsigned long long)ceph_file_layout_su(ci->i_layout),
                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
                (unsigned long long)pool);
+                if (size) {
+                        if (ret <= size)
+                                memcpy(val, buf, ret);
+                        else
+                                ret = -ERANGE;
+                }
+        }
        up_read(&osdc->map_sem);
        return ret;
 }
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
                .name_size = sizeof("ceph.dir.layout"),
                .getxattr_cb = ceph_vxattrcb_layout,
                .readonly = false,
-                .hidden = false,
+                .hidden = true,
                .exists_cb = ceph_vxattrcb_layout_exists,
        },
        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
                .name_size = sizeof("ceph.file.layout"),
                .getxattr_cb = ceph_vxattrcb_layout,
                .readonly = false,
-                .hidden = false,
+                .hidden = true,
                .exists_cb = ceph_vxattrcb_layout_exists,
        },
        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct inode *inode = dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-        struct inode *parent_inode;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = fsc->mdsc;
        int err;
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
        req->r_data_len = size;
        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
-        parent_inode = ceph_get_dentry_parent_inode(dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = dentry->d_inode;
-        struct inode *parent_inode;
        struct ceph_mds_request *req;
        int err;
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
        req->r_num_caps = 1;
        req->r_path2 = kstrdup(name, GFP_NOFS);
-        parent_inode = ceph_get_dentry_parent_inode(dentry);
+        err = ceph_mdsc_do_request(mdsc, NULL, req);
-        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-        iput(parent_inode);
        ceph_mdsc_put_request(req);
        return err;
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 834fce759d80..216d7e99f921 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3113,6 +3113,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 static struct vm_operations_struct cifs_file_vm_ops = {
        .fault = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = cifs_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
diff --git a/fs/dcache.c b/fs/dcache.c
index 66cba5a8a346..40707d88a945 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3144,6 +3144,7 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
                end = ERR_PTR(-ENAMETOOLONG);
        return end;
 }
+EXPORT_SYMBOL(simple_dname);
 /*
 * Write full pathname from the root of the filesystem into the buffer.
diff --git a/fs/exec.c b/fs/exec.c
index 25dfeba6d55f..9e81c630dfa7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/swap.h>
@@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code);
 static int exec_mmap(struct mm_struct *mm)
 {
        struct task_struct *tsk;
-        struct mm_struct * old_mm, *active_mm;
+        struct mm_struct *old_mm, *active_mm;
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
@@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
+        tsk->mm->vmacache_seqnum = 0;
+        vmacache_flush(tsk);
        task_unlock(tsk);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
@@ -1043,7 +1046,7 @@ EXPORT_SYMBOL_GPL(get_task_comm);
 * so that a new one can be started
 */
-void set_task_comm(struct task_struct *tsk, char *buf)
+void set_task_comm(struct task_struct *tsk, const char *buf)
 {
        task_lock(tsk);
        trace_task_rename(tsk, buf);
@@ -1052,21 +1055,6 @@ void set_task_comm(struct task_struct *tsk, char *buf)
        perf_event_comm(tsk);
 }
-static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
-{
-        int i, ch;
-        /* Copies the binary name from after last slash */
-        for (i = 0; (ch = *(fn++)) != '\0';) {
-                if (ch == '/')
-                        i = 0; /* overwrite what we wrote */
-                else
-                        if (i < len - 1)
-                                tcomm[i++] = ch;
-        }
-        tcomm[i] = '\0';
-}
 int flush_old_exec(struct linux_binprm * bprm)
 {
        int retval;
@@ -1080,8 +1068,6 @@ int flush_old_exec(struct linux_binprm * bprm)
                goto out;
        set_mm_exe_file(bprm->mm, bprm->file);
-        filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
        /*
         * Release all of the old mmap stuff
         */
@@ -1124,7 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm)
        else
                set_dumpable(current->mm, suid_dumpable);
-        set_task_comm(current, bprm->tcomm);
+        set_task_comm(current, kbasename(bprm->filename));
        /* Set the new mm task size. We have to do that late because it may
         * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 1b8001bbe947..27695e6f4e46 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -4,7 +4,6 @@
 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
 */
-#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 7cadd823bb31..7d66fb0e4cca 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -284,7 +284,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
                int best_ndir = inodes_per_group;
                int best_group = -1;
-                get_random_bytes(&group, sizeof(group));
+                group = prandom_u32();
                parent_group = (unsigned)group % ngroups;
                for (i = 0; i < ngroups; i++) {
                        group = (parent_group + i) % ngroups;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d260115c0350..3750031cfa2f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -192,7 +192,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
                                             sizeof(struct ext2_inode_info),
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index cfedb2cb0d8c..c0ebc4db8849 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -42,8 +42,8 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
                              value, size, flags);
 }
-int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
-                    void *fs_info)
+                           void *fs_info)
 {
        const struct xattr *xattr;
        int err = 0;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 22548f56197b..158b5d4ce067 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1727,10 +1727,7 @@ allocated:
        percpu_counter_sub(&sbi->s_freeblocks_counter, num);
        BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
-        err = ext3_journal_dirty_metadata(handle, gdp_bh);
+        fatal = ext3_journal_dirty_metadata(handle, gdp_bh);
-        if (!fatal)
-                fatal = err;
        if (fatal)
                goto out;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e66e4808719f..17742eed2c16 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -275,7 +275,7 @@ static inline loff_t ext3_get_htree_eof(struct file *filp)
 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
 *       will be invalid once the directory was converted into a dx directory
 */
-loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
+static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
        int dx_dir = is_dx_dir(inode);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 082afd78b107..a1b810230cc5 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -215,7 +215,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
                int best_ndir = inodes_per_group;
                int best_group = -1;
-                get_random_bytes(&group, sizeof(group));
+                group = prandom_u32();
                parent_group = (unsigned)group % ngroups;
                for (i = 0; i < ngroups; i++) {
                        group = (parent_group + i) % ngroups;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index efce2bbfb5e5..f5157d0d1b43 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
 }
 /*
- * Note that we always start a transaction even if we're not journalling
+ * Note that whenever we need to map blocks we start a transaction even if
- * data.  This is to preserve ordering: any hole instantiation within
+ * we're not journalling data.  This is to preserve ordering: any hole
- * __block_write_full_page -> ext3_get_block() should be journalled
+ * instantiation within __block_write_full_page -> ext3_get_block() should be
- * along with the data so we don't crash and then get metadata which
+ * journalled along with the data so we don't crash and then get metadata which
 * refers to old data.
 *
 * In all journalling modes block_write_full_page() will start the I/O.
 *
- * Problem:
- *
- *      ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *              ext3_writepage()
- *
- * Similar for:
- *
- *      ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext3_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- *          non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- *   In journalled data mode, a data buffer may be metadata against the
- *   current transaction.  But the same file is part of a shared mapping
- *   and someone does a writepage() on it.
- *
- *   We will move the buffer onto the async_data list, but *after* it has
- *   been dirtied. So there's a small window where we have dirty data on
- *   BJ_Metadata.
- *
- *   Note that this only applies to the last partial page in the file.  The
- *   bit which block_write_full_page() uses prepare/commit for.  (That's
- *   broken code anyway: it's wrong for msync()).
- *
- *   It's a rare case: affects the final partial page, for journalled data
- *   where the file is subject to bith write() and writepage() in the same
- *   transction.  To fix it we'll need a custom block_write_full_page().
- *   We'll probably need that anyway for journalling writepage() output.
- *
 * We don't honour synchronous mounts for writepage().  That would be
 * disastrous.  Any write() or metadata operation will sync the fs for
 * us.
- *
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
- * we don't need to open a transaction here.
 */
 static int ext3_ordered_writepage(struct page *page,
                                struct writeback_control *wbc)
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
         * block_write_full_page() succeeded.  Otherwise they are unmapped,
         * and generally junk.
         */
-        if (ret == 0) {
+        if (ret == 0)
-                err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+                ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
                                        NULL, journal_dirty_data_fn);
-                if (!ret)
-                        ret = err;
-        }
        walk_page_buffers(handle, page_bufs, 0,
                        PAGE_CACHE_SIZE, NULL, bput_one);
        err = ext3_journal_stop(handle);
@@ -1925,6 +1883,8 @@ retry:
                         * and pretend the write failed... */
                        ext3_truncate_failed_direct_write(inode);
                        ret = PTR_ERR(handle);
+                        if (inode->i_nlink)
+                                ext3_orphan_del(NULL, inode);
                        goto out;
                }
                if (inode->i_nlink)
@@ -3212,21 +3172,20 @@ out_brelse:
 *
 * We are called from a few places:
 *
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
 *   Here, there will be no transaction running. We wait for any running
 *   transaction to commit.
 *
- * - Within sys_sync(), kupdate and such.
+ * - Within flush work (for sys_sync(), kupdate and such).
- *   We wait on commit, if tol to.
+ *   We wait on commit, if told to.
 *
- * - Within prune_icache() (PF_MEMALLOC == true)
+ * - Within iput_final() -> write_inode_now()
- *   Here we simply return.  We can't afford to block kswapd on the
+ *   We wait on commit, if told to.
- *   journal commit.
 *
 * In all cases it is actually safe for us to return without doing anything,
 * because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * ext3_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
- * knfsd.
+ * writeback.
 *
 * Note that we are absolutely dependent upon all inode dirtiers doing the
 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3238,13 +3197,13 @@ out_brelse:
 *      stuff();
 *      inode->i_size = expr;
 *
- * is in error because a kswapd-driven write_inode() could occur while
+ * is in error because write_inode() could occur while `stuff()' is running,
- * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * and the new i_size will be lost.  Plus the inode will no longer be on the
- * will no longer be on the superblock's dirty inode list.
+ * superblock's dirty inode list.
 */
 int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-        if (current->flags & PF_MEMALLOC)
+        if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                return 0;
        if (ext3_journal_current_handle()) {
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
                return -EIO;
        }
-        if (wbc->sync_mode != WB_SYNC_ALL)
+        /*
+         * No need to force transaction in WB_SYNC_NONE mode. Also
+         * ext3_sync_fs() will force the commit after everything is
+         * written.
+         */
+        if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
                return 0;
        return ext3_force_commit(inode->i_sb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 95c6c5a6d0c5..08cdfe5461e3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,7 +527,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
                                             sizeof(struct ext3_inode_info),
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 3387664ad70e..722c2bf9645d 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -43,8 +43,9 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
                              name, value, size, flags);
 }
-int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+static int ext3_initxattrs(struct inode *inode,
-                    void *fs_info)
+                           const struct xattr *xattr_array,
+                           void *fs_info)
 {
        const struct xattr *xattr;
        handle_t *handle = fs_info;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6db7f7db7777..4e508fc83dcf 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,6 +200,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 static const struct vm_operations_struct ext4_file_vm_ops = {
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4cb8c4b..e93e4ec7d165 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
        retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
        if (retval > 0) {
-                value = kmalloc(retval, GFP_KERNEL);
+                value = kmalloc(retval, GFP_F2FS_ZERO);
                if (!value)
                        return ERR_PTR(-ENOMEM);
                retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
        size_t size = 0;
        int error;
+        if (acl) {
+                error = posix_acl_valid(acl);
+                if (error < 0)
+                        return error;
+        }
        switch (type) {
        case ACL_TYPE_ACCESS:
                name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 293d0486a40f..4aa521aa9bc3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
        struct address_space *mapping = META_MAPPING(sbi);
        struct page *page = NULL;
 repeat:
-        page = grab_cache_page(mapping, index);
+        page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
        if (!page) {
                cond_resched();
                goto repeat;
        }
-        /* We wait writeback only inside grab_meta_page() */
-        wait_on_page_writeback(page);
        SetPageUptodate(page);
        return page;
 }
@@ -75,23 +73,102 @@ out:
        return page;
 }
+inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+{
+        switch (type) {
+        case META_NAT:
+                return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+        case META_SIT:
+                return SIT_BLK_CNT(sbi);
+        case META_SSA:
+        case META_CP:
+                return 0;
+        default:
+                BUG();
+        }
+}
+/*
+ * Readahead CP/NAT/SIT/SSA pages
+ */
+int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
+{
+        block_t prev_blk_addr = 0;
+        struct page *page;
+        int blkno = start;
+        int max_blks = get_max_meta_blks(sbi, type);
+        struct f2fs_io_info fio = {
+                .type = META,
+                .rw = READ_SYNC | REQ_META | REQ_PRIO
+        };
+        for (; nrpages-- > 0; blkno++) {
+                block_t blk_addr;
+                switch (type) {
+                case META_NAT:
+                        /* get nat block addr */
+                        if (unlikely(blkno >= max_blks))
+                                blkno = 0;
+                        blk_addr = current_nat_addr(sbi,
+                                        blkno * NAT_ENTRY_PER_BLOCK);
+                        break;
+                case META_SIT:
+                        /* get sit block addr */
+                        if (unlikely(blkno >= max_blks))
+                                goto out;
+                        blk_addr = current_sit_addr(sbi,
+                                        blkno * SIT_ENTRY_PER_BLOCK);
+                        if (blkno != start && prev_blk_addr + 1 != blk_addr)
+                                goto out;
+                        prev_blk_addr = blk_addr;
+                        break;
+                case META_SSA:
+                case META_CP:
+                        /* get ssa/cp block addr */
+                        blk_addr = blkno;
+                        break;
+                default:
+                        BUG();
+                }
+                page = grab_cache_page(META_MAPPING(sbi), blk_addr);
+                if (!page)
+                        continue;
+                if (PageUptodate(page)) {
+                        mark_page_accessed(page);
+                        f2fs_put_page(page, 1);
+                        continue;
+                }
+                f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
+                mark_page_accessed(page);
+                f2fs_put_page(page, 0);
+        }
+out:
+        f2fs_submit_merged_bio(sbi, META, READ);
+        return blkno - start;
+}
 static int f2fs_write_meta_page(struct page *page,
                                struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-        /* Should not write any meta pages, if any IO error was occurred */
+        if (unlikely(sbi->por_doing))
-        if (unlikely(sbi->por_doing ||
-                        is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
                goto redirty_out;
        if (wbc->for_reclaim)
                goto redirty_out;
-        wait_on_page_writeback(page);
+        /* Should not write any meta pages, if any IO error was occurred */
+        if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+                goto no_write;
+        f2fs_wait_on_page_writeback(page, META);
        write_meta_page(sbi, page);
+no_write:
        dec_page_count(sbi, F2FS_DIRTY_META);
        unlock_page(page);
        return 0;
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
 redirty_out:
        dec_page_count(sbi, F2FS_DIRTY_META);
        wbc->pages_skipped++;
+        account_page_redirty(page);
        set_page_dirty(page);
        return AOP_WRITEPAGE_ACTIVATE;
 }
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
                                struct writeback_control *wbc)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
-        int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+        long diff, written;
-        long written;
-        if (wbc->for_kupdate)
-                return 0;
        /* collect a number of dirty meta pages and write together */
-        if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
+        if (wbc->for_kupdate ||
-                return 0;
+                get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+                goto skip_write;
        /* if mounting is failed, skip writing node pages */
        mutex_lock(&sbi->cp_mutex);
-        written = sync_meta_pages(sbi, META, nrpages);
+        diff = nr_pages_to_write(sbi, META, wbc);
+        written = sync_meta_pages(sbi, META, wbc->nr_to_write);
        mutex_unlock(&sbi->cp_mutex);
-        wbc->nr_to_write -= written;
+        wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
+        return 0;
+skip_write:
+        wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
        return 0;
 }
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
                        lock_page(page);
-                        f2fs_bug_on(page->mapping != mapping);
-                        f2fs_bug_on(!PageDirty(page));
+                        if (unlikely(page->mapping != mapping)) {
-                        clear_page_dirty_for_io(page);
+continue_unlock:
+                                unlock_page(page);
+                                continue;
+                        }
+                        if (!PageDirty(page)) {
+                                /* someone wrote it for us */
+                                goto continue_unlock;
+                        }
+                        if (!clear_page_dirty_for_io(page))
+                                goto continue_unlock;
                        if (f2fs_write_meta_page(page, &wbc)) {
                                unlock_page(page);
                                break;
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-        struct list_head *head, *this;
+        struct list_head *head;
-        struct orphan_inode_entry *new = NULL, *orphan = NULL;
+        struct orphan_inode_entry *new, *orphan;
        new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
        new->ino = ino;
        spin_lock(&sbi->orphan_inode_lock);
        head = &sbi->orphan_inode_list;
-        list_for_each(this, head) {
+        list_for_each_entry(orphan, head, list) {
-                orphan = list_entry(this, struct orphan_inode_entry, list);
                if (orphan->ino == ino) {
                        spin_unlock(&sbi->orphan_inode_lock);
                        kmem_cache_free(orphan_entry_slab, new);
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
                if (orphan->ino > ino)
                        break;
-                orphan = NULL;
        }
-        /* add new_oentry into list which is sorted by inode number */
+        /* add new orphan entry into list which is sorted by inode number */
-        if (orphan)
+        list_add_tail(&new->list, &orphan->list);
-                list_add(&new->list, this->prev);
-        else
-                list_add_tail(&new->list, head);
        spin_unlock(&sbi->orphan_inode_lock);
 }
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
        list_for_each_entry(orphan, head, list) {
                if (orphan->ino == ino) {
                        list_del(&orphan->list);
-                        kmem_cache_free(orphan_entry_slab, orphan);
                        f2fs_bug_on(sbi->n_orphans == 0);
                        sbi->n_orphans--;
-                        break;
+                        spin_unlock(&sbi->orphan_inode_lock);
+                        kmem_cache_free(orphan_entry_slab, orphan);
+                        return;
                }
        }
        spin_unlock(&sbi->orphan_inode_lock);
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
        start_blk = __start_cp_addr(sbi) + 1;
        orphan_blkaddr = __start_sum_addr(sbi) - 1;
+        ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
        for (i = 0; i < orphan_blkaddr; i++) {
                struct page *page = get_meta_page(sbi, start_blk + i);
                struct f2fs_orphan_block *orphan_blk;
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct list_head *head = &sbi->dir_inode_list;
-        struct list_head *this;
+        struct dir_inode_entry *entry;
-        list_for_each(this, head) {
+        list_for_each_entry(entry, head, list)
-                struct dir_inode_entry *entry;
-                entry = list_entry(this, struct dir_inode_entry, list);
                if (unlikely(entry->inode == inode))
                        return -EEXIST;
-        }
        list_add_tail(&new->list, head);
        stat_inc_dirty_dir(sbi);
        return 0;
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct dir_inode_entry *new;
+        int ret = 0;
        if (!S_ISDIR(inode->i_mode))
                return;
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
        INIT_LIST_HEAD(&new->list);
        spin_lock(&sbi->dir_inode_lock);
-        if (__add_dirty_inode(inode, new))
+        ret = __add_dirty_inode(inode, new);
-                kmem_cache_free(inode_entry_slab, new);
-        inc_page_count(sbi, F2FS_DIRTY_DENTS);
        inode_inc_dirty_dents(inode);
        SetPagePrivate(page);
        spin_unlock(&sbi->dir_inode_lock);
+        if (ret)
+                kmem_cache_free(inode_entry_slab, new);
 }
 void add_dirty_dir_inode(struct inode *inode)
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct dir_inode_entry *new =
                        f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+        int ret = 0;
        new->inode = inode;
        INIT_LIST_HEAD(&new->list);
        spin_lock(&sbi->dir_inode_lock);
-        if (__add_dirty_inode(inode, new))
+        ret = __add_dirty_inode(inode, new);
-                kmem_cache_free(inode_entry_slab, new);
        spin_unlock(&sbi->dir_inode_lock);
+        if (ret)
+                kmem_cache_free(inode_entry_slab, new);
 }
 void remove_dirty_dir_inode(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+        struct list_head *head;
-        struct list_head *this, *head;
+        struct dir_inode_entry *entry;
        if (!S_ISDIR(inode->i_mode))
                return;
        spin_lock(&sbi->dir_inode_lock);
-        if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
+        if (get_dirty_dents(inode)) {
                spin_unlock(&sbi->dir_inode_lock);
                return;
        }
        head = &sbi->dir_inode_list;
-        list_for_each(this, head) {
+        list_for_each_entry(entry, head, list) {
-                struct dir_inode_entry *entry;
-                entry = list_entry(this, struct dir_inode_entry, list);
                if (entry->inode == inode) {
                        list_del(&entry->list);
-                        kmem_cache_free(inode_entry_slab, entry);
                        stat_dec_dirty_dir(sbi);
-                        break;
+                        spin_unlock(&sbi->dir_inode_lock);
+                        kmem_cache_free(inode_entry_slab, entry);
+                        goto done;
                }
        }
        spin_unlock(&sbi->dir_inode_lock);
+done:
        /* Only from the recovery routine */
        if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
                clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
 struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-        struct list_head *this, *head;
+        struct list_head *head;
        struct inode *inode = NULL;
+        struct dir_inode_entry *entry;
        spin_lock(&sbi->dir_inode_lock);
        head = &sbi->dir_inode_list;
-        list_for_each(this, head) {
+        list_for_each_entry(entry, head, list) {
-                struct dir_inode_entry *entry;
-                entry = list_entry(this, struct dir_inode_entry, list);
                if (entry->inode->i_ino == ino) {
                        inode = entry->inode;
                        break;
@@ -589,7 +680,7 @@ retry:
        inode = igrab(entry->inode);
        spin_unlock(&sbi->dir_inode_lock);
        if (inode) {
-                filemap_flush(inode->i_mapping);
+                filemap_fdatawrite(inode->i_mapping);
                iput(inode);
        } else {
                /*
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        unblock_operations(sbi);
        mutex_unlock(&sbi->cp_mutex);
+        stat_inc_cp_count(sbi->stat_info);
        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
 }
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
 int __init create_checkpoint_caches(void)
 {
        orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
-                        sizeof(struct orphan_inode_entry), NULL);
+                        sizeof(struct orphan_inode_entry));
        if (!orphan_entry_slab)
                return -ENOMEM;
        inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
-                        sizeof(struct dir_inode_entry), NULL);
+                        sizeof(struct dir_inode_entry));
        if (!inode_entry_slab) {
                kmem_cache_destroy(orphan_entry_slab);
                return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2261ccdd0b5f..45abd60e2bff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
 static void f2fs_write_end_io(struct bio *bio, int err)
 {
-        struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
+        struct f2fs_sb_info *sbi = bio->bi_private;
        struct bio_vec *bvec;
        int i;
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
                if (unlikely(err)) {
                        SetPageError(page);
                        set_bit(AS_EIO, &page->mapping->flags);
-                        set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+                        f2fs_stop_checkpoint(sbi);
-                        sbi->sb->s_flags |= MS_RDONLY;
                }
                end_page_writeback(page);
                dec_page_count(sbi, F2FS_WRITEBACK);
        }
-        if (bio->bi_private)
+        if (sbi->wait_io) {
-                complete(bio->bi_private);
+                complete(sbi->wait_io);
+                sbi->wait_io = NULL;
+        }
        if (!get_pages(sbi, F2FS_WRITEBACK) &&
                        !list_empty(&sbi->cp_wait.task_list))
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
        bio->bi_bdev = sbi->sb->s_bdev;
        bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
        bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+        bio->bi_private = sbi;
        return bio;
 }
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
                 */
                if (fio->type == META_FLUSH) {
                        DECLARE_COMPLETION_ONSTACK(wait);
-                        io->bio->bi_private = &wait;
+                        io->sbi->wait_io = &wait;
                        submit_bio(rw, io->bio);
                        wait_for_completion(&wait);
                } else {
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
        io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
-        mutex_lock(&io->io_mutex);
+        down_write(&io->io_rwsem);
        /* change META to META_FLUSH in the checkpoint procedure */
        if (type >= META_FLUSH) {
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
                io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
        }
        __submit_merged_bio(io);
-        mutex_unlock(&io->io_mutex);
+        up_write(&io->io_rwsem);
 }
 /*
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
        verify_block_addr(sbi, blk_addr);
-        mutex_lock(&io->io_mutex);
+        down_write(&io->io_rwsem);
        if (!is_read)
                inc_page_count(sbi, F2FS_WRITEBACK);
@@ -202,7 +204,7 @@ alloc_new:
        io->last_block_in_bio = blk_addr;
-        mutex_unlock(&io->io_mutex);
+        up_write(&io->io_rwsem);
        trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
 }
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
         */
        offset = i_size & (PAGE_CACHE_SIZE - 1);
        if ((page->index >= end_index + 1) || !offset) {
-                if (S_ISDIR(inode->i_mode)) {
+                inode_dec_dirty_dents(inode);
-                        dec_page_count(sbi, F2FS_DIRTY_DENTS);
-                        inode_dec_dirty_dents(inode);
-                }
                goto out;
        }
        zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 write:
-        if (unlikely(sbi->por_doing)) {
+        if (unlikely(sbi->por_doing))
-                err = AOP_WRITEPAGE_ACTIVATE;
                goto redirty_out;
-        }
        /* Dentry blocks are controlled by checkpoint */
        if (S_ISDIR(inode->i_mode)) {
-                dec_page_count(sbi, F2FS_DIRTY_DENTS);
                inode_dec_dirty_dents(inode);
                err = do_write_data_page(page, &fio);
-        } else {
+                goto done;
-                f2fs_lock_op(sbi);
+        }
-                if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
-                        err = f2fs_write_inline_data(inode, page, offset);
-                        f2fs_unlock_op(sbi);
-                        goto out;
-                } else {
-                        err = do_write_data_page(page, &fio);
-                }
-                f2fs_unlock_op(sbi);
+        if (!wbc->for_reclaim)
                need_balance_fs = true;
-        }
+        else if (has_not_enough_free_secs(sbi, 0))
-        if (err == -ENOENT)
-                goto out;
-        else if (err)
                goto redirty_out;
-        if (wbc->for_reclaim) {
+        f2fs_lock_op(sbi);
-                f2fs_submit_merged_bio(sbi, DATA, WRITE);
+        if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
-                need_balance_fs = false;
+                err = f2fs_write_inline_data(inode, page, offset);
-        }
+        else
+                err = do_write_data_page(page, &fio);
+        f2fs_unlock_op(sbi);
+done:
+        if (err && err != -ENOENT)
+                goto redirty_out;
        clear_cold_data(page);
 out:
@@ -849,12 +839,11 @@ out:
 redirty_out:
        wbc->pages_skipped++;
+        account_page_redirty(page);
        set_page_dirty(page);
-        return err;
+        return AOP_WRITEPAGE_ACTIVATE;
 }
-#define MAX_DESIRED_PAGES_WP    4096
 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
                        void *data)
 {
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        bool locked = false;
        int ret;
-        long excess_nrtw = 0, desired_nrtw;
+        long diff;
        /* deal with chardevs and other special file */
        if (!mapping->a_ops->writepage)
                return 0;
-        if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
+        if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
-                desired_nrtw = MAX_DESIRED_PAGES_WP;
+                        get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
-                excess_nrtw = desired_nrtw - wbc->nr_to_write;
+                goto skip_write;
-                wbc->nr_to_write = desired_nrtw;
-        }
+        diff = nr_pages_to_write(sbi, DATA, wbc);
        if (!S_ISDIR(inode->i_mode)) {
                mutex_lock(&sbi->writepages);
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
        remove_dirty_dir_inode(inode);
-        wbc->nr_to_write -= excess_nrtw;
+        wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
        return ret;
+skip_write:
+        wbc->pages_skipped += get_dirty_dents(inode);
+        return 0;
 }
 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -949,13 +942,19 @@ inline_data:
        if (dn.data_blkaddr == NEW_ADDR) {
                zero_user_segment(page, 0, PAGE_CACHE_SIZE);
        } else {
-                if (f2fs_has_inline_data(inode))
+                if (f2fs_has_inline_data(inode)) {
                        err = f2fs_read_inline_data(inode, page);
-                else
+                        if (err) {
+                                page_cache_release(page);
+                                return err;
+                        }
+                } else {
                        err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
                                                        READ_SYNC);
-                if (err)
+                        if (err)
-                        return err;
+                                return err;
+                }
                lock_page(page);
                if (unlikely(!PageUptodate(page))) {
                        f2fs_put_page(page, 1);
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
                                      unsigned int length)
 {
        struct inode *inode = page->mapping->host;
-        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+        if (PageDirty(page))
-        if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
-                dec_page_count(sbi, F2FS_DIRTY_DENTS);
                inode_dec_dirty_dents(inode);
-        }
        ClearPagePrivate(page);
 }
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3de9d20d0c14..b52c12cf5873 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
 {
        struct f2fs_stat_info *si = F2FS_STAT(sbi);
        unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
-        struct sit_info *sit_i = SIT_I(sbi);
        unsigned int segno, vblocks;
        int ndirty = 0;
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
        total_vblocks = 0;
        blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
        hblks_per_sec = blks_per_sec / 2;
-        mutex_lock(&sit_i->sentry_lock);
        for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
                vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
                dist = abs(vblocks - hblks_per_sec);
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
                        ndirty++;
                }
        }
-        mutex_unlock(&sit_i->sentry_lock);
        dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
        si->bimodal = bimodal / dist;
        if (si->dirty_count)
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
                           si->dirty_count);
                seq_printf(s, "  - Prefree: %d\n  - Free: %d (%d)\n\n",
                           si->prefree_count, si->free_segs, si->free_secs);
+                seq_printf(s, "CP calls: %d\n", si->cp_count);
                seq_printf(s, "GC calls: %d (BG: %d)\n",
                           si->call_count, si->bg_gc);
                seq_printf(s, "  - data segments : %d\n", si->data_segs);
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
                           si->ndirty_dent, si->ndirty_dirs);
                seq_printf(s, "  - meta: %4d in %4d\n",
                           si->ndirty_meta, si->meta_pages);
-                seq_printf(s, "  - NATs: %5d > %lu\n",
+                seq_printf(s, "  - NATs: %9d\n  - SITs: %9d\n",
-                           si->nats, NM_WOUT_THRESHOLD);
+                           si->nats, si->sits);
-                seq_printf(s, "  - SITs: %5d\n  - free_nids: %5d\n",
+                seq_printf(s, "  - free_nids: %9d\n",
-                           si->sits, si->fnids);
+                           si->fnids);
                seq_puts(s, "\nDistribution of User Blocks:");
                seq_puts(s, " [ valid | invalid | free ]\n");
                seq_puts(s, "  [");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2b7c255bcbdf..972fd0ef230f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
                                                        >> PAGE_CACHE_SHIFT;
 }
-static unsigned int dir_buckets(unsigned int level)
+static unsigned int dir_buckets(unsigned int level, int dir_level)
 {
        if (level < MAX_DIR_HASH_DEPTH / 2)
-                return 1 << level;
+                return 1 << (level + dir_level);
        else
-                return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+                return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
 }
 static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
        de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
-static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+static unsigned long dir_block_index(unsigned int level,
+                                int dir_level, unsigned int idx)
 {
        unsigned long i;
        unsigned long bidx = 0;
        for (i = 0; i < level; i++)
-                bidx += dir_buckets(i) * bucket_blocks(i);
+                bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
        bidx += idx * bucket_blocks(level);
        return bidx;
 }
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
                        f2fs_hash_t namehash, struct page **res_page)
 {
        struct f2fs_dir_entry *de;
-        unsigned long bit_pos, end_pos, next_pos;
+        unsigned long bit_pos = 0;
        struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
-        int slots;
+        const void *dentry_bits = &dentry_blk->dentry_bitmap;
+        int max_len = 0;
-        bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
-                                        NR_DENTRY_IN_BLOCK, 0);
        while (bit_pos < NR_DENTRY_IN_BLOCK) {
+                if (!test_bit_le(bit_pos, dentry_bits)) {
+                        if (bit_pos == 0)
+                                max_len = 1;
+                        else if (!test_bit_le(bit_pos - 1, dentry_bits))
+                                max_len++;
+                        bit_pos++;
+                        continue;
+                }
                de = &dentry_blk->dentry[bit_pos];
-                slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
                if (early_match_name(name, namelen, namehash, de)) {
                        if (!memcmp(dentry_blk->filename[bit_pos],
                                                        name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
                                goto found;
                        }
                }
-                next_pos = bit_pos + slots;
+                if (max_len > *max_slots) {
-                bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+                        *max_slots = max_len;
-                                NR_DENTRY_IN_BLOCK, next_pos);
+                        max_len = 0;
-                if (bit_pos >= NR_DENTRY_IN_BLOCK)
+                }
-                        end_pos = NR_DENTRY_IN_BLOCK;
+                bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-                else
-                        end_pos = bit_pos;
-                if (*max_slots < end_pos - next_pos)
-                        *max_slots = end_pos - next_pos;
        }
        de = NULL;
        kunmap(dentry_page);
 found:
+        if (max_len > *max_slots)
+                *max_slots = max_len;
        return de;
 }
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
        f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
-        nbucket = dir_buckets(level);
+        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
        nblock = bucket_blocks(level);
-        bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+        bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+                                        le32_to_cpu(namehash) % nbucket);
        end_block = bidx + nblock;
        for (; bidx < end_block; bidx++) {
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
                struct page *page, struct inode *inode)
 {
        lock_page(page);
-        wait_on_page_writeback(page);
+        f2fs_wait_on_page_writeback(page, DATA);
        de->ino = cpu_to_le32(inode->i_ino);
        set_de_type(de, inode);
        kunmap(page);
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
                err = f2fs_init_security(inode, dir, name, page);
                if (err)
                        goto put_error;
-                wait_on_page_writeback(page);
        } else {
                page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
                if (IS_ERR(page))
                        return page;
-                wait_on_page_writeback(page);
                set_cold_node(inode, page);
        }
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
 put_error:
        f2fs_put_page(page, 1);
+        /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
+        truncate_inode_pages(&inode->i_data, 0);
+        truncate_blocks(inode, 0);
+        remove_dirty_dir_inode(inode);
 error:
        remove_inode_page(inode);
        return ERR_PTR(err);
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
                set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
        }
-        if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
-                update_inode_page(dir);
        if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
                clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
 }
@@ -464,10 +467,11 @@ start:
        if (level == current_depth)
                ++current_depth;
-        nbucket = dir_buckets(level);
+        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
        nblock = bucket_blocks(level);
-        bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+        bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+                                (le32_to_cpu(dentry_hash) % nbucket));
        for (block = bidx; block <= (bidx + nblock - 1); block++) {
                dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -487,8 +491,9 @@ start:
        ++level;
        goto start;
 add_dentry:
-        wait_on_page_writeback(dentry_page);
+        f2fs_wait_on_page_writeback(dentry_page, DATA);
+        down_write(&F2FS_I(inode)->i_sem);
        page = init_inode_metadata(inode, dir, name);
        if (IS_ERR(page)) {
                err = PTR_ERR(page);
@@ -511,7 +516,12 @@ add_dentry:
        update_parent_metadata(dir, inode, current_depth);
 fail:
-        clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+        up_write(&F2FS_I(inode)->i_sem);
+        if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+                update_inode_page(dir);
+                clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+        }
        kunmap(dentry_page);
        f2fs_put_page(dentry_page, 1);
        return err;
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        unsigned int bit_pos;
        struct address_space *mapping = page->mapping;
        struct inode *dir = mapping->host;
-        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
        int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
        void *kaddr = page_address(page);
        int i;
        lock_page(page);
-        wait_on_page_writeback(page);
+        f2fs_wait_on_page_writeback(page, DATA);
        dentry_blk = (struct f2fs_dentry_block *)kaddr;
        bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        if (inode) {
+                struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+                down_write(&F2FS_I(inode)->i_sem);
                if (S_ISDIR(inode->i_mode)) {
                        drop_nlink(dir);
                        update_inode_page(dir);
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                        drop_nlink(inode);
                        i_size_write(inode, 0);
                }
+                up_write(&F2FS_I(inode)->i_sem);
                update_inode_page(inode);
                if (inode->i_nlink == 0)
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                truncate_hole(dir, page->index, page->index + 1);
                clear_page_dirty_for_io(page);
                ClearPageUptodate(page);
-                dec_page_count(sbi, F2FS_DIRTY_DENTS);
                inode_dec_dirty_dents(dir);
        }
        f2fs_put_page(page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fc3c558cb4f3..2ecac8312359 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -40,6 +40,7 @@
 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
 #define F2FS_MOUNT_INLINE_XATTR         0x00000080
 #define F2FS_MOUNT_INLINE_DATA          0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE          0x00000200
 #define clear_opt(sbi, option)  (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)    (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -88,6 +89,16 @@ enum {
        SIT_BITMAP
 };
+/*
+ * For CP/NAT/SIT/SSA readahead
+ */
+enum {
+        META_CP,
+        META_NAT,
+        META_SIT,
+        META_SSA
+};
 /* for the list of orphan inodes */
 struct orphan_inode_entry {
        struct list_head list;  /* list head */
@@ -187,16 +198,20 @@ struct extent_info {
 #define FADVISE_COLD_BIT        0x01
 #define FADVISE_LOST_PINO_BIT   0x02
+#define DEF_DIR_LEVEL           0
 struct f2fs_inode_info {
        struct inode vfs_inode;         /* serve a vfs inode */
        unsigned long i_flags;          /* keep an inode flags for ioctl */
        unsigned char i_advise;         /* use to give file attribute hints */
+        unsigned char i_dir_level;      /* use for dentry level for large dir */
        unsigned int i_current_depth;   /* use only in directory structure */
        unsigned int i_pino;            /* parent inode number */
        umode_t i_acl_mode;             /* keep file acl mode temporarily */
        /* Use below internally in f2fs*/
        unsigned long flags;            /* use to pass per-file flags */
+        struct rw_semaphore i_sem;      /* protect fi info */
        atomic_t dirty_dents;           /* # of dirty dentry pages */
        f2fs_hash_t chash;              /* hash value of given file name */
        unsigned int clevel;            /* maximum level of given file name */
@@ -229,6 +244,7 @@ struct f2fs_nm_info {
        block_t nat_blkaddr;            /* base disk address of NAT */
        nid_t max_nid;                  /* maximum possible node ids */
        nid_t next_scan_nid;            /* the next nid to be scanned */
+        unsigned int ram_thresh;        /* control the memory footprint */
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -238,6 +254,7 @@ struct f2fs_nm_info {
        struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
        /* free node ids management */
+        struct radix_tree_root free_nid_root;/* root of the free_nid cache */
        struct list_head free_nid_list; /* a list for free nids */
        spinlock_t free_nid_list_lock;  /* protect free nid list */
        unsigned int fcnt;              /* the number of free node id */
@@ -300,6 +317,12 @@ enum {
        NO_CHECK_TYPE
 };
+struct flush_cmd {
+        struct flush_cmd *next;
+        struct completion wait;
+        int ret;
+};
 struct f2fs_sm_info {
        struct sit_info *sit_info;              /* whole segment information */
        struct free_segmap_info *free_info;     /* free segment information */
@@ -328,6 +351,14 @@ struct f2fs_sm_info {
        unsigned int ipu_policy;        /* in-place-update policy */
        unsigned int min_ipu_util;      /* in-place-update threshold */
+        /* for flush command control */
+        struct task_struct *f2fs_issue_flush;   /* flush thread */
+        wait_queue_head_t flush_wait_queue;     /* waiting queue for wake-up */
+        struct flush_cmd *issue_list;           /* list for command issue */
+        struct flush_cmd *dispatch_list;        /* list for command dispatch */
+        spinlock_t issue_lock;                  /* for issue list lock */
+        struct flush_cmd *issue_tail;           /* list tail of issue list */
 };
 /*
@@ -378,7 +409,7 @@ struct f2fs_bio_info {
        struct bio *bio;                /* bios to merge */
        sector_t last_block_in_bio;     /* last block number */
        struct f2fs_io_info fio;        /* store buffered io info. */
-        struct mutex io_mutex;          /* mutex for bio */
+        struct rw_semaphore io_rwsem;   /* blocking op for bio */
 };
 struct f2fs_sb_info {
@@ -398,6 +429,7 @@ struct f2fs_sb_info {
        /* for bio operations */
        struct f2fs_bio_info read_io;                   /* for read bios */
        struct f2fs_bio_info write_io[NR_PAGE_TYPE];    /* for write bios */
+        struct completion *wait_io;             /* for completion bios */
        /* for checkpoint */
        struct f2fs_checkpoint *ckpt;           /* raw checkpoint pointer */
@@ -407,7 +439,6 @@ struct f2fs_sb_info {
        struct mutex node_write;                /* locking node writes */
        struct mutex writepages;                /* mutex for writepages() */
        bool por_doing;                         /* recovery is doing or not */
-        bool on_build_free_nids;                /* build_free_nids is doing */
        wait_queue_head_t cp_wait;
        /* for orphan inode management */
@@ -436,6 +467,7 @@ struct f2fs_sb_info {
        unsigned int total_valid_node_count;    /* valid node block count */
        unsigned int total_valid_inode_count;   /* valid inode count */
        int active_logs;                        /* # of active logs */
+        int dir_level;                          /* directory level */
        block_t user_block_count;               /* # of user blocks */
        block_t total_valid_block_count;        /* # of valid blocks */
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
                return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
 }
+static inline bool f2fs_has_xattr_block(unsigned int ofs)
+{
+        return ofs == XATTR_NODE_OFFSET;
+}
 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
                                 struct inode *inode, blkcnt_t count)
 {
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
 static inline void inode_inc_dirty_dents(struct inode *inode)
 {
+        inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
        atomic_inc(&F2FS_I(inode)->dirty_dents);
 }
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
 static inline void inode_dec_dirty_dents(struct inode *inode)
 {
+        if (!S_ISDIR(inode->i_mode))
+                return;
+        dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
        atomic_dec(&F2FS_I(inode)->dirty_dents);
 }
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
        return atomic_read(&sbi->nr_pages[count_type]);
 }
+static inline int get_dirty_dents(struct inode *inode)
+{
+        return atomic_read(&F2FS_I(inode)->dirty_dents);
+}
 static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
 {
        unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
 static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
 {
-        block_t ret;
+        return sbi->total_valid_block_count;
-        spin_lock(&sbi->stat_lock);
-        ret = sbi->total_valid_block_count;
-        spin_unlock(&sbi->stat_lock);
-        return ret;
 }
 static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
 {
-        unsigned int ret;
+        return sbi->total_valid_node_count;
-        spin_lock(&sbi->stat_lock);
-        ret = sbi->total_valid_node_count;
-        spin_unlock(&sbi->stat_lock);
-        return ret;
 }
 static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
 static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
 {
-        unsigned int ret;
+        return sbi->total_valid_inode_count;
-        spin_lock(&sbi->stat_lock);
-        ret = sbi->total_valid_inode_count;
-        spin_unlock(&sbi->stat_lock);
-        return ret;
 }
 static inline void f2fs_put_page(struct page *page, int unlock)
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
 }
 static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
-                                        size_t size, void (*ctor)(void *))
+                                        size_t size)
 {
-        return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
+        return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
 }
 static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
                ri->i_inline |= F2FS_INLINE_DATA;
 }
+static inline int f2fs_has_inline_xattr(struct inode *inode)
+{
+        return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
+}
 static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
 {
-        if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+        if (f2fs_has_inline_xattr(&fi->vfs_inode))
                return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
        return DEF_ADDRS_PER_INODE;
 }
 static inline void *inline_xattr_addr(struct page *page)
 {
-        struct f2fs_inode *ri;
+        struct f2fs_inode *ri = F2FS_INODE(page);
-        ri = (struct f2fs_inode *)page_address(page);
        return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
                                        F2FS_INLINE_XATTR_ADDRS]);
 }
 static inline int inline_xattr_size(struct inode *inode)
 {
-        if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR))
+        if (f2fs_has_inline_xattr(inode))
                return F2FS_INLINE_XATTR_ADDRS << 2;
        else
                return 0;
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
 static inline void *inline_data_addr(struct page *page)
 {
-        struct f2fs_inode *ri;
+        struct f2fs_inode *ri = F2FS_INODE(page);
-        ri = (struct f2fs_inode *)page_address(page);
        return (void *)&(ri->i_addr[1]);
 }
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
        return sb->s_flags & MS_RDONLY;
 }
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+        set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+        sbi->sb->s_flags |= MS_RDONLY;
+}
 #define get_inode_mode(i) \
        ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
         (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
 struct inode *f2fs_iget(struct super_block *, unsigned long);
 int try_to_free_nats(struct f2fs_sb_info *, int);
 void update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
+void update_inode_page(struct inode *);
 int f2fs_write_inode(struct inode *, struct writeback_control *);
 void f2fs_evict_inode(struct inode *);
@@ -1097,6 +1141,7 @@ struct dnode_of_data;
 struct node_info;
 int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
 void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
 void recover_node_page(struct f2fs_sb_info *, struct page *,
                struct f2fs_summary *, struct node_info *, block_t);
+bool recover_xattr_data(struct inode *, struct page *, block_t);
 int recover_inode_page(struct f2fs_sb_info *, struct page *);
 int restore_node_summary(struct f2fs_sb_info *, unsigned int,
                                struct f2fs_summary_block *);
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
 */
 void f2fs_balance_fs(struct f2fs_sb_info *);
 void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
+void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *);
 void allocate_new_segments(struct f2fs_sb_info *);
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
 */
 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
 int acquire_orphan_inode(struct f2fs_sb_info *);
 void release_orphan_inode(struct f2fs_sb_info *);
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
        int util_free, util_valid, util_invalid;
        int rsvd_segs, overp_segs;
        int dirty_count, node_pages, meta_pages;
-        int prefree_count, call_count;
+        int prefree_count, call_count, cp_count;
        int tot_segs, node_segs, data_segs, free_segs, free_secs;
        int tot_blks, data_blks, node_blks;
        int curseg[NR_CURSEG_TYPE];
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
        return (struct f2fs_stat_info *)sbi->stat_info;
 }
+#define stat_inc_cp_count(si)           ((si)->cp_count++)
 #define stat_inc_call_count(si)         ((si)->call_count++)
 #define stat_inc_bggc_count(sbi)        ((sbi)->bg_gc++)
 #define stat_inc_dirty_dir(sbi)         ((sbi)->n_dirty_dirs++)
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
 void __init f2fs_create_root_stats(void);
 void f2fs_destroy_root_stats(void);
 #else
+#define stat_inc_cp_count(si)
 #define stat_inc_call_count(si)
 #define stat_inc_bggc_count(si)
 #define stat_inc_dirty_dir(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0dfcef53a6ed..60e7d5448a1d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
        trace_f2fs_vm_page_mkwrite(page, DATA);
 mapped:
        /* fill the page */
-        wait_on_page_writeback(page);
+        f2fs_wait_on_page_writeback(page, DATA);
 out:
        sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(err);
@@ -84,6 +84,7 @@ out:
 static const struct vm_operations_struct f2fs_file_vm_ops = {
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = f2fs_vm_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };
@@ -111,11 +112,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
        struct inode *inode = file->f_mapping->host;
+        struct f2fs_inode_info *fi = F2FS_I(inode);
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        int ret = 0;
        bool need_cp = false;
        struct writeback_control wbc = {
-                .sync_mode = WB_SYNC_NONE,
+                .sync_mode = WB_SYNC_ALL,
                .nr_to_write = LONG_MAX,
                .for_reclaim = 0,
        };
@@ -133,7 +135,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        /* guarantee free sections for fsync */
        f2fs_balance_fs(sbi);
-        mutex_lock(&inode->i_mutex);
+        down_read(&fi->i_sem);
        /*
         * Both of fdatasync() and fsync() are able to be recovered from
@@ -150,25 +152,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
                need_cp = true;
+        up_read(&fi->i_sem);
        if (need_cp) {
                nid_t pino;
-                F2FS_I(inode)->xattr_ver = 0;
                /* all the dirty node pages should be flushed for POR */
                ret = f2fs_sync_fs(inode->i_sb, 1);
+                down_write(&fi->i_sem);
+                F2FS_I(inode)->xattr_ver = 0;
                if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
                                        get_parent_ino(inode, &pino)) {
                        F2FS_I(inode)->i_pino = pino;
                        file_got_pino(inode);
+                        up_write(&fi->i_sem);
                        mark_inode_dirty_sync(inode);
                        ret = f2fs_write_inode(inode, NULL);
                        if (ret)
                                goto out;
+                } else {
+                        up_write(&fi->i_sem);
                }
        } else {
                /* if there is no written node page, write its inode page */
                while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+                        if (fsync_mark_done(sbi, inode->i_ino))
+                                goto out;
                        mark_inode_dirty_sync(inode);
                        ret = f2fs_write_inode(inode, NULL);
                        if (ret)
@@ -177,10 +187,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
                if (ret)
                        goto out;
-                ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+                ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
        }
 out:
-        mutex_unlock(&inode->i_mutex);
        trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
        return ret;
 }
@@ -245,7 +254,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
                f2fs_put_page(page, 1);
                return;
        }
-        wait_on_page_writeback(page);
+        f2fs_wait_on_page_writeback(page, DATA);
        zero_user(page, offset, PAGE_CACHE_SIZE - offset);
        set_page_dirty(page);
        f2fs_put_page(page, 1);
@@ -422,7 +431,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
        f2fs_unlock_op(sbi);
        if (!IS_ERR(page)) {
-                wait_on_page_writeback(page);
+                f2fs_wait_on_page_writeback(page, DATA);
                zero_user(page, start, len);
                set_page_dirty(page);
                f2fs_put_page(page, 1);
@@ -560,6 +569,8 @@ static long f2fs_fallocate(struct file *file, int mode,
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
+        mutex_lock(&inode->i_mutex);
        if (mode & FALLOC_FL_PUNCH_HOLE)
                ret = punch_hole(inode, offset, len);
        else
@@ -569,6 +580,9 @@ static long f2fs_fallocate(struct file *file, int mode,
                inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty(inode);
        }
+        mutex_unlock(&inode->i_mutex);
        trace_f2fs_fallocate(inode, mode, offset, len, ret);
        return ret;
 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ea0371e854b4..b90dbe55403a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
                set_page_dirty(page);
                set_cold_data(page);
        } else {
-                struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
                f2fs_wait_on_page_writeback(page, DATA);
-                if (clear_page_dirty_for_io(page) &&
+                if (clear_page_dirty_for_io(page))
-                        S_ISDIR(inode->i_mode)) {
-                        dec_page_count(sbi, F2FS_DIRTY_DENTS);
                        inode_dec_dirty_dents(inode);
-                }
                set_cold_data(page);
                do_write_data_page(page, &fio);
                clear_cold_data(page);
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
 gc_more:
        if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
                goto stop;
+        if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+                goto stop;
        if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
                gc_type = FG_GC;
@@ -711,6 +708,11 @@ gc_more:
                goto stop;
        ret = 0;
+        /* readahead multi ssa blocks those have contiguous address */
+        if (sbi->segs_per_sec > 1)
+                ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
+                                                                META_SSA);
        for (i = 0; i < sbi->segs_per_sec; i++)
                do_garbage_collect(sbi, segno + i, &ilist, gc_type);
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
 int __init create_gc_caches(void)
 {
        winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
-                        sizeof(struct inode_entry), NULL);
+                        sizeof(struct inode_entry));
        if (!winode_slab)
                return -ENOMEM;
        return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 31ee5b164ff9..383db1fabcf4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
        }
        ipage = get_node_page(sbi, inode->i_ino);
-        if (IS_ERR(ipage))
+        if (IS_ERR(ipage)) {
+                unlock_page(page);
                return PTR_ERR(ipage);
+        }
        zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 28cea76d78c6..ee829d360468 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
        fi->flags = 0;
        fi->i_advise = ri->i_advise;
        fi->i_pino = le32_to_cpu(ri->i_pino);
+        fi->i_dir_level = ri->i_dir_level;
        get_extent_info(&fi->ext, ri->i_ext);
        get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
        ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
        ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
        ri->i_generation = cpu_to_le32(inode->i_generation);
+        ri->i_dir_level = F2FS_I(inode)->i_dir_level;
        __set_inode_rdev(inode, ri);
        set_cold_node(inode, node_page);
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
        clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
 }
-int update_inode_page(struct inode *inode)
+void update_inode_page(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct page *node_page;
+retry:
        node_page = get_node_page(sbi, inode->i_ino);
-        if (IS_ERR(node_page))
+        if (IS_ERR(node_page)) {
-                return PTR_ERR(node_page);
+                int err = PTR_ERR(node_page);
+                if (err == -ENOMEM) {
+                        cond_resched();
+                        goto retry;
+                } else if (err != -ENOENT) {
+                        f2fs_stop_checkpoint(sbi);
+                }
+                return;
+        }
        update_inode(inode, node_page);
        f2fs_put_page(node_page, 1);
-        return 0;
 }
 int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-        int ret;
        if (inode->i_ino == F2FS_NODE_INO(sbi) ||
                        inode->i_ino == F2FS_META_INO(sbi))
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
         * during the urgent cleaning time when runing out of free sections.
         */
        f2fs_lock_op(sbi);
-        ret = update_inode_page(inode);
+        update_inode_page(inode);
        f2fs_unlock_op(sbi);
        if (wbc)
                f2fs_balance_fs(sbi);
-        return ret;
+        return 0;
 }
 /*
@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
                        inode->i_ino == F2FS_META_INO(sbi))
                goto no_delete;
-        f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
+        f2fs_bug_on(get_dirty_dents(inode));
        remove_dirty_dir_inode(inode);
        if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459e97bf..a9409d19dfd4 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
                inode = f2fs_iget(dir->i_sb, ino);
                if (IS_ERR(inode))
                        return ERR_CAST(inode);
+                stat_inc_inline_inode(inode);
        }
        return d_splice_alias(inode, dentry);
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                }
                f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+                down_write(&F2FS_I(old_inode)->i_sem);
                F2FS_I(old_inode)->i_pino = new_dir->i_ino;
+                up_write(&F2FS_I(old_inode)->i_sem);
                new_inode->i_ctime = CURRENT_TIME;
+                down_write(&F2FS_I(new_inode)->i_sem);
                if (old_dir_entry)
                        drop_nlink(new_inode);
                drop_nlink(new_inode);
+                up_write(&F2FS_I(new_inode)->i_sem);
                mark_inode_dirty(new_inode);
                if (!new_inode->i_nlink)
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (old_dir != new_dir) {
                        f2fs_set_link(old_inode, old_dir_entry,
                                                old_dir_page, new_dir);
+                        down_write(&F2FS_I(old_inode)->i_sem);
                        F2FS_I(old_inode)->i_pino = new_dir->i_ino;
+                        up_write(&F2FS_I(old_inode)->i_sem);
                        update_inode_page(old_inode);
                } else {
                        kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b0649b76eb4f..a161e955c4c8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,27 @@
 #include "segment.h"
 #include <trace/events/f2fs.h>
+#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
+static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+{
+        struct sysinfo val;
+        unsigned long mem_size = 0;
+        si_meminfo(&val);
+        if (type == FREE_NIDS)
+                mem_size = nm_i->fcnt * sizeof(struct free_nid);
+        else if (type == NAT_ENTRIES)
+                mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
+        mem_size >>= 12;
+        /* give 50:50 memory for free nids and nat caches respectively */
+        return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+}
 static void clear_node_page_dirty(struct page *page)
 {
        struct address_space *mapping = page->mapping;
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
        return dst_page;
 }
-/*
- * Readahead NAT pages
- */
-static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
-{
-        struct address_space *mapping = META_MAPPING(sbi);
-        struct f2fs_nm_info *nm_i = NM_I(sbi);
-        struct page *page;
-        pgoff_t index;
-        int i;
-        struct f2fs_io_info fio = {
-                .type = META,
-                .rw = READ_SYNC | REQ_META | REQ_PRIO
-        };
-        for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
-                if (unlikely(nid >= nm_i->max_nid))
-                        nid = 0;
-                index = current_nat_addr(sbi, nid);
-                page = grab_cache_page(mapping, index);
-                if (!page)
-                        continue;
-                if (PageUptodate(page)) {
-                        mark_page_accessed(page);
-                        f2fs_put_page(page, 1);
-                        continue;
-                }
-                f2fs_submit_page_mbio(sbi, page, index, &fio);
-                mark_page_accessed(page);
-                f2fs_put_page(page, 0);
-        }
-        f2fs_submit_merged_bio(sbi, META, READ);
-}
 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
 {
        return radix_tree_lookup(&nm_i->nat_root, n);
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
        return is_cp;
 }
+bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+        struct f2fs_nm_info *nm_i = NM_I(sbi);
+        struct nat_entry *e;
+        bool fsync_done = false;
+        read_lock(&nm_i->nat_tree_lock);
+        e = __lookup_nat_cache(nm_i, nid);
+        if (e)
+                fsync_done = e->fsync_done;
+        read_unlock(&nm_i->nat_tree_lock);
+        return fsync_done;
+}
 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 {
        struct nat_entry *new;
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
        }
        memset(new, 0, sizeof(struct nat_entry));
        nat_set_nid(new, nid);
+        new->checkpointed = true;
        list_add_tail(&new->list, &nm_i->nat_entries);
        nm_i->nat_cnt++;
        return new;
@@ -185,13 +182,12 @@ retry:
                nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
                nat_set_ino(e, le32_to_cpu(ne->ino));
                nat_set_version(e, ne->version);
-                e->checkpointed = true;
        }
        write_unlock(&nm_i->nat_tree_lock);
 }
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
-                        block_t new_blkaddr)
+                        block_t new_blkaddr, bool fsync_done)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *e;
@@ -205,7 +201,6 @@ retry:
                        goto retry;
                }
                e->ni = *ni;
-                e->checkpointed = true;
                f2fs_bug_on(ni->blk_addr == NEW_ADDR);
        } else if (new_blkaddr == NEW_ADDR) {
                /*
@@ -217,9 +212,6 @@ retry:
                f2fs_bug_on(ni->blk_addr != NULL_ADDR);
        }
-        if (new_blkaddr == NEW_ADDR)
-                e->checkpointed = false;
        /* sanity check */
        f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
        f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
@@ -239,6 +231,11 @@ retry:
        /* change address */
        nat_set_blkaddr(e, new_blkaddr);
        __set_nat_cache_dirty(nm_i, e);
+        /* update fsync_mark if its inode nat entry is still alive */
+        e = __lookup_nat_cache(nm_i, ni->ino);
+        if (e)
+                e->fsync_done = fsync_done;
        write_unlock(&nm_i->nat_tree_lock);
 }
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-        if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
+        if (available_free_memory(nm_i, NAT_ENTRIES))
                return 0;
        write_lock(&nm_i->nat_tree_lock);
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
        /* Deallocate node address */
        invalidate_blocks(sbi, ni.blk_addr);
        dec_valid_node_count(sbi, dn->inode);
-        set_node_addr(sbi, &ni, NULL_ADDR);
+        set_node_addr(sbi, &ni, NULL_ADDR, false);
        if (dn->nid == dn->inode->i_ino) {
                remove_orphan_inode(sbi, dn->nid);
@@ -763,7 +760,7 @@ skip_partial:
                                f2fs_put_page(page, 1);
                                goto restart;
                        }
-                        wait_on_page_writeback(page);
+                        f2fs_wait_on_page_writeback(page, NODE);
                        ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
                        set_page_dirty(page);
                        unlock_page(page);
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
        if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
                return ERR_PTR(-EPERM);
-        page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
+        page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
+                                        dn->nid, AOP_FLAG_NOFS);
        if (!page)
                return ERR_PTR(-ENOMEM);
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
        f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
        new_ni = old_ni;
        new_ni.ino = dn->inode->i_ino;
-        set_node_addr(sbi, &new_ni, NEW_ADDR);
+        set_node_addr(sbi, &new_ni, NEW_ADDR, false);
        fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
        set_cold_node(dn->inode, page);
        SetPageUptodate(page);
        set_page_dirty(page);
-        if (ofs == XATTR_NODE_OFFSET)
+        if (f2fs_has_xattr_block(ofs))
                F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
        dn->node_page = page;
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
        struct page *page;
        int err;
 repeat:
-        page = grab_cache_page(NODE_MAPPING(sbi), nid);
+        page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
+                                        nid, AOP_FLAG_NOFS);
        if (!page)
                return ERR_PTR(-ENOMEM);
@@ -959,7 +958,7 @@ repeat:
                goto got_it;
        lock_page(page);
-        if (unlikely(!PageUptodate(page))) {
+        if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
                f2fs_put_page(page, 1);
                return ERR_PTR(-EIO);
        }
@@ -968,7 +967,6 @@ repeat:
                goto repeat;
        }
 got_it:
-        f2fs_bug_on(nid != nid_of_node(page));
        mark_page_accessed(page);
        return page;
 }
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
                                continue;
                        if (ino && ino_of_node(page) == ino) {
-                                wait_on_page_writeback(page);
+                                f2fs_wait_on_page_writeback(page, NODE);
                                if (TestClearPageError(page))
                                        ret = -EIO;
                        }
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
        if (unlikely(sbi->por_doing))
                goto redirty_out;
-        wait_on_page_writeback(page);
+        f2fs_wait_on_page_writeback(page, NODE);
        /* get old block addr of this node page */
        nid = nid_of_node(page);
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
        mutex_lock(&sbi->node_write);
        set_page_writeback(page);
        write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
-        set_node_addr(sbi, &ni, new_addr);
+        set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
        dec_page_count(sbi, F2FS_DIRTY_NODES);
        mutex_unlock(&sbi->node_write);
        unlock_page(page);
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
 redirty_out:
        dec_page_count(sbi, F2FS_DIRTY_NODES);
        wbc->pages_skipped++;
+        account_page_redirty(page);
        set_page_dirty(page);
        return AOP_WRITEPAGE_ACTIVATE;
 }
-/*
- * It is very important to gather dirty pages and write at once, so that we can
- * submit a big bio without interfering other data writes.
- * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
- */
-#define COLLECT_DIRTY_NODES     1536
 static int f2fs_write_node_pages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
-        long nr_to_write = wbc->nr_to_write;
+        long diff;
        /* balancing f2fs's metadata in background */
        f2fs_balance_fs_bg(sbi);
        /* collect a number of dirty node pages and write together */
-        if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
+        if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
-                return 0;
+                goto skip_write;
-        /* if mounting is failed, skip writing node pages */
+        diff = nr_pages_to_write(sbi, NODE, wbc);
-        wbc->nr_to_write = 3 * max_hw_blocks(sbi);
        wbc->sync_mode = WB_SYNC_NONE;
        sync_node_pages(sbi, 0, wbc);
-        wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
+        wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
-                                                wbc->nr_to_write);
+        return 0;
+skip_write:
+        wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
        return 0;
 }
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
        .releasepage    = f2fs_release_node_page,
 };
-static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
+static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
+                                                nid_t n)
 {
-        struct list_head *this;
+        return radix_tree_lookup(&nm_i->free_nid_root, n);
-        struct free_nid *i;
-        list_for_each(this, head) {
-                i = list_entry(this, struct free_nid, list);
-                if (i->nid == n)
-                        return i;
-        }
-        return NULL;
 }
-static void __del_from_free_nid_list(struct free_nid *i)
+static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
+                                                struct free_nid *i)
 {
        list_del(&i->list);
-        kmem_cache_free(free_nid_slab, i);
+        radix_tree_delete(&nm_i->free_nid_root, i->nid);
 }
 static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
        struct nat_entry *ne;
        bool allocated = false;
-        if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+        if (!available_free_memory(nm_i, FREE_NIDS))
                return -1;
        /* 0 nid should not be used */
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
                /* do not add allocated nids */
                read_lock(&nm_i->nat_tree_lock);
                ne = __lookup_nat_cache(nm_i, nid);
-                if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+                if (ne &&
+                        (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
                        allocated = true;
                read_unlock(&nm_i->nat_tree_lock);
                if (allocated)
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
        i->state = NID_NEW;
        spin_lock(&nm_i->free_nid_list_lock);
-        if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
+        if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
                spin_unlock(&nm_i->free_nid_list_lock);
                kmem_cache_free(free_nid_slab, i);
                return 0;
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
 {
        struct free_nid *i;
+        bool need_free = false;
        spin_lock(&nm_i->free_nid_list_lock);
-        i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+        i = __lookup_free_nid_list(nm_i, nid);
        if (i && i->state == NID_NEW) {
-                __del_from_free_nid_list(i);
+                __del_from_free_nid_list(nm_i, i);
                nm_i->fcnt--;
+                need_free = true;
        }
        spin_unlock(&nm_i->free_nid_list_lock);
+        if (need_free)
+                kmem_cache_free(free_nid_slab, i);
 }
 static void scan_nat_page(struct f2fs_nm_info *nm_i,
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
                return;
        /* readahead nat pages to be scanned */
-        ra_nat_pages(sbi, nid);
+        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct free_nid *i = NULL;
-        struct list_head *this;
 retry:
        if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
                return false;
@@ -1462,13 +1458,11 @@ retry:
        spin_lock(&nm_i->free_nid_list_lock);
        /* We should not use stale free nids created by build_free_nids */
-        if (nm_i->fcnt && !sbi->on_build_free_nids) {
+        if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
                f2fs_bug_on(list_empty(&nm_i->free_nid_list));
-                list_for_each(this, &nm_i->free_nid_list) {
+                list_for_each_entry(i, &nm_i->free_nid_list, list)
-                        i = list_entry(this, struct free_nid, list);
                        if (i->state == NID_NEW)
                                break;
-                }
                f2fs_bug_on(i->state != NID_NEW);
                *nid = i->nid;
@@ -1481,9 +1475,7 @@ retry:
        /* Let's scan nat pages and its caches to get free nids */
        mutex_lock(&nm_i->build_lock);
-        sbi->on_build_free_nids = true;
        build_free_nids(sbi);
-        sbi->on_build_free_nids = false;
        mutex_unlock(&nm_i->build_lock);
        goto retry;
 }
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
        struct free_nid *i;
        spin_lock(&nm_i->free_nid_list_lock);
-        i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+        i = __lookup_free_nid_list(nm_i, nid);
        f2fs_bug_on(!i || i->state != NID_ALLOC);
-        __del_from_free_nid_list(i);
+        __del_from_free_nid_list(nm_i, i);
        spin_unlock(&nm_i->free_nid_list_lock);
+        kmem_cache_free(free_nid_slab, i);
 }
 /*
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct free_nid *i;
+        bool need_free = false;
        if (!nid)
                return;
        spin_lock(&nm_i->free_nid_list_lock);
-        i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+        i = __lookup_free_nid_list(nm_i, nid);
        f2fs_bug_on(!i || i->state != NID_ALLOC);
-        if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
+        if (!available_free_memory(nm_i, FREE_NIDS)) {
-                __del_from_free_nid_list(i);
+                __del_from_free_nid_list(nm_i, i);
+                need_free = true;
        } else {
                i->state = NID_NEW;
                nm_i->fcnt++;
        }
        spin_unlock(&nm_i->free_nid_list_lock);
+        if (need_free)
+                kmem_cache_free(free_nid_slab, i);
 }
 void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
                block_t new_blkaddr)
 {
        rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
-        set_node_addr(sbi, ni, new_blkaddr);
+        set_node_addr(sbi, ni, new_blkaddr, false);
        clear_node_page_dirty(page);
 }
+void recover_inline_xattr(struct inode *inode, struct page *page)
+{
+        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+        void *src_addr, *dst_addr;
+        size_t inline_size;
+        struct page *ipage;
+        struct f2fs_inode *ri;
+        if (!f2fs_has_inline_xattr(inode))
+                return;
+        if (!IS_INODE(page))
+                return;
+        ri = F2FS_INODE(page);
+        if (!(ri->i_inline & F2FS_INLINE_XATTR))
+                return;
+        ipage = get_node_page(sbi, inode->i_ino);
+        f2fs_bug_on(IS_ERR(ipage));
+        dst_addr = inline_xattr_addr(ipage);
+        src_addr = inline_xattr_addr(page);
+        inline_size = inline_xattr_size(inode);
+        memcpy(dst_addr, src_addr, inline_size);
+        update_inode(inode, ipage);
+        f2fs_put_page(ipage, 1);
+}
+bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+{
+        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+        nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
+        nid_t new_xnid = nid_of_node(page);
+        struct node_info ni;
+        recover_inline_xattr(inode, page);
+        if (!f2fs_has_xattr_block(ofs_of_node(page)))
+                return false;
+        /* 1: invalidate the previous xattr nid */
+        if (!prev_xnid)
+                goto recover_xnid;
+        /* Deallocate node address */
+        get_node_info(sbi, prev_xnid, &ni);
+        f2fs_bug_on(ni.blk_addr == NULL_ADDR);
+        invalidate_blocks(sbi, ni.blk_addr);
+        dec_valid_node_count(sbi, inode);
+        set_node_addr(sbi, &ni, NULL_ADDR, false);
+recover_xnid:
+        /* 2: allocate new xattr nid */
+        if (unlikely(!inc_valid_node_count(sbi, inode)))
+                f2fs_bug_on(1);
+        remove_free_nid(NM_I(sbi), new_xnid);
+        get_node_info(sbi, new_xnid, &ni);
+        ni.ino = inode->i_ino;
+        set_node_addr(sbi, &ni, NEW_ADDR, false);
+        F2FS_I(inode)->i_xattr_nid = new_xnid;
+        /* 3: update xattr blkaddr */
+        refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
+        set_node_addr(sbi, &ni, blkaddr, false);
+        update_inode_page(inode);
+        return true;
+}
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 {
        struct f2fs_inode *src, *dst;
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
        if (unlikely(!inc_valid_node_count(sbi, NULL)))
                WARN_ON(1);
-        set_node_addr(sbi, &new_ni, NEW_ADDR);
+        set_node_addr(sbi, &new_ni, NEW_ADDR, false);
        inc_valid_inode_count(sbi);
        f2fs_put_page(ipage, 1);
        return 0;
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
        for (; page_idx < start + nrpages; page_idx++) {
                /* alloc temporal page for read node summary info*/
                page = alloc_page(GFP_F2FS_ZERO);
-                if (!page) {
+                if (!page)
-                        struct page *tmp;
+                        break;
-                        list_for_each_entry_safe(page, tmp, pages, lru) {
-                                list_del(&page->lru);
-                                unlock_page(page);
-                                __free_pages(page, 0);
-                        }
-                        return -ENOMEM;
-                }
                lock_page(page);
                page->index = page_idx;
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
                f2fs_submit_page_mbio(sbi, page, page->index, &fio);
        f2fs_submit_merged_bio(sbi, META, READ);
-        return 0;
+        return page_idx - start;
 }
 int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
        addr = START_BLOCK(sbi, segno);
        sum_entry = &sum->entries[0];
-        for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
+        for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
                nrpages = min(last_offset - i, bio_blocks);
                /* read ahead node pages */
-                err = ra_sum_pages(sbi, &page_list, addr, nrpages);
+                nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
-                if (err)
+                if (!nrpages)
-                        return err;
+                        return -ENOMEM;
                list_for_each_entry_safe(page, tmp, &page_list, lru) {
+                        if (err)
+                                goto skip;
                        lock_page(page);
                        if (unlikely(!PageUptodate(page))) {
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
                                sum_entry->ofs_in_node = 0;
                                sum_entry++;
                        }
-                        list_del(&page->lru);
                        unlock_page(page);
+skip:
+                        list_del(&page->lru);
                        __free_pages(page, 0);
                }
        }
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
        struct f2fs_summary_block *sum = curseg->sum_blk;
-        struct list_head *cur, *n;
+        struct nat_entry *ne, *cur;
        struct page *page = NULL;
        struct f2fs_nat_block *nat_blk = NULL;
        nid_t start_nid = 0, end_nid = 0;
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
                mutex_lock(&curseg->curseg_mutex);
        /* 1) flush dirty nat caches */
-        list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
+        list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
-                struct nat_entry *ne;
                nid_t nid;
                struct f2fs_nat_entry raw_ne;
                int offset = -1;
                block_t new_blkaddr;
-                ne = list_entry(cur, struct nat_entry, list);
-                nid = nat_get_nid(ne);
                if (nat_get_blkaddr(ne) == NEW_ADDR)
                        continue;
+                nid = nat_get_nid(ne);
                if (flushed)
                        goto to_nat_page;
@@ -1783,16 +1850,12 @@ flush_now:
                } else {
                        write_lock(&nm_i->nat_tree_lock);
                        __clear_nat_cache_dirty(nm_i, ne);
-                        ne->checkpointed = true;
                        write_unlock(&nm_i->nat_tree_lock);
                }
        }
        if (!flushed)
                mutex_unlock(&curseg->curseg_mutex);
        f2fs_put_page(page, 1);
-        /* 2) shrink nat caches if necessary */
-        try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
 }
 static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        /* segment_count_nat includes pair segment so divide to 2. */
        nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
        nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-        nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+        /* not used nids: 0, node, meta, (and root counted as valid node) */
+        nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
        nm_i->fcnt = 0;
        nm_i->nat_cnt = 0;
+        nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+        INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->free_nid_list);
        INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        spin_lock(&nm_i->free_nid_list_lock);
        list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
                f2fs_bug_on(i->state == NID_ALLOC);
-                __del_from_free_nid_list(i);
+                __del_from_free_nid_list(nm_i, i);
                nm_i->fcnt--;
+                spin_unlock(&nm_i->free_nid_list_lock);
+                kmem_cache_free(free_nid_slab, i);
+                spin_lock(&nm_i->free_nid_list_lock);
        }
        f2fs_bug_on(nm_i->fcnt);
        spin_unlock(&nm_i->free_nid_list_lock);
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        while ((found = __gang_lookup_nat_cache(nm_i,
                                        nid, NATVEC_SIZE, natvec))) {
                unsigned idx;
-                for (idx = 0; idx < found; idx++) {
+                nid = nat_get_nid(natvec[found - 1]) + 1;
-                        struct nat_entry *e = natvec[idx];
+                for (idx = 0; idx < found; idx++)
-                        nid = nat_get_nid(e) + 1;
+                        __del_from_nat_cache(nm_i, natvec[idx]);
-                        __del_from_nat_cache(nm_i, e);
-                }
        }
        f2fs_bug_on(nm_i->nat_cnt);
        write_unlock(&nm_i->nat_tree_lock);
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 int __init create_node_manager_caches(void)
 {
        nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
-                        sizeof(struct nat_entry), NULL);
+                        sizeof(struct nat_entry));
        if (!nat_entry_slab)
                return -ENOMEM;
        free_nid_slab = f2fs_kmem_cache_create("free_nid",
-                        sizeof(struct free_nid), NULL);
+                        sizeof(struct free_nid));
        if (!free_nid_slab) {
                kmem_cache_destroy(nat_entry_slab);
                return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c4c79885c993..5decc1a375f0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
 /* # of pages to perform readahead before building free nids */
 #define FREE_NID_PAGES 4
-/* maximum # of free node ids to produce during build_free_nids */
-#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
 /* maximum readahead size for node during getting data blocks */
 #define MAX_RA_NODE             128
-/* maximum cached nat entries to manage memory footprint */
+/* control the memory footprint threshold (10MB per 1GB ram) */
-#define NM_WOUT_THRESHOLD       (64 * NAT_ENTRY_PER_BLOCK)
+#define DEF_RAM_THRESHOLD       10
 /* vector size for gang look-up from nat cache that consists of radix tree */
 #define NATVEC_SIZE     64
@@ -45,6 +42,7 @@ struct node_info {
 struct nat_entry {
        struct list_head list;  /* for clean or dirty nat list */
        bool checkpointed;      /* whether it is checkpointed or not */
+        bool fsync_done;        /* whether the latest node has fsync mark */
        struct node_info ni;    /* in-memory node information */
 };
@@ -58,9 +56,15 @@ struct nat_entry {
 #define nat_set_version(nat, v)         (nat->ni.version = v)
 #define __set_nat_cache_dirty(nm_i, ne)                                 \
-        list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
+        do {                                                            \
+                ne->checkpointed = false;                               \
+                list_move_tail(&ne->list, &nm_i->dirty_nat_entries);    \
+        } while (0);
 #define __clear_nat_cache_dirty(nm_i, ne)                               \
-        list_move_tail(&ne->list, &nm_i->nat_entries);
+        do {                                                            \
+                ne->checkpointed = true;                                \
+                list_move_tail(&ne->list, &nm_i->nat_entries);          \
+        } while (0);
 #define inc_node_version(version)       (++version)
 static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
        ni->version = raw_ne->version;
 }
+enum nid_type {
+        FREE_NIDS,      /* indicates the free nid list */
+        NAT_ENTRIES     /* indicates the cached nat entry */
+};
 /*
 * For free nid mangement
 */
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
 {
        unsigned int ofs = ofs_of_node(node_page);
-        if (ofs == XATTR_NODE_OFFSET)
+        if (f2fs_has_xattr_block(ofs))
                return false;
        if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 976a7a934db5..b1ae89f0f44e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
 static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
                                                                nid_t ino)
 {
-        struct list_head *this;
        struct fsync_inode_entry *entry;
-        list_for_each(this, head) {
+        list_for_each_entry(entry, head, list)
-                entry = list_entry(this, struct fsync_inode_entry, list);
                if (entry->inode->i_ino == ino)
                        return entry;
-        }
        return NULL;
 }
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
        /* get node pages in the current segment */
        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
-        blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
        /* read node page */
        page = alloc_page(GFP_F2FS_ZERO);
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 {
        struct seg_entry *sentry;
        unsigned int segno = GET_SEGNO(sbi, blkaddr);
-        unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
+        unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
-                                        (sbi->blocks_per_seg - 1);
+        struct f2fs_summary_block *sum_node;
        struct f2fs_summary sum;
+        struct page *sum_page, *node_page;
        nid_t ino, nid;
-        void *kaddr;
        struct inode *inode;
-        struct page *node_page;
        unsigned int offset;
        block_t bidx;
        int i;
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
                struct curseg_info *curseg = CURSEG_I(sbi, i);
                if (curseg->segno == segno) {
                        sum = curseg->sum_blk->entries[blkoff];
-                        break;
+                        goto got_it;
                }
        }
-        if (i > CURSEG_COLD_DATA) {
-                struct page *sum_page = get_sum_page(sbi, segno);
-                struct f2fs_summary_block *sum_node;
-                kaddr = page_address(sum_page);
-                sum_node = (struct f2fs_summary_block *)kaddr;
-                sum = sum_node->entries[blkoff];
-                f2fs_put_page(sum_page, 1);
-        }
+        sum_page = get_sum_page(sbi, segno);
+        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+        sum = sum_node->entries[blkoff];
+        f2fs_put_page(sum_page, 1);
+got_it:
        /* Use the locked dnode page and inode */
        nid = le32_to_cpu(sum.nid);
        if (dn->inode->i_ino == nid) {
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        if (recover_inline_data(inode, page))
                goto out;
+        if (recover_xattr_data(inode, page, blkaddr))
+                goto out;
        start = start_bidx_of_node(ofs_of_node(page), fi);
        if (IS_INODE(page))
                end = start + ADDRS_PER_INODE(fi);
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                goto out;
        }
-        wait_on_page_writeback(dn.node_page);
+        f2fs_wait_on_page_writeback(dn.node_page, NODE);
        get_node_info(sbi, dn.nid, &ni);
        f2fs_bug_on(ni.ino != ino_of_node(page));
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
        bool need_writecp = false;
        fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
-                        sizeof(struct fsync_inode_entry), NULL);
+                        sizeof(struct fsync_inode_entry));
        if (!fsync_entry_slab)
                return -ENOMEM;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7caac5f2ca9e..085f548be7a3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/prefetch.h>
+#include <linux/kthread.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
@@ -24,6 +25,7 @@
 #define __reverse_ffz(x) __reverse_ffs(~(x))
 static struct kmem_cache *discard_entry_slab;
+static struct kmem_cache *flush_cmd_slab;
 /*
 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
                f2fs_sync_fs(sbi->sb, true);
 }
+static int issue_flush_thread(void *data)
+{
+        struct f2fs_sb_info *sbi = data;
+        struct f2fs_sm_info *sm_i = SM_I(sbi);
+        wait_queue_head_t *q = &sm_i->flush_wait_queue;
+repeat:
+        if (kthread_should_stop())
+                return 0;
+        spin_lock(&sm_i->issue_lock);
+        if (sm_i->issue_list) {
+                sm_i->dispatch_list = sm_i->issue_list;
+                sm_i->issue_list = sm_i->issue_tail = NULL;
+        }
+        spin_unlock(&sm_i->issue_lock);
+        if (sm_i->dispatch_list) {
+                struct bio *bio = bio_alloc(GFP_NOIO, 0);
+                struct flush_cmd *cmd, *next;
+                int ret;
+                bio->bi_bdev = sbi->sb->s_bdev;
+                ret = submit_bio_wait(WRITE_FLUSH, bio);
+                for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
+                        cmd->ret = ret;
+                        next = cmd->next;
+                        complete(&cmd->wait);
+                }
+                sm_i->dispatch_list = NULL;
+        }
+        wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
+        goto repeat;
+}
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+        struct f2fs_sm_info *sm_i = SM_I(sbi);
+        struct flush_cmd *cmd;
+        int ret;
+        if (!test_opt(sbi, FLUSH_MERGE))
+                return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+        cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
+        cmd->next = NULL;
+        cmd->ret = 0;
+        init_completion(&cmd->wait);
+        spin_lock(&sm_i->issue_lock);
+        if (sm_i->issue_list)
+                sm_i->issue_tail->next = cmd;
+        else
+                sm_i->issue_list = cmd;
+        sm_i->issue_tail = cmd;
+        spin_unlock(&sm_i->issue_lock);
+        if (!sm_i->dispatch_list)
+                wake_up(&sm_i->flush_wait_queue);
+        wait_for_completion(&cmd->wait);
+        ret = cmd->ret;
+        kmem_cache_free(flush_cmd_slab, cmd);
+        return ret;
+}
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                enum dirty_type dirty_type)
 {
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 void clear_prefree_segments(struct f2fs_sb_info *sbi)
 {
        struct list_head *head = &(SM_I(sbi)->discard_list);
-        struct list_head *this, *next;
+        struct discard_entry *entry, *this;
-        struct discard_entry *entry;
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
        unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
        mutex_unlock(&dirty_i->seglist_lock);
        /* send small discards */
-        list_for_each_safe(this, next, head) {
+        list_for_each_entry_safe(entry, this, head, list) {
-                entry = list_entry(this, struct discard_entry, list);
                f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
                list_del(&entry->list);
                SM_I(sbi)->nr_discards -= entry->len;
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
        se = get_seg_entry(sbi, segno);
        new_vblocks = se->valid_blocks + del;
-        offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
        f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
                                (new_vblocks > sbi->blocks_per_seg)));
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                get_sec_entry(sbi, segno)->valid_blocks += del;
 }
-static void refresh_sit_entry(struct f2fs_sb_info *sbi,
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
-                        block_t old_blkaddr, block_t new_blkaddr)
 {
-        update_sit_entry(sbi, new_blkaddr, 1);
+        update_sit_entry(sbi, new, 1);
-        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+        if (GET_SEGNO(sbi, old) != NULL_SEGNO)
-                update_sit_entry(sbi, old_blkaddr, -1);
+                update_sit_entry(sbi, old, -1);
+        locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+        locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
 }
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        stat_inc_block_count(sbi, curseg);
+        if (!__has_curseg_space(sbi, type))
+                sit_i->s_ops->allocate_segment(sbi, type, false);
        /*
         * SIT information should be updated before segment allocation,
         * since SSR needs latest valid block information.
         */
        refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-        if (!__has_curseg_space(sbi, type))
-                sit_i->s_ops->allocate_segment(sbi, type, false);
        locate_dirty_segment(sbi, old_cursegno);
-        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
        mutex_unlock(&sit_i->sentry_lock);
        if (page && IS_NODESEG(type))
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
                change_curseg(sbi, type, true);
        }
-        curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
+        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
-                                        (sbi->blocks_per_seg - 1);
        __add_sum_entry(sbi, type, sum);
        refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
        locate_dirty_segment(sbi, old_cursegno);
-        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
        mutex_unlock(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
                curseg->next_segno = segno;
                change_curseg(sbi, type, true);
        }
-        curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
+        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
-                                        (sbi->blocks_per_seg - 1);
        __add_sum_entry(sbi, type, sum);
        /* change the current log to the next block addr in advance */
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
                curseg->next_segno = next_segno;
                change_curseg(sbi, type, true);
        }
-        curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
+        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
-                                        (sbi->blocks_per_seg - 1);
        /* rewrite node page */
        set_page_writeback(page);
        f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
        f2fs_submit_merged_bio(sbi, NODE, WRITE);
        refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
        locate_dirty_segment(sbi, old_cursegno);
-        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
        mutex_unlock(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
 }
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+                                        struct page *page, enum page_type type)
+{
+        enum page_type btype = PAGE_TYPE_OF_BIO(type);
+        struct f2fs_bio_info *io = &sbi->write_io[btype];
+        struct bio_vec *bvec;
+        int i;
+        down_read(&io->io_rwsem);
+        if (!io->bio)
+                goto out;
+        bio_for_each_segment_all(bvec, io->bio, i) {
+                if (page == bvec->bv_page) {
+                        up_read(&io->io_rwsem);
+                        return true;
+                }
+        }
+out:
+        up_read(&io->io_rwsem);
+        return false;
+}
 void f2fs_wait_on_page_writeback(struct page *page,
                                enum page_type type)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
        if (PageWriteback(page)) {
-                f2fs_submit_merged_bio(sbi, type, WRITE);
+                if (is_merged_page(sbi, page, type))
+                        f2fs_submit_merged_bio(sbi, type, WRITE);
                wait_on_page_writeback(page);
        }
 }
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                                ns->ofs_in_node = 0;
                        }
                } else {
-                        if (restore_node_summary(sbi, segno, sum)) {
+                        int err;
+                        err = restore_node_summary(sbi, segno, sum);
+                        if (err) {
                                f2fs_put_page(new, 1);
-                                return -EINVAL;
+                                return err;
                        }
                }
        }
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
        int type = CURSEG_HOT_DATA;
+        int err;
        if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
                /* restore for compacted data summary */
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
                type = CURSEG_HOT_NODE;
        }
-        for (; type <= CURSEG_COLD_NODE; type++)
+        for (; type <= CURSEG_COLD_NODE; type++) {
-                if (read_normal_summaries(sbi, type))
+                err = read_normal_summaries(sbi, type);
-                        return -EINVAL;
+                if (err)
+                        return err;
+        }
        return 0;
 }
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
        return restore_curseg_summaries(sbi);
 }
-static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
-{
-        struct address_space *mapping = META_MAPPING(sbi);
-        struct page *page;
-        block_t blk_addr, prev_blk_addr = 0;
-        int sit_blk_cnt = SIT_BLK_CNT(sbi);
-        int blkno = start;
-        struct f2fs_io_info fio = {
-                .type = META,
-                .rw = READ_SYNC | REQ_META | REQ_PRIO
-        };
-        for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
-                blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
-                if (blkno != start && prev_blk_addr + 1 != blk_addr)
-                        break;
-                prev_blk_addr = blk_addr;
-repeat:
-                page = grab_cache_page(mapping, blk_addr);
-                if (!page) {
-                        cond_resched();
-                        goto repeat;
-                }
-                if (PageUptodate(page)) {
-                        mark_page_accessed(page);
-                        f2fs_put_page(page, 1);
-                        continue;
-                }
-                f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
-                mark_page_accessed(page);
-                f2fs_put_page(page, 0);
-        }
-        f2fs_submit_merged_bio(sbi, META, READ);
-        return blkno - start;
-}
 static void build_sit_entries(struct f2fs_sb_info *sbi)
 {
        struct sit_info *sit_i = SIT_I(sbi);
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
        int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
        do {
-                readed = ra_sit_pages(sbi, start_blk, nrpages);
+                readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
                start = start_blk * sit_i->sents_per_block;
                end = (start_blk + readed) * sit_i->sents_per_block;
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+        dev_t dev = sbi->sb->s_bdev->bd_dev;
        struct f2fs_sm_info *sm_info;
        int err;
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
        sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
        sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
-        sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
+        sm_info->rec_prefree_segments = sm_info->main_segments *
+                                        DEF_RECLAIM_PREFREE_SEGMENTS / 100;
        sm_info->ipu_policy = F2FS_IPU_DISABLE;
        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->nr_discards = 0;
        sm_info->max_discards = 0;
+        if (test_opt(sbi, FLUSH_MERGE)) {
+                spin_lock_init(&sm_info->issue_lock);
+                init_waitqueue_head(&sm_info->flush_wait_queue);
+                sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+                                "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+                if (IS_ERR(sm_info->f2fs_issue_flush))
+                        return PTR_ERR(sm_info->f2fs_issue_flush);
+        }
        err = build_sit_info(sbi);
        if (err)
                return err;
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
        struct f2fs_sm_info *sm_info = SM_I(sbi);
        if (!sm_info)
                return;
+        if (sm_info->f2fs_issue_flush)
+                kthread_stop(sm_info->f2fs_issue_flush);
        destroy_dirty_segmap(sbi);
        destroy_curseg(sbi);
        destroy_free_segmap(sbi);
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
 int __init create_segment_manager_caches(void)
 {
        discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
-                        sizeof(struct discard_entry), NULL);
+                        sizeof(struct discard_entry));
        if (!discard_entry_slab)
                return -ENOMEM;
+        flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
+                        sizeof(struct flush_cmd));
+        if (!flush_cmd_slab) {
+                kmem_cache_destroy(discard_entry_slab);
+                return -ENOMEM;
+        }
        return 0;
 }
 void destroy_segment_manager_caches(void)
 {
        kmem_cache_destroy(discard_entry_slab);
+        kmem_cache_destroy(flush_cmd_slab);
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5731682d7516..7091204680f4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,7 +14,7 @@
 #define NULL_SEGNO                      ((unsigned int)(~0))
 #define NULL_SECNO                      ((unsigned int)(~0))
-#define DEF_RECLAIM_PREFREE_SEGMENTS    100     /* 200MB of prefree segments */
+#define DEF_RECLAIM_PREFREE_SEGMENTS    5       /* 5% over total segments */
 /* L: Logical segment # in volume, R: Relative segment # in main area */
 #define GET_L2R_SEGNO(free_i, segno)    (segno - free_i->start_segno)
@@ -57,6 +57,9 @@
        ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
 #define GET_SEGNO_FROM_SEG0(sbi, blk_addr)                              \
        (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr)                             \
+        (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
 #define GET_SEGNO(sbi, blk_addr)                                        \
        (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ?          \
        NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),                 \
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
 static inline block_t written_block_count(struct f2fs_sb_info *sbi)
 {
-        struct sit_info *sit_i = SIT_I(sbi);
+        return SIT_I(sbi)->written_valid_blocks;
-        block_t vblocks;
-        mutex_lock(&sit_i->sentry_lock);
-        vblocks = sit_i->written_valid_blocks;
-        mutex_unlock(&sit_i->sentry_lock);
-        return vblocks;
 }
 static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
 {
-        struct free_segmap_info *free_i = FREE_I(sbi);
+        return FREE_I(sbi)->free_segments;
-        unsigned int free_segs;
-        read_lock(&free_i->segmap_lock);
-        free_segs = free_i->free_segments;
-        read_unlock(&free_i->segmap_lock);
-        return free_segs;
 }
 static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
 static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
 {
-        struct free_segmap_info *free_i = FREE_I(sbi);
+        return FREE_I(sbi)->free_sections;
-        unsigned int free_secs;
-        read_lock(&free_i->segmap_lock);
-        free_secs = free_i->free_sections;
-        read_unlock(&free_i->segmap_lock);
-        return free_secs;
 }
 static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
        struct request_queue *q = bdev_get_queue(bdev);
        return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
 }
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * By default, 512 pages for directory data,
+ * 512 pages (2MB) * 3 for three types of nodes, and
+ * max_bio_blocks for meta are set.
+ */
+static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
+{
+        if (type == DATA)
+                return sbi->blocks_per_seg;
+        else if (type == NODE)
+                return 3 * sbi->blocks_per_seg;
+        else if (type == META)
+                return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+        else
+                return 0;
+}
+/*
+ * When writing pages, it'd better align nr_to_write for segment size.
+ */
+static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
+                                        struct writeback_control *wbc)
+{
+        long nr_to_write, desired;
+        if (wbc->sync_mode != WB_SYNC_NONE)
+                return 0;
+        nr_to_write = wbc->nr_to_write;
+        if (type == DATA)
+                desired = 4096;
+        else if (type == NODE)
+                desired = 3 * max_hw_blocks(sbi);
+        else
+                desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+        wbc->nr_to_write = desired;
+        return desired - nr_to_write;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 856bdf994c0a..c756923a7302 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@ enum {
        Opt_disable_ext_identify,
        Opt_inline_xattr,
        Opt_inline_data,
+        Opt_flush_merge,
        Opt_err,
 };
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
        {Opt_disable_ext_identify, "disable_ext_identify"},
        {Opt_inline_xattr, "inline_xattr"},
        {Opt_inline_data, "inline_data"},
+        {Opt_flush_merge, "flush_merge"},
        {Opt_err, NULL},
 };
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
 enum {
        GC_THREAD,      /* struct f2fs_gc_thread */
        SM_INFO,        /* struct f2fs_sm_info */
+        NM_INFO,        /* struct f2fs_nm_info */
        F2FS_SBI,       /* struct f2fs_sb_info */
 };
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
                return (unsigned char *)sbi->gc_thread;
        else if (struct_type == SM_INFO)
                return (unsigned char *)SM_I(sbi);
+        else if (struct_type == NM_INFO)
+                return (unsigned char *)NM_I(sbi);
        else if (struct_type == F2FS_SBI)
                return (unsigned char *)sbi;
        return NULL;
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(ipu_policy),
        ATTR_LIST(min_ipu_util),
        ATTR_LIST(max_victim_search),
+        ATTR_LIST(dir_level),
+        ATTR_LIST(ram_thresh),
        NULL,
 };
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
                        if (!name)
                                return -ENOMEM;
-                        if (!strncmp(name, "on", 2))
+                        if (strlen(name) == 2 && !strncmp(name, "on", 2))
                                set_opt(sbi, BG_GC);
-                        else if (!strncmp(name, "off", 3))
+                        else if (strlen(name) == 3 && !strncmp(name, "off", 3))
                                clear_opt(sbi, BG_GC);
                        else {
                                kfree(name);
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
                case Opt_inline_data:
                        set_opt(sbi, INLINE_DATA);
                        break;
+                case Opt_flush_merge:
+                        set_opt(sbi, FLUSH_MERGE);
+                        break;
                default:
                        f2fs_msg(sb, KERN_ERR,
                                "Unrecognized mount option \"%s\" or missing value",
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        fi->i_current_depth = 1;
        fi->i_advise = 0;
        rwlock_init(&fi->ext.ext_lock);
+        init_rwsem(&fi->i_sem);
        set_inode_flag(fi, FI_NEW_INODE);
        if (test_opt(F2FS_SB(sb), INLINE_XATTR))
                set_inode_flag(fi, FI_INLINE_XATTR);
+        /* Will be used by directory only */
+        fi->i_dir_level = F2FS_SB(sb)->dir_level;
        return &fi->vfs_inode;
 }
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, ",disable_ext_identify");
        if (test_opt(sbi, INLINE_DATA))
                seq_puts(seq, ",inline_data");
+        if (test_opt(sbi, FLUSH_MERGE))
+                seq_puts(seq, ",flush_merge");
        seq_printf(seq, ",active_logs=%u", sbi->active_logs);
        return 0;
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
                        le32_to_cpu(sbi->raw_super->segment_count_main);
        int i;
+        seq_puts(seq, "format: segment_type|valid_blocks\n"
+                "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
        for (i = 0; i < total_segs; i++) {
-                seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1));
+                struct seg_entry *se = get_seg_entry(sbi, i);
-                if (i != 0 && (i % 10) == 0)
-                        seq_puts(seq, "\n");
+                if ((i % 10) == 0)
+                        seq_printf(seq, "%-5d", i);
+                seq_printf(seq, "%d|%-3u", se->type,
+                                        get_valid_blocks(sbi, i, 1));
+                if ((i % 10) == 9 || i == (total_segs - 1))
+                        seq_putc(seq, '\n');
                else
-                        seq_puts(seq, " ");
+                        seq_putc(seq, ' ');
        }
        return 0;
 }
@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
        if (unlikely(ino < F2FS_ROOT_INO(sbi)))
                return ERR_PTR(-ESTALE);
+        if (unlikely(ino >= NM_I(sbi)->max_nid))
+                return ERR_PTR(-ESTALE);
        /*
         * f2fs_iget isn't quite right if the inode is currently unallocated!
@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        for (i = 0; i < NR_COUNT_TYPE; i++)
                atomic_set(&sbi->nr_pages[i], 0);
+        sbi->dir_level = DEF_DIR_LEVEL;
 }
 /*
@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        sbi->por_doing = false;
        spin_lock_init(&sbi->stat_lock);
-        mutex_init(&sbi->read_io.io_mutex);
+        init_rwsem(&sbi->read_io.io_rwsem);
        sbi->read_io.sbi = sbi;
        sbi->read_io.bio = NULL;
        for (i = 0; i < NR_PAGE_TYPE; i++) {
-                mutex_init(&sbi->write_io[i].io_mutex);
+                init_rwsem(&sbi->write_io[i].io_rwsem);
                sbi->write_io[i].sbi = sbi;
                sbi->write_io[i].bio = NULL;
        }
@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
                goto free_root_inode;
        }
-        /* recover fsynced data */
-        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
-                err = recover_fsync_data(sbi);
-                if (err)
-                        f2fs_msg(sb, KERN_ERR,
-                                "Cannot recover all fsync data errno=%ld", err);
-        }
-        /*
-         * If filesystem is not mounted as read-only then
-         * do start the gc_thread.
-         */
-        if (!(sb->s_flags & MS_RDONLY)) {
-                /* After POR, we can run background GC thread.*/
-                err = start_gc_thread(sbi);
-                if (err)
-                        goto free_gc;
-        }
        err = f2fs_build_stats(sbi);
        if (err)
-                goto free_gc;
+                goto free_root_inode;
        if (f2fs_proc_root)
                sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
                                                        "%s", sb->s_id);
        if (err)
-                goto fail;
+                goto free_proc;
+        /* recover fsynced data */
+        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+                err = recover_fsync_data(sbi);
+                if (err)
+                        f2fs_msg(sb, KERN_ERR,
+                                "Cannot recover all fsync data errno=%ld", err);
+        }
+        /*
+         * If filesystem is not mounted as read-only then
+         * do start the gc_thread.
+         */
+        if (!(sb->s_flags & MS_RDONLY)) {
+                /* After POR, we can run background GC thread.*/
+                err = start_gc_thread(sbi);
+                if (err)
+                        goto free_kobj;
+        }
        return 0;
-fail:
+free_kobj:
+        kobject_del(&sbi->s_kobj);
+free_proc:
        if (sbi->s_proc) {
                remove_proc_entry("segment_info", sbi->s_proc);
                remove_proc_entry(sb->s_id, f2fs_proc_root);
        }
        f2fs_destroy_stats(sbi);
-free_gc:
-        stop_gc_thread(sbi);
 free_root_inode:
        dput(sb->s_root);
        sb->s_root = NULL;
@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
 static int __init init_inodecache(void)
 {
        f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
-                        sizeof(struct f2fs_inode_info), NULL);
+                        sizeof(struct f2fs_inode_info));
        if (!f2fs_inode_cachep)
                return -ENOMEM;
        return 0;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89d0422a91a8..503c2451131e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
        inline_size = inline_xattr_size(inode);
-        txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
+        txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
        if (!txattr_addr)
                return NULL;
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
        if (name == NULL)
                return -EINVAL;
        name_len = strlen(name);
+        if (name_len > F2FS_NAME_LEN)
+                return -ERANGE;
        base_addr = read_all_xattrs(inode, NULL);
        if (!base_addr)
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
        f2fs_balance_fs(sbi);
        f2fs_lock_op(sbi);
+        /* protect xattr_ver */
+        down_write(&F2FS_I(inode)->i_sem);
        err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
+        up_write(&F2FS_I(inode)->i_sem);
        f2fs_unlock_op(sbi);
        return err;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 23e363f38302..13b691a8a7d2 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -569,7 +569,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev,
        return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
 }
-static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL);
+static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL);
 static ssize_t cuse_class_abort_store(struct device *dev,
                                      struct device_attribute *attr,
@@ -580,7 +580,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
        fuse_abort_conn(&cc->fc);
        return count;
 }
-static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store);
+static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
 static struct attribute *cuse_class_dev_attrs[] = {
        &dev_attr_waiting.attr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 65df7d8be4f5..48992cac714b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2117,6 +2117,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 static const struct vm_operations_struct fuse_file_vm_ops = {
        .close          = fuse_vma_close,
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = fuse_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6c794085abac..80d67253623c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -494,6 +494,7 @@ out:
 static const struct vm_operations_struct gfs2_vm_ops = {
        .fault = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = gfs2_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 6af66ee56390..4556ce1af5b0 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -93,7 +93,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
                                        sizeof(struct iso_inode_info),
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 16a5047903a6..406d9cc84ba8 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
                                unsigned char *cpage_out,
                                uint32_t *sourcelen, uint32_t *dstlen)
 {
-        short positions[256];
+        unsigned short positions[256];
        int outpos = 0;
        int pos=0;
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
                                  unsigned char *cpage_out,
                                  uint32_t srclen, uint32_t destlen)
 {
-        short positions[256];
+        unsigned short positions[256];
        int outpos = 0;
        int pos=0;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index f73991522672..601afd1afddf 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -457,12 +457,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
           The umask is only applied if there's no default ACL */
        ret = jffs2_init_acl_pre(dir_i, inode, &mode);
        if (ret) {
-            make_bad_inode(inode);
+                mutex_unlock(&f->sem);
-            iput(inode);
+                make_bad_inode(inode);
-            return ERR_PTR(ret);
+                iput(inode);
+                return ERR_PTR(ret);
        }
        ret = jffs2_do_new_inode (c, f, mode, ri);
        if (ret) {
+                mutex_unlock(&f->sem);
                make_bad_inode(inode);
                iput(inode);
                return ERR_PTR(ret);
@@ -479,6 +481,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
        inode->i_size = 0;
        if (insert_inode_locked(inode) < 0) {
+                mutex_unlock(&f->sem);
                make_bad_inode(inode);
                iput(inode);
                return ERR_PTR(-EINVAL);
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index e4619b00f7c5..fa35ff79ab35 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
        uint32_t version;
        uint32_t data_crc;
        uint32_t partial_crc;
-        uint16_t csize;
+        uint32_t csize;
        uint16_t overlapped;
 };
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 03310721712f..b6bd4affd9ad 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
                                        spin_unlock(&c->erase_completion_lock);
                                        schedule();
+                                        remove_wait_queue(&c->erase_wait, &wait);
                                } else
                                        spin_unlock(&c->erase_completion_lock);
                        } else if (ret)
@@ -211,20 +212,25 @@ out:
 int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
                           uint32_t *len, uint32_t sumsize)
 {
-        int ret = -EAGAIN;
+        int ret;
        minsize = PAD(minsize);
        jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
-        spin_lock(&c->erase_completion_lock);
+        while (true) {
-        while(ret == -EAGAIN) {
+                spin_lock(&c->erase_completion_lock);
                ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
                if (ret) {
                        jffs2_dbg(1, "%s(): looping, ret is %d\n",
                                  __func__, ret);
                }
+                spin_unlock(&c->erase_completion_lock);
+                if (ret == -EAGAIN)
+                        cond_resched();
+                else
+                        break;
        }
-        spin_unlock(&c->erase_completion_lock);
        if (!ret)
                ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 10d6c41aecad..6bf06a07f3e0 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -235,6 +235,7 @@ out_err:
        if (warned++ == 0)
                printk(KERN_WARNING
                        "lockd_up: makesock failed, error=%d\n", err);
+        svc_shutdown_net(serv, net);
        return err;
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index c320ac52353e..08b8ea8c353e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,7 +339,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
        if (val)
                goto finished;
-        DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
+        ncp_dbg(2, "%pd2 not valid, age=%ld, server lookup\n",
                dentry, NCP_GET_AGE(dentry));
        len = sizeof(__name);
@@ -358,7 +358,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        res = ncp_obtain_info(server, dir, __name, &(finfo.i));
        }
        finfo.volume = finfo.i.volNumber;
-        DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
+        ncp_dbg(2, "looked for %pd/%s, res=%d\n",
                dentry->d_parent, __name, res);
        /*
         * If we didn't find it, or if it has a different dirEntNum to
@@ -372,14 +372,14 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        ncp_new_dentry(dentry);
                        val=1;
                } else
-                        DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n");
+                        ncp_dbg(2, "found, but dirEntNum changed\n");
                ncp_update_inode2(inode, &finfo);
                mutex_unlock(&inode->i_mutex);
        }
 finished:
-        DDPRINTK("ncp_lookup_validate: result=%d\n", val);
+        ncp_dbg(2, "result=%d\n", val);
        dput(parent);
        return val;
 }
@@ -453,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
        ctl.page  = NULL;
        ctl.cache = NULL;
-        DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
+        ncp_dbg(2, "reading %pD2, pos=%d\n", file, (int)ctx->pos);
-                (int) ctx->pos);
        result = -EIO;
        /* Do not generate '.' and '..' when server is dead. */
@@ -697,8 +696,7 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
        struct ncp_entry_info entry;
        int i;
-        DPRINTK("ncp_read_volume_list: pos=%ld\n",
+        ncp_dbg(1, "pos=%ld\n", (unsigned long)ctx->pos);
-                        (unsigned long) ctx->pos);
        for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) {
                int inval_dentry;
@@ -708,12 +706,11 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
                if (!strlen(info.volume_name))
                        continue;
-                DPRINTK("ncp_read_volume_list: found vol: %s\n",
+                ncp_dbg(1, "found vol: %s\n", info.volume_name);
-                        info.volume_name);
                if (ncp_lookup_volume(server, info.volume_name,
                                        &entry.i)) {
-                        DPRINTK("ncpfs: could not lookup vol %s\n",
+                        ncp_dbg(1, "could not lookup vol %s\n",
                                info.volume_name);
                        continue;
                }
@@ -738,14 +735,13 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
        int more;
        size_t bufsize;
-        DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
+        ncp_dbg(1, "%pD2, fpos=%ld\n", file, (unsigned long)ctx->pos);
-                (unsigned long) ctx->pos);
+        ncp_vdbg("init %pD, volnum=%d, dirent=%u\n",
-        PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
+                 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
-                file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
        err = ncp_initialize_search(server, dir, &seq);
        if (err) {
-                DPRINTK("ncp_do_readdir: init failed, err=%d\n", err);
+                ncp_dbg(1, "init failed, err=%d\n", err);
                return;
        }
        /* We MUST NOT use server->buffer_size handshaked with server if we are
@@ -808,8 +804,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                        goto out;
                result = -ENOENT;
                if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) {
-                        PPRINTK("ncp_conn_logged_in: %s not found\n",
+                        ncp_vdbg("%s not found\n", server->m.mounted_vol);
-                                server->m.mounted_vol);
                        goto out;
                }
                dent = sb->s_root;
@@ -822,10 +817,10 @@ int ncp_conn_logged_in(struct super_block *sb)
                                NCP_FINFO(ino)->DosDirNum = DosDirNum;
                                result = 0;
                        } else {
-                                DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n");
+                                ncp_dbg(1, "sb->s_root->d_inode == NULL!\n");
                        }
                } else {
-                        DPRINTK("ncpfs: sb->s_root == NULL!\n");
+                        ncp_dbg(1, "sb->s_root == NULL!\n");
                }
        } else
                result = 0;
@@ -846,7 +841,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
        if (!ncp_conn_valid(server))
                goto finished;
-        PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
+        ncp_vdbg("server lookup for %pd2\n", dentry);
        len = sizeof(__name);
        if (ncp_is_server_root(dir)) {
@@ -854,15 +849,15 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
                                 dentry->d_name.len, 1);
                if (!res)
                        res = ncp_lookup_volume(server, __name, &(finfo.i));
-                        if (!res)
+                if (!res)
-                                ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
+                        ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
        } else {
                res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
                                 dentry->d_name.len, !ncp_preserve_case(dir));
                if (!res)
                        res = ncp_obtain_info(server, dir, __name, &(finfo.i));
        }
-        PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
+        ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
        /*
         * If we didn't find an entry, make a negative dentry.
         */
@@ -886,7 +881,7 @@ add_entry:
        }
 finished:
-        PPRINTK("ncp_lookup: result=%d\n", error);
+        ncp_vdbg("result=%d\n", error);
        return ERR_PTR(error);
 }
@@ -909,7 +904,7 @@ out:
        return error;
 out_close:
-        PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
+        ncp_vdbg("%pd2 failed, closing file\n", dentry);
        ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
        goto out;
 }
@@ -923,7 +918,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
        int opmode;
        __u8 __name[NCP_MAXPATHLEN + 1];
        
-        PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
+        ncp_vdbg("creating %pd2, mode=%hx\n", dentry, mode);
        ncp_age_dentry(server, dentry);
        len = sizeof(__name);
@@ -952,7 +947,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
                                error = -ENAMETOOLONG;
                        else if (result < 0)
                                error = result;
-                        DPRINTK("ncp_create: %pd2 failed\n", dentry);
+                        ncp_dbg(1, "%pd2 failed\n", dentry);
                        goto out;
                }
                opmode = O_WRONLY;
@@ -985,7 +980,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        int error, len;
        __u8 __name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_mkdir: making %pd2\n", dentry);
+        ncp_dbg(1, "making %pd2\n", dentry);
        ncp_age_dentry(server, dentry);
        len = sizeof(__name);
@@ -1022,7 +1017,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
        int error, result, len;
        __u8 __name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
+        ncp_dbg(1, "removing %pd2\n", dentry);
        len = sizeof(__name);
        error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1067,13 +1062,13 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
        int error;
        server = NCP_SERVER(dir);
-        DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
+        ncp_dbg(1, "unlinking %pd2\n", dentry);
        
        /*
         * Check whether to close the file ...
         */
        if (inode) {
-                PPRINTK("ncp_unlink: closing file\n");
+                ncp_vdbg("closing file\n");
                ncp_make_closed(inode);
        }
@@ -1087,7 +1082,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 #endif
        switch (error) {
                case 0x00:
-                        DPRINTK("ncp: removed %pd2\n", dentry);
+                        ncp_dbg(1, "removed %pd2\n", dentry);
                        break;
                case 0x85:
                case 0x8A:
@@ -1120,7 +1115,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
        int old_len, new_len;
        __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
+        ncp_dbg(1, "%pd2 to %pd2\n", old_dentry, new_dentry);
        ncp_age_dentry(server, old_dentry);
        ncp_age_dentry(server, new_dentry);
@@ -1150,8 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 #endif
        switch (error) {
                case 0x00:
-                        DPRINTK("ncp renamed %pd -> %pd.\n",
+                        ncp_dbg(1, "renamed %pd -> %pd\n",
-                                old_dentry, new_dentry);
+                                old_dentry, new_dentry);
                        break;
                case 0x9E:
                        error = -ENAMETOOLONG;
@@ -1173,7 +1168,7 @@ static int ncp_mknod(struct inode * dir, struct dentry *dentry,
        if (!new_valid_dev(rdev))
                return -EINVAL;
        if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
-                DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
+                ncp_dbg(1, "mode = 0%ho\n", mode);
                return ncp_create_new(dir, dentry, mode, rdev, 0);
        }
        return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 8f5074e1ecb9..77640a8bfb87 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -6,6 +6,8 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <asm/uaccess.h>
 #include <linux/time.h>
@@ -34,11 +36,11 @@ int ncp_make_open(struct inode *inode, int right)
        error = -EINVAL;
        if (!inode) {
-                printk(KERN_ERR "ncp_make_open: got NULL inode\n");
+                pr_err("%s: got NULL inode\n", __func__);
                goto out;
        }
-        DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n",
+        ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n",
                atomic_read(&NCP_FINFO(inode)->opened), 
                NCP_FINFO(inode)->volNumber, 
                NCP_FINFO(inode)->dirEntNum);
@@ -71,7 +73,7 @@ int ncp_make_open(struct inode *inode, int right)
                                break;
                }
                if (result) {
-                        PPRINTK("ncp_make_open: failed, result=%d\n", result);
+                        ncp_vdbg("failed, result=%d\n", result);
                        goto out_unlock;
                }
                /*
@@ -83,7 +85,7 @@ int ncp_make_open(struct inode *inode, int right)
        }
        access = NCP_FINFO(inode)->access;
-        PPRINTK("ncp_make_open: file open, access=%x\n", access);
+        ncp_vdbg("file open, access=%x\n", access);
        if (access == right || access == O_RDWR) {
                atomic_inc(&NCP_FINFO(inode)->opened);
                error = 0;
@@ -107,7 +109,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        void* freepage;
        size_t freelen;
-        DPRINTK("ncp_file_read: enter %pd2\n", dentry);
+        ncp_dbg(1, "enter %pd2\n", dentry);
        pos = *ppos;
@@ -124,7 +126,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        error = ncp_make_open(inode, O_RDONLY);
        if (error) {
-                DPRINTK(KERN_ERR "ncp_file_read: open failed, error=%d\n", error);
+                ncp_dbg(1, "open failed, error=%d\n", error);
                return error;
        }
@@ -165,7 +167,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        file_accessed(file);
-        DPRINTK("ncp_file_read: exit %pd2\n", dentry);
+        ncp_dbg(1, "exit %pd2\n", dentry);
 outrel:
        ncp_inode_close(inode);         
        return already_read ? already_read : error;
@@ -182,7 +184,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
        int errno;
        void* bouncebuffer;
-        DPRINTK("ncp_file_write: enter %pd2\n", dentry);
+        ncp_dbg(1, "enter %pd2\n", dentry);
        if ((ssize_t) count < 0)
                return -EINVAL;
        pos = *ppos;
@@ -211,7 +213,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
                return 0;
        errno = ncp_make_open(inode, O_WRONLY);
        if (errno) {
-                DPRINTK(KERN_ERR "ncp_file_write: open failed, error=%d\n", errno);
+                ncp_dbg(1, "open failed, error=%d\n", errno);
                return errno;
        }
        bufsize = NCP_SERVER(inode)->buffer_size;
@@ -261,7 +263,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
                        i_size_write(inode, pos);
                mutex_unlock(&inode->i_mutex);
        }
-        DPRINTK("ncp_file_write: exit %pd2\n", dentry);
+        ncp_dbg(1, "exit %pd2\n", dentry);
 outrel:
        ncp_inode_close(inode);         
        return already_written ? already_written : errno;
@@ -269,7 +271,7 @@ outrel:
 static int ncp_release(struct inode *inode, struct file *file) {
        if (ncp_make_closed(inode)) {
-                DPRINTK("ncp_release: failed to close\n");
+                ncp_dbg(1, "failed to close\n");
        }
        return 0;
 }
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
index 0af3349de851..03ffde1f44d6 100644
--- a/fs/ncpfs/getopt.c
+++ b/fs/ncpfs/getopt.c
@@ -2,6 +2,8 @@
 * getopt.c
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -46,8 +48,8 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
                                if (opts->has_arg & OPT_NOPARAM) {
                                        return opts->val;
                                }
-                                printk(KERN_INFO "%s: the %s option requires an argument\n",
+                                pr_info("%s: the %s option requires an argument\n",
-                                       caller, token);
+                                        caller, token);
                                return -EINVAL;
                        }
                        if (opts->has_arg & OPT_INT) {
@@ -57,18 +59,18 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
                                if (!*v) {
                                        return opts->val;
                                }
-                                printk(KERN_INFO "%s: invalid numeric value in %s=%s\n",
+                                pr_info("%s: invalid numeric value in %s=%s\n",
                                        caller, token, val);
                                return -EDOM;
                        }
                        if (opts->has_arg & OPT_STRING) {
                                return opts->val;
                        }
-                        printk(KERN_INFO "%s: unexpected argument %s to the %s option\n",
+                        pr_info("%s: unexpected argument %s to the %s option\n",
                                caller, val, token);
                        return -EINVAL;
                }
        }
-        printk(KERN_INFO "%s: Unrecognized mount option %s\n", caller, token);
+        pr_info("%s: Unrecognized mount option %s\n", caller, token);
        return -EOPNOTSUPP;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 647d86d2db39..81b4f643ecef 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -9,6 +9,8 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <asm/uaccess.h>
@@ -133,7 +135,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
        NCP_FINFO(inode)->access = nwinfo->access;
        memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
                        sizeof(nwinfo->file_handle));
-        DPRINTK("ncp_update_inode: updated %s, volnum=%d, dirent=%u\n",
+        ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n",
                nwinfo->i.entryName, NCP_FINFO(inode)->volNumber,
                NCP_FINFO(inode)->dirEntNum);
 }
@@ -141,8 +143,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
 static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
 {
        /* NFS namespace mode overrides others if it's set. */
-        DPRINTK(KERN_DEBUG "ncp_update_dates_and_mode: (%s) nfs.mode=0%o\n",
+        ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode);
-                nwi->entryName, nwi->nfs.mode);
        if (nwi->nfs.mode) {
                /* XXX Security? */
                inode->i_mode = nwi->nfs.mode;
@@ -230,7 +231,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
        
        ncp_update_attrs(inode, nwinfo);
-        DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
+        ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode);
        set_nlink(inode, 1);
        inode->i_uid = server->m.uid;
@@ -258,7 +259,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
        struct inode *inode;
        if (info == NULL) {
-                printk(KERN_ERR "ncp_iget: info is NULL\n");
+                pr_err("%s: info is NULL\n", __func__);
                return NULL;
        }
@@ -290,7 +291,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
                }
                insert_inode_hash(inode);
        } else
-                printk(KERN_ERR "ncp_iget: iget failed!\n");
+                pr_err("%s: iget failed!\n", __func__);
        return inode;
 }
@@ -301,12 +302,12 @@ ncp_evict_inode(struct inode *inode)
        clear_inode(inode);
        if (S_ISDIR(inode->i_mode)) {
-                DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
+                ncp_dbg(2, "put directory %ld\n", inode->i_ino);
        }
        if (ncp_make_closed(inode) != 0) {
                /* We can't do anything but complain. */
-                printk(KERN_ERR "ncp_evict_inode: could not close\n");
+                pr_err("%s: could not close\n", __func__);
        }
 }
@@ -621,7 +622,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
           now because of PATH_MAX changes.. */
        if (server->m.time_out < 1) {
                server->m.time_out = 10;
-                printk(KERN_INFO "You need to recompile your ncpfs utils..\n");
+                pr_info("You need to recompile your ncpfs utils..\n");
        }
        server->m.time_out = server->m.time_out * HZ / 100;
        server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG;
@@ -682,7 +683,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        ncp_unlock_server(server);
        if (error < 0)
                goto out_rxbuf;
-        DPRINTK("ncp_fill_super: NCP_SBP(sb) = %x\n", (int) NCP_SBP(sb));
+        ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb));
        error = -EMSGSIZE;      /* -EREMOTESIDEINCOMPATIBLE */
 #ifdef CONFIG_NCPFS_PACKET_SIGNING
@@ -710,7 +711,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (ncp_negotiate_buffersize(server, default_bufsize,
                                     &(server->buffer_size)) != 0)
                goto out_disconnect;
-        DPRINTK("ncpfs: bufsize = %d\n", server->buffer_size);
+        ncp_dbg(1, "bufsize = %d\n", server->buffer_size);
        memset(&finfo, 0, sizeof(finfo));
        finfo.i.attributes      = aDIR;
@@ -739,7 +740,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        root_inode = ncp_iget(sb, &finfo);
        if (!root_inode)
                goto out_disconnect;
-        DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
+        ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
        sb->s_root = d_make_root(root_inode);
        if (!sb->s_root)
                goto out_disconnect;
@@ -985,8 +986,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
                int written;
-                DPRINTK("ncpfs: trying to change size to %ld\n",
+                ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size);
-                        attr->ia_size);
                if ((result = ncp_make_open(inode, O_WRONLY)) < 0) {
                        result = -EACCES;
@@ -1072,7 +1072,7 @@ MODULE_ALIAS_FS("ncpfs");
 static int __init init_ncp_fs(void)
 {
        int err;
-        DPRINTK("ncpfs: init_ncp_fs called\n");
+        ncp_dbg(1, "called\n");
        err = init_inodecache();
        if (err)
@@ -1089,7 +1089,7 @@ out1:
 static void __exit exit_ncp_fs(void)
 {
-        DPRINTK("ncpfs: exit_ncp_fs called\n");
+        ncp_dbg(1, "called\n");
        unregister_filesystem(&ncp_fs_type);
        destroy_inodecache();
 }
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60426ccb3b65..d5659d96ee7f 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -41,7 +41,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info.version != NCP_GET_FS_INFO_VERSION) {
-                DPRINTK("info.version invalid: %d\n", info.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info.version);
                return -EINVAL;
        }
        /* TODO: info.addr = server->m.serv_addr; */
@@ -66,7 +66,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
-                DPRINTK("info.version invalid: %d\n", info2.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -132,7 +132,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
-                DPRINTK("info.version invalid: %d\n", info2.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -308,8 +308,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                else
                        result = server->reply_size;
                ncp_unlock_server(server);
-                DPRINTK("ncp_ioctl: copy %d bytes\n",
+                ncp_dbg(1, "copy %d bytes\n", result);
-                        result);
                if (result >= 0)
                        if (copy_to_user(request.data, bouncebuffer, result))
                                result = -EFAULT;
@@ -385,9 +384,9 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                sr.namespace = server->name_space[sr.volNumber];
                                                result = 0;
                                        } else
-                                                DPRINTK("ncpfs: s_root->d_inode==NULL\n");
+                                                ncp_dbg(1, "s_root->d_inode==NULL\n");
                                } else
-                                        DPRINTK("ncpfs: s_root==NULL\n");
+                                        ncp_dbg(1, "s_root==NULL\n");
                        } else {
                                sr.volNumber = -1;
                                sr.namespace = 0;
@@ -440,11 +439,11 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                        NCP_FINFO(s_inode)->DosDirNum = dosde;
                                                        server->root_setuped = 1;
                                                } else {
-                                                        DPRINTK("ncpfs: s_root->d_inode==NULL\n");
+                                                        ncp_dbg(1, "s_root->d_inode==NULL\n");
                                                        result = -EIO;
                                                }
                                        } else {
-                                                DPRINTK("ncpfs: s_root==NULL\n");
+                                                ncp_dbg(1, "s_root==NULL\n");
                                                result = -EIO;
                                        }
                                }
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 3c5dd55d284c..b359d12eb359 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -107,7 +107,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *inode = file_inode(file);
        
-        DPRINTK("ncp_mmap: called\n");
+        ncp_dbg(1, "called\n");
        if (!ncp_conn_valid(NCP_SERVER(inode)))
                return -EIO;
diff --git a/fs/ncpfs/ncp_fs.h b/fs/ncpfs/ncp_fs.h
index 31831afe1c3b..b9f69e1b1f43 100644
--- a/fs/ncpfs/ncp_fs.h
+++ b/fs/ncpfs/ncp_fs.h
@@ -2,30 +2,32 @@
 #include "ncp_fs_i.h"
 #include "ncp_fs_sb.h"
-/* define because it is easy to change PRINTK to {*}PRINTK */
-#define PRINTK(format, args...) printk(KERN_DEBUG format , ## args)
 #undef NCPFS_PARANOIA
 #ifdef NCPFS_PARANOIA
-#define PPRINTK(format, args...) PRINTK(format , ## args)
+#define ncp_vdbg(fmt, ...)                                      \
+        pr_debug(fmt, ##__VA_ARGS__)
 #else
-#define PPRINTK(format, args...)
+#define ncp_vdbg(fmt, ...)                                      \
+do {                                                            \
+        if (0)                                                  \
+                pr_debug(fmt, ##__VA_ARGS__);                   \
+} while (0)
 #endif
 #ifndef DEBUG_NCP
 #define DEBUG_NCP 0
 #endif
-#if DEBUG_NCP > 0
-#define DPRINTK(format, args...) PRINTK(format , ## args)
+#if DEBUG_NCP > 0 && !defined(DEBUG)
-#else
+#define DEBUG
-#define DPRINTK(format, args...)
-#endif
-#if DEBUG_NCP > 1
-#define DDPRINTK(format, args...) PRINTK(format , ## args)
-#else
-#define DDPRINTK(format, args...)
 #endif
+#define ncp_dbg(level, fmt, ...)                                \
+do {                                                            \
+        if (level <= DEBUG_NCP)                                 \
+                pr_debug(fmt, ##__VA_ARGS__);                   \
+} while (0)
 #define NCP_MAX_RPC_TIMEOUT (6*HZ)
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 981a95617fc9..482387532f54 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -9,14 +9,14 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include "ncp_fs.h"
 static inline void assert_server_locked(struct ncp_server *server)
 {
        if (server->lock == 0) {
-                DPRINTK("ncpfs: server not locked!\n");
+                ncp_dbg(1, "server not locked!\n");
        }
 }
@@ -75,7 +75,7 @@ static void ncp_add_pstring(struct ncp_server *server, const char *s)
        int len = strlen(s);
        assert_server_locked(server);
        if (len > 255) {
-                DPRINTK("ncpfs: string too long: %s\n", s);
+                ncp_dbg(1, "string too long: %s\n", s);
                len = 255;
        }
        ncp_add_byte(server, len);
@@ -225,7 +225,7 @@ int ncp_get_volume_info_with_number(struct ncp_server* server,
        result = -EIO;
        len = ncp_reply_byte(server, 29);
        if (len > NCP_VOLNAME_LEN) {
-                DPRINTK("ncpfs: volume name too long: %d\n", len);
+                ncp_dbg(1, "volume name too long: %d\n", len);
                goto out;
        }
        memcpy(&(target->volume_name), ncp_reply_data(server, 30), len);
@@ -259,7 +259,7 @@ int ncp_get_directory_info(struct ncp_server* server, __u8 n,
        result = -EIO;
        len = ncp_reply_byte(server, 21);
        if (len > NCP_VOLNAME_LEN) {
-                DPRINTK("ncpfs: volume name too long: %d\n", len);
+                ncp_dbg(1, "volume name too long: %d\n", len);
                goto out;
        }
        memcpy(&(target->volume_name), ncp_reply_data(server, 22), len);
@@ -295,9 +295,9 @@ ncp_make_closed(struct inode *inode)
                err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
                if (!err)
-                        PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n",
+                        ncp_vdbg("volnum=%d, dirent=%u, error=%d\n",
-                                NCP_FINFO(inode)->volNumber,
+                                 NCP_FINFO(inode)->volNumber,
-                                NCP_FINFO(inode)->dirEntNum, err);
+                                 NCP_FINFO(inode)->dirEntNum, err);
        }
        mutex_unlock(&NCP_FINFO(inode)->open_mutex);
        return err;
@@ -394,8 +394,7 @@ int ncp_obtain_nfs_info(struct ncp_server *server,
                if ((result = ncp_request(server, 87)) == 0) {
                        ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs);
-                        DPRINTK(KERN_DEBUG
+                        ncp_dbg(1, "(%s) mode=0%o, rdev=0x%x\n",
-                                "ncp_obtain_nfs_info: (%s) mode=0%o, rdev=0x%x\n",
                                target->entryName, target->nfs.mode,
                                target->nfs.rdev);
                } else {
@@ -425,7 +424,7 @@ int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *pa
        int result;
        if (target == NULL) {
-                printk(KERN_ERR "ncp_obtain_info: invalid call\n");
+                pr_err("%s: invalid call\n", __func__);
                return -EINVAL;
        }
        ncp_init_request(server);
@@ -498,7 +497,7 @@ ncp_get_known_namespace(struct ncp_server *server, __u8 volume)
        namespace = ncp_reply_data(server, 2);
        while (no_namespaces > 0) {
-                DPRINTK("get_namespaces: found %d on %d\n", *namespace, volume);
+                ncp_dbg(1, "found %d on %d\n", *namespace, volume);
 #ifdef CONFIG_NCPFS_NFS_NS
                if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS)) 
@@ -531,8 +530,7 @@ ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns)
        if (ret_ns)
                *ret_ns = ns;
-        DPRINTK("lookup_vol: namespace[%d] = %d\n",
+        ncp_dbg(1, "namespace[%d] = %d\n", volume, server->name_space[volume]);
-                volume, server->name_space[volume]);
        if (server->name_space[volume] == ns)
                return 0;
@@ -596,7 +594,7 @@ ncp_get_volume_root(struct ncp_server *server,
 {
        int result;
-        DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname);
+        ncp_dbg(1, "looking up vol %s\n", volname);
        ncp_init_request(server);
        ncp_add_byte(server, 22);       /* Subfunction: Generate dir handle */
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 3a1587222c8a..04a69a4d8e96 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -8,6 +8,7 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/time.h>
 #include <linux/errno.h>
@@ -231,7 +232,7 @@ static void __ncptcp_try_send(struct ncp_server *server)
                return;
        if (result < 0) {
-                printk(KERN_ERR "ncpfs: tcp: Send failed: %d\n", result);
+                pr_err("tcp: Send failed: %d\n", result);
                __ncp_abort_request(server, rq, result);
                return;
        }
@@ -332,7 +333,7 @@ static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *
        mutex_lock(&server->rcv.creq_mutex);
        if (!ncp_conn_valid(server)) {
                mutex_unlock(&server->rcv.creq_mutex);
-                printk(KERN_ERR "ncpfs: tcp: Server died\n");
+                pr_err("tcp: Server died\n");
                return -EIO;
        }
        ncp_req_get(req);
@@ -405,15 +406,15 @@ void ncpdgram_rcv_proc(struct work_struct *work)
                                }
                                result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
                                if (result < 0) {
-                                        DPRINTK("recv failed with %d\n", result);
+                                        ncp_dbg(1, "recv failed with %d\n", result);
                                        continue;
                                }
                                if (result < 10) {
-                                        DPRINTK("too short (%u) watchdog packet\n", result);
+                                        ncp_dbg(1, "too short (%u) watchdog packet\n", result);
                                        continue;
                                }
                                if (buf[9] != '?') {
-                                        DPRINTK("bad signature (%02X) in watchdog packet\n", buf[9]);
+                                        ncp_dbg(1, "bad signature (%02X) in watchdog packet\n", buf[9]);
                                        continue;
                                }
                                buf[9] = 'Y';
@@ -448,7 +449,7 @@ void ncpdgram_rcv_proc(struct work_struct *work)
                                                        result -= 8;
                                                        hdrl = sock->sk->sk_family == AF_INET ? 8 : 6;
                                                        if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) {
-                                                                printk(KERN_INFO "ncpfs: Signature violation\n");
+                                                                pr_info("Signature violation\n");
                                                                result = -EIO;
                                                        }
                                                }
@@ -524,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
                return result;
        }
        if (result > len) {
-                printk(KERN_ERR "ncpfs: tcp: bug in recvmsg (%u > %Zu)\n", result, len);
+                pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
                return -EIO;                    
        }
        return result;
@@ -552,9 +553,9 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
                                        __ncptcp_abort(server);
                                }
                                if (result < 0) {
-                                        printk(KERN_ERR "ncpfs: tcp: error in recvmsg: %d\n", result);
+                                        pr_err("tcp: error in recvmsg: %d\n", result);
                                } else {
-                                        DPRINTK(KERN_ERR "ncpfs: tcp: EOF\n");
+                                        ncp_dbg(1, "tcp: EOF\n");
                                }
                                return -EIO;
                        }
@@ -566,20 +567,20 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
                switch (server->rcv.state) {
                        case 0:
                                if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
+                                        pr_err("tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
                                        __ncptcp_abort(server);
                                        return -EIO;
                                }
                                datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF;
                                if (datalen < 10) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen);
+                                        pr_err("tcp: Unexpected reply len %d\n", datalen);
                                        __ncptcp_abort(server);
                                        return -EIO;
                                }
 #ifdef CONFIG_NCPFS_PACKET_SIGNING                              
                                if (server->sign_active) {
                                        if (datalen < 18) {
-                                                printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen);
+                                                pr_err("tcp: Unexpected reply len %d\n", datalen);
                                                __ncptcp_abort(server);
                                                return -EIO;
                                        }
@@ -604,7 +605,7 @@ cont:;
                                                server->rcv.len = datalen - 10;
                                                break;
                                        }                                       
-                                        DPRINTK("ncpfs: tcp: Unexpected NCP type %02X\n", type);
+                                        ncp_dbg(1, "tcp: Unexpected NCP type %02X\n", type);
 skipdata2:;
                                        server->rcv.state = 2;
 skipdata:;
@@ -614,11 +615,11 @@ skipdata:;
                                }
                                req = server->rcv.creq;
                                if (!req) {
-                                        DPRINTK(KERN_ERR "ncpfs: Reply without appropriate request\n");
+                                        ncp_dbg(1, "Reply without appropriate request\n");
                                        goto skipdata2;
                                }
                                if (datalen > req->datalen + 8) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
+                                        pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
                                        server->rcv.state = 3;
                                        goto skipdata;
                                }
@@ -638,12 +639,12 @@ skipdata:;
                                req = server->rcv.creq;
                                if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) {
                                        if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) {
-                                                printk(KERN_ERR "ncpfs: tcp: Bad sequence number\n");
+                                                pr_err("tcp: Bad sequence number\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
                                        if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) {
-                                                printk(KERN_ERR "ncpfs: tcp: Connection number mismatch\n");
+                                                pr_err("tcp: Connection number mismatch\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
@@ -651,7 +652,7 @@ skipdata:;
 #ifdef CONFIG_NCPFS_PACKET_SIGNING                              
                                if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) {
                                        if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) {
-                                                printk(KERN_ERR "ncpfs: tcp: Signature violation\n");
+                                                pr_err("tcp: Signature violation\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
@@ -742,7 +743,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
        int result;
        if (server->lock == 0) {
-                printk(KERN_ERR "ncpfs: Server not locked!\n");
+                pr_err("Server not locked!\n");
                return -EIO;
        }
        if (!ncp_conn_valid(server)) {
@@ -781,7 +782,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
                spin_unlock_irqrestore(&current->sighand->siglock, flags);
        }
-        DDPRINTK("do_ncp_rpc_call returned %d\n", result);
+        ncp_dbg(2, "do_ncp_rpc_call returned %d\n", result);
        return result;
 }
@@ -811,7 +812,7 @@ int ncp_request2(struct ncp_server *server, int function,
        result = ncp_do_request(server, server->current_size, reply, size);
        if (result < 0) {
-                DPRINTK("ncp_request_error: %d\n", result);
+                ncp_dbg(1, "ncp_request_error: %d\n", result);
                goto out;
        }
        server->completion = reply->completion_code;
@@ -822,7 +823,7 @@ int ncp_request2(struct ncp_server *server, int function,
        result = reply->completion_code;
        if (result != 0)
-                PPRINTK("ncp_request: completion code=%x\n", result);
+                ncp_vdbg("completion code=%x\n", result);
 out:
        return result;
 }
@@ -865,14 +866,14 @@ void ncp_lock_server(struct ncp_server *server)
 {
        mutex_lock(&server->mutex);
        if (server->lock)
-                printk(KERN_WARNING "ncp_lock_server: was locked!\n");
+                pr_warn("%s: was locked!\n", __func__);
        server->lock = 1;
 }
 void ncp_unlock_server(struct ncp_server *server)
 {
        if (!server->lock) {
-                printk(KERN_WARNING "ncp_unlock_server: was not locked!\n");
+                pr_warn("%s: was not locked!\n", __func__);
                return;
        }
        server->lock = 0;
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 52439ddc8de0..1a63bfdb4a65 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -112,7 +112,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
        __le32 attr;
        unsigned int hdr;
-        DPRINTK("ncp_symlink(dir=%p,dentry=%p,symname=%s)\n",dir,dentry,symname);
+        ncp_dbg(1, "dir=%p, dentry=%p, symname=%s\n", dir, dentry, symname);
        if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber))
                kludge = 0;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ae2e87b95453..41db5258e7a7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -112,7 +112,8 @@ out:
 * TODO: keep track of all layouts (and delegations) in a hash table
 * hashed by filehandle.
 */
-static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
+                struct nfs_fh *fh, nfs4_stateid *stateid)
 {
        struct nfs_server *server;
        struct inode *ino;
@@ -120,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                list_for_each_entry(lo, &server->layouts, plh_layouts) {
+                        if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
+                                continue;
                        if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
                                continue;
                        ino = igrab(lo->plh_inode);
                        if (!ino)
-                                continue;
+                                break;
                        spin_lock(&ino->i_lock);
                        /* Is this layout in the process of being freed? */
                        if (NFS_I(ino)->layout != lo) {
                                spin_unlock(&ino->i_lock);
                                iput(ino);
-                                continue;
+                                break;
                        }
                        pnfs_get_layout_hdr(lo);
                        spin_unlock(&ino->i_lock);
@@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
        return NULL;
 }
-static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
+                struct nfs_fh *fh, nfs4_stateid *stateid)
 {
        struct pnfs_layout_hdr *lo;
        spin_lock(&clp->cl_lock);
        rcu_read_lock();
-        lo = get_layout_by_fh_locked(clp, fh);
+        lo = get_layout_by_fh_locked(clp, fh, stateid);
        rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
@@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp,
        u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
        LIST_HEAD(free_me_list);
-        lo = get_layout_by_fh(clp, &args->cbl_fh);
+        lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
        if (!lo)
-                return NFS4ERR_NOMATCHING_LAYOUT;
+                goto out;
        ino = lo->plh_inode;
        spin_lock(&ino->i_lock);
@@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
        pnfs_free_lseg_list(&free_me_list);
        pnfs_put_layout_hdr(lo);
        iput(ino);
+out:
        return rv;
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a48fe4b84b6..d9f3d067cd15 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = {
 static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
 {
+        struct nfs_inode *nfsi = NFS_I(dir);
        struct nfs_open_dir_context *ctx;
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
        if (ctx != NULL) {
                ctx->duped = 0;
-                ctx->attr_gencount = NFS_I(dir)->attr_gencount;
+                ctx->attr_gencount = nfsi->attr_gencount;
                ctx->dir_cookie = 0;
                ctx->dup_cookie = 0;
                ctx->cred = get_rpccred(cred);
+                spin_lock(&dir->i_lock);
+                list_add(&ctx->list, &nfsi->open_files);
+                spin_unlock(&dir->i_lock);
                return ctx;
        }
        return  ERR_PTR(-ENOMEM);
 }
-static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
+static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
 {
+        spin_lock(&dir->i_lock);
+        list_del(&ctx->list);
+        spin_unlock(&dir->i_lock);
        put_rpccred(ctx->cred);
        kfree(ctx);
 }
@@ -126,7 +133,7 @@ out:
 static int
 nfs_closedir(struct inode *inode, struct file *filp)
 {
-        put_nfs_open_dir_context(filp->private_data);
+        put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
        return 0;
 }
@@ -306,10 +313,9 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
                                        if (printk_ratelimit()) {
                                                pr_notice("NFS: directory %pD2 contains a readdir loop."
                                                                "Please contact your server vendor.  "
-                                                                "The file: %s has duplicate cookie %llu\n",
+                                                                "The file: %.*s has duplicate cookie %llu\n",
-                                                                desc->file,
+                                                                desc->file, array->array[i].string.len,
-                                                                array->array[i].string.name,
+                                                                array->array[i].string.name, *desc->dir_cookie);
-                                                                *desc->dir_cookie);
                                        }
                                        status = -ELOOP;
                                        goto out;
@@ -437,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir)
        set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
 }
+/*
+ * This function is mainly for use by nfs_getattr().
+ *
+ * If this is an 'ls -l', we want to force use of readdirplus.
+ * Do this by checking if there is an active file descriptor
+ * and calling nfs_advise_use_readdirplus, then forcing a
+ * cache flush.
+ */
+void nfs_force_use_readdirplus(struct inode *dir)
+{
+        if (!list_empty(&NFS_I(dir)->open_files)) {
+                nfs_advise_use_readdirplus(dir);
+                nfs_zap_mapping(dir, dir->i_mapping);
+        }
+}
 static
 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 {
@@ -815,6 +837,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
        goto out;
 }
+static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
+{
+        struct nfs_inode *nfsi = NFS_I(dir);
+        if (nfs_attribute_cache_expired(dir))
+                return true;
+        if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+                return true;
+        return false;
+}
 /* The file offset position represents the dirent entry number.  A
   last cookie cache takes care of the common case of reading the
   whole directory.
@@ -847,7 +880,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
        desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
        nfs_block_sillyrename(dentry);
-        if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
+        if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
                res = nfs_revalidate_mapping(inode, file->f_mapping);
        if (res < 0)
                goto out;
@@ -1911,6 +1944,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct inode *old_inode = old_dentry->d_inode;
        struct inode *new_inode = new_dentry->d_inode;
        struct dentry *dentry = NULL, *rehash = NULL;
+        struct rpc_task *task;
        int error = -EBUSY;
        dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
@@ -1958,8 +1992,16 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (new_inode != NULL)
                NFS_PROTO(new_inode)->return_delegation(new_inode);
-        error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
+        task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
-                                           new_dir, &new_dentry->d_name);
+        if (IS_ERR(task)) {
+                error = PTR_ERR(task);
+                goto out;
+        }
+        error = rpc_wait_for_completion_task(task);
+        if (error == 0)
+                error = task->tk_status;
+        rpc_put_task(task);
        nfs_mark_for_revalidate(old_inode);
 out:
        if (rehash)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5bb790a69c71..284ca901fe16 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -617,6 +617,7 @@ out:
 static const struct vm_operations_struct nfs_file_vm_ops = {
        .fault = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = nfs_vm_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c4702baa22b8..0c438973f3c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -588,6 +588,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
+static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
+{
+        struct dentry *parent;
+        parent = dget_parent(dentry);
+        nfs_force_use_readdirplus(parent->d_inode);
+        dput(parent);
+}
+static bool nfs_need_revalidate_inode(struct inode *inode)
+{
+        if (NFS_I(inode)->cache_validity &
+                        (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+                return true;
+        if (nfs_attribute_cache_expired(inode))
+                return true;
+        return false;
+}
 int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
        struct inode *inode = dentry->d_inode;
@@ -616,10 +635,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
            ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
                need_atime = 0;
-        if (need_atime)
+        if (need_atime || nfs_need_revalidate_inode(inode)) {
-                err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+                struct nfs_server *server = NFS_SERVER(inode);
-        else
-                err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+                if (server->caps & NFS_CAP_READDIRPLUS)
+                        nfs_request_parent_use_readdirplus(dentry);
+                err = __nfs_revalidate_inode(server, inode);
+        }
        if (!err) {
                generic_fillattr(inode, stat);
                stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
@@ -961,9 +983,7 @@ int nfs_attribute_cache_expired(struct inode *inode)
 */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
-        if (!(NFS_I(inode)->cache_validity &
+        if (!nfs_need_revalidate_inode(inode))
-                        (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
-                        && !nfs_attribute_cache_expired(inode))
                return NFS_STALE(inode) ? -ESTALE : 0;
        return __nfs_revalidate_inode(server, inode);
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b46cf5a67329..dd8bfc2e2464 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -301,6 +301,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
                           const char *ip_addr);
 /* dir.c */
+extern void nfs_force_use_readdirplus(struct inode *dir);
 extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
                                            struct shrink_control *sc);
 extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
@@ -474,6 +475,13 @@ extern int nfs_migrate_page(struct address_space *,
 #define nfs_migrate_page NULL
 #endif
+/* unlink.c */
+extern struct rpc_task *
+nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+                 struct dentry *old_dentry, struct dentry *new_dentry,
+                 void (*complete)(struct rpc_task *, struct nfs_renamedata *));
+extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
                              struct nfs_direct_req *dreq);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index a462ef0fb5d6..db60149c4579 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -479,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 }
 static int
-nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
-                 struct inode *new_dir, struct qstr *new_name)
-{
-        struct nfs_renameargs   arg = {
-                .old_dir        = NFS_FH(old_dir),
-                .old_name       = old_name,
-                .new_dir        = NFS_FH(new_dir),
-                .new_name       = new_name,
-        };
-        struct nfs_renameres res;
-        struct rpc_message msg = {
-                .rpc_proc       = &nfs3_procedures[NFS3PROC_RENAME],
-                .rpc_argp       = &arg,
-                .rpc_resp       = &res,
-        };
-        int status = -ENOMEM;
-        dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-        res.old_fattr = nfs_alloc_fattr();
-        res.new_fattr = nfs_alloc_fattr();
-        if (res.old_fattr == NULL || res.new_fattr == NULL)
-                goto out;
-        status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
-        nfs_post_op_update_inode(old_dir, res.old_fattr);
-        nfs_post_op_update_inode(new_dir, res.new_fattr);
-out:
-        nfs_free_fattr(res.old_fattr);
-        nfs_free_fattr(res.new_fattr);
-        dprintk("NFS reply rename: %d\n", status);
-        return status;
-}
-static int
 nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
        struct nfs3_linkargs    arg = {
@@ -968,7 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .unlink_setup   = nfs3_proc_unlink_setup,
        .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
        .unlink_done    = nfs3_proc_unlink_done,
-        .rename         = nfs3_proc_rename,
        .rename_setup   = nfs3_proc_rename_setup,
        .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
        .rename_done    = nfs3_proc_rename_done,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a5b27c2d9689..e1d1badbe53c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -427,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
 extern void nfs_inode_find_state_and_recover(struct inode *inode,
                const nfs4_stateid *stateid);
+extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
 extern void nfs4_schedule_lease_recovery(struct nfs_client *);
 extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
 extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
@@ -500,6 +501,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
        return memcmp(dst, src, sizeof(*dst)) == 0;
 }
+static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+        return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
+}
+static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+        return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
+}
 static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
 {
        return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0e46d3d1b6cc..aa9ef4876046 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -531,6 +531,13 @@ int nfs40_walk_client_list(struct nfs_client *new,
                        *result = pos;
                        dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
                                __func__, pos, atomic_read(&pos->cl_count));
+                        goto out;
+                case -ERESTARTSYS:
+                case -ETIMEDOUT:
+                        /* The callback path may have been inadvertently
+                         * changed. Schedule recovery!
+                         */
+                        nfs4_schedule_path_down_recovery(pos);
                default:
                        goto out;
                }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 450bfedbe2f4..397be39c6dc8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1068,6 +1068,7 @@ static void nfs4_opendata_free(struct kref *kref)
        dput(p->dentry);
        nfs_sb_deactive(sb);
        nfs_fattr_free_names(&p->f_attr);
+        kfree(p->f_attr.mdsthreshold);
        kfree(p);
 }
@@ -1137,12 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
        nfs4_state_set_mode_locked(state, state->state | fmode);
 }
-static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
+{
+        struct nfs_client *clp = state->owner->so_server->nfs_client;
+        bool need_recover = false;
+        if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
+                need_recover = true;
+        if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
+                need_recover = true;
+        if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
+                need_recover = true;
+        if (need_recover)
+                nfs4_state_mark_reclaim_nograce(clp, state);
+}
+static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+                nfs4_stateid *stateid)
+{
+        if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
+                return true;
+        if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+                nfs_test_and_clear_all_open_stateid(state);
+                return true;
+        }
+        if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+                return true;
+        return false;
+}
+static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
+                nfs4_stateid *stateid, fmode_t fmode)
 {
+        clear_bit(NFS_O_RDWR_STATE, &state->flags);
+        switch (fmode & (FMODE_READ|FMODE_WRITE)) {
+        case FMODE_WRITE:
+                clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+                break;
+        case FMODE_READ:
+                clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+                break;
+        case 0:
+                clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+                clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+                clear_bit(NFS_OPEN_STATE, &state->flags);
+        }
+        if (stateid == NULL)
+                return;
+        if (!nfs_need_update_open_stateid(state, stateid))
+                return;
        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
                nfs4_stateid_copy(&state->stateid, stateid);
        nfs4_stateid_copy(&state->open_stateid, stateid);
-        set_bit(NFS_OPEN_STATE, &state->flags);
+}
+static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
+        write_seqlock(&state->seqlock);
+        nfs_clear_open_stateid_locked(state, stateid, fmode);
+        write_sequnlock(&state->seqlock);
+        if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+                nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
+}
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
        switch (fmode) {
                case FMODE_READ:
                        set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -1153,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
                case FMODE_READ|FMODE_WRITE:
                        set_bit(NFS_O_RDWR_STATE, &state->flags);
        }
-}
+        if (!nfs_need_update_open_stateid(state, stateid))
+                return;
-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
-{
+                nfs4_stateid_copy(&state->stateid, stateid);
-        write_seqlock(&state->seqlock);
+        nfs4_stateid_copy(&state->open_stateid, stateid);
-        nfs_set_open_stateid_locked(state, stateid, fmode);
-        write_sequnlock(&state->seqlock);
 }
 static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
@@ -1217,6 +1275,8 @@ no_delegation:
                __update_open_stateid(state, open_stateid, NULL, fmode);
                ret = 1;
        }
+        if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+                nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
        return ret;
 }
@@ -1450,12 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
        struct nfs4_state *newstate;
        int ret;
+        /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
+        clear_bit(NFS_O_RDWR_STATE, &state->flags);
+        clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+        clear_bit(NFS_O_RDONLY_STATE, &state->flags);
        /* memory barrier prior to reading state->n_* */
        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        clear_bit(NFS_OPEN_STATE, &state->flags);
        smp_rmb();
        if (state->n_rdwr != 0) {
-                clear_bit(NFS_O_RDWR_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
@@ -1463,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
                        return -ESTALE;
        }
        if (state->n_wronly != 0) {
-                clear_bit(NFS_O_WRONLY_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
@@ -1471,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
                        return -ESTALE;
        }
        if (state->n_rdonly != 0) {
-                clear_bit(NFS_O_RDONLY_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
                if (ret != 0)
                        return ret;
@@ -2244,10 +2305,12 @@ static int _nfs4_do_open(struct inode *dir,
                }
        }
-        if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+        if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
-                opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+                if (!opendata->f_attr.mdsthreshold) {
-                if (!opendata->f_attr.mdsthreshold)
+                        opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
-                        goto err_free_label;
+                        if (!opendata->f_attr.mdsthreshold)
+                                goto err_free_label;
+                }
                opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
        }
        if (dentry->d_inode != NULL)
@@ -2275,11 +2338,10 @@ static int _nfs4_do_open(struct inode *dir,
        if (opendata->file_created)
                *opened |= FILE_CREATED;
-        if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+        if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
                *ctx_th = opendata->f_attr.mdsthreshold;
-        else
+                opendata->f_attr.mdsthreshold = NULL;
-                kfree(opendata->f_attr.mdsthreshold);
+        }
-        opendata->f_attr.mdsthreshold = NULL;
        nfs4_label_free(olabel);
@@ -2289,7 +2351,6 @@ static int _nfs4_do_open(struct inode *dir,
 err_free_label:
        nfs4_label_free(olabel);
 err_opendata_put:
-        kfree(opendata->f_attr.mdsthreshold);
        nfs4_opendata_put(opendata);
 err_put_state_owner:
        nfs4_put_state_owner(sp);
@@ -2479,26 +2540,6 @@ static void nfs4_free_closedata(void *data)
        kfree(calldata);
 }
-static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
-                fmode_t fmode)
-{
-        spin_lock(&state->owner->so_lock);
-        clear_bit(NFS_O_RDWR_STATE, &state->flags);
-        switch (fmode & (FMODE_READ|FMODE_WRITE)) {
-        case FMODE_WRITE:
-                clear_bit(NFS_O_RDONLY_STATE, &state->flags);
-                break;
-        case FMODE_READ:
-                clear_bit(NFS_O_WRONLY_STATE, &state->flags);
-                break;
-        case 0:
-                clear_bit(NFS_O_RDONLY_STATE, &state->flags);
-                clear_bit(NFS_O_WRONLY_STATE, &state->flags);
-                clear_bit(NFS_OPEN_STATE, &state->flags);
-        }
-        spin_unlock(&state->owner->so_lock);
-}
 static void nfs4_close_done(struct rpc_task *task, void *data)
 {
        struct nfs4_closedata *calldata = data;
@@ -2517,9 +2558,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                        if (calldata->roc)
                                pnfs_roc_set_barrier(state->inode,
                                                     calldata->roc_barrier);
-                        nfs_set_open_stateid(state, &calldata->res.stateid, 0);
+                        nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
                        renew_lease(server, calldata->timestamp);
-                        break;
+                        goto out_release;
                case -NFS4ERR_ADMIN_REVOKED:
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_OLD_STATEID:
@@ -2533,7 +2574,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                                goto out_release;
                        }
        }
-        nfs4_close_clear_stateid_flags(state, calldata->arg.fmode);
+        nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
 out_release:
        nfs_release_seqid(calldata->arg.seqid);
        nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -3507,49 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
        return 1;
 }
-static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
-                struct inode *new_dir, struct qstr *new_name)
-{
-        struct nfs_server *server = NFS_SERVER(old_dir);
-        struct nfs_renameargs arg = {
-                .old_dir = NFS_FH(old_dir),
-                .new_dir = NFS_FH(new_dir),
-                .old_name = old_name,
-                .new_name = new_name,
-        };
-        struct nfs_renameres res = {
-                .server = server,
-        };
-        struct rpc_message msg = {
-                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
-                .rpc_argp = &arg,
-                .rpc_resp = &res,
-        };
-        int status = -ENOMEM;
-        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
-        if (!status) {
-                update_changeattr(old_dir, &res.old_cinfo);
-                update_changeattr(new_dir, &res.new_cinfo);
-        }
-        return status;
-}
-static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
-                struct inode *new_dir, struct qstr *new_name)
-{
-        struct nfs4_exception exception = { };
-        int err;
-        do {
-                err = _nfs4_proc_rename(old_dir, old_name,
-                                        new_dir, new_name);
-                trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err);
-                err = nfs4_handle_exception(NFS_SERVER(old_dir), err,
-                                &exception);
-        } while (exception.retry);
-        return err;
-}
 static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
        struct nfs_server *server = NFS_SERVER(inode);
@@ -4884,6 +4882,20 @@ nfs4_init_uniform_client_string(const struct nfs_client *clp,
                                nodename);
 }
+/*
+ * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback
+ * services.  Advertise one based on the address family of the
+ * clientaddr.
+ */
+static unsigned int
+nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
+{
+        if (strchr(clp->cl_ipaddr, ':') != NULL)
+                return scnprintf(buf, len, "tcp6");
+        else
+                return scnprintf(buf, len, "tcp");
+}
 /**
 * nfs4_proc_setclientid - Negotiate client ID
 * @clp: state data structure
@@ -4925,12 +4937,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                                                setclientid.sc_name,
                                                sizeof(setclientid.sc_name));
        /* cb_client4 */
-        rcu_read_lock();
+        setclientid.sc_netid_len =
-        setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
+                                nfs4_init_callback_netid(clp,
-                                sizeof(setclientid.sc_netid), "%s",
+                                                setclientid.sc_netid,
-                                rpc_peeraddr2str(clp->cl_rpcclient,
+                                                sizeof(setclientid.sc_netid));
-                                                        RPC_DISPLAY_NETID));
-        rcu_read_unlock();
        setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
                                sizeof(setclientid.sc_uaddr), "%s.%u.%u",
                                clp->cl_ipaddr, port >> 8, port & 255);
@@ -8408,7 +8418,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .unlink_setup   = nfs4_proc_unlink_setup,
        .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
        .unlink_done    = nfs4_proc_unlink_done,
-        .rename         = nfs4_proc_rename,
        .rename_setup   = nfs4_proc_rename_setup,
        .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
        .rename_done    = nfs4_proc_rename_done,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0deb32105ccf..2349518eef2c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1316,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st
        return 1;
 }
-static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
 {
        set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
        clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -2075,8 +2075,10 @@ again:
        switch (status) {
        case 0:
                break;
-        case -NFS4ERR_DELAY:
        case -ETIMEDOUT:
+                if (clnt->cl_softrtry)
+                        break;
+        case -NFS4ERR_DELAY:
        case -EAGAIN:
                ssleep(1);
        case -NFS4ERR_STALE_CLIENTID:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 72f3bf1754ef..73ce8d4fe2c8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -203,8 +203,7 @@ static int nfs4_stat_to_errno(int);
                                 2 + encode_verifier_maxsz + 5 + \
                                nfs4_label_maxsz)
 #define decode_readdir_maxsz    (op_decode_hdr_maxsz + \
-                                 decode_verifier_maxsz + \
+                                 decode_verifier_maxsz)
-                                nfs4_label_maxsz + nfs4_fattr_maxsz)
 #define encode_readlink_maxsz   (op_encode_hdr_maxsz)
 #define decode_readlink_maxsz   (op_decode_hdr_maxsz + 1)
 #define encode_write_maxsz      (op_encode_hdr_maxsz + \
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4755858e37a0..cb53d450ae32 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -662,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 */
 static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
 {
-        return (s32)s1 - (s32)s2 > 0;
+        return (s32)(s1 - s2) > 0;
+}
+static void
+pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
+                const nfs4_stateid *new,
+                struct list_head *free_me_list)
+{
+        if (nfs4_stateid_match_other(&lo->plh_stateid, new))
+                return;
+        /* Layout is new! Kill existing layout segments */
+        pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
 }
 /* update lo->plh_stateid with new if is more recent */
@@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        struct nfs4_layoutget_res *res = &lgp->res;
        struct pnfs_layout_segment *lseg;
        struct inode *ino = lo->plh_inode;
+        LIST_HEAD(free_me);
        int status = 0;
        /* Inject layout blob into I/O device driver */
@@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                goto out_forget_reply;
        }
+        /* Check that the new stateid matches the old stateid */
+        pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
        /* Done processing layoutget. Set the layout stateid */
        pnfs_set_layout_stateid(lo, &res->stateid, false);
@@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        }
        spin_unlock(&ino->i_lock);
+        pnfs_free_lseg_list(&free_me);
        return lseg;
 out:
        return ERR_PTR(status);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index fddbba2d9eff..e55ce9e8b034 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -357,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 }
 static int
-nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
-                struct inode *new_dir, struct qstr *new_name)
-{
-        struct nfs_renameargs   arg = {
-                .old_dir        = NFS_FH(old_dir),
-                .old_name       = old_name,
-                .new_dir        = NFS_FH(new_dir),
-                .new_name       = new_name,
-        };
-        struct rpc_message msg = {
-                .rpc_proc       = &nfs_procedures[NFSPROC_RENAME],
-                .rpc_argp       = &arg,
-        };
-        int                     status;
-        dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-        status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
-        nfs_mark_for_revalidate(old_dir);
-        nfs_mark_for_revalidate(new_dir);
-        dprintk("NFS reply rename: %d\n", status);
-        return status;
-}
-static int
 nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
        struct nfs_linkargs     arg = {
@@ -745,7 +721,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .unlink_setup   = nfs_proc_unlink_setup,
        .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
        .unlink_done    = nfs_proc_unlink_done,
-        .rename         = nfs_proc_rename,
        .rename_setup   = nfs_proc_rename_setup,
        .rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
        .rename_done    = nfs_proc_rename_done,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 11d78944de79..de54129336c6 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/namei.h>
+#include <linux/fsnotify.h>
 #include "internal.h"
 #include "nfs4_fs.h"
@@ -353,8 +354,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
                return;
        }
-        if (task->tk_status != 0)
+        if (data->complete)
-                nfs_cancel_async_unlink(old_dentry);
+                data->complete(task, data);
 }
 /**
@@ -399,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = {
 *
 * It's expected that valid references to the dentries and inodes are held
 */
-static struct rpc_task *
+struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
-                 struct dentry *old_dentry, struct dentry *new_dentry)
+                 struct dentry *old_dentry, struct dentry *new_dentry,
+                 void (*complete)(struct rpc_task *, struct nfs_renamedata *))
 {
        struct nfs_renamedata *data;
        struct rpc_message msg = { };
@@ -438,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
        data->new_dentry = dget(new_dentry);
        nfs_fattr_init(&data->old_fattr);
        nfs_fattr_init(&data->new_fattr);
+        data->complete = complete;
        /* set up nfs_renameargs */
        data->args.old_dir = NFS_FH(old_dir);
@@ -456,6 +459,27 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
        return rpc_run_task(&task_setup_data);
 }
+/*
+ * Perform tasks needed when a sillyrename is done such as cancelling the
+ * queued async unlink if it failed.
+ */
+static void
+nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+        struct dentry *dentry = data->old_dentry;
+        if (task->tk_status != 0) {
+                nfs_cancel_async_unlink(dentry);
+                return;
+        }
+        /*
+         * vfs_unlink and the like do not issue this when a file is
+         * sillyrenamed, so do it here.
+         */
+        fsnotify_nameremove(dentry, 0);
+}
 #define SILLYNAME_PREFIX ".nfs"
 #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
 #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
@@ -548,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
        }
        /* run the rename task, undo unlink if it fails */
-        task = nfs_async_rename(dir, dir, dentry, sdentry);
+        task = nfs_async_rename(dir, dir, dentry, sdentry,
+                                        nfs_complete_sillyrename);
        if (IS_ERR(task)) {
                error = -EBUSY;
                nfs_cancel_async_unlink(dentry);
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a812fd1b92a4..b481e1f5eecc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,9 +39,13 @@ struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
-/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
+/*
- * fit in a page: */
+ * Maximum ACL we'll accept from a client; chosen (somewhat
-#define NFS4_ACL_MAX 170
+ * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
+ * high-order allocation.  This allows 204 ACEs on x86_64:
+ */
+#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
+                        / sizeof(struct nfs4_ace))
 struct nfs4_acl *nfs4_acl_new(int);
 int nfs4_acl_get_whotype(char *, u32);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d190e33d0ec2..6f3f392d48af 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -542,7 +542,10 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
         * up setting a 3-element effective posix ACL with all
         * permissions zero.
         */
-        nace = 4 + state->users->n + state->groups->n;
+        if (!state->users->n && !state->groups->n)
+                nace = 3;
+        else /* Note we also include a MASK ACE in this case: */
+                nace = 4 + state->users->n + state->groups->n;
        pacl = posix_acl_alloc(nace, GFP_KERNEL);
        if (!pacl)
                return ERR_PTR(-ENOMEM);
@@ -586,9 +589,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
                add_to_mask(state, &state->groups->aces[i].perms);
        }
-        pace++;
+        if (!state->users->n && !state->groups->n) {
-        pace->e_tag = ACL_MASK;
+                pace++;
-        low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+                pace->e_tag = ACL_MASK;
+                low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+        }
        pace++;
        pace->e_tag = ACL_OTHER;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7f05cd140de3..39c8ef875f91 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
 */
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/slab.h>
 #include "nfsd.h"
@@ -635,6 +636,22 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
        }
 }
+static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
+{
+        struct rpc_xprt *xprt;
+        if (args->protocol != XPRT_TRANSPORT_BC_TCP)
+                return rpc_create(args);
+        xprt = args->bc_xprt->xpt_bc_xprt;
+        if (xprt) {
+                xprt_get(xprt);
+                return rpc_create_xprt(args, xprt);
+        }
+        return rpc_create(args);
+}
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
        struct rpc_timeout      timeparms = {
@@ -674,7 +691,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
                args.authflavor = ses->se_cb_sec.flavor;
        }
        /* Create RPC client */
-        client = rpc_create(&args);
+        client = create_backchannel_client(&args);
        if (IS_ERR(client)) {
                dprintk("NFSD: couldn't create callback client: %ld\n",
                        PTR_ERR(client));
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 82189b208af3..d543222babf3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1273,6 +1273,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        struct nfsd4_op *op;
        struct nfsd4_operation *opdesc;
        struct nfsd4_compound_state *cstate = &resp->cstate;
+        struct svc_fh *current_fh = &cstate->current_fh;
+        struct svc_fh *save_fh = &cstate->save_fh;
        int             slack_bytes;
        u32             plen = 0;
        __be32          status;
@@ -1288,11 +1290,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        resp->tag = args->tag;
        resp->opcnt = 0;
        resp->rqstp = rqstp;
-        resp->cstate.minorversion = args->minorversion;
+        cstate->minorversion = args->minorversion;
-        resp->cstate.replay_owner = NULL;
+        cstate->replay_owner = NULL;
-        resp->cstate.session = NULL;
+        cstate->session = NULL;
-        fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+        fh_init(current_fh, NFS4_FHSIZE);
-        fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+        fh_init(save_fh, NFS4_FHSIZE);
        /*
         * Don't use the deferral mechanism for NFSv4; compounds make it
         * too hard to avoid non-idempotency problems.
@@ -1345,20 +1347,28 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                opdesc = OPDESC(op);
-                if (!cstate->current_fh.fh_dentry) {
+                if (!current_fh->fh_dentry) {
                        if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
                                op->status = nfserr_nofilehandle;
                                goto encode_op;
                        }
-                } else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
+                } else if (current_fh->fh_export->ex_fslocs.migrated &&
                          !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
                        op->status = nfserr_moved;
                        goto encode_op;
                }
+                fh_clear_wcc(current_fh);
                /* If op is non-idempotent */
                if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
                        plen = opdesc->op_rsize_bop(rqstp, op);
+                        /*
+                         * If there's still another operation, make sure
+                         * we'll have space to at least encode an error:
+                         */
+                        if (resp->opcnt < args->opcnt)
+                                plen += COMPOUND_ERR_SLACK_SPACE;
                        op->status = nfsd4_check_resp_size(resp, plen);
                }
@@ -1377,12 +1387,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                                clear_current_stateid(cstate);
                        if (need_wrongsec_check(rqstp))
-                                op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+                                op->status = check_nfsd_access(current_fh->fh_export, rqstp);
                }
 encode_op:
                /* Only from SEQUENCE */
-                if (resp->cstate.status == nfserr_replay_cache) {
+                if (cstate->status == nfserr_replay_cache) {
                        dprintk("%s NFS4.1 replay from cache\n", __func__);
                        status = op->status;
                        goto out;
@@ -1411,10 +1421,10 @@ encode_op:
                nfsd4_increment_op_stats(op->opnum);
        }
-        resp->cstate.status = status;
+        cstate->status = status;
-        fh_put(&resp->cstate.current_fh);
+        fh_put(current_fh);
-        fh_put(&resp->cstate.save_fh);
+        fh_put(save_fh);
-        BUG_ON(resp->cstate.replay_owner);
+        BUG_ON(cstate->replay_owner);
 out:
        /* Reset deferral mechanism for RPC deferrals */
        rqstp->rq_usedeferral = 1;
@@ -1523,7 +1533,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-        return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+        return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
+                                                                sizeof(__be32);
 }
 static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5d070fbeb35..3ba65979a3cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1538,7 +1538,7 @@ out_err:
 }
 /*
- * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
+ * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
 */
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
@@ -1596,7 +1596,7 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 * The sequence operation is not cached because we can use the slot and
 * session values.
 */
-__be32
+static __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
                         struct nfsd4_sequence *seq)
 {
@@ -1605,9 +1605,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
        dprintk("--> %s slot %p\n", __func__, slot);
-        /* Either returns 0 or nfserr_retry_uncached */
        status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
-        if (status == nfserr_retry_uncached_rep)
+        if (status)
                return status;
        /* The sequence operation has been encoded, cstate->datap set. */
@@ -2287,7 +2286,8 @@ out:
        if (!list_empty(&clp->cl_revoked))
                seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
 out_no_session:
-        kfree(conn);
+        if (conn)
+                free_conn(conn);
        spin_unlock(&nn->client_lock);
        return status;
 out_put_session:
@@ -3627,8 +3627,11 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
                return nfserr_bad_stateid;
        status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
                                                        nn, &cl);
-        if (status == nfserr_stale_clientid)
+        if (status == nfserr_stale_clientid) {
+                if (sessions)
+                        return nfserr_bad_stateid;
                return nfserr_stale_stateid;
+        }
        if (status)
                return status;
        *s = find_stateid_by_type(cl, stateid, typemask);
@@ -5062,7 +5065,6 @@ nfs4_state_destroy_net(struct net *net)
        int i;
        struct nfs4_client *clp = NULL;
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-        struct rb_node *node, *tmp;
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -5071,13 +5073,11 @@ nfs4_state_destroy_net(struct net *net)
                }
        }
-        node = rb_first(&nn->unconf_name_tree);
+        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-        while (node != NULL) {
+                while (!list_empty(&nn->unconf_id_hashtbl[i])) {
-                tmp = node;
+                        clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
-                node = rb_next(tmp);
+                        destroy_client(clp);
-                clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
+                }
-                rb_erase(tmp, &nn->unconf_name_tree);
-                destroy_client(clp);
        }
        kfree(nn->sessionid_hashtbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 63f2395c57ed..2723c1badd01 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -294,7 +294,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ32(nace);
                if (nace > NFS4_ACL_MAX)
-                        return nfserr_resource;
+                        return nfserr_fbig;
                *acl = nfs4_acl_new(nace);
                if (*acl == NULL)
@@ -1222,7 +1222,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
        }
        write->wr_head.iov_base = p;
        write->wr_head.iov_len = avail;
-        WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
        write->wr_pagelist = argp->pagelist;
        len = XDR_QUADLEN(write->wr_buflen) << 2;
@@ -2483,6 +2482,8 @@ out_acl:
                        goto out;
        }
        if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+                if ((buflen -= 16) < 0)
+                        goto out_resource;
                WRITE32(3);
                WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
                WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
@@ -2499,8 +2500,10 @@ out:
                security_release_secctx(context, contextlen);
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
        kfree(acl);
-        if (tempfh)
+        if (tempfh) {
                fh_put(tempfh);
+                kfree(tempfh);
+        }
        return status;
 out_nfserr:
        status = nfserrno(err);
@@ -3471,6 +3474,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
        struct nfsd4_test_stateid_id *stateid, *next;
        __be32 *p;
+        if (nfserr)
+                return nfserr;
        RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
        *p++ = htonl(test_stateid->ts_num_ids);
@@ -3579,8 +3585,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
                return 0;
        session = resp->cstate.session;
-        if (session == NULL)
-                return 0;
        if (xb->page_len == 0) {
                length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3620,9 +3624,17 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
               !nfsd4_enc_ops[op->opnum]);
        op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
-        /* nfsd4_check_drc_limit guarantees enough room for error status */
+        /* nfsd4_check_resp_size guarantees enough room for error status */
        if (!op->status)
                op->status = nfsd4_check_resp_size(resp, 0);
+        if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
+                struct nfsd4_slot *slot = resp->cstate.slot;
+                if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+                        op->status = nfserr_rep_too_big_to_cache;
+                else
+                        op->status = nfserr_rep_too_big;
+        }
        if (so) {
                so->so_replay.rp_status = op->status;
                so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
@@ -3691,6 +3703,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 int
 nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
+        if (rqstp->rq_arg.head[0].iov_len % 4) {
+                /* client is nuts */
+                dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
+                        __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
+                return 0;
+        }
        args->p = p;
        args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
        args->pagelist = rqstp->rq_arg.pages;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7f555179bf81..f34d9de802ab 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
        if (err != 0 || fd < 0)
                return -EINVAL;
+        if (svc_alien_sock(net, fd)) {
+                printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+                return -EINVAL;
+        }
        err = nfsd_create_serv(net);
        if (err != 0)
                return err;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 30f34ab02137..479eb681c27c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -282,7 +282,7 @@ void		nfsd_lockd_shutdown(void);
 * reason.
 */
 #define COMPOUND_SLACK_SPACE            140    /* OP_GETFH */
-#define COMPOUND_ERR_SLACK_SPACE        12     /* OP_SETATTR */
+#define COMPOUND_ERR_SLACK_SPACE        16     /* OP_SETATTR */
 #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4775bc4896c8..ad67964d0bb1 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -133,6 +133,17 @@ fh_init(struct svc_fh *fhp, int maxsize)
 #ifdef CONFIG_NFSD_V3
 /*
+ * The wcc data stored in current_fh should be cleared
+ * between compound ops.
+ */
+static inline void
+fh_clear_wcc(struct svc_fh *fhp)
+{
+        fhp->fh_post_saved = 0;
+        fhp->fh_pre_saved = 0;
+}
+/*
 * Fill in the pre_op attr for the wcc data
 */
 static inline void
@@ -152,7 +163,8 @@ fill_pre_wcc(struct svc_fh *fhp)
 extern void fill_post_wcc(struct svc_fh *);
 #else
-#define fill_pre_wcc(ignored)
+#define fh_clear_wcc(ignored)
+#define fill_pre_wcc(ignored)
 #define fill_post_wcc(notused)
 #endif /* CONFIG_NFSD_V3 */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b17d93214d01..9c769a47ac5a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,7 +152,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
        type = (stat->mode & S_IFMT);
        *p++ = htonl(nfs_ftypes[type >> 12]);
-        *p++ = htonl((u32) (stat->mode & S_IALLUGO));
+        *p++ = htonl((u32) stat->mode);
        *p++ = htonl((u32) stat->nlink);
        *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
        *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 915808b36df7..16f0673a423c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,6 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        umode_t         ftype = 0;
        __be32          err;
        int             host_err;
+        bool            get_write_count;
        int             size_change = 0;
        if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
@@ -411,10 +412,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        if (iap->ia_valid & ATTR_SIZE)
                ftype = S_IFREG;
+        /* Callers that do fh_verify should do the fh_want_write: */
+        get_write_count = !fhp->fh_dentry;
        /* Get inode */
        err = fh_verify(rqstp, fhp, ftype, accmode);
        if (err)
                goto out;
+        if (get_write_count) {
+                host_err = fh_want_write(fhp);
+                if (host_err)
+                        return nfserrno(host_err);
+        }
        dentry = fhp->fh_dentry;
        inode = dentry->d_inode;
@@ -1706,10 +1715,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        dput(odentry);
 out_nfserr:
        err = nfserrno(host_err);
+        /*
-        /* we cannot reply on fh_unlock on the two filehandles,
+         * We cannot rely on fh_unlock on the two filehandles,
         * as that would do the wrong thing if the two directories
-         * were the same, so again we do it by hand
+         * were the same, so again we do it by hand.
         */
        fill_post_wcc(ffhp);
        fill_post_wcc(tfhp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d278a0d03496..5ea7df305083 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -574,8 +574,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *,
                struct nfsd4_setclientid_confirm *setclientid_confirm);
 extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
-extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
-                struct nfsd4_sequence *seq);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
 extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 08fdb77852ac..f3a82fbcae02 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,6 +134,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 static const struct vm_operations_struct nilfs_file_vm_ops = {
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = nilfs_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 807150e2c2b9..dd6103cc93c1 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -18,16 +18,9 @@
 * distribution in the file COPYING); if not, write to the Free Software
 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include "debug.h"
-/*
- * A static buffer to hold the error string being displayed and a spinlock
- * to protect concurrent accesses to it.
- */
-static char err_buf[1024];
-static DEFINE_SPINLOCK(err_buf_lock);
 /**
 * __ntfs_warning - output a warning to the syslog
 * @function:   name of function outputting the warning
@@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock);
 void __ntfs_warning(const char *function, const struct super_block *sb,
                const char *fmt, ...)
 {
+        struct va_format vaf;
        va_list args;
        int flen = 0;
@@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
 #endif
        if (function)
                flen = strlen(function);
-        spin_lock(&err_buf_lock);
        va_start(args, fmt);
-        vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+        vaf.fmt = fmt;
-        va_end(args);
+        vaf.va = &args;
        if (sb)
-                printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n",
+                pr_warn("(device %s): %s(): %pV\n",
-                                sb->s_id, flen ? function : "", err_buf);
+                        sb->s_id, flen ? function : "", &vaf);
        else
-                printk(KERN_ERR "NTFS-fs warning: %s(): %s\n",
+                pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
-                                flen ? function : "", err_buf);
+        va_end(args);
-        spin_unlock(&err_buf_lock);
 }
 /**
@@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
 void __ntfs_error(const char *function, const struct super_block *sb,
                const char *fmt, ...)
 {
+        struct va_format vaf;
        va_list args;
        int flen = 0;
@@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb,
 #endif
        if (function)
                flen = strlen(function);
-        spin_lock(&err_buf_lock);
        va_start(args, fmt);
-        vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+        vaf.fmt = fmt;
-        va_end(args);
+        vaf.va = &args;
        if (sb)
-                printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n",
+                pr_err("(device %s): %s(): %pV\n",
-                                sb->s_id, flen ? function : "", err_buf);
+                       sb->s_id, flen ? function : "", &vaf);
        else
-                printk(KERN_ERR "NTFS-fs error: %s(): %s\n",
+                pr_err("%s(): %pV\n", flen ? function : "", &vaf);
-                                flen ? function : "", err_buf);
+        va_end(args);
-        spin_unlock(&err_buf_lock);
 }
 #ifdef DEBUG
@@ -124,6 +115,7 @@ int debug_msgs = 0;
 void __ntfs_debug (const char *file, int line, const char *function,
                const char *fmt, ...)
 {
+        struct va_format vaf;
        va_list args;
        int flen = 0;
@@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function,
                return;
        if (function)
                flen = strlen(function);
-        spin_lock(&err_buf_lock);
        va_start(args, fmt);
-        vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+        vaf.fmt = fmt;
+        vaf.va = &args;
+        pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
        va_end(args);
-        printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line,
-                        flen ? function : "", err_buf);
-        spin_unlock(&err_buf_lock);
 }
 /* Dump a runlist. Caller has to provide synchronisation for @rl. */
@@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
        if (!debug_msgs)
                return;
-        printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n");
+        pr_debug("Dumping runlist (values in hex):\n");
        if (!rl) {
-                printk(KERN_DEBUG "Run list not present.\n");
+                pr_debug("Run list not present.\n");
                return;
        }
-        printk(KERN_DEBUG "VCN              LCN               Run length\n");
+        pr_debug("VCN              LCN               Run length\n");
        for (i = 0; ; i++) {
                LCN lcn = (rl + i)->lcn;
@@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
                        if (index > -LCN_ENOENT - 1)
                                index = 3;
-                        printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
+                        pr_debug("%-16Lx %s %-16Lx%s\n",
                                        (long long)(rl + i)->vcn, lcn_str[index],
                                        (long long)(rl + i)->length,
                                        (rl + i)->length ? "" :
                                                " (runlist end)");
                } else
-                        printk(KERN_DEBUG "%-16Lx %-16Lx  %-16Lx%s\n",
+                        pr_debug("%-16Lx %-16Lx  %-16Lx%s\n",
                                        (long long)(rl + i)->vcn,
                                        (long long)(rl + i)->lcn,
                                        (long long)(rl + i)->length,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 53c27eaf2307..61bf091e32a8 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -48,7 +48,12 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
 #else   /* !DEBUG */
-#define ntfs_debug(f, a...)             do {} while (0)
+#define ntfs_debug(fmt, ...)                                            \
+do {                                                                    \
+        if (0)                                                          \
+                no_printk(fmt, ##__VA_ARGS__);                          \
+} while (0)
 #define ntfs_debug_dump_runlist(rl)     do {} while (0)
 #endif  /* !DEBUG */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index bd5610d48242..9de2491f2926 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -19,6 +19,7 @@
 * distribution in the file COPYING); if not, write to the Free Software
 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/stddef.h>
 #include <linux/init.h>
@@ -1896,7 +1897,7 @@ get_ctx_vol_failed:
        vol->minor_ver = vi->minor_ver;
        ntfs_attr_put_search_ctx(ctx);
        unmap_mft_record(NTFS_I(vol->vol_ino));
-        printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
+        pr_info("volume version %i.%i.\n", vol->major_ver,
                        vol->minor_ver);
        if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
                ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
@@ -3095,7 +3096,7 @@ static int __init init_ntfs_fs(void)
        int err = 0;
        /* This may be ugly but it results in pretty output so who cares. (-8 */
-        printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/"
+        pr_info("driver " NTFS_VERSION " [Flags: R/"
 #ifdef NTFS_RW
                        "W"
 #else
@@ -3115,16 +3116,15 @@ static int __init init_ntfs_fs(void)
                        sizeof(ntfs_index_context), 0 /* offset */,
                        SLAB_HWCACHE_ALIGN, NULL /* ctor */);
        if (!ntfs_index_ctx_cache) {
-                printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+                pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
-                                ntfs_index_ctx_cache_name);
                goto ictx_err_out;
        }
        ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
                        sizeof(ntfs_attr_search_ctx), 0 /* offset */,
                        SLAB_HWCACHE_ALIGN, NULL /* ctor */);
        if (!ntfs_attr_ctx_cache) {
-                printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+                pr_crit("NTFS: Failed to create %s!\n",
-                                ntfs_attr_ctx_cache_name);
+                        ntfs_attr_ctx_cache_name);
                goto actx_err_out;
        }
@@ -3132,8 +3132,7 @@ static int __init init_ntfs_fs(void)
                        (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
                        SLAB_HWCACHE_ALIGN, NULL);
        if (!ntfs_name_cache) {
-                printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+                pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
-                                ntfs_name_cache_name);
                goto name_err_out;
        }
@@ -3141,8 +3140,7 @@ static int __init init_ntfs_fs(void)
                        sizeof(ntfs_inode), 0,
                        SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
        if (!ntfs_inode_cache) {
-                printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+                pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
-                                ntfs_inode_cache_name);
                goto inode_err_out;
        }
@@ -3151,15 +3149,14 @@ static int __init init_ntfs_fs(void)
                        SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
                        ntfs_big_inode_init_once);
        if (!ntfs_big_inode_cache) {
-                printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+                pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
-                                ntfs_big_inode_cache_name);
                goto big_inode_err_out;
        }
        /* Register the ntfs sysctls. */
        err = ntfs_sysctl(1);
        if (err) {
-                printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n");
+                pr_crit("Failed to register NTFS sysctls!\n");
                goto sysctl_err_out;
        }
@@ -3168,7 +3165,7 @@ static int __init init_ntfs_fs(void)
                ntfs_debug("NTFS driver registered successfully.");
                return 0; /* Success! */
        }
-        printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
+        pr_crit("Failed to register NTFS filesystem driver!\n");
        /* Unregister the ntfs sysctls. */
        ntfs_sysctl(0);
@@ -3184,8 +3181,7 @@ actx_err_out:
        kmem_cache_destroy(ntfs_index_ctx_cache);
 ictx_err_out:
        if (!err) {
-                printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
+                pr_crit("Aborting NTFS filesystem driver registration...\n");
-                                "registration...\n");
                err = -ENOMEM;
        }
        return err;
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index a4b07730b2e1..b7f57271d49c 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr,
        return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION);
 }
 static struct kobj_attribute attr_version =
-        __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL);
+        __ATTR(interface_revision, S_IRUGO, version_show, NULL);
 static struct attribute *o2cb_attrs[] = {
        &attr_version.attr,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5c8343fe7438..83f1a665ae97 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -496,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
 }
 static struct kobj_attribute ocfs2_attr_max_locking_protocol =
-        __ATTR(max_locking_protocol, S_IFREG | S_IRUGO,
+        __ATTR(max_locking_protocol, S_IRUGO,
               ocfs2_max_locking_protocol_show, NULL);
 static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
@@ -528,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
 }
 static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
-        __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO,
+        __ATTR(loaded_cluster_plugins, S_IRUGO,
               ocfs2_loaded_cluster_plugins_show, NULL);
 static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
@@ -550,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
 }
 static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
-        __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO,
+        __ATTR(active_cluster_plugin, S_IRUGO,
               ocfs2_active_cluster_plugin_show, NULL);
 static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
@@ -599,7 +599,7 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
 static struct kobj_attribute ocfs2_attr_cluster_stack =
-        __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR,
+        __ATTR(cluster_stack, S_IRUGO | S_IWUSR,
               ocfs2_cluster_stack_show,
               ocfs2_cluster_stack_store);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 656e401794de..64db2bceac59 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -138,8 +138,8 @@ static const char * const task_state_array[] = {
        "D (disk sleep)",       /*   2 */
        "T (stopped)",          /*   4 */
        "t (tracing stop)",     /*   8 */
-        "Z (zombie)",           /*  16 */
+        "X (dead)",             /*  16 */
-        "X (dead)",             /*  32 */
+        "Z (zombie)",           /*  32 */
 };
 static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b9760628e1fd..6b7087e2e8fb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1236,6 +1236,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
        make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
        if (*end)
                return -EINVAL;
+        if (make_it_fail < 0 || make_it_fail > 1)
+                return -EINVAL;
        task = get_proc_task(file_inode(file));
        if (!task)
                return -ESRCH;
@@ -2588,7 +2591,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("environ",    S_IRUSR, proc_environ_operations),
        INF("auxv",       S_IRUSR, proc_pid_auxv),
        ONE("status",     S_IRUGO, proc_pid_status),
-        ONE("personality", S_IRUGO, proc_pid_personality),
+        ONE("personality", S_IRUSR, proc_pid_personality),
        INF("limits",     S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2598,7 +2601,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-        INF("syscall",    S_IRUGO, proc_pid_syscall),
+        INF("syscall",    S_IRUSR, proc_pid_syscall),
 #endif
        INF("cmdline",    S_IRUGO, proc_pid_cmdline),
        ONE("stat",       S_IRUGO, proc_tgid_stat),
@@ -2617,7 +2620,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_PROC_PAGE_MONITOR
        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
        REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
-        REG("pagemap",    S_IRUGO, proc_pagemap_operations),
+        REG("pagemap",    S_IRUSR, proc_pagemap_operations),
 #endif
 #ifdef CONFIG_SECURITY
        DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2626,7 +2629,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        INF("wchan",      S_IRUGO, proc_pid_wchan),
 #endif
 #ifdef CONFIG_STACKTRACE
-        ONE("stack",      S_IRUGO, proc_pid_stack),
+        ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
 #ifdef CONFIG_SCHEDSTATS
        INF("schedstat",  S_IRUGO, proc_pid_schedstat),
@@ -2927,14 +2930,14 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("environ",   S_IRUSR, proc_environ_operations),
        INF("auxv",      S_IRUSR, proc_pid_auxv),
        ONE("status",    S_IRUGO, proc_pid_status),
-        ONE("personality", S_IRUGO, proc_pid_personality),
+        ONE("personality", S_IRUSR, proc_pid_personality),
        INF("limits",    S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-        INF("syscall",   S_IRUGO, proc_pid_syscall),
+        INF("syscall",   S_IRUSR, proc_pid_syscall),
 #endif
        INF("cmdline",   S_IRUGO, proc_pid_cmdline),
        ONE("stat",      S_IRUGO, proc_tid_stat),
@@ -2955,7 +2958,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_PROC_PAGE_MONITOR
        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
        REG("smaps",     S_IRUGO, proc_tid_smaps_operations),
-        REG("pagemap",    S_IRUGO, proc_pagemap_operations),
+        REG("pagemap",    S_IRUSR, proc_pagemap_operations),
 #endif
 #ifdef CONFIG_SECURITY
        DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2964,7 +2967,7 @@ static const struct pid_entry tid_base_stuff[] = {
        INF("wchan",     S_IRUGO, proc_pid_wchan),
 #endif
 #ifdef CONFIG_STACKTRACE
-        ONE("stack",      S_IRUGO, proc_pid_stack),
+        ONE("stack",      S_IRUSR, proc_pid_stack),
 #endif
 #ifdef CONFIG_SCHEDSTATS
        INF("schedstat", S_IRUGO, proc_pid_schedstat),
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 985ea881b5bc..0788d093f5d8 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -11,6 +11,7 @@
 #include <linux/proc_fs.h>
+#include "../mount.h"
 #include "internal.h"
 #include "fd.h"
@@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v)
        }
        if (!ret) {
-                seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+                seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
-                           (long long)file->f_pos, f_flags);
+                           (long long)file->f_pos, f_flags,
+                           real_mount(file->f_path.mnt)->mnt_id);
                if (file->f_op->show_fdinfo)
                        ret = file->f_op->show_fdinfo(m, file);
                fput(file);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8f20e3404fd2..0adbc02d60e3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode)
                pde_put(de);
        head = PROC_I(inode)->sysctl;
        if (head) {
-                rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+                RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
                sysctl_head_put(head);
        }
        /* Release any associated namespace */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 136e548d9567..7445af0b1aa3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -73,7 +73,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
        available += pagecache;
        /*
-         * Part of the reclaimable swap consists of items that are in use,
+         * Part of the reclaimable slab consists of items that are in use,
         * and cannot be freed. Cap this estimate at the low watermark.
         */
        available += global_page_state(NR_SLAB_RECLAIMABLE) -
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b548080d..442177b1119a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
 #include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        /*
         * We remember last_addr rather than next_addr to hit with
-         * mmap_cache most of the time. We have zero last_addr at
+         * vmacache most of the time. We have zero last_addr at
         * the beginning and also after lseek. We will have -1 last_addr
         * after the end of the vmas.
         */
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 88d4585b30f1..6a8e785b29da 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -484,7 +484,6 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
                phdr_ptr->p_memsz = real_sz;
                if (real_sz == 0) {
                        pr_warn("Warning: Zero PT_NOTE entries found\n");
-                        return -EINVAL;
                }
        }
@@ -671,7 +670,6 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
                phdr_ptr->p_memsz = real_sz;
                if (real_sz == 0) {
                        pr_warn("Warning: Zero PT_NOTE entries found\n");
-                        return -EINVAL;
                }
        }
@@ -1118,4 +1116,3 @@ void vmcore_cleanup(void)
        }
        free_elfcorebuf();
 }
-EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 880fd9884366..c51df1dd237e 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -8,9 +8,10 @@ config QUOTA
        help
          If you say Y here, you will be able to set per user limits for disk
          usage (also called disk quotas). Currently, it works for the
-          ext2, ext3, and reiserfs file system. ext3 also supports journalled
+          ext2, ext3, ext4, jfs, ocfs2 and reiserfs file systems.
-          quotas for which you don't need to run quotacheck(8) after an unclean
+          Note that gfs2 and xfs use their own quota system.
-          shutdown.
+          Ext3, ext4 and reiserfs also support journaled quotas for which
+          you don't need to run quotacheck(8) after an unclean shutdown.
          For further details, read the Quota mini-HOWTO, available from
          <http://www.tldp.org/docs.html#howto>, or the documentation provided
          with the quota tools. Probably the quota support is only useful for
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 1fd2051109a3..af677353a3f5 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -125,6 +125,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
                                int d_reclen;
                                char *d_name;
                                ino_t d_ino;
+                                loff_t cur_pos = deh_offset(deh);
                                if (!de_visible(deh))
                                        /* it is hidden entry */
@@ -196,8 +197,9 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
                                if (local_buf != small_buf) {
                                        kfree(local_buf);
                                }
-                                // next entry should be looked for with such offset
-                                next_pos = deh_offset(deh) + 1;
+                                /* deh_offset(deh) may be invalid now. */
+                                next_pos = cur_pos + 1;
                                if (item_moved(&tmp_ih, &path_to_entry)) {
                                        set_cpu_key_k_offset(&pos_key,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 123c79b7261e..4f34dbae823d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1538,6 +1538,7 @@ out_unlock:
 static const struct vm_operations_struct ubifs_file_vm_ops = {
        .fault        = filemap_fault,
+        .map_pages = filemap_map_pages,
        .page_mkwrite = ubifs_vm_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
 };
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 64f2b7334d08..3286db047a40 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,7 +175,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        udf_inode_cachep = kmem_cache_create("udf_inode_cache",
                                             sizeof(struct udf_inode_info),
@@ -505,6 +505,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
        while ((p = strsep(&options, ",")) != NULL) {
                substring_t args[MAX_OPT_ARGS];
                int token;
+                unsigned n;
                if (!*p)
                        continue;
@@ -516,7 +517,10 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
                case Opt_bs:
                        if (match_int(&args[0], &option))
                                return 0;
-                        uopt->blocksize = option;
+                        n = option;
+                        if (n != 512 && n != 1024 && n != 2048 && n != 4096)
+                                return 0;
+                        uopt->blocksize = n;
                        uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET);
                        break;
                case Opt_unhide:
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7ea492ae660..0ab1de4b39a5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -38,7 +38,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        unsigned cgno, bit, end_bit, bbase, blkmap, i;
@@ -46,7 +45,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        
        UFSD("ENTER, fragment %llu, count %u\n",
             (unsigned long long)fragment, count);
@@ -135,7 +133,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        unsigned overflow, cgno, bit, end_bit, i;
@@ -143,7 +140,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        UFSD("ENTER, fragment %llu, count %u\n",
             (unsigned long long)fragment, count);
@@ -499,7 +495,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        unsigned cgno, fragno, fragoff, count, fragsize, i;
@@ -509,7 +504,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
        
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first (uspi);
        count = newcount - oldcount;
        
        cgno = ufs_dtog(uspi, fragment);
@@ -577,7 +571,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        unsigned oldcg, i, j, k, allocsize;
@@ -588,7 +581,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        oldcg = cgno;
        
        /*
@@ -690,7 +682,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cylinder_group * ucg;
        u64 result, blkno;
@@ -698,7 +689,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        ucg = ubh_get_ucg(UCPI_UBH(ucpi));
        if (goal == 0) {
@@ -794,7 +784,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
                0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
        };
        struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
-        struct ufs_super_block_first *usb1;
        struct ufs_cylinder_group *ucg;
        unsigned start, length, loc;
        unsigned pos, want, blockmap, mask, end;
@@ -803,7 +792,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
        UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
             (unsigned long long)goal, count);
-        usb1 = ubh_get_usb_first (uspi);
        ucg = ubh_get_ucg(UCPI_UBH(ucpi));
        if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index d0426d74817b..98f7211599ff 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -57,7 +57,6 @@ void ufs_free_inode (struct inode * inode)
 {
        struct super_block * sb;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        int is_directory;
@@ -67,7 +66,6 @@ void ufs_free_inode (struct inode * inode)
        sb = inode->i_sb;
        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        
        ino = inode->i_ino;
@@ -175,7 +173,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
        struct super_block * sb;
        struct ufs_sb_info * sbi;
        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
        struct ufs_cg_private_info * ucpi;
        struct ufs_cylinder_group * ucg;
        struct inode * inode;
@@ -195,7 +192,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
        ufsi = UFS_I(inode);
        sbi = UFS_SB(sb);
        uspi = sbi->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
        mutex_lock(&sbi->s_lock);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b8c6791f046f..c1183f9f69dc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -524,11 +524,9 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
        struct ufs_buffer_head * ubh;
        unsigned char * base, * space;
        unsigned size, blks, i;
-        struct ufs_super_block_third *usb3;
        UFSD("ENTER\n");
-        usb3 = ubh_get_usb_third(uspi);
        /*
         * Read cs structures from (usually) first data block
         * on the device. 
@@ -1390,15 +1388,11 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct super_block *sb = dentry->d_sb;
        struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi;
        unsigned  flags = UFS_SB(sb)->s_flags;
-        struct ufs_super_block_first *usb1;
-        struct ufs_super_block_second *usb2;
        struct ufs_super_block_third *usb3;
        u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
        lock_ufs(sb);
-        usb1 = ubh_get_usb_first(uspi);
-        usb2 = ubh_get_usb_second(uspi);
        usb3 = ubh_get_usb_third(uspi);
        
        if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1454,7 +1448,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
-static int init_inodecache(void)
+static int __init init_inodecache(void)
 {
        ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
                                             sizeof(struct ufs_inode_info),
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7abff8c16ca..003c0051b62f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1483,6 +1483,7 @@ const struct file_operations xfs_dir_file_operations = {
 static const struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = filemap_fault,
+        .map_pages      = filemap_map_pages,
        .page_mkwrite   = xfs_vm_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
 };