158 files changed, 2677 insertions, 1971 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
        candidate->first = candidate->last = index;
        candidate->offset_first = from;
        candidate->to_last = to;
+        INIT_LIST_HEAD(&candidate->link);
        candidate->usage = 1;
        candidate->state = AFS_WBACK_PENDING;
        init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..26869cde3953 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
        call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
-#define get_ioctx(kioctx) do {                                          \
+static inline void get_ioctx(struct kioctx *kioctx)
-        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
+{
-        atomic_inc(&(kioctx)->users);                                   \
+        BUG_ON(atomic_read(&kioctx->users) <= 0);
-} while (0)
+        atomic_inc(&kioctx->users);
-#define put_ioctx(kioctx) do {                                          \
+}
-        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
-        if (unlikely(atomic_dec_and_test(&(kioctx)->users)))            \
+static inline int try_get_ioctx(struct kioctx *kioctx)
-                __put_ioctx(kioctx);                                    \
+{
-} while (0)
+        return atomic_inc_not_zero(&kioctx->users);
+}
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+        BUG_ON(atomic_read(&kioctx->users) <= 0);
+        if (unlikely(atomic_dec_and_test(&kioctx->users)))
+                __put_ioctx(kioctx);
+}
 /* ioctx_alloc
 *      Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
        rcu_read_lock();
        hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
-                if (ctx->user_id == ctx_id && !ctx->dead) {
+                /*
-                        get_ioctx(ctx);
+                 * RCU protects us against accessing freed memory but
+                 * we have to be careful not to get a reference when the
+                 * reference count already dropped to 0 (ctx->dead test
+                 * is unreliable because of races).
+                 */
+                if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
                        ret = ctx;
                        break;
                }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                goto out_put_req;
        spin_lock_irq(&ctx->ctx_lock);
+        /*
+         * We could have raced with io_destroy() and are currently holding a
+         * reference to ctx which should be destroyed. We cannot submit IO
+         * since ctx gets freed as soon as io_submit() puts its reference.  The
+         * check here is reliable: io_destroy() sets ctx->dead before waiting
+         * for outstanding IO and the barrier between these two is realized by
+         * unlock of mm->ioctx_lock and lock of ctx->ctx_lock.  Analogously we
+         * increment ctx->reqs_active before checking for ctx->dead and the
+         * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+         * don't see ctx->dead set here, io_destroy() waits for our IO to
+         * finish.
+         */
+        if (ctx->dead) {
+                spin_unlock_irq(&ctx->ctx_lock);
+                ret = -EINVAL;
+                goto out_put_req;
+        }
        aio_run_iocb(req);
        if (!list_empty(&ctx->run_list)) {
                /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
        ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
        if (ret)
                goto out_del;
+        /*
+         * bdev could be deleted beneath us which would implicitly destroy
+         * the holder directory.  Hold on to it.
+         */
+        kobject_get(bdev->bd_part->holder_dir);
        list_add(&holder->list, &bdev->bd_holder_disks);
        goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
                del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
                del_symlink(bdev->bd_part->holder_dir,
                            &disk_to_dev(disk)->kobj);
+                kobject_put(bdev->bd_part->holder_dir);
                list_del_init(&holder->list);
                kfree(holder);
        }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 * flush_disk - invalidates all buffer-cache entries on a disk
 *
 * @bdev:      struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
 *
 * Invalidates all buffer-cache entries on a disk. It should be called
 * when a disk has been changed -- either by a media change or online
 * resize.
 */
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
 {
-        if (__invalidate_device(bdev)) {
+        if (__invalidate_device(bdev, kill_dirty)) {
                char name[BDEVNAME_SIZE] = "";
                if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
                       "%s: detected capacity change from %lld to %lld\n",
                       name, bdev_size, disk_size);
                i_size_write(bdev->bd_inode, disk_size);
-                flush_disk(bdev);
+                flush_disk(bdev, false);
        }
 }
 EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
        if (!(events & DISK_EVENT_MEDIA_CHANGE))
                return 0;
-        flush_disk(bdev);
+        flush_disk(bdev, true);
        if (bdops->revalidate_disk)
                bdops->revalidate_disk(bdev->bd_disk);
        return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
        res = __blkdev_get(bdev, mode, 0);
-        /* __blkdev_get() may alter read only status, check it afterwards */
-        if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
-                __blkdev_put(bdev, mode, 0);
-                res = -EACCES;
-        }
        if (whole) {
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
        if (err)
                return ERR_PTR(err);
+        if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+                blkdev_put(bdev, mode);
+                return ERR_PTR(-EACCES);
+        }
        return bdev;
 }
 EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
 }
 EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 {
        struct super_block *sb = get_super(bdev);
        int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
                 * hold).
                 */
                shrink_dcache_sb(sb);
-                res = invalidate_inodes(sb);
+                res = invalidate_inodes(sb, kill_dirty);
                drop_super(sb);
        }
        invalidate_bdev(bdev);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2a2606..9c949348510b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
        char *value = NULL;
        struct posix_acl *acl;
+        if (!IS_POSIXACL(inode))
+                return NULL;
        acl = get_cached_acl(inode, type);
        if (acl != ACL_NOT_CACHED)
                return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
        struct posix_acl *acl;
        int ret = 0;
+        if (!IS_POSIXACL(dentry->d_inode))
+                return -EOPNOTSUPP;
        acl = btrfs_get_acl(dentry->d_inode, type);
        if (IS_ERR(acl))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287fbf2e..4d2110eafe29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        u64 em_len;
        u64 em_start;
        struct extent_map *em;
-        int ret;
+        int ret = -ENOMEM;
        u32 *sums;
        tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+        if (!cb)
+                goto out;
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
@@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
                                 PAGE_CACHE_SIZE;
-        cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
+        cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
                                       GFP_NOFS);
+        if (!cb->compressed_pages)
+                goto fail1;
        bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
        for (page_index = 0; page_index < nr_pages; page_index++) {
                cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
                                                              __GFP_HIGHMEM);
+                if (!cb->compressed_pages[page_index])
+                        goto fail2;
        }
        cb->nr_pages = nr_pages;
@@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        cb->len = uncompressed_len;
        comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
+        if (!comp_bio)
+                goto fail2;
        comp_bio->bi_private = cb;
        comp_bio->bi_end_io = end_compressed_bio_read;
        atomic_inc(&cb->pending_bios);
@@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        bio_put(comp_bio);
        return 0;
+fail2:
+        for (page_index = 0; page_index < nr_pages; page_index++)
+                free_page((unsigned long)cb->compressed_pages[page_index]);
+        kfree(cb->compressed_pages);
+fail1:
+        kfree(cb);
+out:
+        free_extent_map(em);
+        return ret;
 }
 static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
        return ret;
 }
-void __exit btrfs_exit_compress(void)
+void btrfs_exit_compress(void)
 {
        free_workspaces();
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..6f820fa23df4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1254,6 +1254,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_SPACE_CACHE         (1 << 12)
 #define BTRFS_MOUNT_CLEAR_CACHE         (1 << 13)
 #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG         (1 << 15)
 #define btrfs_clear_opt(o, opt)         ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)           ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
                                   u64 start, u64 end);
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
                               u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 type);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
        tree = &BTRFS_I(page->mapping->host)->io_tree;
-        if (page->private == EXTENT_PAGE_PRIVATE)
+        if (page->private == EXTENT_PAGE_PRIVATE) {
+                WARN_ON(1);
                goto out;
-        if (!page->private)
+        }
+        if (!page->private) {
+                WARN_ON(1);
                goto out;
+        }
        len = page->private >> 2;
        WARN_ON(len == 0);
@@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg)
                spin_unlock(&root->fs_info->new_trans_lock);
                trans = btrfs_join_transaction(root, 1);
+                BUG_ON(IS_ERR(trans));
                if (transid == trans->transid) {
                        ret = btrfs_commit_transaction(trans, root);
                        BUG_ON(ret);
@@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root)
        up_write(&root->fs_info->cleanup_work_sem);
        trans = btrfs_join_transaction(root, 1);
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
        ret = btrfs_commit_transaction(trans, root);
        BUG_ON(ret);
        /* run commit again to drop the original snapshot */
        trans = btrfs_join_transaction(root, 1);
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
        btrfs_commit_transaction(trans, root);
        ret = btrfs_write_and_wait_transaction(NULL, root);
        BUG_ON(ret);
@@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root)
        kfree(fs_info->chunk_root);
        kfree(fs_info->dev_root);
        kfree(fs_info->csum_root);
+        kfree(fs_info);
        return 0;
 }
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963b07e5..ff27d7a477b2 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
        int ret;
        path = btrfs_alloc_path();
+        if (!path)
+                return ERR_PTR(-ENOMEM);
        if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
                key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b55269340cec..588ff9849873 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,11 +320,6 @@ static int caching_kthread(void *data)
        if (!path)
                return -ENOMEM;
-        exclude_super_stripes(extent_root, block_group);
-        spin_lock(&block_group->space_info->lock);
-        block_group->space_info->bytes_readonly += block_group->bytes_super;
-        spin_unlock(&block_group->space_info->lock);
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
        /*
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                        cache->cached = BTRFS_CACHE_NO;
                }
                spin_unlock(&cache->lock);
-                if (ret == 1)
+                if (ret == 1) {
+                        free_excluded_extents(fs_info->extent_root, cache);
                        return 0;
+                }
        }
        if (load_cache_only)
@@ -3344,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        u64 reserved;
        u64 max_reclaim;
        u64 reclaimed = 0;
+        long time_left;
        int pause = 1;
        int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+        int loops = 0;
        block_rsv = &root->fs_info->delalloc_block_rsv;
        space_info = block_rsv->space_info;
@@ -3358,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        max_reclaim = min(reserved, to_reclaim);
-        while (1) {
+        while (loops < 1024) {
                /* have the flusher threads jump in and do some IO */
                smp_mb();
                nr_pages = min_t(unsigned long, nr_pages,
@@ -3366,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
                spin_lock(&space_info->lock);
-                if (reserved > space_info->bytes_reserved)
+                if (reserved > space_info->bytes_reserved) {
+                        loops = 0;
                        reclaimed += reserved - space_info->bytes_reserved;
+                } else {
+                        loops++;
+                }
                reserved = space_info->bytes_reserved;
                spin_unlock(&space_info->lock);
@@ -3378,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                        return -EAGAIN;
                __set_current_state(TASK_INTERRUPTIBLE);
-                schedule_timeout(pause);
+                time_left = schedule_timeout(pause);
+                /* We were interrupted, exit */
+                if (time_left)
+                        break;
                pause <<= 1;
                if (pause > HZ / 10)
                        pause = HZ / 10;
@@ -3588,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
        if (num_bytes > 0) {
                if (dest) {
-                        block_rsv_add_bytes(dest, num_bytes, 0);
+                        spin_lock(&dest->lock);
-                } else {
+                        if (!dest->full) {
+                                u64 bytes_to_add;
+                                bytes_to_add = dest->size - dest->reserved;
+                                bytes_to_add = min(num_bytes, bytes_to_add);
+                                dest->reserved += bytes_to_add;
+                                if (dest->reserved >= dest->size)
+                                        dest->full = 1;
+                                num_bytes -= bytes_to_add;
+                        }
+                        spin_unlock(&dest->lock);
+                }
+                if (num_bytes) {
                        spin_lock(&space_info->lock);
                        space_info->bytes_reserved -= num_bytes;
                        spin_unlock(&space_info->lock);
@@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
        num_bytes = ALIGN(num_bytes, root->sectorsize);
        atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+        WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
        spin_lock(&BTRFS_I(inode)->accounting_lock);
        nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
@@ -5355,7 +5376,7 @@ again:
                               num_bytes, data, 1);
                goto again;
        }
-        if (ret == -ENOSPC) {
+        if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                struct btrfs_space_info *sinfo;
                sinfo = __find_space_info(root->fs_info, data);
@@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
              struct btrfs_root *root, u32 blocksize)
 {
        struct btrfs_block_rsv *block_rsv;
+        struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
        int ret;
        block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        if (block_rsv->size == 0) {
                ret = reserve_metadata_bytes(trans, root, block_rsv,
                                             blocksize, 0);
-                if (ret)
+                /*
+                 * If we couldn't reserve metadata bytes try and use some from
+                 * the global reserve.
+                 */
+                if (ret && block_rsv != global_rsv) {
+                        ret = block_rsv_use_bytes(global_rsv, blocksize);
+                        if (!ret)
+                                return global_rsv;
+                        return ERR_PTR(ret);
+                } else if (ret) {
                        return ERR_PTR(ret);
+                }
                return block_rsv;
        }
        ret = block_rsv_use_bytes(block_rsv, blocksize);
        if (!ret)
                return block_rsv;
+        if (ret) {
+                WARN_ON(1);
+                ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
+                                             0);
+                if (!ret) {
+                        spin_lock(&block_rsv->lock);
+                        block_rsv->size += blocksize;
+                        spin_unlock(&block_rsv->lock);
+                        return block_rsv;
+                } else if (ret && block_rsv != global_rsv) {
+                        ret = block_rsv_use_bytes(global_rsv, blocksize);
+                        if (!ret)
+                                return global_rsv;
+                }
+        }
        return ERR_PTR(-ENOSPC);
 }
@@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        BUG_ON(!wc);
        trans = btrfs_start_transaction(tree_root, 0);
+        BUG_ON(IS_ERR(trans));
        if (block_rsv)
                trans->block_rsv = block_rsv;
@@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                        btrfs_end_transaction_throttle(trans, tree_root);
                        trans = btrfs_start_transaction(tree_root, 0);
+                        BUG_ON(IS_ERR(trans));
                        if (block_rsv)
                                trans->block_rsv = block_rsv;
                }
@@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
        int ret = 0;
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
+        if (!ra)
+                return -ENOMEM;
        mutex_lock(&inode->i_mutex);
        first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
        u64 end = start + extent_key->offset - 1;
        em = alloc_extent_map(GFP_NOFS);
-        BUG_ON(!em || IS_ERR(em));
+        BUG_ON(!em);
        em->start = start;
        em->len = extent_key->offset;
@@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
                BUG_ON(reloc_root->commit_root != NULL);
                while (1) {
                        trans = btrfs_join_transaction(root, 1);
-                        BUG_ON(!trans);
+                        BUG_ON(IS_ERR(trans));
                        mutex_lock(&root->fs_info->drop_mutex);
                        ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
        if (found) {
                trans = btrfs_start_transaction(root, 1);
-                BUG_ON(!trans);
+                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
                BUG_ON(ret);
        }
@@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
        trans = btrfs_start_transaction(extent_root, 1);
-        BUG_ON(!trans);
+        BUG_ON(IS_ERR(trans));
        if (extent_key->objectid == 0) {
                ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -8013,6 +8065,13 @@ out:
        return ret;
 }
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 type)
+{
+        u64 alloc_flags = get_alloc_profile(root, type);
+        return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
 /*
 * helper to account the unused space of all the readonly block group in the
 * list. takes mirrors into account.
@@ -8270,6 +8329,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                if (block_group->cached == BTRFS_CACHE_STARTED)
                        wait_block_group_cache_done(block_group);
+                /*
+                 * We haven't cached this block group, which means we could
+                 * possibly have excluded extents on this block group.
+                 */
+                if (block_group->cached == BTRFS_CACHE_NO)
+                        free_excluded_extents(info->extent_root, block_group);
                btrfs_remove_free_space_cache(block_group);
                btrfs_put_block_group(block_group);
@@ -8385,6 +8451,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache->sectorsize = root->sectorsize;
                /*
+                 * We need to exclude the super stripes now so that the space
+                 * info has super bytes accounted for, otherwise we'll think
+                 * we have more space than we actually do.
+                 */
+                exclude_super_stripes(root, cache);
+                /*
                 * check for two cases, either we are full, and therefore
                 * don't need to bother with the caching work since we won't
                 * find any space, or we are empty, and we can just add all
@@ -8392,12 +8465,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * time, particularly in the full case.
                 */
                if (found_key.offset == btrfs_block_group_used(&cache->item)) {
-                        exclude_super_stripes(root, cache);
                        cache->last_byte_to_unpin = (u64)-1;
                        cache->cached = BTRFS_CACHE_FINISHED;
                        free_excluded_extents(root, cache);
                } else if (btrfs_block_group_used(&cache->item) == 0) {
-                        exclude_super_stripes(root, cache);
                        cache->last_byte_to_unpin = (u64)-1;
                        cache->cached = BTRFS_CACHE_FINISHED;
                        add_new_free_space(cache, root->fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..fd3f172e94e6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 */
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end, u64 max_bytes,
-                     unsigned long bits)
+                     unsigned long bits, int contig)
 {
        struct rb_node *node;
        struct extent_state *state;
        u64 cur_start = *start;
        u64 total_bytes = 0;
+        u64 last = 0;
        int found = 0;
        if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                state = rb_entry(node, struct extent_state, rb_node);
                if (state->start > search_end)
                        break;
-                if (state->end >= cur_start && (state->state & bits)) {
+                if (contig && found && state->start > last + 1)
+                        break;
+                if (state->end >= cur_start && (state->state & bits) == bits) {
                        total_bytes += min(search_end, state->end) + 1 -
                                       max(cur_start, state->start);
                        if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                                *start = state->start;
                                found = 1;
                        }
+                        last = state->end;
+                } else if (contig && found) {
+                        break;
                }
                node = rb_next(node);
                if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
        bio_get(bio);
        if (tree->ops && tree->ops->submit_bio_hook)
-                tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+                ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
                                           mirror_num, bio_flags, start);
        else
                submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                nr = bio_get_nr_vecs(bdev);
        bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+        if (!bio)
+                return -ENOMEM;
        bio_add_page(bio, page, page_size, offset);
        bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
 static void set_page_extent_head(struct page *page, unsigned long len)
 {
+        WARN_ON(!PagePrivate(page));
        set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
 }
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
                                      &bio_flags);
        if (bio)
-                submit_one_bio(READ, bio, 0, bio_flags);
+                ret = submit_one_bio(READ, bio, 0, bio_flags);
        return ret;
 }
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
                 * at this point we can safely clear everything except the
                 * locked bit and the nodatasum bit
                 */
-                clear_extent_bit(tree, start, end,
+                ret = clear_extent_bit(tree, start, end,
                                 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
                                 0, 0, NULL, mask);
+                /* if clear_extent_bit failed for enomem reasons,
+                 * we can't allow the release to continue.
+                 */
+                if (ret < 0)
+                        ret = 0;
+                else
+                        ret = 1;
        }
        return ret;
 }
@@ -2901,6 +2918,46 @@ out:
        return sector;
 }
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+                                                u64 offset,
+                                                u64 last,
+                                                get_extent_t *get_extent)
+{
+        u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+        struct extent_map *em;
+        u64 len;
+        if (offset >= last)
+                return NULL;
+        while(1) {
+                len = last - offset;
+                if (len == 0)
+                        break;
+                len = (len + sectorsize - 1) & ~(sectorsize - 1);
+                em = get_extent(inode, NULL, 0, offset, len, 0);
+                if (!em || IS_ERR(em))
+                        return em;
+                /* if this isn't a hole return it */
+                if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+                    em->block_start != EXTENT_MAP_HOLE) {
+                        return em;
+                }
+                /* this is a hole, advance to the next extent */
+                offset = extent_map_end(em);
+                free_extent_map(em);
+                if (offset >= last)
+                        break;
+        }
+        return NULL;
+}
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        u32 flags = 0;
        u32 found_type;
        u64 last;
+        u64 last_for_get_extent = 0;
        u64 disko = 0;
+        u64 isize = i_size_read(inode);
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int end = 0;
-        u64 em_start = 0, em_len = 0;
+        u64 em_start = 0;
+        u64 em_len = 0;
+        u64 em_end = 0;
        unsigned long emflags;
-        int hole = 0;
        if (len == 0)
                return -EINVAL;
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -ENOMEM;
        path->leave_spinning = 1;
+        /*
+         * lookup the last file extent.  We're not using i_size here
+         * because there might be preallocation past i_size
+         */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
                                       path, inode->i_ino, -1, 0);
        if (ret < 0) {
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
-        /* No extents, just return */
+        /* No extents, but there might be delalloc bits */
        if (found_key.objectid != inode->i_ino ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
-                btrfs_free_path(path);
+                /* have to trust i_size as the end */
-                return 0;
+                last = (u64)-1;
+                last_for_get_extent = isize;
+        } else {
+                /*
+                 * remember the start of the last extent.  There are a
+                 * bunch of different factors that go into the length of the
+                 * extent, so its much less complex to remember where it started
+                 */
+                last = found_key.offset;
+                last_for_get_extent = last + 1;
        }
-        last = found_key.offset;
        btrfs_free_path(path);
+        /*
+         * we might have some extents allocated but more delalloc past those
+         * extents.  so, we trust isize unless the start of the last extent is
+         * beyond isize
+         */
+        if (last < isize) {
+                last = (u64)-1;
+                last_for_get_extent = isize;
+        }
        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
                         &cached_state, GFP_NOFS);
-        em = get_extent(inode, NULL, 0, off, max - off, 0);
+        em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                   get_extent);
        if (!em)
                goto out;
        if (IS_ERR(em)) {
@@ -2962,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
        while (!end) {
-                hole = 0;
+                off = extent_map_end(em);
-                off = em->start + em->len;
                if (off >= max)
                        end = 1;
-                if (em->block_start == EXTENT_MAP_HOLE) {
-                        hole = 1;
-                        goto next;
-                }
                em_start = em->start;
                em_len = em->len;
+                em_end = extent_map_end(em);
+                emflags = em->flags;
                disko = 0;
                flags = 0;
@@ -2993,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
-next:
-                emflags = em->flags;
                free_extent_map(em);
                em = NULL;
-                if (!end) {
+                if ((em_start >= last) || em_len == (u64)-1 ||
-                        em = get_extent(inode, NULL, 0, off, max - off, 0);
+                   (last == (u64)-1 && isize <= em_end)) {
-                        if (!em)
-                                goto out;
-                        if (IS_ERR(em)) {
-                                ret = PTR_ERR(em);
-                                goto out;
-                        }
-                        emflags = em->flags;
-                }
-                if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-                if (em_start == last) {
+                /* now scan forward to see if this is really the last extent. */
+                em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                           get_extent);
+                if (IS_ERR(em)) {
+                        ret = PTR_ERR(em);
+                        goto out;
+                }
+                if (!em) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
+                ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                if (!hole) {
+                                              em_len, flags);
-                        ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                if (ret)
-                                                em_len, flags);
+                        goto out_free;
-                        if (ret)
-                                goto out_free;
-                }
        }
 out_free:
        free_extent_map(em);
@@ -3192,7 +3263,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                }
                if (!PageUptodate(p))
                        uptodate = 0;
-                unlock_page(p);
+                /*
+                 * see below about how we avoid a nasty race with release page
+                 * and why we unlock later
+                 */
+                if (i != 0)
+                        unlock_page(p);
        }
        if (uptodate)
                set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3293,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        atomic_inc(&eb->refs);
        spin_unlock(&tree->buffer_lock);
        radix_tree_preload_end();
+        /*
+         * there is a race where release page may have
+         * tried to find this extent buffer in the radix
+         * but failed.  It will tell the VM it is safe to
+         * reclaim the, and it will clear the page private bit.
+         * We must make sure to set the page private bit properly
+         * after the extent buffer is in the radix tree so
+         * it doesn't get lost
+         */
+        set_page_extent_mapped(eb->first_page);
+        set_page_extent_head(eb->first_page, eb->len);
+        if (!page0)
+                unlock_page(eb->first_page);
        return eb;
 free_eb:
+        if (eb->first_page && !page0)
+                unlock_page(eb->first_page);
        if (!atomic_dec_and_test(&eb->refs))
                return exists;
        btrfs_release_extent_buffer(eb);
@@ -3269,10 +3363,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                        continue;
                lock_page(page);
+                WARN_ON(!PagePrivate(page));
+                set_page_extent_mapped(page);
                if (i == 0)
                        set_page_extent_head(page, eb->len);
-                else
-                        set_page_private(page, EXTENT_PAGE_PRIVATE);
                clear_page_dirty_for_io(page);
                spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3557,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
+                WARN_ON(!PagePrivate(page));
+                set_page_extent_mapped(page);
+                if (i == 0)
+                        set_page_extent_head(page, eb->len);
                if (inc_all_pages)
                        page_cache_get(page);
                if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end,
-                     u64 max_bytes, unsigned long bits);
+                     u64 max_bytes, unsigned long bits, int contig);
 void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
        em = kmem_cache_alloc(extent_map_cache, mask);
-        if (!em || IS_ERR(em))
+        if (!em)
-                return em;
+                return NULL;
        em->in_tree = 0;
        em->flags = 0;
        em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a250ae77..4f19a3e1bf32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
        root = root->fs_info->csum_root;
        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        while (1) {
                key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                        if (path->slots[0] == 0)
                                goto out;
                        path->slots[0]--;
+                } else if (ret < 0) {
+                        goto out;
                }
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58f3013..7084140d5940 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        split = alloc_extent_map(GFP_NOFS);
                if (!split2)
                        split2 = alloc_extent_map(GFP_NOFS);
+                BUG_ON(!split || !split2);
                write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
@@ -793,8 +794,12 @@ again:
        for (i = 0; i < num_pages; i++) {
                pages[i] = grab_cache_page(inode->i_mapping, index + i);
                if (!pages[i]) {
-                        err = -ENOMEM;
+                        int c;
-                        BUG_ON(1);
+                        for (c = i - 1; c >= 0; c--) {
+                                unlock_page(pages[c]);
+                                page_cache_release(pages[c]);
+                        }
+                        return -ENOMEM;
                }
                wait_on_page_writeback(pages[i]);
        }
@@ -946,6 +951,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
                     (sizeof(struct page *)));
        pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
+        if (!pages) {
+                ret = -ENOMEM;
+                goto out;
+        }
        /* generic_write_checks can change our pos */
        start_pos = pos;
@@ -984,8 +993,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                size_t write_bytes = min(iov_iter_count(&i),
                                         nrptrs * (size_t)PAGE_CACHE_SIZE -
                                         offset);
-                size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
+                size_t num_pages = (write_bytes + offset +
-                                        PAGE_CACHE_SHIFT;
+                                    PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
                WARN_ON(num_pages > nrptrs);
                memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1015,8 +1024,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                copied = btrfs_copy_from_user(pos, num_pages,
                                           write_bytes, pages, &i);
-                dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+                dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
-                                        PAGE_CACHE_SHIFT;
+                                PAGE_CACHE_SHIFT;
                if (num_pages > dirty_pages) {
                        if (copied > 0)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d684266959..a0390657451b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
        return entry;
 }
-static void unlink_free_space(struct btrfs_block_group_cache *block_group,
+static inline void
-                              struct btrfs_free_space *info)
+__unlink_free_space(struct btrfs_block_group_cache *block_group,
+                    struct btrfs_free_space *info)
 {
        rb_erase(&info->offset_index, &block_group->free_space_offset);
        block_group->free_extents--;
+}
+static void unlink_free_space(struct btrfs_block_group_cache *block_group,
+                              struct btrfs_free_space *info)
+{
+        __unlink_free_space(block_group, info);
        block_group->free_space -= info->bytes;
 }
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
        u64 max_bytes;
        u64 bitmap_bytes;
        u64 extent_bytes;
+        u64 size = block_group->key.offset;
        /*
         * The goal is to keep the total amount of memory used per 1gb of space
         * at or below 32k, so we need to adjust how much memory we allow to be
         * used by extent based free space tracking
         */
-        max_bytes = MAX_CACHE_BYTES_PER_GIG *
+        if (size < 1024 * 1024 * 1024)
-                (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
+                max_bytes = MAX_CACHE_BYTES_PER_GIG;
+        else
+                max_bytes = MAX_CACHE_BYTES_PER_GIG *
+                        div64_u64(size, 1024 * 1024 * 1024);
        /*
         * we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
        recalculate_thresholds(block_group);
 }
+static void free_bitmap(struct btrfs_block_group_cache *block_group,
+                        struct btrfs_free_space *bitmap_info)
+{
+        unlink_free_space(block_group, bitmap_info);
+        kfree(bitmap_info->bitmap);
+        kfree(bitmap_info);
+        block_group->total_bitmaps--;
+        recalculate_thresholds(block_group);
+}
 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
                              struct btrfs_free_space *bitmap_info,
                              u64 *offset, u64 *bytes)
@@ -1195,6 +1216,7 @@ again:
         */
        search_start = *offset;
        search_bytes = *bytes;
+        search_bytes = min(search_bytes, end - search_start + 1);
        ret = search_bitmap(block_group, bitmap_info, &search_start,
                            &search_bytes);
        BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1233,8 @@ again:
        if (*bytes) {
                struct rb_node *next = rb_next(&bitmap_info->offset_index);
-                if (!bitmap_info->bytes) {
+                if (!bitmap_info->bytes)
-                        unlink_free_space(block_group, bitmap_info);
+                        free_bitmap(block_group, bitmap_info);
-                        kfree(bitmap_info->bitmap);
-                        kfree(bitmap_info);
-                        block_group->total_bitmaps--;
-                        recalculate_thresholds(block_group);
-                }
                /*
                 * no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1267,8 @@ again:
                        return -EAGAIN;
                goto again;
-        } else if (!bitmap_info->bytes) {
+        } else if (!bitmap_info->bytes)
-                unlink_free_space(block_group, bitmap_info);
+                free_bitmap(block_group, bitmap_info);
-                kfree(bitmap_info->bitmap);
-                kfree(bitmap_info);
-                block_group->total_bitmaps--;
-                recalculate_thresholds(block_group);
-        }
        return 0;
 }
@@ -1359,22 +1371,14 @@ out:
        return ret;
 }
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
-                         u64 offset, u64 bytes)
+                          struct btrfs_free_space *info, bool update_stat)
 {
-        struct btrfs_free_space *right_info = NULL;
+        struct btrfs_free_space *left_info;
-        struct btrfs_free_space *left_info = NULL;
+        struct btrfs_free_space *right_info;
-        struct btrfs_free_space *info = NULL;
+        bool merged = false;
-        int ret = 0;
+        u64 offset = info->offset;
+        u64 bytes = info->bytes;
-        info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
-        if (!info)
-                return -ENOMEM;
-        info->offset = offset;
-        info->bytes = bytes;
-        spin_lock(&block_group->tree_lock);
        /*
         * first we want to see if there is free space adjacent to the range we
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
        else
                left_info = tree_search_offset(block_group, offset - 1, 0, 0);
-        /*
-         * If there was no extent directly to the left or right of this new
-         * extent then we know we're going to have to allocate a new extent, so
-         * before we do that see if we need to drop this into a bitmap
-         */
-        if ((!left_info || left_info->bitmap) &&
-            (!right_info || right_info->bitmap)) {
-                ret = insert_into_bitmap(block_group, info);
-                if (ret < 0) {
-                        goto out;
-                } else if (ret) {
-                        ret = 0;
-                        goto out;
-                }
-        }
        if (right_info && !right_info->bitmap) {
-                unlink_free_space(block_group, right_info);
+                if (update_stat)
+                        unlink_free_space(block_group, right_info);
+                else
+                        __unlink_free_space(block_group, right_info);
                info->bytes += right_info->bytes;
                kfree(right_info);
+                merged = true;
        }
        if (left_info && !left_info->bitmap &&
            left_info->offset + left_info->bytes == offset) {
-                unlink_free_space(block_group, left_info);
+                if (update_stat)
+                        unlink_free_space(block_group, left_info);
+                else
+                        __unlink_free_space(block_group, left_info);
                info->offset = left_info->offset;
                info->bytes += left_info->bytes;
                kfree(left_info);
+                merged = true;
        }
+        return merged;
+}
+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+                         u64 offset, u64 bytes)
+{
+        struct btrfs_free_space *info;
+        int ret = 0;
+        info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+        if (!info)
+                return -ENOMEM;
+        info->offset = offset;
+        info->bytes = bytes;
+        spin_lock(&block_group->tree_lock);
+        if (try_merge_free_space(block_group, info, true))
+                goto link;
+        /*
+         * There was no extent directly to the left or right of this new
+         * extent then we know we're going to have to allocate a new extent, so
+         * before we do that see if we need to drop this into a bitmap
+         */
+        ret = insert_into_bitmap(block_group, info);
+        if (ret < 0) {
+                goto out;
+        } else if (ret) {
+                ret = 0;
+                goto out;
+        }
+link:
        ret = link_free_space(block_group, info);
        if (ret)
                kfree(info);
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space(
                node = rb_next(&entry->offset_index);
                rb_erase(&entry->offset_index, &cluster->root);
                BUG_ON(entry->bitmap);
+                try_merge_free_space(block_group, entry, false);
                tree_insert_offset(&block_group->free_space_offset,
                                   entry->offset, &entry->offset_index, 0);
        }
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
        ret = offset;
        if (entry->bitmap) {
                bitmap_clear_bits(block_group, entry, offset, bytes);
-                if (!entry->bytes) {
+                if (!entry->bytes)
-                        unlink_free_space(block_group, entry);
+                        free_bitmap(block_group, entry);
-                        kfree(entry->bitmap);
-                        kfree(entry);
-                        block_group->total_bitmaps--;
-                        recalculate_thresholds(block_group);
-                }
        } else {
                unlink_free_space(block_group, entry);
                entry->offset += bytes;
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
        ret = search_start;
        bitmap_clear_bits(block_group, entry, ret, bytes);
+        if (entry->bytes == 0)
+                free_bitmap(block_group, entry);
 out:
        spin_unlock(&cluster->lock);
        spin_unlock(&block_group->tree_lock);
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
                entry->offset += bytes;
                entry->bytes -= bytes;
-                if (entry->bytes == 0) {
+                if (entry->bytes == 0)
                        rb_erase(&entry->offset_index, &cluster->root);
-                        kfree(entry);
-                }
                break;
        }
 out:
        spin_unlock(&cluster->lock);
+        if (!ret)
+                return 0;
+        spin_lock(&block_group->tree_lock);
+        block_group->free_space -= bytes;
+        if (entry->bytes == 0) {
+                block_group->free_extents--;
+                kfree(entry);
+        }
+        spin_unlock(&block_group->tree_lock);
        return ret;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b3e132..0efdb65953c5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -416,7 +416,7 @@ again:
        }
        if (start == 0) {
                trans = btrfs_join_transaction(root, 1);
-                BUG_ON(!trans);
+                BUG_ON(IS_ERR(trans));
                btrfs_set_trans_block_group(trans, inode);
                trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -612,6 +612,7 @@ retry:
                            GFP_NOFS);
                trans = btrfs_join_transaction(root, 1);
+                BUG_ON(IS_ERR(trans));
                ret = btrfs_reserve_extent(trans, root,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
@@ -643,6 +644,7 @@ retry:
                                        async_extent->ram_size - 1, 0);
                em = alloc_extent_map(GFP_NOFS);
+                BUG_ON(!em);
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
@@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode,
        BUG_ON(root == root->fs_info->tree_root);
        trans = btrfs_join_transaction(root, 1);
-        BUG_ON(!trans);
+        BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
                BUG_ON(ret);
                em = alloc_extent_map(GFP_NOFS);
+                BUG_ON(!em);
                em->start = start;
                em->orig_start = em->start;
                ram_size = ins.offset;
@@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        } else {
                trans = btrfs_join_transaction(root, 1);
        }
-        BUG_ON(!trans);
+        BUG_ON(IS_ERR(trans));
        cow_start = (u64)-1;
        cur_offset = start;
@@ -1168,6 +1171,7 @@ out_check:
                        struct extent_map_tree *em_tree;
                        em_tree = &BTRFS_I(inode)->extent_tree;
                        em = alloc_extent_map(GFP_NOFS);
+                        BUG_ON(!em);
                        em->start = cur_offset;
                        em->orig_start = em->start;
                        em->len = num_bytes;
@@ -1557,6 +1561,7 @@ out:
 out_page:
        unlock_page(page);
        page_cache_release(page);
+        kfree(fixup);
 }
 /*
@@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                trans = btrfs_join_transaction_nolock(root, 1);
                        else
                                trans = btrfs_join_transaction(root, 1);
-                        BUG_ON(!trans);
+                        BUG_ON(IS_ERR(trans));
                        btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                trans = btrfs_join_transaction_nolock(root, 1);
        else
                trans = btrfs_join_transaction(root, 1);
+        BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1907,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
        private = 0;
        if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                             (u64)-1, 1, EXTENT_DIRTY)) {
+                             (u64)-1, 1, EXTENT_DIRTY, 0)) {
                ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
                                        start, &private_failure);
                if (ret == 0) {
@@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 */
                if (is_bad_inode(inode)) {
                        trans = btrfs_start_transaction(root, 0);
+                        BUG_ON(IS_ERR(trans));
                        btrfs_orphan_del(trans, inode);
                        btrfs_end_transaction(trans, root);
                        iput(inode);
@@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
        if (root->orphan_block_rsv || root->orphan_item_inserted) {
                trans = btrfs_join_transaction(root, 1);
+                BUG_ON(IS_ERR(trans));
                btrfs_end_transaction(trans, root);
        }
@@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path) {
                ret = -ENOMEM;
-                goto err;
+                goto out;
        }
        path->leave_spinning = 1;
@@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root,
        struct extent_buffer *eb;
        int level;
        u64 refs = 1;
-        int uninitialized_var(ret);
        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+                int ret;
                if (!path->nodes[level])
                        break;
                eb = path->nodes[level];
@@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root,
                if (refs > 1)
                        return 1;
        }
-        return ret; /* XXX callers? */
+        return 0;
 }
 /*
@@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        }
        srcu_read_unlock(&root->fs_info->subvol_srcu, index);
-        if (root != sub_root) {
+        if (!IS_ERR(inode) && root != sub_root) {
                down_read(&root->fs_info->cleanup_work_sem);
                if (!(inode->i_sb->s_flags & MS_RDONLY))
                        btrfs_orphan_cleanup(sub_root);
@@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
                        trans = btrfs_join_transaction_nolock(root, 1);
                else
                        trans = btrfs_join_transaction(root, 1);
+                if (IS_ERR(trans))
+                        return PTR_ERR(trans);
                btrfs_set_trans_block_group(trans, inode);
                if (nolock)
                        ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode)
                return;
        trans = btrfs_join_transaction(root, 1);
+        BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
        ret = btrfs_update_inode(trans, root, inode);
@@ -5176,6 +5188,8 @@ again:
                                em = NULL;
                                btrfs_release_path(root, path);
                                trans = btrfs_join_transaction(root, 1);
+                                if (IS_ERR(trans))
+                                        return ERR_CAST(trans);
                                goto again;
                        }
                        map = kmap(page);
@@ -5266,6 +5280,128 @@ out:
        return em;
 }
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+                                           size_t pg_offset, u64 start, u64 len,
+                                           int create)
+{
+        struct extent_map *em;
+        struct extent_map *hole_em = NULL;
+        u64 range_start = start;
+        u64 end;
+        u64 found;
+        u64 found_end;
+        int err = 0;
+        em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+        if (IS_ERR(em))
+                return em;
+        if (em) {
+                /*
+                 * if our em maps to a hole, there might
+                 * actually be delalloc bytes behind it
+                 */
+                if (em->block_start != EXTENT_MAP_HOLE)
+                        return em;
+                else
+                        hole_em = em;
+        }
+        /* check to see if we've wrapped (len == -1 or similar) */
+        end = start + len;
+        if (end < start)
+                end = (u64)-1;
+        else
+                end -= 1;
+        em = NULL;
+        /* ok, we didn't find anything, lets look for delalloc */
+        found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+                                 end, len, EXTENT_DELALLOC, 1);
+        found_end = range_start + found;
+        if (found_end < range_start)
+                found_end = (u64)-1;
+        /*
+         * we didn't find anything useful, return
+         * the original results from get_extent()
+         */
+        if (range_start > end || found_end <= start) {
+                em = hole_em;
+                hole_em = NULL;
+                goto out;
+        }
+        /* adjust the range_start to make sure it doesn't
+         * go backwards from the start they passed in
+         */
+        range_start = max(start,range_start);
+        found = found_end - range_start;
+        if (found > 0) {
+                u64 hole_start = start;
+                u64 hole_len = len;
+                em = alloc_extent_map(GFP_NOFS);
+                if (!em) {
+                        err = -ENOMEM;
+                        goto out;
+                }
+                /*
+                 * when btrfs_get_extent can't find anything it
+                 * returns one huge hole
+                 *
+                 * make sure what it found really fits our range, and
+                 * adjust to make sure it is based on the start from
+                 * the caller
+                 */
+                if (hole_em) {
+                        u64 calc_end = extent_map_end(hole_em);
+                        if (calc_end <= start || (hole_em->start > end)) {
+                                free_extent_map(hole_em);
+                                hole_em = NULL;
+                        } else {
+                                hole_start = max(hole_em->start, start);
+                                hole_len = calc_end - hole_start;
+                        }
+                }
+                em->bdev = NULL;
+                if (hole_em && range_start > hole_start) {
+                        /* our hole starts before our delalloc, so we
+                         * have to return just the parts of the hole
+                         * that go until  the delalloc starts
+                         */
+                        em->len = min(hole_len,
+                                      range_start - hole_start);
+                        em->start = hole_start;
+                        em->orig_start = hole_start;
+                        /*
+                         * don't adjust block start at all,
+                         * it is fixed at EXTENT_MAP_HOLE
+                         */
+                        em->block_start = hole_em->block_start;
+                        em->block_len = hole_len;
+                } else {
+                        em->start = range_start;
+                        em->len = found;
+                        em->orig_start = range_start;
+                        em->block_start = EXTENT_MAP_DELALLOC;
+                        em->block_len = found;
+                }
+        } else if (hole_em) {
+                return hole_em;
+        }
+out:
+        free_extent_map(hole_em);
+        if (err) {
+                free_extent_map(em);
+                return ERR_PTR(err);
+        }
+        return em;
+}
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
@@ -5280,8 +5416,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
        trans = btrfs_join_transaction(root, 0);
-        if (!trans)
+        if (IS_ERR(trans))
-                return ERR_PTR(-ENOMEM);
+                return ERR_CAST(trans);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -5505,7 +5641,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                 * while we look for nocow cross refs
                 */
                trans = btrfs_join_transaction(root, 0);
-                if (!trans)
+                if (IS_ERR(trans))
                        goto must_cow;
                if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5640,7 +5776,7 @@ again:
        BUG_ON(!ordered);
        trans = btrfs_join_transaction(root, 1);
-        if (!trans) {
+        if (IS_ERR(trans)) {
                err = -ENOMEM;
                goto out;
        }
@@ -6088,7 +6224,7 @@ out:
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
-        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+        return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22b522a..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        trans = btrfs_join_transaction(root, 1);
-        BUG_ON(!trans);
+        BUG_ON(IS_ERR(trans));
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
@@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
        if (new_size > old_size) {
                trans = btrfs_start_transaction(root, 0);
+                if (IS_ERR(trans)) {
+                        ret = PTR_ERR(trans);
+                        goto out_unlock;
+                }
                ret = btrfs_grow_device(trans, device, new_size);
                btrfs_commit_transaction(trans, root);
        } else {
@@ -1067,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
-        if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+        if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
                return -EINVAL;
        if (flags & ~BTRFS_SUBVOL_RDONLY)
                return -EOPNOTSUPP;
+        if (!is_owner_or_cap(inode))
+                return -EACCES;
        down_write(&root->fs_info->subvol_sem);
        /* nothing to do */
@@ -1093,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                goto out_reset;
        }
-        ret = btrfs_update_root(trans, root,
+        ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &root->root_key, &root->root_item);
        btrfs_commit_transaction(trans, root);
@@ -1898,7 +1905,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.objectid = inode->i_ino;
-                        new_key.offset = key.offset + destoff - off;
+                        if (off <= key.offset)
+                                new_key.offset = key.offset + destoff - off;
+                        else
+                                new_key.offset = destoff;
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
@@ -2082,7 +2092,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
        ret = -ENOMEM;
        trans = btrfs_start_ioctl_transaction(root, 0);
-        if (!trans)
+        if (IS_ERR(trans))
                goto out_drop;
        file->private_data = trans;
@@ -2138,9 +2148,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
        path->leave_spinning = 1;
        trans = btrfs_start_transaction(root, 1);
-        if (!trans) {
+        if (IS_ERR(trans)) {
                btrfs_free_path(path);
-                return -ENOMEM;
+                return PTR_ERR(trans);
        }
        dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        int num_types = 4;
        int alloc_size;
        int ret = 0;
-        int slot_count = 0;
+        u64 slot_count = 0;
        int i, c;
        if (copy_from_user(&space_args,
@@ -2240,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                goto out;
        }
-        slot_count = min_t(int, space_args.space_slots, slot_count);
+        slot_count = min_t(u64, space_args.space_slots, slot_count);
        alloc_size = sizeof(*dest) * slot_count;
@@ -2260,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        for (i = 0; i < num_types; i++) {
                struct btrfs_space_info *tmp;
+                if (!slot_count)
+                        break;
                info = NULL;
                rcu_read_lock();
                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                                memcpy(dest, &space, sizeof(space));
                                dest++;
                                space_args.total_spaces++;
+                                slot_count--;
                        }
+                        if (!slot_count)
+                                break;
                }
                up_read(&info->groups_sem);
        }
@@ -2334,6 +2350,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
        u64 transid;
        trans = btrfs_start_transaction(root, 0);
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
        transid = trans->transid;
        btrfs_commit_transaction_async(trans, root, 0);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
        unsigned long tot_out;
        unsigned long tot_len;
        char *buf;
+        bool may_late_unmap, need_unmap;
        data_in = kmap(pages_in[0]);
        tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
                tot_in += in_len;
                working_bytes = in_len;
+                may_late_unmap = need_unmap = false;
                /* fast path: avoid using the working buffer */
                if (in_page_bytes_left >= in_len) {
                        buf = data_in + in_offset;
                        bytes = in_len;
+                        may_late_unmap = true;
                        goto cont;
                }
@@ -329,14 +332,17 @@ cont:
                                if (working_bytes == 0 && tot_in >= tot_len)
                                        break;
-                                kunmap(pages_in[page_in_index]);
+                                if (page_in_index + 1 >= total_pages_in) {
-                                page_in_index++;
-                                if (page_in_index >= total_pages_in) {
                                        ret = -1;
-                                        data_in = NULL;
                                        goto done;
                                }
-                                data_in = kmap(pages_in[page_in_index]);
+                                if (may_late_unmap)
+                                        need_unmap = true;
+                                else
+                                        kunmap(pages_in[page_in_index]);
+                                data_in = kmap(pages_in[++page_in_index]);
                                in_page_bytes_left = PAGE_CACHE_SIZE;
                                in_offset = 0;
@@ -346,6 +352,8 @@ cont:
                out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
                ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
                                            &out_len);
+                if (need_unmap)
+                        kunmap(pages_in[page_in_index - 1]);
                if (ret != LZO_E_OK) {
                        printk(KERN_WARNING "btrfs decompress failed\n");
                        ret = -1;
@@ -363,8 +371,7 @@ cont:
                        break;
        }
 done:
-        if (data_in)
+        kunmap(pages_in[page_in_index]);
-                kunmap(pages_in[page_in_index]);
        return ret;
 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1ddcd99..083a55477375 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
                                          u64 file_offset)
 {
        struct rb_root *root = &tree->tree;
-        struct rb_node *prev;
+        struct rb_node *prev = NULL;
        struct rb_node *ret;
        struct btrfs_ordered_extent *entry;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be22b63..fb2605d998e9 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 #else
                        BUG();
 #endif
+                        break;
                case BTRFS_BLOCK_GROUP_ITEM_KEY:
                        bi = btrfs_item_ptr(l, i,
                                            struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2b2d7e..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
        new_node->bytenr = dest->node->start;
        new_node->level = node->level;
        new_node->lowest = node->lowest;
+        new_node->checked = 1;
        new_node->root = dest;
        if (!node->lowest) {
@@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
        while (1) {
                trans = btrfs_start_transaction(root, 0);
+                BUG_ON(IS_ERR(trans));
                trans->block_rsv = rc->block_rsv;
                ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2149,12 @@ again:
        }
        trans = btrfs_join_transaction(rc->extent_root, 1);
+        if (IS_ERR(trans)) {
+                if (!err)
+                        btrfs_block_rsv_release(rc->extent_root,
+                                                rc->block_rsv, num_bytes);
+                return PTR_ERR(trans);
+        }
        if (!err) {
                if (num_bytes != rc->merging_rsv_size) {
@@ -3222,6 +3230,7 @@ truncate:
        trans = btrfs_join_transaction(root, 0);
        if (IS_ERR(trans)) {
                btrfs_free_path(path);
+                ret = PTR_ERR(trans);
                goto out;
        }
@@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc)
        set_reloc_control(rc);
        trans = btrfs_join_transaction(rc->extent_root, 1);
+        BUG_ON(IS_ERR(trans));
        btrfs_commit_transaction(trans, rc->extent_root);
        return 0;
 }
@@ -3644,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        u32 item_size;
        int ret;
        int err = 0;
+        int progress = 0;
        path = btrfs_alloc_path();
        if (!path)
@@ -3656,8 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        }
        while (1) {
+                progress++;
                trans = btrfs_start_transaction(rc->extent_root, 0);
+                BUG_ON(IS_ERR(trans));
+restart:
                if (update_backref_cache(trans, &rc->backref_cache)) {
                        btrfs_end_transaction(trans, rc->extent_root);
                        continue;
@@ -3770,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                        }
                }
        }
+        if (trans && progress && err == -ENOSPC) {
+                ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+                                              rc->block_group->flags);
+                if (ret == 0) {
+                        err = 0;
+                        progress = 0;
+                        goto restart;
+                }
+        }
        btrfs_release_path(rc->extent_root, path);
        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
@@ -3804,7 +3826,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        /* get rid of pinned extents */
        trans = btrfs_join_transaction(rc->extent_root, 1);
-        btrfs_commit_transaction(trans, rc->extent_root);
+        if (IS_ERR(trans))
+                err = PTR_ERR(trans);
+        else
+                btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
        btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
        btrfs_free_path(path);
@@ -4022,6 +4047,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
        int ret;
        trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
+        BUG_ON(IS_ERR(trans));
        memset(&root->root_item.drop_progress, 0,
                sizeof(root->root_item.drop_progress));
@@ -4125,6 +4151,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        set_reloc_control(rc);
        trans = btrfs_join_transaction(rc->extent_root, 1);
+        if (IS_ERR(trans)) {
+                unset_reloc_control(rc);
+                err = PTR_ERR(trans);
+                goto out_free;
+        }
        rc->merge_reloc_tree = 1;
@@ -4154,9 +4185,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        unset_reloc_control(rc);
        trans = btrfs_join_transaction(rc->extent_root, 1);
-        btrfs_commit_transaction(trans, rc->extent_root);
+        if (IS_ERR(trans))
-out:
+                err = PTR_ERR(trans);
+        else
+                btrfs_commit_transaction(trans, rc->extent_root);
+out_free:
        kfree(rc);
+out:
        while (!list_empty(&reloc_roots)) {
                reloc_root = list_entry(reloc_roots.next,
                                        struct btrfs_root, root_list);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c46fdb5..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+        Opt_enospc_debug, Opt_err,
 };
 static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
        {Opt_space_cache, "space_cache"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+        {Opt_enospc_debug, "enospc_debug"},
        {Opt_err, NULL},
 };
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_user_subvol_rm_allowed:
                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
                        break;
+                case Opt_enospc_debug:
+                        btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+                        break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -383,7 +388,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                struct btrfs_fs_devices **fs_devices)
 {
        substring_t args[MAX_OPT_ARGS];
-        char *opts, *p;
+        char *opts, *orig, *p;
        int error = 0;
        int intarg;
@@ -397,6 +402,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
        opts = kstrdup(options, GFP_KERNEL);
        if (!opts)
                return -ENOMEM;
+        orig = opts;
        while ((p = strsep(&opts, ",")) != NULL) {
                int token;
@@ -432,7 +438,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
        }
 out_free_opts:
-        kfree(opts);
+        kfree(orig);
 out:
        /*
         * If no subvolume name is specified we use the default one.  Allocate
@@ -623,6 +629,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        btrfs_wait_ordered_extents(root, 0, 0);
        trans = btrfs_start_transaction(root, 0);
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
        ret = btrfs_commit_transaction(trans, root);
        return ret;
 }
@@ -761,6 +769,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                }
                btrfs_close_devices(fs_devices);
+                kfree(fs_info);
+                kfree(tree_root);
        } else {
                char b[BDEVNAME_SIZE];
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b8bbe2..3d73c8d93bbb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
        INIT_DELAYED_WORK(&ac->work, do_async_commit);
        ac->root = root;
        ac->newtrans = btrfs_join_transaction(root, 0);
+        if (IS_ERR(ac->newtrans)) {
+                int err = PTR_ERR(ac->newtrans);
+                kfree(ac);
+                return err;
+        }
        /* take transaction reference */
        mutex_lock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744ac5719..a4bbb854dfd2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
                }
                dst_copy = kmalloc(item_size, GFP_NOFS);
                src_copy = kmalloc(item_size, GFP_NOFS);
+                if (!dst_copy || !src_copy) {
+                        btrfs_release_path(root, path);
+                        kfree(dst_copy);
+                        kfree(src_copy);
+                        return -ENOMEM;
+                }
                read_extent_buffer(eb, src_copy, src_ptr, item_size);
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        btrfs_dir_item_key_to_cpu(leaf, di, &location);
        name_len = btrfs_dir_name_len(leaf, di);
        name = kmalloc(name_len, GFP_NOFS);
+        if (!name)
+                return -ENOMEM;
        read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
        btrfs_release_path(root, path);
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
        int match = 0;
        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
        if (ret != 0)
                goto out;
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
        key.offset = (u64)-1;
        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        while (1) {
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
        name_len = btrfs_dir_name_len(eb, di);
        name = kmalloc(name_len, GFP_NOFS);
+        if (!name)
+                return -ENOMEM;
        log_type = btrfs_dir_type(eb, di);
        read_extent_buffer(eb, name, (unsigned long)(di + 1),
                   name_len);
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                root_owner = btrfs_header_owner(parent);
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+                if (!next)
+                        return -ENOMEM;
                if (*level == 1) {
                        wc->process_func(root, next, wc, ptr_gen);
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                wait_log_commit(trans, log_root_tree,
                                log_root_tree->log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
+                ret = 0;
                goto out;
        }
        atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2116,7 @@ out:
        smp_mb();
        if (waitqueue_active(&root->log_commit_wait[index1]))
                wake_up(&root->log_commit_wait[index1]);
-        return 0;
+        return ret;
 }
 static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        log = root->log_root;
        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
                                   name, name_len, -1);
        if (IS_ERR(di)) {
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
                           nr * sizeof(u32), GFP_NOFS);
+        if (!ins_data)
+                return -ENOMEM;
        ins_sizes = (u32 *)ins_data;
        ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        log = root->log_root;
        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        dst_path = btrfs_alloc_path();
+        if (!dst_path) {
+                btrfs_free_path(path);
+                return -ENOMEM;
+        }
        min_key.objectid = inode->i_ino;
        min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
        BUG_ON(!path);
        trans = btrfs_start_transaction(fs_info->tree_root, 0);
+        BUG_ON(IS_ERR(trans));
        wc.trans = trans;
        wc.pin = 1;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530233b7..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
                return -ENOMEM;
        trans = btrfs_start_transaction(root, 0);
+        if (IS_ERR(trans)) {
+                btrfs_free_path(path);
+                return PTR_ERR(trans);
+        }
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = device->devid;
@@ -1334,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        ret = btrfs_shrink_device(device, 0);
        if (ret)
-                goto error_brelse;
+                goto error_undo;
        ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
        if (ret)
-                goto error_brelse;
+                goto error_undo;
        device->in_fs_metadata = 0;
@@ -1412,6 +1416,13 @@ out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
        return ret;
+error_undo:
+        if (device->writeable) {
+                list_add(&device->dev_alloc_list,
+                         &root->fs_info->fs_devices->alloc_list);
+                root->fs_info->fs_devices->rw_devices++;
+        }
+        goto error_brelse;
 }
 /*
@@ -1601,11 +1612,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        ret = find_next_devid(root, &device->devid);
        if (ret) {
+                kfree(device->name);
                kfree(device);
                goto error;
        }
        trans = btrfs_start_transaction(root, 0);
+        if (IS_ERR(trans)) {
+                kfree(device->name);
+                kfree(device);
+                ret = PTR_ERR(trans);
+                goto error;
+        }
        lock_chunks(root);
        device->writeable = 1;
@@ -1621,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
-        device->mode = 0;
+        device->mode = FMODE_EXCL;
        set_blocksize(device->bdev, 4096);
        if (seeding_dev) {
@@ -1873,7 +1892,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
                return ret;
        trans = btrfs_start_transaction(root, 0);
-        BUG_ON(!trans);
+        BUG_ON(IS_ERR(trans));
        lock_chunks(root);
@@ -2047,7 +2066,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
                BUG_ON(ret);
                trans = btrfs_start_transaction(dev_root, 0);
-                BUG_ON(!trans);
+                BUG_ON(IS_ERR(trans));
                ret = btrfs_grow_device(trans, device, old_size);
                BUG_ON(ret);
@@ -2213,6 +2232,11 @@ again:
        /* Shrinking succeeded, else we would be at "done". */
        trans = btrfs_start_transaction(root, 0);
+        if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                goto done;
+        }
        lock_chunks(root);
        device->disk_total_bytes = new_size;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 60d27bc9eb83..6b61ded701e1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1560,9 +1560,10 @@ retry_locked:
                /* NOTE: no side-effects allowed, until we take s_mutex */
                revoking = cap->implemented & ~cap->issued;
-                if (revoking)
+                dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
-                        dout(" mds%d revoking %s\n", cap->mds,
+                     cap->mds, cap, ceph_cap_string(cap->issued),
-                             ceph_cap_string(revoking));
+                     ceph_cap_string(cap->implemented),
+                     ceph_cap_string(revoking));
                if (cap == ci->i_auth_cap &&
                    (cap->issued & CEPH_CAP_FILE_WR)) {
@@ -1658,6 +1659,8 @@ ack:
                if (cap == ci->i_auth_cap && ci->i_dirty_caps)
                        flushing = __mark_caps_flushing(inode, session);
+                else
+                        flushing = 0;
                mds = cap->mds;  /* remember mds, so we don't repeat */
                sent++;
@@ -1940,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
        }
 }
+static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
+                                     struct ceph_mds_session *session,
+                                     struct inode *inode)
+{
+        struct ceph_inode_info *ci = ceph_inode(inode);
+        struct ceph_cap *cap;
+        int delayed = 0;
+        spin_lock(&inode->i_lock);
+        cap = ci->i_auth_cap;
+        dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
+             ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+        __ceph_flush_snaps(ci, &session, 1);
+        if (ci->i_flushing_caps) {
+                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+                                     __ceph_caps_used(ci),
+                                     __ceph_caps_wanted(ci),
+                                     cap->issued | cap->implemented,
+                                     ci->i_flushing_caps, NULL);
+                if (delayed) {
+                        spin_lock(&inode->i_lock);
+                        __cap_delay_requeue(mdsc, ci);
+                        spin_unlock(&inode->i_lock);
+                }
+        } else {
+                spin_unlock(&inode->i_lock);
+        }
+}
 /*
 * Take references to capabilities we hold, so that we don't release
@@ -2687,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
        ceph_add_cap(inode, session, cap_id, -1,
                     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
                     NULL /* no caps context */);
-        try_flush_caps(inode, session, NULL);
+        kick_flushing_inode_caps(mdsc, session, inode);
        up_read(&mdsc->snap_rwsem);
        /* make sure we re-request max_size, if necessary */
@@ -2785,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        case CEPH_CAP_OP_IMPORT:
                handle_cap_import(mdsc, inode, h, session,
                                  snaptrace, snaptrace_len);
-                ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
+                ceph_check_caps(ceph_inode(inode), 0, session);
-                                session);
                goto done_unlocked;
        }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..099a58615b90 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
        spin_lock(&inode->i_lock);
        if (ci->i_release_count == fi->dir_release_count) {
                dout(" marking %p complete\n", inode);
-                ci->i_ceph_flags |= CEPH_I_COMPLETE;
+                /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
                ci->i_max_offset = filp->f_pos;
        }
        spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
        /* .snap dir? */
        if (err == -ENOENT &&
+            ceph_snap(parent) == CEPH_NOSNAP &&
            strcmp(dentry->d_name.name,
                   fsc->mount_options->snapdir_name) == 0) {
                struct inode *inode = ceph_get_snapdir(parent);
@@ -1029,28 +1030,8 @@ out_touch:
 static void ceph_dentry_release(struct dentry *dentry)
 {
        struct ceph_dentry_info *di = ceph_dentry(dentry);
-        struct inode *parent_inode = NULL;
-        u64 snapid = CEPH_NOSNAP;
-        if (!IS_ROOT(dentry)) {
+        dout("dentry_release %p\n", dentry);
-                parent_inode = dentry->d_parent->d_inode;
-                if (parent_inode)
-                        snapid = ceph_snap(parent_inode);
-        }
-        dout("dentry_release %p parent %p\n", dentry, parent_inode);
-        if (parent_inode && snapid != CEPH_SNAPDIR) {
-                struct ceph_inode_info *ci = ceph_inode(parent_inode);
-                spin_lock(&parent_inode->i_lock);
-                if (ci->i_shared_gen == di->lease_shared_gen ||
-                    snapid <= CEPH_MAXSNAP) {
-                        dout(" clearing %p complete (d_release)\n",
-                             parent_inode);
-                        ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
-                        ci->i_release_count++;
-                }
-                spin_unlock(&parent_inode->i_lock);
-        }
        if (di) {
                ceph_dentry_lru_del(dentry);
                if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e835eff551e3..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,13 +707,9 @@ static int fill_inode(struct inode *inode,
                    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
                    (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
                        dout(" marking %p complete (empty)\n", inode);
-                        ci->i_ceph_flags |= CEPH_I_COMPLETE;
+                        /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
                        ci->i_max_offset = 2;
                }
-                /* it may be better to set st_size in getattr instead? */
-                if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
-                        inode->i_size = ci->i_rbytes;
                break;
        default:
                pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -1819,7 +1815,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
                else
                        stat->dev = 0;
                if (S_ISDIR(inode->i_mode)) {
-                        stat->size = ci->i_rbytes;
+                        if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+                                                RBYTES))
+                                stat->size = ci->i_rbytes;
+                        else
+                                stat->size = ci->i_files + ci->i_subdirs;
                        stat->blocks = 0;
                        stat->blksize = 65536;
                }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1e30d194a8e3..a1ee8fa3a8e7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                dout("choose_mds %p %llx.%llx "
                                     "frag %u mds%d (%d/%d)\n",
                                     inode, ceph_vinop(inode),
-                                     frag.frag, frag.mds,
+                                     frag.frag, mds,
                                     (int)r, frag.ndist);
-                                return mds;
+                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+                                    CEPH_MDS_STATE_ACTIVE)
+                                        return mds;
                        }
                        /* since this file/dir wasn't known to be
@@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                dout("choose_mds %p %llx.%llx "
                                     "frag %u mds%d (auth)\n",
                                     inode, ceph_vinop(inode), frag.frag, mds);
-                                return mds;
+                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+                                    CEPH_MDS_STATE_ACTIVE)
+                                        return mds;
                        }
                }
        }
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
        if (lastinode)
                iput(lastinode);
-        dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
+        list_for_each_entry(child, &realm->children, child_item) {
-        list_for_each_entry(child, &realm->children, child_item)
+                dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
-                queue_realm_cap_snaps(child);
+                     realm, realm->ino, child, child->ino);
+                list_del_init(&child->dirty_item);
+                list_add(&child->dirty_item, &realm->dirty_item);
+        }
+        list_del_init(&realm->dirty_item);
        dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
 }
@@ -683,7 +687,9 @@ more:
         * queue cap snaps _after_ we've built the new snap contexts,
         * so that i_head_snapc can be set appropriately.
         */
-        list_for_each_entry(realm, &dirty_realms, dirty_item) {
+        while (!list_empty(&dirty_realms)) {
+                realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+                                         dirty_item);
                queue_realm_cap_snaps(realm);
        }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bf6f0f34082a..9c5085465a63 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+        fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+        fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 6e12a6ba5f79..8c9eba6ef9df 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct ceph_inode_xattr *xattr = NULL;
+        int name_len = strlen(name);
        int c;
        p = &ci->i_xattrs.index.rb_node;
@@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
                parent = *p;
                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
                c = strncmp(name, xattr->name, xattr->name_len);
+                if (c == 0 && name_len > xattr->name_len)
+                        c = 1;
                if (c < 0)
                        p = &(*p)->rb_left;
                else if (c > 0)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index ee45648b0d1a..7cb0f7f847e4 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -3,6 +3,7 @@ config CIFS
        depends on INET
        select NLS
        select CRYPTO
+        select CRYPTO_MD4
        select CRYPTO_MD5
        select CRYPTO_HMAC
        select CRYPTO_ARC4
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 43b19dd39191..d87558448e3d 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
          link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
-          md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
+          cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
          readdir.o ioctl.o sess.o export.o
 cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 46af99ab3614..fe1683590828 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -452,6 +452,11 @@ A partial list of the supported mount options follows:
                if oplock (caching token) is granted and held. Note that
                direct allows write operations larger than page size
                to be sent to the server.
+  strictcache   Use for switching on strict cache mode. In this mode the
+                client read from the cache all the time it has Oplock Level II,
+                otherwise - read from the server. All written data are stored
+                in the cache, but if the client doesn't have Exclusive Oplock,
+                it writes the data to the server.
  acl           Allow setfacl and getfacl to manage posix ACLs if server
                supports them.  (default)
  noacl         Do not allow setfacl and getfacl calls on this mount
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7ed36536e754..0a265ad9e426 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        cFYI(1, "in %s", __func__);
        BUG_ON(IS_ROOT(mntpt));
-        xid = GetXid();
        /*
         * The MSDFS spec states that paths in DFS referral requests and
         * responses must be prefixed by a single '\' character instead of
@@ -293,20 +291,21 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        mnt = ERR_PTR(-ENOMEM);
        full_path = build_path_from_dentry(mntpt);
        if (full_path == NULL)
-                goto free_xid;
+                goto cdda_exit;
        cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
        tlink = cifs_sb_tlink(cifs_sb);
-        mnt = ERR_PTR(-EINVAL);
        if (IS_ERR(tlink)) {
                mnt = ERR_CAST(tlink);
                goto free_full_path;
        }
        ses = tlink_tcon(tlink)->ses;
+        xid = GetXid();
        rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
                &num_referrals, &referrals,
                cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+        FreeXid(xid);
        cifs_put_tlink(tlink);
@@ -339,8 +338,7 @@ success:
        free_dfs_info_array(referrals, num_referrals);
 free_full_path:
        kfree(full_path);
-free_xid:
+cdda_exit:
-        FreeXid(xid);
        cFYI(1, "leaving %s" , __func__);
        return mnt;
 }
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1e7636b145a8..beeebf194234 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
                ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
                                GFP_KERNEL);
+                if (!ppace) {
+                        cERROR(1, "DACL memory allocation error");
+                        return;
+                }
                for (i = 0; i < num_aces; ++i) {
                        ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 66f3d50d0676..a51585f9852b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -24,7 +24,6 @@
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifs_debug.h"
-#include "md5.h"
 #include "cifs_unicode.h"
 #include "cifsproto.h"
 #include "ntlmssp.h"
@@ -37,11 +36,6 @@
 /* Note that the smb header signature field on input contains the
        sequence number before this function is called */
-extern void mdfour(unsigned char *out, unsigned char *in, int n);
-extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
-extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
-                       unsigned char *p24);
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
                                struct TCP_Server_Info *server, char *signature)
 {
@@ -234,6 +228,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 /* first calculate 24 bytes ntlm response and then 16 byte session key */
 int setup_ntlm_response(struct cifsSesInfo *ses)
 {
+        int rc = 0;
        unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
        char temp_key[CIFS_SESS_KEY_SIZE];
@@ -247,13 +242,26 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
        }
        ses->auth_key.len = temp_len;
-        SMBNTencrypt(ses->password, ses->server->cryptkey,
+        rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
                        ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+        if (rc) {
+                cFYI(1, "%s Can't generate NTLM response, error: %d",
+                        __func__, rc);
+                return rc;
+        }
-        E_md4hash(ses->password, temp_key);
+        rc = E_md4hash(ses->password, temp_key);
-        mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+        if (rc) {
+                cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+                return rc;
+        }
-        return 0;
+        rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+        if (rc)
+                cFYI(1, "%s Can't generate NTLM session key, error: %d",
+                        __func__, rc);
+        return rc;
 }
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
@@ -649,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses)
        get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
        tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
-        if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+        if (IS_ERR(tfm_arc4)) {
+                rc = PTR_ERR(tfm_arc4);
                cERROR(1, "could not allocate crypto API arc4\n");
-                return PTR_ERR(tfm_arc4);
+                return rc;
        }
        desc.tfm = tfm_arc4;
@@ -700,14 +709,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
        unsigned int size;
        server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
-        if (!server->secmech.hmacmd5 ||
+        if (IS_ERR(server->secmech.hmacmd5)) {
-                        IS_ERR(server->secmech.hmacmd5)) {
                cERROR(1, "could not allocate crypto hmacmd5\n");
                return PTR_ERR(server->secmech.hmacmd5);
        }
        server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
-        if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+        if (IS_ERR(server->secmech.md5)) {
                cERROR(1, "could not allocate crypto md5\n");
                rc = PTR_ERR(server->secmech.md5);
                goto crypto_allocate_md5_fail;
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
deleted file mode 100644
index 15d2ec006474..000000000000
--- a/fs/cifs/cifsencrypt.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *   fs/cifs/cifsencrypt.h
- *
- *   Copyright (c) International Business Machines  Corp., 2005
- *   Author(s): Steve French (sfrench@us.ibm.com)
- *
- *   Externs for misc. small encryption routines
- *   so we do not have to put them in cifsproto.h
- *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-/* md4.c */
-extern void mdfour(unsigned char *out, unsigned char *in, int n);
-/* smbdes.c */
-extern void E_P16(unsigned char *p14, unsigned char *p16);
-extern void E_P24(unsigned char *p21, const unsigned char *c8,
-                  unsigned char *p24);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a8323f1dc1c4..f2970136d17d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
        ssize_t written;
+        int rc;
        written = generic_file_aio_write(iocb, iov, nr_segs, pos);
-        if (!CIFS_I(inode)->clientCanCacheAll)
-                filemap_fdatawrite(inode->i_mapping);
+        if (CIFS_I(inode)->clientCanCacheAll)
+                return written;
+        rc = filemap_fdatawrite(inode->i_mapping);
+        if (rc)
+                cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode);
        return written;
 }
@@ -737,7 +744,7 @@ const struct file_operations cifs_file_strict_ops = {
        .read = do_sync_read,
        .write = do_sync_write,
        .aio_read = cifs_strict_readv,
-        .aio_write = cifs_file_aio_write,
+        .aio_write = cifs_strict_writev,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -793,7 +800,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
        .read = do_sync_read,
        .write = do_sync_write,
        .aio_read = cifs_strict_readv,
-        .aio_write = cifs_file_aio_write,
+        .aio_write = cifs_strict_writev,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_strict_fsync,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f23206d46531..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -85,7 +85,9 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
                                 unsigned long nr_segs, loff_t pos);
 extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
-                         size_t write_size, loff_t *poffset);
+                               size_t write_size, loff_t *poffset);
+extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
+                                  unsigned long nr_segs, loff_t pos);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, int);
 extern int cifs_strict_fsync(struct file *, int);
@@ -125,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
-#define CIFS_VERSION   "1.69"
+#define CIFS_VERSION   "1.71"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5bfb75346cb0..17afb0fbcaed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -166,6 +166,9 @@ struct TCP_Server_Info {
        struct socket *ssocket;
        struct sockaddr_storage dstaddr;
        struct sockaddr_storage srcaddr; /* locally bind to this IP */
+#ifdef CONFIG_NET_NS
+        struct net *net;
+#endif
        wait_queue_head_t response_q;
        wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
        struct list_head pending_mid_q;
@@ -185,6 +188,8 @@ struct TCP_Server_Info {
        /* multiplexed reads or writes */
        unsigned int maxBuf;    /* maxBuf specifies the maximum */
        /* message size the server can send or receive for non-raw SMBs */
+        /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
+        /* when socket is setup (and during reconnect) before NegProt sent */
        unsigned int max_rw;    /* maxRw specifies the maximum */
        /* message size the server can send or receive for */
        /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -217,6 +222,36 @@ struct TCP_Server_Info {
 };
 /*
+ * Macros to allow the TCP_Server_Info->net field and related code to drop out
+ * when CONFIG_NET_NS isn't set.
+ */
+#ifdef CONFIG_NET_NS
+static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
+{
+        return srv->net;
+}
+static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
+{
+        srv->net = net;
+}
+#else
+static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
+{
+        return &init_net;
+}
+static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
+{
+}
+#endif
+/*
 * Session structure.  One of these for each uid session with a particular host
 */
 struct cifsSesInfo {
@@ -619,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 #define   MID_REQUEST_SUBMITTED 2
 #define   MID_RESPONSE_RECEIVED 4
 #define   MID_RETRY_NEEDED      8 /* session closed while this request out */
-#define   MID_NO_RESP_NEEDED 0x10
+#define   MID_RESPONSE_MALFORMED 0x10
 /* Types of response buffer returned from SendReceive2 */
 #define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 982895fa7615..8096f27ad9a8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
 extern bool is_valid_oplock_break(struct smb_hdr *smb,
                                  struct TCP_Server_Info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
+extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+                            unsigned int bytes_written);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
@@ -373,7 +375,7 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 extern int cifs_verify_signature(struct smb_hdr *,
                                 struct TCP_Server_Info *server,
                                __u32 expected_sequence_number);
-extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
+extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
 extern int setup_ntlm_response(struct cifsSesInfo *);
 extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
 extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
@@ -423,4 +425,11 @@ extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
 extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
                const unsigned char *path,
                struct cifs_sb_info *cifs_sb, int xid);
+extern int mdfour(unsigned char *, unsigned char *, int);
+extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
+extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
+                        unsigned char *p24);
+extern void E_P16(unsigned char *p14, unsigned char *p16);
+extern void E_P24(unsigned char *p21, const unsigned char *c8,
+                        unsigned char *p24);
 #endif                  /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3106f5e5c633..904aa47e3515 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
                }
        }
-        if (ses->status == CifsExiting)
-                return -EIO;
        /*
         * Give demultiplex thread up to 10 seconds to reconnect, should be
         * greater than cifs socket timeout which is 7 seconds
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
                 * retrying until process is killed or server comes
                 * back on-line
                 */
-                if (!tcon->retry || ses->status == CifsExiting) {
+                if (!tcon->retry) {
                        cFYI(1, "gave up waiting on reconnect in smb_init");
                        return -EHOSTDOWN;
                }
@@ -4914,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
                   __u16 fid, __u32 pid_of_opener, bool SetAllocation)
 {
        struct smb_com_transaction2_sfi_req *pSMB  = NULL;
-        char *data_offset;
        struct file_end_of_file_info *parm_data;
        int rc = 0;
        __u16 params, param_offset, offset, byte_count, count;
@@ -4938,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
        param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
        offset = param_offset + params;
-        data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
        count = sizeof(struct file_end_of_file_info);
        pSMB->MaxParameterCount = cpu_to_le16(2);
        /* BB find exact max SMB PDU from sess structure BB */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 18d3c7724d6e..8d6c17ab593d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -55,9 +55,6 @@
 /* SMB echo "timeout" -- FIXME: tunable? */
 #define SMB_ECHO_INTERVAL (60 * HZ)
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
-                         unsigned char *p24);
 extern mempool_t *cifs_req_poolp;
 struct smb_vol {
@@ -87,6 +84,7 @@ struct smb_vol {
        bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
        bool server_ino:1; /* use inode numbers from server ie UniqueId */
        bool direct_io:1;
+        bool strict_io:1; /* strict cache behavior */
        bool remap:1;      /* set to remap seven reserved chars in filenames */
        bool posix_paths:1; /* unset to not ask for posix pathnames. */
        bool no_linux_ext:1;
@@ -339,8 +337,13 @@ cifs_echo_request(struct work_struct *work)
        struct TCP_Server_Info *server = container_of(work,
                                        struct TCP_Server_Info, echo.work);
-        /* no need to ping if we got a response recently */
+        /*
-        if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+         * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
+         * done, which is indicated by maxBuf != 0. Also, no need to ping if
+         * we got a response recently
+         */
+        if (server->maxBuf == 0 ||
+            time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
                goto requeue_echo;
        rc = CIFSSMBEcho(server);
@@ -580,14 +583,23 @@ incomplete_rcv:
                else if (reconnect == 1)
                        continue;
-                length += 4; /* account for rfc1002 hdr */
+                total_read += 4; /* account for rfc1002 hdr */
+                dump_smb(smb_buffer, total_read);
-                dump_smb(smb_buffer, length);
+                /*
-                if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) {
+                 * We know that we received enough to get to the MID as we
-                        cifs_dump_mem("Bad SMB: ", smb_buffer, 48);
+                 * checked the pdu_length earlier. Now check to see
-                        continue;
+                 * if the rest of the header is OK. We borrow the length
-                }
+                 * var for the rest of the loop to avoid a new stack var.
+                 *
+                 * 48 bytes is enough to display the header and a little bit
+                 * into the payload for debugging purposes.
+                 */
+                length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
+                if (length != 0)
+                        cifs_dump_mem("Bad SMB: ", smb_buffer,
+                                        min_t(unsigned int, total_read, 48));
                mid_entry = NULL;
                server->lstrp = jiffies;
@@ -599,7 +611,8 @@ incomplete_rcv:
                        if ((mid_entry->mid == smb_buffer->Mid) &&
                            (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
                            (mid_entry->command == smb_buffer->Command)) {
-                                if (check2ndT2(smb_buffer,server->maxBuf) > 0) {
+                                if (length == 0 &&
+                                   check2ndT2(smb_buffer, server->maxBuf) > 0) {
                                        /* We have a multipart transact2 resp */
                                        isMultiRsp = true;
                                        if (mid_entry->resp_buf) {
@@ -634,12 +647,17 @@ incomplete_rcv:
                                mid_entry->resp_buf = smb_buffer;
                                mid_entry->largeBuf = isLargeBuf;
 multi_t2_fnd:
-                                mid_entry->midState = MID_RESPONSE_RECEIVED;
+                                if (length == 0)
-                                list_del_init(&mid_entry->qhead);
+                                        mid_entry->midState =
-                                mid_entry->callback(mid_entry);
+                                                        MID_RESPONSE_RECEIVED;
+                                else
+                                        mid_entry->midState =
+                                                        MID_RESPONSE_MALFORMED;
 #ifdef CONFIG_CIFS_STATS2
                                mid_entry->when_received = jiffies;
 #endif
+                                list_del_init(&mid_entry->qhead);
+                                mid_entry->callback(mid_entry);
                                break;
                        }
                        mid_entry = NULL;
@@ -655,6 +673,9 @@ multi_t2_fnd:
                                else
                                        smallbuf = NULL;
                        }
+                } else if (length != 0) {
+                        /* response sanity checks failed */
+                        continue;
                } else if (!is_valid_oplock_break(smb_buffer, server) &&
                           !isMultiRsp) {
                        cERROR(1, "No task to wake, unknown frame received! "
@@ -1344,6 +1365,8 @@ cifs_parse_mount_options(char *options, const char *devname,
                        vol->direct_io = 1;
                } else if (strnicmp(data, "forcedirectio", 13) == 0) {
                        vol->direct_io = 1;
+                } else if (strnicmp(data, "strictcache", 11) == 0) {
+                        vol->strict_io = 1;
                } else if (strnicmp(data, "noac", 4) == 0) {
                        printk(KERN_WARNING "CIFS: Mount option noac not "
                                "supported. Instead set "
@@ -1568,6 +1591,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
        spin_lock(&cifs_tcp_ses_lock);
        list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+                if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
+                        continue;
                if (!match_address(server, addr,
                                   (struct sockaddr *)&vol->srcaddr))
                        continue;
@@ -1598,6 +1624,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
                return;
        }
+        put_net(cifs_net_ns(server));
        list_del_init(&server->tcp_ses_list);
        spin_unlock(&cifs_tcp_ses_lock);
@@ -1672,6 +1700,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                goto out_err;
        }
+        cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
        tcp_ses->hostname = extract_hostname(volume_info->UNC);
        if (IS_ERR(tcp_ses->hostname)) {
                rc = PTR_ERR(tcp_ses->hostname);
@@ -1752,6 +1781,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 out_err_crypto_release:
        cifs_crypto_shash_release(tcp_ses);
+        put_net(cifs_net_ns(tcp_ses));
 out_err:
        if (tcp_ses) {
                if (!IS_ERR(tcp_ses->hostname))
@@ -2263,8 +2294,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
        }
        if (socket == NULL) {
-                rc = sock_create_kern(sfamily, SOCK_STREAM,
+                rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
-                                      IPPROTO_TCP, &socket);
+                                   IPPROTO_TCP, &socket, 1);
                if (rc < 0) {
                        cERROR(1, "Error %d creating socket", rc);
                        server->ssocket = NULL;
@@ -2576,6 +2607,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
        if (pvolume_info->multiuser)
                cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
                                            CIFS_MOUNT_NO_PERM);
+        if (pvolume_info->strict_io)
+                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
        if (pvolume_info->direct_io) {
                cFYI(1, "mounting share using direct i/o");
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
@@ -2977,7 +3010,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                                         bcc_ptr);
                else
 #endif /* CIFS_WEAK_PW_HASH */
-                SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
+                rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
+                                        bcc_ptr);
                bcc_ptr += CIFS_AUTH_RESP_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d7d65a70678e..e964b1cd5dd0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -346,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file)
        struct cifsTconInfo *tcon;
        struct tcon_link *tlink;
        struct cifsFileInfo *pCifsFile = NULL;
-        struct cifsInodeInfo *pCifsInode;
        char *full_path = NULL;
        bool posix_open_ok = false;
        __u16 netfid;
@@ -361,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file)
        }
        tcon = tlink_tcon(tlink);
-        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
        full_path = build_path_from_dentry(file->f_path.dentry);
        if (full_path == NULL) {
                rc = -ENOMEM;
@@ -848,7 +845,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 }
 /* update the file size (if needed) after a write */
-static void
+void
 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
                      unsigned int bytes_written)
 {
@@ -1146,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
        char *write_data;
        int rc = -EFAULT;
        int bytes_written = 0;
-        struct cifs_sb_info *cifs_sb;
        struct inode *inode;
        struct cifsFileInfo *open_file;
@@ -1154,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
                return -EFAULT;
        inode = page->mapping->host;
-        cifs_sb = CIFS_SB(inode->i_sb);
        offset += (loff_t)from;
        write_data = kmap(page);
@@ -1619,13 +1614,215 @@ int cifs_flush(struct file *file, fl_owner_t id)
        return rc;
 }
+static int
+cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
+{
+        int rc = 0;
+        unsigned long i;
+        for (i = 0; i < num_pages; i++) {
+                pages[i] = alloc_page(__GFP_HIGHMEM);
+                if (!pages[i]) {
+                        /*
+                         * save number of pages we have already allocated and
+                         * return with ENOMEM error
+                         */
+                        num_pages = i;
+                        rc = -ENOMEM;
+                        goto error;
+                }
+        }
+        return rc;
+error:
+        for (i = 0; i < num_pages; i++)
+                put_page(pages[i]);
+        return rc;
+}
+static inline
+size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
+{
+        size_t num_pages;
+        size_t clen;
+        clen = min_t(const size_t, len, wsize);
+        num_pages = clen / PAGE_CACHE_SIZE;
+        if (clen % PAGE_CACHE_SIZE)
+                num_pages++;
+        if (cur_len)
+                *cur_len = clen;
+        return num_pages;
+}
+static ssize_t
+cifs_iovec_write(struct file *file, const struct iovec *iov,
+                 unsigned long nr_segs, loff_t *poffset)
+{
+        unsigned int written;
+        unsigned long num_pages, npages, i;
+        size_t copied, len, cur_len;
+        ssize_t total_written = 0;
+        struct kvec *to_send;
+        struct page **pages;
+        struct iov_iter it;
+        struct inode *inode;
+        struct cifsFileInfo *open_file;
+        struct cifsTconInfo *pTcon;
+        struct cifs_sb_info *cifs_sb;
+        int xid, rc;
+        len = iov_length(iov, nr_segs);
+        if (!len)
+                return 0;
+        rc = generic_write_checks(file, poffset, &len, 0);
+        if (rc)
+                return rc;
+        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+        num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+        pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
+        if (!pages)
+                return -ENOMEM;
+        to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
+        if (!to_send) {
+                kfree(pages);
+                return -ENOMEM;
+        }
+        rc = cifs_write_allocate_pages(pages, num_pages);
+        if (rc) {
+                kfree(pages);
+                kfree(to_send);
+                return rc;
+        }
+        xid = GetXid();
+        open_file = file->private_data;
+        pTcon = tlink_tcon(open_file->tlink);
+        inode = file->f_path.dentry->d_inode;
+        iov_iter_init(&it, iov, nr_segs, len, 0);
+        npages = num_pages;
+        do {
+                size_t save_len = cur_len;
+                for (i = 0; i < npages; i++) {
+                        copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
+                        copied = iov_iter_copy_from_user(pages[i], &it, 0,
+                                                         copied);
+                        cur_len -= copied;
+                        iov_iter_advance(&it, copied);
+                        to_send[i+1].iov_base = kmap(pages[i]);
+                        to_send[i+1].iov_len = copied;
+                }
+                cur_len = save_len - cur_len;
+                do {
+                        if (open_file->invalidHandle) {
+                                rc = cifs_reopen_file(open_file, false);
+                                if (rc != 0)
+                                        break;
+                        }
+                        rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid,
+                                           cur_len, *poffset, &written,
+                                           to_send, npages, 0);
+                } while (rc == -EAGAIN);
+                for (i = 0; i < npages; i++)
+                        kunmap(pages[i]);
+                if (written) {
+                        len -= written;
+                        total_written += written;
+                        cifs_update_eof(CIFS_I(inode), *poffset, written);
+                        *poffset += written;
+                } else if (rc < 0) {
+                        if (!total_written)
+                                total_written = rc;
+                        break;
+                }
+                /* get length and number of kvecs of the next write */
+                npages = get_numpages(cifs_sb->wsize, len, &cur_len);
+        } while (len > 0);
+        if (total_written > 0) {
+                spin_lock(&inode->i_lock);
+                if (*poffset > inode->i_size)
+                        i_size_write(inode, *poffset);
+                spin_unlock(&inode->i_lock);
+        }
+        cifs_stats_bytes_written(pTcon, total_written);
+        mark_inode_dirty_sync(inode);
+        for (i = 0; i < num_pages; i++)
+                put_page(pages[i]);
+        kfree(to_send);
+        kfree(pages);
+        FreeXid(xid);
+        return total_written;
+}
+static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+                                unsigned long nr_segs, loff_t pos)
+{
+        ssize_t written;
+        struct inode *inode;
+        inode = iocb->ki_filp->f_path.dentry->d_inode;
+        /*
+         * BB - optimize the way when signing is disabled. We can drop this
+         * extra memory-to-memory copying and use iovec buffers for constructing
+         * write request.
+         */
+        written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
+        if (written > 0) {
+                CIFS_I(inode)->invalid_mapping = true;
+                iocb->ki_pos = pos;
+        }
+        return written;
+}
+ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
+                           unsigned long nr_segs, loff_t pos)
+{
+        struct inode *inode;
+        inode = iocb->ki_filp->f_path.dentry->d_inode;
+        if (CIFS_I(inode)->clientCanCacheAll)
+                return generic_file_aio_write(iocb, iov, nr_segs, pos);
+        /*
+         * In strict cache mode we need to write the data to the server exactly
+         * from the pos to pos+len-1 rather than flush all affected pages
+         * because it may cause a error with mandatory locks on these pages but
+         * not on the region from pos to ppos+len-1.
+         */
+        return cifs_user_writev(iocb, iov, nr_segs, pos);
+}
 static ssize_t
 cifs_iovec_read(struct file *file, const struct iovec *iov,
                 unsigned long nr_segs, loff_t *poffset)
 {
        int rc;
        int xid;
-        unsigned int total_read, bytes_read = 0;
+        ssize_t total_read;
+        unsigned int bytes_read = 0;
        size_t len, cur_len;
        int iov_offset = 0;
        struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 306769de2fb5..e8804d373404 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -28,7 +28,6 @@
 #include "cifsproto.h"
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
-#include "md5.h"
 #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1)
 #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1))
@@ -47,6 +46,45 @@
        md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
 static int
+symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
+{
+        int rc;
+        unsigned int size;
+        struct crypto_shash *md5;
+        struct sdesc *sdescmd5;
+        md5 = crypto_alloc_shash("md5", 0, 0);
+        if (IS_ERR(md5)) {
+                rc = PTR_ERR(md5);
+                cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc);
+                return rc;
+        }
+        size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
+        sdescmd5 = kmalloc(size, GFP_KERNEL);
+        if (!sdescmd5) {
+                rc = -ENOMEM;
+                cERROR(1, "%s: Memory allocation failure\n", __func__);
+                goto symlink_hash_err;
+        }
+        sdescmd5->shash.tfm = md5;
+        sdescmd5->shash.flags = 0x0;
+        rc = crypto_shash_init(&sdescmd5->shash);
+        if (rc) {
+                cERROR(1, "%s: Could not init md5 shash\n", __func__);
+                goto symlink_hash_err;
+        }
+        crypto_shash_update(&sdescmd5->shash, link_str, link_len);
+        rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
+symlink_hash_err:
+        crypto_free_shash(md5);
+        kfree(sdescmd5);
+        return rc;
+}
+static int
 CIFSParseMFSymlink(const u8 *buf,
                   unsigned int buf_len,
                   unsigned int *_link_len,
@@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf,
        unsigned int link_len;
        const char *md5_str1;
        const char *link_str;
-        struct MD5Context md5_ctx;
        u8 md5_hash[16];
        char md5_str2[34];
@@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf,
        if (rc != 1)
                return -EINVAL;
-        cifs_MD5_init(&md5_ctx);
+        rc = symlink_hash(link_len, link_str, md5_hash);
-        cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len);
+        if (rc) {
-        cifs_MD5_final(md5_hash, &md5_ctx);
+                cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
+                return rc;
+        }
        snprintf(md5_str2, sizeof(md5_str2),
                 CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf,
 static int
 CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
 {
+        int rc;
        unsigned int link_len;
        unsigned int ofs;
-        struct MD5Context md5_ctx;
        u8 md5_hash[16];
        if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE)
@@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
        if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
                return -ENAMETOOLONG;
-        cifs_MD5_init(&md5_ctx);
+        rc = symlink_hash(link_len, link_str, md5_hash);
-        cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len);
+        if (rc) {
-        cifs_MD5_final(md5_hash, &md5_ctx);
+                cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
+                return rc;
+        }
        snprintf(buf, buf_len,
                 CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
deleted file mode 100644
index a725c2609d67..000000000000
--- a/fs/cifs/md4.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
-   Unix SMB/Netbios implementation.
-   Version 1.9.
-   a implementation of MD4 designed for use in the SMB authentication protocol
-   Copyright (C) Andrew Tridgell 1997-1998.
-   Modified by Steve French (sfrench@us.ibm.com) 2002-2003
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-#include <linux/module.h>
-#include <linux/fs.h>
-#include "cifsencrypt.h"
-/* NOTE: This code makes no attempt to be fast! */
-static __u32
-F(__u32 X, __u32 Y, __u32 Z)
-{
-        return (X & Y) | ((~X) & Z);
-}
-static __u32
-G(__u32 X, __u32 Y, __u32 Z)
-{
-        return (X & Y) | (X & Z) | (Y & Z);
-}
-static __u32
-H(__u32 X, __u32 Y, __u32 Z)
-{
-        return X ^ Y ^ Z;
-}
-static __u32
-lshift(__u32 x, int s)
-{
-        x &= 0xFFFFFFFF;
-        return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
-}
-#define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s)
-#define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s)
-#define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s)
-/* this applies md4 to 64 byte chunks */
-static void
-mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
-{
-        int j;
-        __u32 AA, BB, CC, DD;
-        __u32 X[16];
-        for (j = 0; j < 16; j++)
-                X[j] = M[j];
-        AA = *A;
-        BB = *B;
-        CC = *C;
-        DD = *D;
-        ROUND1(A, B, C, D, 0, 3);
-        ROUND1(D, A, B, C, 1, 7);
-        ROUND1(C, D, A, B, 2, 11);
-        ROUND1(B, C, D, A, 3, 19);
-        ROUND1(A, B, C, D, 4, 3);
-        ROUND1(D, A, B, C, 5, 7);
-        ROUND1(C, D, A, B, 6, 11);
-        ROUND1(B, C, D, A, 7, 19);
-        ROUND1(A, B, C, D, 8, 3);
-        ROUND1(D, A, B, C, 9, 7);
-        ROUND1(C, D, A, B, 10, 11);
-        ROUND1(B, C, D, A, 11, 19);
-        ROUND1(A, B, C, D, 12, 3);
-        ROUND1(D, A, B, C, 13, 7);
-        ROUND1(C, D, A, B, 14, 11);
-        ROUND1(B, C, D, A, 15, 19);
-        ROUND2(A, B, C, D, 0, 3);
-        ROUND2(D, A, B, C, 4, 5);
-        ROUND2(C, D, A, B, 8, 9);
-        ROUND2(B, C, D, A, 12, 13);
-        ROUND2(A, B, C, D, 1, 3);
-        ROUND2(D, A, B, C, 5, 5);
-        ROUND2(C, D, A, B, 9, 9);
-        ROUND2(B, C, D, A, 13, 13);
-        ROUND2(A, B, C, D, 2, 3);
-        ROUND2(D, A, B, C, 6, 5);
-        ROUND2(C, D, A, B, 10, 9);
-        ROUND2(B, C, D, A, 14, 13);
-        ROUND2(A, B, C, D, 3, 3);
-        ROUND2(D, A, B, C, 7, 5);
-        ROUND2(C, D, A, B, 11, 9);
-        ROUND2(B, C, D, A, 15, 13);
-        ROUND3(A, B, C, D, 0, 3);
-        ROUND3(D, A, B, C, 8, 9);
-        ROUND3(C, D, A, B, 4, 11);
-        ROUND3(B, C, D, A, 12, 15);
-        ROUND3(A, B, C, D, 2, 3);
-        ROUND3(D, A, B, C, 10, 9);
-        ROUND3(C, D, A, B, 6, 11);
-        ROUND3(B, C, D, A, 14, 15);
-        ROUND3(A, B, C, D, 1, 3);
-        ROUND3(D, A, B, C, 9, 9);
-        ROUND3(C, D, A, B, 5, 11);
-        ROUND3(B, C, D, A, 13, 15);
-        ROUND3(A, B, C, D, 3, 3);
-        ROUND3(D, A, B, C, 11, 9);
-        ROUND3(C, D, A, B, 7, 11);
-        ROUND3(B, C, D, A, 15, 15);
-        *A += AA;
-        *B += BB;
-        *C += CC;
-        *D += DD;
-        *A &= 0xFFFFFFFF;
-        *B &= 0xFFFFFFFF;
-        *C &= 0xFFFFFFFF;
-        *D &= 0xFFFFFFFF;
-        for (j = 0; j < 16; j++)
-                X[j] = 0;
-}
-static void
-copy64(__u32 *M, unsigned char *in)
-{
-        int i;
-        for (i = 0; i < 16; i++)
-                M[i] = (in[i * 4 + 3] << 24) | (in[i * 4 + 2] << 16) |
-                    (in[i * 4 + 1] << 8) | (in[i * 4 + 0] << 0);
-}
-static void
-copy4(unsigned char *out, __u32 x)
-{
-        out[0] = x & 0xFF;
-        out[1] = (x >> 8) & 0xFF;
-        out[2] = (x >> 16) & 0xFF;
-        out[3] = (x >> 24) & 0xFF;
-}
-/* produce a md4 message digest from data of length n bytes */
-void
-mdfour(unsigned char *out, unsigned char *in, int n)
-{
-        unsigned char buf[128];
-        __u32 M[16];
-        __u32 b = n * 8;
-        int i;
-        __u32 A = 0x67452301;
-        __u32 B = 0xefcdab89;
-        __u32 C = 0x98badcfe;
-        __u32 D = 0x10325476;
-        while (n > 64) {
-                copy64(M, in);
-                mdfour64(M, &A, &B, &C, &D);
-                in += 64;
-                n -= 64;
-        }
-        for (i = 0; i < 128; i++)
-                buf[i] = 0;
-        memcpy(buf, in, n);
-        buf[n] = 0x80;
-        if (n <= 55) {
-                copy4(buf + 56, b);
-                copy64(M, buf);
-                mdfour64(M, &A, &B, &C, &D);
-        } else {
-                copy4(buf + 120, b);
-                copy64(M, buf);
-                mdfour64(M, &A, &B, &C, &D);
-                copy64(M, buf + 64);
-                mdfour64(M, &A, &B, &C, &D);
-        }
-        for (i = 0; i < 128; i++)
-                buf[i] = 0;
-        copy64(M, buf);
-        copy4(out, A);
-        copy4(out + 4, B);
-        copy4(out + 8, C);
-        copy4(out + 12, D);
-        A = B = C = D = 0;
-}
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
deleted file mode 100644
index 98b66a54c319..000000000000
--- a/fs/cifs/md5.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * This code implements the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest.  This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as
- * needed on buffers full of bytes, and then call cifs_MD5_final, which
- * will fill a supplied 16-byte array with the digest.
- */
-/* This code slightly modified to fit into Samba by
-   abartlet@samba.org Jun 2001
-   and to fit the cifs vfs by
-   Steve French sfrench@us.ibm.com */
-#include <linux/string.h>
-#include "md5.h"
-static void MD5Transform(__u32 buf[4], __u32 const in[16]);
-/*
- * Note: this code is harmless on little-endian machines.
- */
-static void
-byteReverse(unsigned char *buf, unsigned longs)
-{
-        __u32 t;
-        do {
-                t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
-                    ((unsigned) buf[1] << 8 | buf[0]);
-                *(__u32 *) buf = t;
-                buf += 4;
-        } while (--longs);
-}
-/*
- * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
- * initialization constants.
- */
-void
-cifs_MD5_init(struct MD5Context *ctx)
-{
-        ctx->buf[0] = 0x67452301;
-        ctx->buf[1] = 0xefcdab89;
-        ctx->buf[2] = 0x98badcfe;
-        ctx->buf[3] = 0x10325476;
-        ctx->bits[0] = 0;
-        ctx->bits[1] = 0;
-}
-/*
- * Update context to reflect the concatenation of another buffer full
- * of bytes.
- */
-void
-cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
-{
-        register __u32 t;
-        /* Update bitcount */
-        t = ctx->bits[0];
-        if ((ctx->bits[0] = t + ((__u32) len << 3)) < t)
-                ctx->bits[1]++; /* Carry from low to high */
-        ctx->bits[1] += len >> 29;
-        t = (t >> 3) & 0x3f;    /* Bytes already in shsInfo->data */
-        /* Handle any leading odd-sized chunks */
-        if (t) {
-                unsigned char *p = (unsigned char *) ctx->in + t;
-                t = 64 - t;
-                if (len < t) {
-                        memmove(p, buf, len);
-                        return;
-                }
-                memmove(p, buf, t);
-                byteReverse(ctx->in, 16);
-                MD5Transform(ctx->buf, (__u32 *) ctx->in);
-                buf += t;
-                len -= t;
-        }
-        /* Process data in 64-byte chunks */
-        while (len >= 64) {
-                memmove(ctx->in, buf, 64);
-                byteReverse(ctx->in, 16);
-                MD5Transform(ctx->buf, (__u32 *) ctx->in);
-                buf += 64;
-                len -= 64;
-        }
-        /* Handle any remaining bytes of data. */
-        memmove(ctx->in, buf, len);
-}
-/*
- * Final wrapup - pad to 64-byte boundary with the bit pattern
- * 1 0* (64-bit count of bits processed, MSB-first)
- */
-void
-cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx)
-{
-        unsigned int count;
-        unsigned char *p;
-        /* Compute number of bytes mod 64 */
-        count = (ctx->bits[0] >> 3) & 0x3F;
-        /* Set the first char of padding to 0x80.  This is safe since there is
-           always at least one byte free */
-        p = ctx->in + count;
-        *p++ = 0x80;
-        /* Bytes of padding needed to make 64 bytes */
-        count = 64 - 1 - count;
-        /* Pad out to 56 mod 64 */
-        if (count < 8) {
-                /* Two lots of padding:  Pad the first block to 64 bytes */
-                memset(p, 0, count);
-                byteReverse(ctx->in, 16);
-                MD5Transform(ctx->buf, (__u32 *) ctx->in);
-                /* Now fill the next block with 56 bytes */
-                memset(ctx->in, 0, 56);
-        } else {
-                /* Pad block to 56 bytes */
-                memset(p, 0, count - 8);
-        }
-        byteReverse(ctx->in, 14);
-        /* Append length in bits and transform */
-        ((__u32 *) ctx->in)[14] = ctx->bits[0];
-        ((__u32 *) ctx->in)[15] = ctx->bits[1];
-        MD5Transform(ctx->buf, (__u32 *) ctx->in);
-        byteReverse((unsigned char *) ctx->buf, 4);
-        memmove(digest, ctx->buf, 16);
-        memset(ctx, 0, sizeof(*ctx));   /* In case it's sensitive */
-}
-/* The four core functions - F1 is optimized somewhat */
-/* #define F1(x, y, z) (x & y | ~x & z) */
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
-/* This is the central step in the MD5 algorithm. */
-#define MD5STEP(f, w, x, y, z, data, s) \
-        (w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x)
-/*
- * The core of the MD5 algorithm, this alters an existing MD5 hash to
- * reflect the addition of 16 longwords of new data.  cifs_MD5_update blocks
- * the data and converts bytes into longwords for this routine.
- */
-static void
-MD5Transform(__u32 buf[4], __u32 const in[16])
-{
-        register __u32 a, b, c, d;
-        a = buf[0];
-        b = buf[1];
-        c = buf[2];
-        d = buf[3];
-        MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
-        MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
-        MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
-        MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
-        MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
-        MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
-        MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
-        MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
-        MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
-        MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
-        MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
-        MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
-        MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
-        MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
-        MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
-        MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
-        MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
-        MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
-        MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
-        MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
-        MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
-        MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
-        MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
-        MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
-        MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
-        MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
-        MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
-        MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
-        MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
-        MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
-        MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
-        MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
-        MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
-        MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
-        MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
-        MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
-        MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
-        MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
-        MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
-        MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
-        MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
-        MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
-        MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
-        MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
-        MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
-        MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
-        MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
-        MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
-        MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
-        MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
-        MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
-        MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
-        MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
-        MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
-        MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
-        MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
-        MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
-        MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
-        MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
-        MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
-        MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
-        MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
-        MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
-        MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
-        buf[0] += a;
-        buf[1] += b;
-        buf[2] += c;
-        buf[3] += d;
-}
-#if 0   /* currently unused */
-/***********************************************************************
- the rfc 2104 version of hmac_md5 initialisation.
-***********************************************************************/
-static void
-hmac_md5_init_rfc2104(unsigned char *key, int key_len,
-                      struct HMACMD5Context *ctx)
-{
-        int i;
-        /* if key is longer than 64 bytes reset it to key=MD5(key) */
-        if (key_len > 64) {
-                unsigned char tk[16];
-                struct MD5Context tctx;
-                cifs_MD5_init(&tctx);
-                cifs_MD5_update(&tctx, key, key_len);
-                cifs_MD5_final(tk, &tctx);
-                key = tk;
-                key_len = 16;
-        }
-        /* start out by storing key in pads */
-        memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
-        memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
-        memcpy(ctx->k_ipad, key, key_len);
-        memcpy(ctx->k_opad, key, key_len);
-        /* XOR key with ipad and opad values */
-        for (i = 0; i < 64; i++) {
-                ctx->k_ipad[i] ^= 0x36;
-                ctx->k_opad[i] ^= 0x5c;
-        }
-        cifs_MD5_init(&ctx->ctx);
-        cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
-}
-#endif
-/***********************************************************************
- the microsoft version of hmac_md5 initialisation.
-***********************************************************************/
-void
-hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
-                         struct HMACMD5Context *ctx)
-{
-        int i;
-        /* if key is longer than 64 bytes truncate it */
-        if (key_len > 64)
-                key_len = 64;
-        /* start out by storing key in pads */
-        memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
-        memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
-        memcpy(ctx->k_ipad, key, key_len);
-        memcpy(ctx->k_opad, key, key_len);
-        /* XOR key with ipad and opad values */
-        for (i = 0; i < 64; i++) {
-                ctx->k_ipad[i] ^= 0x36;
-                ctx->k_opad[i] ^= 0x5c;
-        }
-        cifs_MD5_init(&ctx->ctx);
-        cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
-}
-/***********************************************************************
- update hmac_md5 "inner" buffer
-***********************************************************************/
-void
-hmac_md5_update(const unsigned char *text, int text_len,
-                struct HMACMD5Context *ctx)
-{
-        cifs_MD5_update(&ctx->ctx, text, text_len);     /* then text of datagram */
-}
-/***********************************************************************
- finish off hmac_md5 "inner" buffer and generate outer one.
-***********************************************************************/
-void
-hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
-{
-        struct MD5Context ctx_o;
-        cifs_MD5_final(digest, &ctx->ctx);
-        cifs_MD5_init(&ctx_o);
-        cifs_MD5_update(&ctx_o, ctx->k_opad, 64);
-        cifs_MD5_update(&ctx_o, digest, 16);
-        cifs_MD5_final(digest, &ctx_o);
-}
-/***********************************************************
- single function to calculate an HMAC MD5 digest from data.
- use the microsoft hmacmd5 init method because the key is 16 bytes.
-************************************************************/
-#if 0 /* currently unused */
-static void
-hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
-         unsigned char *digest)
-{
-        struct HMACMD5Context ctx;
-        hmac_md5_init_limK_to_64(key, 16, &ctx);
-        if (data_len != 0)
-                hmac_md5_update(data, data_len, &ctx);
-        hmac_md5_final(digest, &ctx);
-}
-#endif
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
deleted file mode 100644
index 6fba8cb402fd..000000000000
--- a/fs/cifs/md5.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef MD5_H
-#define MD5_H
-#ifndef HEADER_MD5_H
-/* Try to avoid clashes with OpenSSL */
-#define HEADER_MD5_H
-#endif
-struct MD5Context {
-        __u32 buf[4];
-        __u32 bits[2];
-        unsigned char in[64];
-};
-#endif                          /* !MD5_H */
-#ifndef _HMAC_MD5_H
-struct HMACMD5Context {
-        struct MD5Context ctx;
-        unsigned char k_ipad[65];
-        unsigned char k_opad[65];
-};
-#endif                          /* _HMAC_MD5_H */
-void cifs_MD5_init(struct MD5Context *context);
-void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf,
-                        unsigned len);
-void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context);
-/* The following definitions come from lib/hmacmd5.c  */
-/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
-                        struct HMACMD5Context *ctx);*/
-void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
-                        struct HMACMD5Context *ctx);
-void hmac_md5_update(const unsigned char *text, int text_len,
-                        struct HMACMD5Context *ctx);
-void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx);
-/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
-                        unsigned char *digest);*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a09e077ba925..2a930a752a78 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
 {
        __u16 mid = 0;
        __u16 last_mid;
-        int   collision;
+        bool collision;
-        if (server == NULL)
-                return mid;
        spin_lock(&GlobalMid_Lock);
        last_mid = server->CurrentMid; /* we do not want to loop forever */
@@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
        (and it would also have to have been a request that
         did not time out) */
        while (server->CurrentMid != last_mid) {
-                struct list_head *tmp;
                struct mid_q_entry *mid_entry;
+                unsigned int num_mids;
-                collision = 0;
+                collision = false;
                if (server->CurrentMid == 0)
                        server->CurrentMid++;
-                list_for_each(tmp, &server->pending_mid_q) {
+                num_mids = 0;
-                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+                list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
+                        ++num_mids;
-                        if ((mid_entry->mid == server->CurrentMid) &&
+                        if (mid_entry->mid == server->CurrentMid &&
-                            (mid_entry->midState == MID_REQUEST_SUBMITTED)) {
+                            mid_entry->midState == MID_REQUEST_SUBMITTED) {
                                /* This mid is in use, try a different one */
-                                collision = 1;
+                                collision = true;
                                break;
                        }
                }
-                if (collision == 0) {
+                /*
+                 * if we have more than 32k mids in the list, then something
+                 * is very wrong. Possibly a local user is trying to DoS the
+                 * box by issuing long-running calls and SIGKILL'ing them. If
+                 * we get to 2^16 mids then we're in big trouble as this
+                 * function could loop forever.
+                 *
+                 * Go ahead and assign out the mid in this situation, but force
+                 * an eventual reconnect to clean out the pending_mid_q.
+                 */
+                if (num_mids > 32768)
+                        server->tcpStatus = CifsNeedReconnect;
+                if (!collision) {
                        mid = server->CurrentMid;
                        break;
                }
@@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 }
 static int
-checkSMBhdr(struct smb_hdr *smb, __u16 mid)
+check_smb_hdr(struct smb_hdr *smb, __u16 mid)
 {
-        /* Make sure that this really is an SMB, that it is a response,
+        /* does it have the right SMB "signature" ? */
-           and that the message ids match */
+        if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
-        if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) &&
+                cERROR(1, "Bad protocol string signature header 0x%x",
-                (mid == smb->Mid)) {
+                        *(unsigned int *)smb->Protocol);
-                if (smb->Flags & SMBFLG_RESPONSE)
+                return 1;
-                        return 0;
+        }
-                else {
-                /* only one valid case where server sends us request */
+        /* Make sure that message ids match */
-                        if (smb->Command == SMB_COM_LOCKING_ANDX)
+        if (mid != smb->Mid) {
-                                return 0;
+                cERROR(1, "Mids do not match. received=%u expected=%u",
-                        else
+                        smb->Mid, mid);
-                                cERROR(1, "Received Request not response");
+                return 1;
-                }
-        } else { /* bad signature or mid */
-                if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
-                        cERROR(1, "Bad protocol string signature header %x",
-                                *(unsigned int *) smb->Protocol);
-                if (mid != smb->Mid)
-                        cERROR(1, "Mids do not match");
        }
-        cERROR(1, "bad smb detected. The Mid=%d", smb->Mid);
+        /* if it's a response then accept */
+        if (smb->Flags & SMBFLG_RESPONSE)
+                return 0;
+        /* only one valid case where server sends us request */
+        if (smb->Command == SMB_COM_LOCKING_ANDX)
+                return 0;
+        cERROR(1, "Server sent request, not response. mid=%u", smb->Mid);
        return 1;
 }
@@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
                return 1;
        }
-        if (checkSMBhdr(smb, mid))
+        if (check_smb_hdr(smb, mid))
                return 1;
        clc_len = smbCalcSize_LE(smb);
@@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
                        if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
                                return 0; /* bcc wrapped */
                }
-                cFYI(1, "Calculated size %d vs length %d mismatch for mid %d",
+                cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u",
                                clc_len, 4 + len, smb->Mid);
-                /* Windows XP can return a few bytes too much, presumably
-                an illegal pad, at the end of byte range lock responses
+                if (4 + len < clc_len) {
-                so we allow for that three byte pad, as long as actual
+                        cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u",
-                received length is as long or longer than calculated length */
-                /* We have now had to extend this more, since there is a
-                case in which it needs to be bigger still to handle a
-                malformed response to transact2 findfirst from WinXP when
-                access denied is returned and thus bcc and wct are zero
-                but server says length is 0x21 bytes too long as if the server
-                forget to reset the smb rfc1001 length when it reset the
-                wct and bcc to minimum size and drop the t2 parms and data */
-                if ((4+len > clc_len) && (len <= clc_len + 512))
-                        return 0;
-                else {
-                        cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
                                        len, smb->Mid);
                        return 1;
+                } else if (len > clc_len + 512) {
+                        /*
+                         * Some servers (Windows XP in particular) send more
+                         * data than the lengths in the SMB packet would
+                         * indicate on certain calls (byte range locks and
+                         * trans2 find first calls in particular). While the
+                         * client can handle such a frame by ignoring the
+                         * trailing data, we choose limit the amount of extra
+                         * data to 512 bytes.
+                         */
+                        cERROR(1, "RFC1001 size %u more than 512 bytes larger "
+                                  "than SMB for mid=%u", len, smb->Mid);
+                        return 1;
                }
        }
        return 0;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
 {
        int rc, alen, slen;
        const char *pct;
-        char *endp, scope_id[13];
+        char scope_id[13];
        struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
        struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
                memcpy(scope_id, pct + 1, slen);
                scope_id[slen] = '\0';
-                s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
+                rc = strict_strtoul(scope_id, 0,
-                if (endp != scope_id + slen)
+                                        (unsigned long *)&s6->sin6_scope_id);
-                        return 0;
+                rc = (rc == 0) ? 1 : 0;
        }
        return rc;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 7f25cc3d2256..f8e4cd2a7912 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 {
        int rc = 0;
        int xid, i;
-        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        struct cifsFileInfo *cifsFile = NULL;
        char *current_entry;
@@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
        xid = GetXid();
-        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        /*
         * Ensure FindFirst doesn't fail before doing filldir() for '.' and
         * '..'. Otherwise we won't be able to notify VFS in case of failure.
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
        if (type == LANMAN) {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-                char lnm_session_key[CIFS_SESS_KEY_SIZE];
+                char lnm_session_key[CIFS_AUTH_RESP_SIZE];
                pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
                /* no capabilities flags in old lanman negotiation */
-                pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
+                pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
                /* Calculate hash with password and copy into bcc_ptr.
                 * Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
                                        true : false, lnm_session_key);
                ses->flags |= CIFS_SES_LANMAN;
-                memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
+                memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
-                bcc_ptr += CIFS_SESS_KEY_SIZE;
+                bcc_ptr += CIFS_AUTH_RESP_SIZE;
                /* can not sign if LANMAN negotiated so no need
                to calculate signing key? but what if server
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index b6b6dcb500bf..04721485925d 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -45,7 +45,6 @@
   up with a different answer to the one above)
 */
 #include <linux/slab.h>
-#include "cifsencrypt.h"
 #define uchar unsigned char
 static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 192ea51af20f..b5041c849981 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -32,9 +32,8 @@
 #include "cifs_unicode.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
-#include "md5.h"
 #include "cifs_debug.h"
-#include "cifsencrypt.h"
+#include "cifsproto.h"
 #ifndef false
 #define false 0
@@ -48,14 +47,58 @@
 #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
 #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
-/*The following definitions come from  libsmb/smbencrypt.c  */
+/* produce a md4 message digest from data of length n bytes */
+int
+mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
+{
+        int rc;
+        unsigned int size;
+        struct crypto_shash *md4;
+        struct sdesc *sdescmd4;
+        md4 = crypto_alloc_shash("md4", 0, 0);
+        if (IS_ERR(md4)) {
+                rc = PTR_ERR(md4);
+                cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc);
+                return rc;
+        }
+        size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
+        sdescmd4 = kmalloc(size, GFP_KERNEL);
+        if (!sdescmd4) {
+                rc = -ENOMEM;
+                cERROR(1, "%s: Memory allocation failure\n", __func__);
+                goto mdfour_err;
+        }
+        sdescmd4->shash.tfm = md4;
+        sdescmd4->shash.flags = 0x0;
+        rc = crypto_shash_init(&sdescmd4->shash);
+        if (rc) {
+                cERROR(1, "%s: Could not init md4 shash\n", __func__);
+                goto mdfour_err;
+        }
+        crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+        rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
-void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
+mdfour_err:
-                unsigned char *p24);
+        crypto_free_shash(md4);
-void E_md4hash(const unsigned char *passwd, unsigned char *p16);
+        kfree(sdescmd4);
-static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
-                   unsigned char p24[24]);
+        return rc;
-void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24);
+}
+/* Does the des encryption from the NT or LM MD4 hash. */
+static void
+SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
+              unsigned char p24[24])
+{
+        unsigned char p21[21];
+        memset(p21, '\0', 21);
+        memcpy(p21, passwd, 16);
+        E_P24(p21, c8, p24);
+}
 /*
   This implements the X/Open SMB password encryption
@@ -118,9 +161,10 @@ _my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
 * Creates the MD4 Hash of the users password in NT UNICODE.
 */
-void
+int
 E_md4hash(const unsigned char *passwd, unsigned char *p16)
 {
+        int rc;
        int len;
        __u16 wpwd[129];
@@ -139,8 +183,10 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
        /* Calculate length in bytes */
        len = _my_wcslen(wpwd) * sizeof(__u16);
-        mdfour(p16, (unsigned char *) wpwd, len);
+        rc = mdfour(p16, (unsigned char *) wpwd, len);
        memset(wpwd, 0, 129 * 2);
+        return rc;
 }
 #if 0 /* currently unused */
@@ -212,19 +258,6 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n,
 }
 #endif
-/* Does the des encryption from the NT or LM MD4 hash. */
-static void
-SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
-              unsigned char p24[24])
-{
-        unsigned char p21[21];
-        memset(p21, '\0', 21);
-        memcpy(p21, passwd, 16);
-        E_P24(p21, c8, p24);
-}
 /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
 #if 0 /* currently unused */
 static void
@@ -242,16 +275,21 @@ NTLMSSPOWFencrypt(unsigned char passwd[8],
 #endif
 /* Does the NT MD4 hash then des encryption. */
+int
-void
 SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
 {
+        int rc;
        unsigned char p21[21];
        memset(p21, '\0', 21);
-        E_md4hash(passwd, p21);
+        rc = E_md4hash(passwd, p21);
+        if (rc) {
+                cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
+                return rc;
+        }
        SMBOWFencrypt(p21, c8, p24);
+        return rc;
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c1ccca1a933f..46d8756f2b24 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
                server->tcpStatus = CifsNeedReconnect;
        }
-        if (rc < 0) {
+        if (rc < 0 && rc != -EINTR)
                cERROR(1, "Error %d sending data on socket to server", rc);
-        } else
+        else
                rc = 0;
        /* Don't want to modify the buffer as a
@@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
        if (rc)
                return rc;
+        /* enable signing if server requires it */
+        if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+                in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
        mutex_lock(&server->srv_mutex);
        mid = AllocMidQEntry(in_buf, server);
        if (mid == NULL) {
@@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
        case MID_RETRY_NEEDED:
                rc = -EAGAIN;
                break;
+        case MID_RESPONSE_MALFORMED:
+                rc = -EIO;
+                break;
        default:
                cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
                        mid->mid, mid->midState);
@@ -570,17 +577,33 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 #endif
        mutex_unlock(&ses->server->srv_mutex);
-        cifs_small_buf_release(in_buf);
-        if (rc < 0)
+        if (rc < 0) {
+                cifs_small_buf_release(in_buf);
                goto out;
+        }
-        if (long_op == CIFS_ASYNC_OP)
+        if (long_op == CIFS_ASYNC_OP) {
+                cifs_small_buf_release(in_buf);
                goto out;
+        }
        rc = wait_for_response(ses->server, midQ);
-        if (rc != 0)
+        if (rc != 0) {
-                goto out;
+                send_nt_cancel(ses->server, in_buf, midQ);
+                spin_lock(&GlobalMid_Lock);
+                if (midQ->midState == MID_REQUEST_SUBMITTED) {
+                        midQ->callback = DeleteMidQEntry;
+                        spin_unlock(&GlobalMid_Lock);
+                        cifs_small_buf_release(in_buf);
+                        atomic_dec(&ses->server->inFlight);
+                        wake_up(&ses->server->request_q);
+                        return rc;
+                }
+                spin_unlock(&GlobalMid_Lock);
+        }
+        cifs_small_buf_release(in_buf);
        rc = sync_mid_result(midQ, ses->server);
        if (rc != 0) {
@@ -724,8 +747,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                goto out;
        rc = wait_for_response(ses->server, midQ);
-        if (rc != 0)
+        if (rc != 0) {
-                goto out;
+                send_nt_cancel(ses->server, in_buf, midQ);
+                spin_lock(&GlobalMid_Lock);
+                if (midQ->midState == MID_REQUEST_SUBMITTED) {
+                        /* no longer considered to be "in-flight" */
+                        midQ->callback = DeleteMidQEntry;
+                        spin_unlock(&GlobalMid_Lock);
+                        atomic_dec(&ses->server->inFlight);
+                        wake_up(&ses->server->request_q);
+                        return rc;
+                }
+                spin_unlock(&GlobalMid_Lock);
+        }
        rc = sync_mid_result(midQ, ses->server);
        if (rc != 0) {
@@ -922,10 +956,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
                        }
                }
-                if (wait_for_response(ses->server, midQ) == 0) {
+                rc = wait_for_response(ses->server, midQ);
-                        /* We got the response - restart system call. */
+                if (rc) {
-                        rstart = 1;
+                        send_nt_cancel(ses->server, in_buf, midQ);
+                        spin_lock(&GlobalMid_Lock);
+                        if (midQ->midState == MID_REQUEST_SUBMITTED) {
+                                /* no longer considered to be "in-flight" */
+                                midQ->callback = DeleteMidQEntry;
+                                spin_unlock(&GlobalMid_Lock);
+                                return rc;
+                        }
+                        spin_unlock(&GlobalMid_Lock);
                }
+                /* We got the response - restart system call. */
+                rstart = 1;
        }
        rc = sync_mid_result(midQ, ses->server);
diff --git a/fs/dcache.c b/fs/dcache.c
index 9f493ee4dcba..2a6bd9a4ae97 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry)
 /**
 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * @dentry: the target dentry
 * After this call, in-progress rcu-walk path lookup will fail. This
 * should be called after unhashing, and after changing d_inode (if
 * the dentry has not already been unhashed).
@@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry)
 /**
 * d_kill - kill dentry and return parent
 * @dentry: dentry to kill
+ * @parent: parent dentry
 *
 * The dentry must already be unhashed and removed from the LRU.
 *
@@ -1973,7 +1975,7 @@ out:
 /**
 * d_validate - verify dentry provided from insecure source (deprecated)
 * @dentry: The dentry alleged to be valid child of @dparent
- * @parent: The parent dentry (known to be valid)
+ * @dparent: The parent dentry (known to be valid)
 *
 * An insecure source has sent us a dentry, here we verify it and dget() it.
 * This is used by ncpfs in its readdir implementation.
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1a..2d8c87b951c2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
 static int work_start(void)
 {
-        recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+        recv_workqueue = create_singlethread_workqueue("dlm_recv");
-                                         WQ_HIGHPRI | WQ_FREEZEABLE, 0);
        if (!recv_workqueue) {
                log_print("can't start dlm_recv");
                return -ENOMEM;
        }
-        send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+        send_workqueue = create_singlethread_workqueue("dlm_send");
-                                         WQ_HIGHPRI | WQ_FREEZEABLE, 0);
        if (!send_workqueue) {
                log_print("can't start dlm_send");
                destroy_workqueue(recv_workqueue);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
        struct dentry *lower_dentry;
        struct vfsmount *lower_mnt;
-        struct dentry *dentry_save;
+        struct dentry *dentry_save = NULL;
-        struct vfsmount *vfsmount_save;
+        struct vfsmount *vfsmount_save = NULL;
        int rc = 1;
-        if (nd->flags & LOOKUP_RCU)
+        if (nd && nd->flags & LOOKUP_RCU)
                return -ECHILD;
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
        if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
                goto out;
-        dentry_save = nd->path.dentry;
+        if (nd) {
-        vfsmount_save = nd->path.mnt;
+                dentry_save = nd->path.dentry;
-        nd->path.dentry = lower_dentry;
+                vfsmount_save = nd->path.mnt;
-        nd->path.mnt = lower_mnt;
+                nd->path.dentry = lower_dentry;
+                nd->path.mnt = lower_mnt;
+        }
        rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
-        nd->path.dentry = dentry_save;
+        if (nd) {
-        nd->path.mnt = vfsmount_save;
+                nd->path.dentry = dentry_save;
+                nd->path.mnt = vfsmount_save;
+        }
        if (dentry->d_inode) {
                struct inode *lower_inode =
                        ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
                       u32 flags);
 int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
                                        struct dentry *lower_dentry,
-                                        struct inode *ecryptfs_dir_inode,
+                                        struct inode *ecryptfs_dir_inode);
-                                        struct nameidata *ecryptfs_nd);
 int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
                                         size_t *decrypted_name_size,
                                         struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 const struct file_operations ecryptfs_dir_fops = {
        .readdir = ecryptfs_readdir,
+        .read = generic_read_dir,
        .unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
        unsigned int flags_save;
        int rc;
-        dentry_save = nd->path.dentry;
+        if (nd) {
-        vfsmount_save = nd->path.mnt;
+                dentry_save = nd->path.dentry;
-        flags_save = nd->flags;
+                vfsmount_save = nd->path.mnt;
-        nd->path.dentry = lower_dentry;
+                flags_save = nd->flags;
-        nd->path.mnt = lower_mnt;
+                nd->path.dentry = lower_dentry;
-        nd->flags &= ~LOOKUP_OPEN;
+                nd->path.mnt = lower_mnt;
+                nd->flags &= ~LOOKUP_OPEN;
+        }
        rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
-        nd->path.dentry = dentry_save;
+        if (nd) {
-        nd->path.mnt = vfsmount_save;
+                nd->path.dentry = dentry_save;
-        nd->flags = flags_save;
+                nd->path.mnt = vfsmount_save;
+                nd->flags = flags_save;
+        }
        return rc;
 }
@@ -241,8 +245,7 @@ out:
 */
 int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
                                        struct dentry *lower_dentry,
-                                        struct inode *ecryptfs_dir_inode,
+                                        struct inode *ecryptfs_dir_inode)
-                                        struct nameidata *ecryptfs_nd)
 {
        struct dentry *lower_dir_dentry;
        struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
                goto out;
        if (special_file(lower_inode->i_mode))
                goto out;
-        if (!ecryptfs_nd)
-                goto out;
        /* Released in this function */
        page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
        if (!page_virt) {
@@ -349,75 +350,6 @@ out:
 }
 /**
- * ecryptfs_new_lower_dentry
- * @name: The name of the new dentry.
- * @lower_dir_dentry: Parent directory of the new dentry.
- * @nd: nameidata from last lookup.
- *
- * Create a new dentry or get it from lower parent dir.
- */
-static struct dentry *
-ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
-                          struct nameidata *nd)
-{
-        struct dentry *new_dentry;
-        struct dentry *tmp;
-        struct inode *lower_dir_inode;
-        lower_dir_inode = lower_dir_dentry->d_inode;
-        tmp = d_alloc(lower_dir_dentry, name);
-        if (!tmp)
-                return ERR_PTR(-ENOMEM);
-        mutex_lock(&lower_dir_inode->i_mutex);
-        new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
-        mutex_unlock(&lower_dir_inode->i_mutex);
-        if (!new_dentry)
-                new_dentry = tmp;
-        else
-                dput(tmp);
-        return new_dentry;
-}
-/**
- * ecryptfs_lookup_one_lower
- * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
- * @lower_dir_dentry: lower parent directory
- * @name: lower file name
- *
- * Get the lower dentry from vfs. If lower dentry does not exist yet,
- * create it.
- */
-static struct dentry *
-ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
-                          struct dentry *lower_dir_dentry, struct qstr *name)
-{
-        struct nameidata nd;
-        struct vfsmount *lower_mnt;
-        int err;
-        lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
-                                    ecryptfs_dentry->d_parent));
-        err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
-        mntput(lower_mnt);
-        if (!err) {
-                /* we dont need the mount */
-                mntput(nd.path.mnt);
-                return nd.path.dentry;
-        }
-        if (err != -ENOENT)
-                return ERR_PTR(err);
-        /* create a new lower dentry */
-        return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
-}
-/**
 * ecryptfs_lookup
 * @ecryptfs_dir_inode: The eCryptfs directory inode
 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        size_t encrypted_and_encoded_name_size;
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
        struct dentry *lower_dir_dentry, *lower_dentry;
-        struct qstr lower_name;
        int rc = 0;
        if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                goto out_d_drop;
        }
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-        lower_name.name = ecryptfs_dentry->d_name.name;
+        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-        lower_name.len = ecryptfs_dentry->d_name.len;
+        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
-        lower_name.hash = ecryptfs_dentry->d_name.hash;
+                                      lower_dir_dentry,
-        if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
+                                      ecryptfs_dentry->d_name.len);
-                rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
+        mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
-                                lower_dir_dentry->d_inode, &lower_name);
-                if (rc < 0)
-                        goto out_d_drop;
-        }
-        lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
-                                                 lower_dir_dentry, &lower_name);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
-                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
                                encrypted_and_encoded_name);
                goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out_d_drop;
        }
-        lower_name.name = encrypted_and_encoded_name;
+        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-        lower_name.len = encrypted_and_encoded_name_size;
+        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
-        lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
+                                      lower_dir_dentry,
-        if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
+                                      encrypted_and_encoded_name_size);
-                rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
+        mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
-                                lower_dir_dentry->d_inode, &lower_name);
-                if (rc < 0)
-                        goto out_d_drop;
-        }
-        lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
-                                                 lower_dir_dentry, &lower_name);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
-                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
                                encrypted_and_encoded_name);
                goto out_d_drop;
        }
 lookup_and_interpose:
        rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
-                                                 ecryptfs_dir_inode,
+                                                 ecryptfs_dir_inode);
-                                                 ecryptfs_nd);
        goto out;
 out_d_drop:
        d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
                         ecryptfs_dentry_to_lower(dentry), &lower_stat);
        if (!rc) {
+                fsstack_copy_attr_all(dentry->d_inode,
+                                      ecryptfs_inode_to_lower(dentry->d_inode));
                generic_fillattr(dentry->d_inode, stat);
                stat->blocks = lower_stat.blocks;
        }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
 * @ctx: [in] Pointer to eventfd context.
 *
 * The eventfd context reference must have been previously acquired either
- * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ * with eventfd_ctx_get() or eventfd_ctx_fdget().
 */
 void eventfd_ctx_put(struct eventfd_ctx *ctx)
 {
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
 * @ctx: [in] Pointer to eventfd context.
 * @wait: [in] Wait queue to be removed.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
 *
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
 *
 * -EAGAIN      : The operation would have blocked.
 *
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
 * @ctx: [in] Pointer to eventfd context.
 * @no_wait: [in] Different from zero if the operation should not block.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
 *
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
 *
- * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -EAGAIN      : The operation would have blocked but @no_wait was non-zero.
 * -ERESTARTSYS : A signal interrupted the wait operation.
 *
 * If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7d6064..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
 * cleanup path and it is also acquired by eventpoll_release_file()
 * if a file has been pushed inside an epoll set and it is then
 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
 * It is possible to drop the "ep->mtx" and to use the global
 * mutex "epmutex" (together with "ep->lock") to have it working,
 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
 */
 static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
 /* Used for safe wake up implementation */
 static struct nested_calls poll_safewake_ncalls;
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep,
        return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
 }
+static inline struct timespec ep_set_mstimeout(long ms)
+{
+        struct timespec now, ts = {
+                .tv_sec = ms / MSEC_PER_SEC,
+                .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
+        };
+        ktime_get_ts(&now);
+        return timespec_add_safe(now, ts);
+}
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
                   int maxevents, long timeout)
 {
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
        unsigned long flags;
        long slack;
        wait_queue_t wait;
-        struct timespec end_time;
        ktime_t expires, *to = NULL;
        if (timeout > 0) {
-                ktime_get_ts(&end_time);
+                struct timespec end_time = ep_set_mstimeout(timeout);
-                timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
                slack = select_estimate_accuracy(&end_time);
                to = &expires;
                *to = timespec_to_ktime(end_time);
@@ -1188,6 +1208,62 @@ retry:
        return res;
 }
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ *                      API, to verify that adding an epoll file inside another
+ *                      epoll structure, does not violate the constraints, in
+ *                      terms of closed loops, or too deep chains (which can
+ *                      result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ *          data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ *          structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+        int error = 0;
+        struct file *file = priv;
+        struct eventpoll *ep = file->private_data;
+        struct rb_node *rbp;
+        struct epitem *epi;
+        mutex_lock(&ep->mtx);
+        for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+                epi = rb_entry(rbp, struct epitem, rbn);
+                if (unlikely(is_file_epoll(epi->ffd.file))) {
+                        error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                                               ep_loop_check_proc, epi->ffd.file,
+                                               epi->ffd.file->private_data, current);
+                        if (error != 0)
+                                break;
+                }
+        }
+        mutex_unlock(&ep->mtx);
+        return error;
+}
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ *                 another epoll file (represented by @ep) does not create
+ *                 closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ *          structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+        return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+                              ep_loop_check_proc, file, ep, current);
+}
 /*
 * Open an eventpoll file descriptor.
 */
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                struct epoll_event __user *, event)
 {
        int error;
+        int did_lock_epmutex = 0;
        struct file *file, *tfile;
        struct eventpoll *ep;
        struct epitem *epi;
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
         */
        ep = file->private_data;
+        /*
+         * When we insert an epoll file descriptor, inside another epoll file
+         * descriptor, there is the change of creating closed loops, which are
+         * better be handled here, than in more critical paths.
+         *
+         * We hold epmutex across the loop check and the insert in this case, in
+         * order to prevent two separate inserts from racing and each doing the
+         * insert "at the same time" such that ep_loop_check passes on both
+         * before either one does the insert, thereby creating a cycle.
+         */
+        if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+                mutex_lock(&epmutex);
+                did_lock_epmutex = 1;
+                error = -ELOOP;
+                if (ep_loop_check(ep, tfile) != 0)
+                        goto error_tgt_fput;
+        }
        mutex_lock(&ep->mtx);
        /*
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        mutex_unlock(&ep->mtx);
 error_tgt_fput:
+        if (unlikely(did_lock_epmutex))
+                mutex_unlock(&epmutex);
        fput(tfile);
 error_fput:
        fput(file);
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void)
                EP_ITEM_COST;
        BUG_ON(max_user_watches < 0);
+        /*
+         * Initialize the structure used to perform epoll file descriptor
+         * inclusion loops checks.
+         */
+        ep_nested_calls_init(&poll_loop_ncalls);
        /* Initialize the structure used to perform safe poll wait head wake ups */
        ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exec.c b/fs/exec.c
index c62efcb959c7..52a447d9b6ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
                goto out;
        file = do_filp_open(AT_FDCWD, tmp,
-                                O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
+                                O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
                                MAY_READ | MAY_EXEC | MAY_OPEN);
        putname(tmp);
        error = PTR_ERR(file);
@@ -723,7 +723,7 @@ struct file *open_exec(const char *name)
        int err;
        file = do_filp_open(AT_FDCWD, name,
-                                O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
+                                O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
                                MAY_EXEC | MAY_OPEN);
        if (IS_ERR(file))
                goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 42685424817b..a7555238c41a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
                memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
        }
-        inode->i_mapping->backing_dev_info = sb->s_bdi;
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &exofs_file_inode_operations;
                inode->i_fop = &exofs_file_operations;
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
        sbi = sb->s_fs_info;
-        inode->i_mapping->backing_dev_info = sb->s_bdi;
        sb->s_dirt = 1;
        inode_init_owner(inode, dir, mode);
        inode->i_ino = sbi->s_nextid++;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
                new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
                if (!new_de)
                        goto out_dir;
-                inode_inc_link_count(old_inode);
                err = exofs_set_link(new_dir, new_de, new_page, old_inode);
                new_inode->i_ctime = CURRENT_TIME;
                if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        if (new_dir->i_nlink >= EXOFS_LINK_MAX)
                                goto out_dir;
                }
-                inode_inc_link_count(old_inode);
                err = exofs_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        inode_dec_link_count(old_inode);
                        goto out_dir;
-                }
                if (dir_de)
                        inode_inc_link_count(new_dir);
        }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
        old_inode->i_ctime = CURRENT_TIME;
        exofs_delete_entry(old_de, old_page);
-        inode_dec_link_count(old_inode);
+        mark_inode_dirty(old_inode);
        if (dir_de) {
                err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..adb91855ccd0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
                new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
                if (!new_de)
                        goto out_dir;
-                inode_inc_link_count(old_inode);
                ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
                new_inode->i_ctime = CURRENT_TIME_SEC;
                if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
                        if (new_dir->i_nlink >= EXT2_LINK_MAX)
                                goto out_dir;
                }
-                inode_inc_link_count(old_inode);
                err = ext2_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        inode_dec_link_count(old_inode);
                        goto out_dir;
-                }
                if (dir_de)
                        inode_inc_link_count(new_dir);
        }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
        /*
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
-         * inode_dec_link_count() will mark the inode dirty.
         */
        old_inode->i_ctime = CURRENT_TIME_SEC;
+        mark_inode_dirty(old_inode);
        ext2_delete_entry (old_de, old_page);
-        inode_dec_link_count(old_inode);
        if (dir_de) {
                if (old_dir != new_dir)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f34..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
        atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+        atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
        spinlock_t i_block_reservation_lock;
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/* For ioend & aio unwritten conversion wait queues */
+#define EXT4_WQ_HASH_SZ         37
+#define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
+                                            EXT4_WQ_HASH_SZ])
+#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
+                                             EXT4_WQ_HASH_SZ])
+extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 #endif  /* __KERNEL__ */
 #endif  /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c3..ccce8a7e94ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                 * that this IO needs to convertion to written when IO is
                 * completed
                 */
-                if (io)
+                if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                        io->flag = EXT4_IO_END_UNWRITTEN;
-                else
+                        atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+                } else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * that we need to perform convertion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-                        if (io)
+                        if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
                                io->flag = EXT4_IO_END_UNWRITTEN;
-                        else
+                                atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+                        } else
                                ext4_set_inode_state(inode,
                                                     EXT4_STATE_DIO_UNWRITTEN);
                }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa88..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
        return 0;
 }
+static void ext4_aiodio_wait(struct inode *inode)
+{
+        wait_queue_head_t *wq = ext4_ioend_wq(inode);
+        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
+}
+/*
+ * This tests whether the IO in question is block-aligned or not.
+ * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
+ * are converted to written only after the IO is complete.  Until they are
+ * mapped, these blocks appear as holes, so dio_zero_block() will assume that
+ * it needs to zero out portions of the start and/or end block.  If 2 AIO
+ * threads are at work on the same unwritten block, they must be synchronized
+ * or one thread will zero the other's data, causing corruption.
+ */
+static int
+ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
+                   unsigned long nr_segs, loff_t pos)
+{
+        struct super_block *sb = inode->i_sb;
+        int blockmask = sb->s_blocksize - 1;
+        size_t count = iov_length(iov, nr_segs);
+        loff_t final_size = pos + count;
+        if (pos >= inode->i_size)
+                return 0;
+        if ((pos & blockmask) || (final_size & blockmask))
+                return 1;
+        return 0;
+}
 static ssize_t
 ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                unsigned long nr_segs, loff_t pos)
 {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+        int unaligned_aio = 0;
+        int ret;
        /*
         * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                        nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
                                              sbi->s_bitmap_maxbytes - pos);
                }
+        } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
+                   !is_sync_kiocb(iocb))) {
+                unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
        }
-        return generic_file_aio_write(iocb, iov, nr_segs, pos);
+        /* Unaligned direct AIO must be serialized; see comment above */
+        if (unaligned_aio) {
+                static unsigned long unaligned_warn_time;
+                /* Warn about this once per day */
+                if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
+                        ext4_msg(inode->i_sb, KERN_WARNING,
+                                 "Unaligned AIO/DIO on inode %ld by %s; "
+                                 "performance will be poor.",
+                                 inode->i_ino, current->comm);
+                mutex_lock(ext4_aio_mutex(inode));
+                ext4_aiodio_wait(inode);
+        }
+        ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+        if (unaligned_aio)
+                mutex_unlock(ext4_aio_mutex(inode));
+        return ret;
 }
 static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
 /* We create slab caches for groupinfo data structures based on the
 * superblock block size.  There will be one per mounted filesystem for
 * each unique s_blocksize_bits */
-#define NR_GRPINFO_CACHES       \
+#define NR_GRPINFO_CACHES 8
-        (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
+        "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
+        "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
+        "ext4_groupinfo_64k", "ext4_groupinfo_128k"
+};
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group);
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
        return -ENOMEM;
 }
+static void ext4_groupinfo_destroy_slabs(void)
+{
+        int i;
+        for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+                if (ext4_groupinfo_caches[i])
+                        kmem_cache_destroy(ext4_groupinfo_caches[i]);
+                ext4_groupinfo_caches[i] = NULL;
+        }
+}
+static int ext4_groupinfo_create_slab(size_t size)
+{
+        static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
+        int slab_size;
+        int blocksize_bits = order_base_2(size);
+        int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+        struct kmem_cache *cachep;
+        if (cache_index >= NR_GRPINFO_CACHES)
+                return -EINVAL;
+        if (unlikely(cache_index < 0))
+                cache_index = 0;
+        mutex_lock(&ext4_grpinfo_slab_create_mutex);
+        if (ext4_groupinfo_caches[cache_index]) {
+                mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+                return 0;       /* Already created */
+        }
+        slab_size = offsetof(struct ext4_group_info,
+                                bb_counters[blocksize_bits + 2]);
+        cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
+                                        slab_size, 0, SLAB_RECLAIM_ACCOUNT,
+                                        NULL);
+        mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+        if (!cachep) {
+                printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
+                return -ENOMEM;
+        }
+        ext4_groupinfo_caches[cache_index] = cachep;
+        return 0;
+}
 int ext4_mb_init(struct super_block *sb, int needs_recovery)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        unsigned offset;
        unsigned max;
        int ret;
-        int cache_index;
-        struct kmem_cache *cachep;
-        char *namep = NULL;
        i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
                goto out;
        }
-        cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+        ret = ext4_groupinfo_create_slab(sb->s_blocksize);
-        cachep = ext4_groupinfo_caches[cache_index];
+        if (ret < 0)
-        if (!cachep) {
+                goto out;
-                char name[32];
-                int len = offsetof(struct ext4_group_info,
-                                        bb_counters[sb->s_blocksize_bits + 2]);
-                sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
-                namep = kstrdup(name, GFP_KERNEL);
-                if (!namep) {
-                        ret = -ENOMEM;
-                        goto out;
-                }
-                /* Need to free the kmem_cache_name() when we
-                 * destroy the slab */
-                cachep = kmem_cache_create(namep, len, 0,
-                                             SLAB_RECLAIM_ACCOUNT, NULL);
-                if (!cachep) {
-                        ret = -ENOMEM;
-                        goto out;
-                }
-                ext4_groupinfo_caches[cache_index] = cachep;
-        }
        /* order 0 is regular bitmap */
        sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
        if (ret) {
                kfree(sbi->s_mb_offsets);
                kfree(sbi->s_mb_maxs);
-                kfree(namep);
        }
        return ret;
 }
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
 void ext4_exit_mballoc(void)
 {
-        int i;
        /*
         * Wait for completion of call_rcu()'s on ext4_pspace_cachep
         * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
        kmem_cache_destroy(ext4_pspace_cachep);
        kmem_cache_destroy(ext4_ac_cachep);
        kmem_cache_destroy(ext4_free_ext_cachep);
+        ext4_groupinfo_destroy_slabs();
-        for (i = 0; i < NR_GRPINFO_CACHES; i++) {
-                struct kmem_cache *cachep = ext4_groupinfo_caches[i];
-                if (cachep) {
-                        char *name = (char *)kmem_cache_name(cachep);
-                        kmem_cache_destroy(cachep);
-                        kfree(name);
-                }
-        }
        ext4_remove_debugfs_entry();
 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
-#define WQ_HASH_SZ              37
-#define to_ioend_wq(v)  (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
-static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
 int __init ext4_init_pageio(void)
 {
-        int i;
        io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
        if (io_page_cachep == NULL)
                return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
                kmem_cache_destroy(io_page_cachep);
                return -ENOMEM;
        }
-        for (i = 0; i < WQ_HASH_SZ; i++)
-                init_waitqueue_head(&ioend_wq[i]);
        return 0;
 }
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
 void ext4_ioend_wait(struct inode *inode)
 {
-        wait_queue_head_t *wq = to_ioend_wq(inode);
+        wait_queue_head_t *wq = ext4_ioend_wq(inode);
        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
 }
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
        for (i = 0; i < io->num_io_pages; i++)
                put_io_page(io->pages[i]);
        io->num_io_pages = 0;
-        wq = to_ioend_wq(io->inode);
+        wq = ext4_ioend_wq(io->inode);
        if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
            waitqueue_active(wq))
                wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
        struct inode *inode = io->inode;
        loff_t offset = io->offset;
        ssize_t size = io->size;
+        wait_queue_head_t *wq;
        int ret = 0;
        ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
        if (io->iocb)
                aio_complete(io->iocb, io->result, 0);
        /* clear the DIO AIO unwritten flag */
-        io->flag &= ~EXT4_IO_END_UNWRITTEN;
+        if (io->flag & EXT4_IO_END_UNWRITTEN) {
+                io->flag &= ~EXT4_IO_END_UNWRITTEN;
+                /* Wake up anyone waiting on unwritten extent conversion */
+                wq = ext4_ioend_wq(io->inode);
+                if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
+                    waitqueue_active(wq)) {
+                        wake_up_all(wq);
+                }
+        }
        return ret;
 }
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
        struct inode *inode;
        unsigned long flags;
        int i;
+        sector_t bi_sector = bio->bi_sector;
        BUG_ON(!io_end);
        bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
                if (error)
                        SetPageError(page);
                BUG_ON(!head);
-                if (head->b_size == PAGE_CACHE_SIZE)
+                if (head->b_size != PAGE_CACHE_SIZE) {
-                        clear_buffer_dirty(head);
-                else {
                        loff_t offset;
                        loff_t io_end_offset = io_end->offset + io_end->size;
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
                                        if (error)
                                                buffer_io_error(bh);
-                                        clear_buffer_dirty(bh);
                                }
                                if (buffer_delay(bh))
                                        partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
                             (unsigned long long) io_end->offset,
                             (long) io_end->size,
                             (unsigned long long)
-                             bio->bi_sector >> (inode->i_blkbits - 9));
+                             bi_sector >> (inode->i_blkbits - 9));
        }
        /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        blocksize = 1 << inode->i_blkbits;
+        BUG_ON(!PageLocked(page));
        BUG_ON(PageWriteback(page));
        set_page_writeback(page);
        ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        for (bh = head = page_buffers(page), block_start = 0;
             bh != head || !block_start;
             block_start = block_end, bh = bh->b_this_page) {
                block_end = block_start + blocksize;
                if (block_start >= len) {
                        clear_buffer_dirty(bh);
                        set_buffer_uptodate(bh);
                        continue;
                }
+                clear_buffer_dirty(bh);
                ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
                if (ret) {
                        /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafac..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                       const char *dev_name, void *data);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
+static void ext4_clear_request_list(void);
 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
        atomic_set(&ei->i_ioend_count, 0);
+        atomic_set(&ei->i_aiodio_unwritten, 0);
        return &ei->vfs_inode;
 }
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
        mutex_unlock(&ext4_li_info->li_list_mtx);
 }
+static struct task_struct *ext4_lazyinit_task;
 /*
 * This is the function where ext4lazyinit thread lives. It walks
 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
                if (time_before(jiffies, next_wakeup))
                        schedule();
                finish_wait(&eli->li_wait_daemon, &wait);
+                if (kthread_should_stop()) {
+                        ext4_clear_request_list();
+                        goto exit_thread;
+                }
        }
 exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
        wake_up(&eli->li_wait_task);
        kfree(ext4_li_info);
+        ext4_lazyinit_task = NULL;
        ext4_li_info = NULL;
        mutex_unlock(&ext4_li_mtx);
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
 static int ext4_run_lazyinit_thread(void)
 {
-        struct task_struct *t;
+        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
+                                         ext4_li_info, "ext4lazyinit");
-        t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
+        if (IS_ERR(ext4_lazyinit_task)) {
-        if (IS_ERR(t)) {
+                int err = PTR_ERR(ext4_lazyinit_task);
-                int err = PTR_ERR(t);
                ext4_clear_request_list();
                del_timer_sync(&ext4_li_info->li_timer);
                kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
         * If thread exited earlier
         * there's nothing to be done.
         */
-        if (!ext4_li_info)
+        if (!ext4_li_info || !ext4_lazyinit_task)
                return;
-        ext4_clear_request_list();
+        kthread_stop(ext4_lazyinit_task);
-        while (ext4_li_info->li_task) {
-                wake_up(&ext4_li_info->li_wait_daemon);
-                wait_event(ext4_li_info->li_wait_task,
-                           ext4_li_info->li_task == NULL);
-        }
 }
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
-int __init ext4_init_feat_adverts(void)
+static int __init ext4_init_feat_adverts(void)
 {
        struct ext4_features *ef;
        int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
        return ret;
 }
+static void ext4_exit_feat_adverts(void)
+{
+        kobject_put(&ext4_feat->f_kobj);
+        wait_for_completion(&ext4_feat->f_kobj_unregister);
+        kfree(ext4_feat);
+}
+/* Shared across all ext4 file systems */
+wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 static int __init ext4_init_fs(void)
 {
-        int err;
+        int i, err;
        ext4_check_flag_values();
+        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
+                mutex_init(&ext4__aio_mutex[i]);
+                init_waitqueue_head(&ext4__ioend_wq[i]);
+        }
        err = ext4_init_pageio();
        if (err)
                return err;
        err = ext4_init_system_zone();
        if (err)
-                goto out5;
+                goto out7;
        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
        if (!ext4_kset)
-                goto out4;
+                goto out6;
        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
+        if (!ext4_proc_root)
+                goto out5;
        err = ext4_init_feat_adverts();
+        if (err)
+                goto out4;
        err = ext4_init_mballoc();
        if (err)
@@ -4838,12 +4861,14 @@ out1:
 out2:
        ext4_exit_mballoc();
 out3:
-        kfree(ext4_feat);
+        ext4_exit_feat_adverts();
+out4:
        remove_proc_entry("fs/ext4", NULL);
+out5:
        kset_unregister(ext4_kset);
-out4:
+out6:
        ext4_exit_system_zone();
-out5:
+out7:
        ext4_exit_pageio();
        return err;
 }
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
        destroy_inodecache();
        ext4_exit_xattr();
        ext4_exit_mballoc();
+        ext4_exit_feat_adverts();
        remove_proc_entry("fs/ext4", NULL);
        kset_unregister(ext4_kset);
        ext4_exit_system_zone();
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ecc8b3954ed6..cb1026181bdc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -815,7 +815,7 @@ static int __init fcntl_init(void)
                __O_SYNC        | O_DSYNC       | FASYNC        |
                O_DIRECT        | O_LARGEFILE   | O_DIRECTORY   |
                O_NOFOLLOW      | O_NOATIME     | O_CLOEXEC     |
-                FMODE_EXEC
+                __FMODE_EXEC
                ));
        fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file_table.c b/fs/file_table.c
index c3e89adf53c0..eb36b6b17e26 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void)
                goto fail;
        percpu_counter_inc(&nr_files);
+        f->f_cred = get_cred(cred);
        if (security_file_alloc(f))
                goto fail_sec;
        INIT_LIST_HEAD(&f->f_u.fu_list);
        atomic_long_set(&f->f_count, 1);
        rwlock_init(&f->f_owner.lock);
-        f->f_cred = get_cred(cred);
        spin_lock_init(&f->f_lock);
        eventpoll_init_file(f);
        /* f->f_version: 0 */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..83543b5ff941 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        if (err)
                return err;
-        if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
+        if (attr->ia_valid & ATTR_OPEN) {
-                return 0;
+                if (fc->atomic_o_trunc)
+                        return 0;
+                file = NULL;
+        }
        if (attr->ia_valid & ATTR_SIZE)
                is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
        return ff;
 }
+static void fuse_release_async(struct work_struct *work)
+{
+        struct fuse_req *req;
+        struct fuse_conn *fc;
+        struct path path;
+        req = container_of(work, struct fuse_req, misc.release.work);
+        path = req->misc.release.path;
+        fc = get_fuse_conn(path.dentry->d_inode);
+        fuse_put_request(fc, req);
+        path_put(&path);
+}
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-        path_put(&req->misc.release.path);
+        if (fc->destroy_req) {
+                /*
+                 * If this is a fuseblk mount, then it's possible that
+                 * releasing the path will result in releasing the
+                 * super block and sending the DESTROY request.  If
+                 * the server is single threaded, this would hang.
+                 * For this reason do the path_put() in a separate
+                 * thread.
+                 */
+                atomic_inc(&req->count);
+                INIT_WORK(&req->misc.release.work, fuse_release_async);
+                schedule_work(&req->misc.release.work);
+        } else {
+                path_put(&req->misc.release.path);
+        }
 }
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
 {
        if (atomic_dec_and_test(&ff->count)) {
                struct fuse_req *req = ff->reserved_req;
-                req->end = fuse_release_end;
+                if (sync) {
-                fuse_request_send_background(ff->fc, req);
+                        fuse_request_send(ff->fc, req);
+                        path_put(&req->misc.release.path);
+                        fuse_put_request(ff->fc, req);
+                } else {
+                        req->end = fuse_release_end;
+                        fuse_request_send_background(ff->fc, req);
+                }
                kfree(ff);
        }
 }
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
         * Normally this will send the RELEASE request, however if
         * some asynchronous READ or WRITE requests are outstanding,
         * the sending will be delayed.
+         *
+         * Make the release synchronous if this is a fuseblk mount,
+         * synchronous RELEASE is allowed (and desirable) in this case
+         * because the server can be trusted not to screw up.
         */
-        fuse_file_put(ff);
+        fuse_file_put(ff, ff->fc->destroy_req != NULL);
 }
 static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
                page_cache_release(page);
        }
        if (req->ff)
-                fuse_file_put(req->ff);
+                fuse_file_put(req->ff, false);
 }
 static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 {
        __free_page(req->pages[0]);
-        fuse_file_put(req->ff);
+        fuse_file_put(req->ff, false);
 }
 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
 #include <linux/poll.h>
+#include <linux/workqueue.h>
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
        /** Data for asynchronous requests */
        union {
                struct {
-                        struct fuse_release_in in;
+                        union {
+                                struct fuse_release_in in;
+                                struct work_struct work;
+                        };
                        struct path path;
                } release;
                struct fuse_init_in init_in;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
 #endif
        glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
-                                          WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+                                          WQ_HIGHPRI | WQ_FREEZABLE, 0);
        if (IS_ERR(glock_workqueue))
                return PTR_ERR(glock_workqueue);
        gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
-                                                WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+                                                WQ_MEM_RECLAIM | WQ_FREEZABLE,
                                                0);
        if (IS_ERR(gfs2_delete_workqueue)) {
                destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
        struct address_space *mapping = (struct address_space *)(gl + 1);
        gfs2_init_glock_once(gl);
-        memset(mapping, 0, sizeof(*mapping));
+        address_space_init_once(mapping);
-        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
-        spin_lock_init(&mapping->tree_lock);
-        spin_lock_init(&mapping->i_mmap_lock);
-        INIT_LIST_HEAD(&mapping->private_list);
-        spin_lock_init(&mapping->private_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
-        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
 }
 /**
@@ -144,7 +137,7 @@ static int __init init_gfs2_fs(void)
        error = -ENOMEM;
        gfs_recovery_wq = alloc_workqueue("gfs_recovery",
-                                          WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0);
+                                          WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
        if (!gfs_recovery_wq)
                goto fail_wq;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 /*
- * hfs_unlink()
+ * hfs_remove()
 *
- * This is the unlink() entry in the inode_operations structure for
+ * This serves as both unlink() and rmdir() in the inode_operations
- * regular HFS directories.  The purpose is to delete an existing
+ * structure for regular HFS directories.  The purpose is to delete
- * file, given the inode for the parent directory and the name
+ * an existing child, given the inode for the parent directory and
- * (and its length) of the existing file.
+ * the name (and its length) of the existing directory.
- */
-static int hfs_unlink(struct inode *dir, struct dentry *dentry)
-{
-        struct inode *inode;
-        int res;
-        inode = dentry->d_inode;
-        res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
-        if (res)
-                return res;
-        drop_nlink(inode);
-        hfs_delete_inode(inode);
-        inode->i_ctime = CURRENT_TIME_SEC;
-        mark_inode_dirty(inode);
-        return res;
-}
-/*
- * hfs_rmdir()
 *
- * This is the rmdir() entry in the inode_operations structure for
+ * HFS does not have hardlinks, so both rmdir and unlink set the
- * regular HFS directories.  The purpose is to delete an existing
+ * link count to 0.  The only difference is the emptiness check.
- * directory, given the inode for the parent directory and the name
- * (and its length) of the existing directory.
 */
-static int hfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int hfs_remove(struct inode *dir, struct dentry *dentry)
 {
-        struct inode *inode;
+        struct inode *inode = dentry->d_inode;
        int res;
-        inode = dentry->d_inode;
+        if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
-        if (inode->i_size != 2)
                return -ENOTEMPTY;
        res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
        if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        /* Unlink destination if it already exists */
        if (new_dentry->d_inode) {
-                res = hfs_unlink(new_dir, new_dentry);
+                res = hfs_remove(new_dir, new_dentry);
                if (res)
                        return res;
        }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
 const struct inode_operations hfs_dir_inode_operations = {
        .create         = hfs_create,
        .lookup         = hfs_lookup,
-        .unlink         = hfs_unlink,
+        .unlink         = hfs_remove,
        .mkdir          = hfs_mkdir,
-        .rmdir          = hfs_rmdir,
+        .rmdir          = hfs_remove,
        .rename         = hfs_rename,
        .setattr        = hfs_inode_setattr,
 };
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 52a0bcaa7b6d..b1991a2a08e0 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode)
        u32 start, len, goal;
        int res;
-        if (sbi->total_blocks - sbi->free_blocks + 8 >
+        if (sbi->alloc_file->i_size * 8 <
-                        sbi->alloc_file->i_size * 8) {
+            sbi->total_blocks - sbi->free_blocks + 8) {
                /* extend alloc file */
                printk(KERN_ERR "hfs: extend alloc file! "
                                "(%llu,%u,%u)\n",
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index d66ad113b1cc..40ad88c12c64 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb,
        res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
                                 data, READ);
        if (res)
-                return res;
+                goto out;
        switch (be16_to_cpu(*((__be16 *)data))) {
        case HFS_OLD_PMAP_MAGIC:
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb,
                res = -ENOENT;
                break;
        }
+out:
        kfree(data);
        return res;
 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a3b4795f43c..b49b55584c84 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        struct inode *root, *inode;
        struct qstr str;
        struct nls_table *nls = NULL;
-        int err = -EINVAL;
+        int err;
+        err = -EINVAL;
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
-                return -ENOMEM;
+                goto out;
        sb->s_fs_info = sbi;
        mutex_init(&sbi->alloc_mutex);
        mutex_init(&sbi->vh_mutex);
        hfsplus_fill_defaults(sbi);
+        err = -EINVAL;
        if (!hfsplus_parse_options(data, sbi)) {
                printk(KERN_ERR "hfs: unable to parse mount options\n");
-                err = -EINVAL;
+                goto out_unload_nls;
-                goto cleanup;
        }
        /* temporarily use utf8 to correctly find the hidden dir below */
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        sbi->nls = load_nls("utf8");
        if (!sbi->nls) {
                printk(KERN_ERR "hfs: unable to load nls for utf8\n");
-                err = -EINVAL;
+                goto out_unload_nls;
-                goto cleanup;
        }
        /* Grab the volume header */
        if (hfsplus_read_wrapper(sb)) {
                if (!silent)
                        printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
-                err = -EINVAL;
+                goto out_unload_nls;
-                goto cleanup;
        }
        vhdr = sbi->s_vhdr;
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
            be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
                printk(KERN_ERR "hfs: wrong filesystem version\n");
-                goto cleanup;
+                goto out_free_vhdr;
        }
        sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
        sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
        if (!sbi->ext_tree) {
                printk(KERN_ERR "hfs: failed to load extents file\n");
-                goto cleanup;
+                goto out_free_vhdr;
        }
        sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
        if (!sbi->cat_tree) {
                printk(KERN_ERR "hfs: failed to load catalog file\n");
-                goto cleanup;
+                goto out_close_ext_tree;
        }
        inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
        if (IS_ERR(inode)) {
                printk(KERN_ERR "hfs: failed to load allocation file\n");
                err = PTR_ERR(inode);
-                goto cleanup;
+                goto out_close_cat_tree;
        }
        sbi->alloc_file = inode;
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        if (IS_ERR(root)) {
                printk(KERN_ERR "hfs: failed to load root directory\n");
                err = PTR_ERR(root);
-                goto cleanup;
+                goto out_put_alloc_file;
-        }
-        sb->s_d_op = &hfsplus_dentry_operations;
-        sb->s_root = d_alloc_root(root);
-        if (!sb->s_root) {
-                iput(root);
-                err = -ENOMEM;
-                goto cleanup;
        }
        str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
                hfs_find_exit(&fd);
                if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
-                        goto cleanup;
+                        goto out_put_root;
                inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
                if (IS_ERR(inode)) {
                        err = PTR_ERR(inode);
-                        goto cleanup;
+                        goto out_put_root;
                }
                sbi->hidden_dir = inode;
        } else
                hfs_find_exit(&fd);
-        if (sb->s_flags & MS_RDONLY)
+        if (!(sb->s_flags & MS_RDONLY)) {
-                goto out;
+                /*
+                 * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
+                 * all three are registered with Apple for our use
+                 */
+                vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
+                vhdr->modify_date = hfsp_now2mt();
+                be32_add_cpu(&vhdr->write_count, 1);
+                vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
+                vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
+                hfsplus_sync_fs(sb, 1);
-        /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
+                if (!sbi->hidden_dir) {
-         * all three are registered with Apple for our use
+                        mutex_lock(&sbi->vh_mutex);
-         */
+                        sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
-        vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
+                        hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str,
-        vhdr->modify_date = hfsp_now2mt();
+                                           sbi->hidden_dir);
-        be32_add_cpu(&vhdr->write_count, 1);
+                        mutex_unlock(&sbi->vh_mutex);
-        vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
-        vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
+                        hfsplus_mark_inode_dirty(sbi->hidden_dir,
-        hfsplus_sync_fs(sb, 1);
+                                                 HFSPLUS_I_CAT_DIRTY);
+                }
-        if (!sbi->hidden_dir) {
-                mutex_lock(&sbi->vh_mutex);
-                sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
-                hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
-                                   &str, sbi->hidden_dir);
-                mutex_unlock(&sbi->vh_mutex);
-                hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
        }
-out:
+        sb->s_d_op = &hfsplus_dentry_operations;
+        sb->s_root = d_alloc_root(root);
+        if (!sb->s_root) {
+                err = -ENOMEM;
+                goto out_put_hidden_dir;
+        }
        unload_nls(sbi->nls);
        sbi->nls = nls;
        return 0;
-cleanup:
+out_put_hidden_dir:
-        hfsplus_put_super(sb);
+        iput(sbi->hidden_dir);
+out_put_root:
+        iput(sbi->alloc_file);
+out_put_alloc_file:
+        iput(sbi->alloc_file);
+out_close_cat_tree:
+        hfs_btree_close(sbi->cat_tree);
+out_close_ext_tree:
+        hfs_btree_close(sbi->ext_tree);
+out_free_vhdr:
+        kfree(sbi->s_vhdr);
+        kfree(sbi->s_backup_vhdr);
+out_unload_nls:
+        unload_nls(sbi->nls);
        unload_nls(nls);
+        kfree(sbi);
+out:
        return err;
 }
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 196231794f64..3031d81f5f0f 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -167,7 +167,7 @@ reread:
                break;
        case cpu_to_be16(HFSP_WRAP_MAGIC):
                if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
-                        goto out;
+                        goto out_free_backup_vhdr;
                wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
                part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
                part_size = wd.embed_count * wd.ablk_size;
@@ -179,7 +179,7 @@ reread:
                 * (should do this only for cdrom/loop though)
                 */
                if (hfs_part_find(sb, &part_start, &part_size))
-                        goto out;
+                        goto out_free_backup_vhdr;
                goto reread;
        }
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
                call_rcu(&inode->i_rcu, i_callback);
 }
+void address_space_init_once(struct address_space *mapping)
+{
+        memset(mapping, 0, sizeof(*mapping));
+        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+        spin_lock_init(&mapping->tree_lock);
+        spin_lock_init(&mapping->i_mmap_lock);
+        INIT_LIST_HEAD(&mapping->private_list);
+        spin_lock_init(&mapping->private_lock);
+        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+        mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
 /*
 * These are initializations that only need to be done
 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_LIST_HEAD(&inode->i_wb_list);
        INIT_LIST_HEAD(&inode->i_lru);
-        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
+        address_space_init_once(&inode->i_data);
-        spin_lock_init(&inode->i_data.tree_lock);
-        spin_lock_init(&inode->i_data.i_mmap_lock);
-        INIT_LIST_HEAD(&inode->i_data.private_list);
-        spin_lock_init(&inode->i_data.private_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
-        INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
        i_size_ordered_init(inode);
 #ifdef CONFIG_FSNOTIFY
        INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
 /**
 * invalidate_inodes    - attempt to free all inodes on a superblock
 * @sb:         superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
 *
 * Attempts to free all inodes for a given superblock.  If there were any
 * busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
 */
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
 {
        int busy = 0;
        struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
        list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
                if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
                        continue;
+                if (inode->i_state & I_DIRTY && !kill_dirty) {
+                        busy = 1;
+                        continue;
+                }
                if (atomic_read(&inode->i_count)) {
                        busy = 1;
                        continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
 */
 extern int get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index a59635e295fa..1eebeb72b202 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode,
                len = isize;
        }
+        /*
+         * Some filesystems can't deal with being asked to map less than
+         * blocksize, so make sure our len is at least block length.
+         */
+        if (logical_to_blk(inode, len) == 0)
+                len = blk_to_logical(inode, 1);
        start_blk = logical_to_blk(inode, start);
        last_blk = logical_to_blk(inode, start + len - 1);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..97e73469b2c4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
 }
 /*
- * Called under j_state_lock.  Returns true if a transaction commit was started.
+ * Called with j_state_lock locked for writing.
+ * Returns true if a transaction commit was started.
 */
 int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
 {
        transaction_t *transaction = NULL;
        tid_t tid;
+        int need_to_start = 0;
        read_lock(&journal->j_state_lock);
        if (journal->j_running_transaction && !current->journal_info) {
                transaction = journal->j_running_transaction;
-                __jbd2_log_start_commit(journal, transaction->t_tid);
+                if (!tid_geq(journal->j_commit_request, transaction->t_tid))
+                        need_to_start = 1;
        } else if (journal->j_committing_transaction)
                transaction = journal->j_committing_transaction;
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
        tid = transaction->t_tid;
        read_unlock(&journal->j_state_lock);
+        if (need_to_start)
+                jbd2_log_start_commit(journal, tid);
        jbd2_log_wait_commit(journal, tid);
        return 1;
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..1d1191050f99 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
 static int start_this_handle(journal_t *journal, handle_t *handle,
                             int gfp_mask)
 {
-        transaction_t *transaction;
+        transaction_t   *transaction, *new_transaction = NULL;
-        int needed;
+        tid_t           tid;
-        int nblocks = handle->h_buffer_credits;
+        int             needed, need_to_start;
-        transaction_t *new_transaction = NULL;
+        int             nblocks = handle->h_buffer_credits;
        if (nblocks > journal->j_max_transaction_buffers) {
                printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
                atomic_sub(nblocks, &transaction->t_outstanding_credits);
                prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
                                TASK_UNINTERRUPTIBLE);
-                __jbd2_log_start_commit(journal, transaction->t_tid);
+                tid = transaction->t_tid;
+                need_to_start = !tid_geq(journal->j_commit_request, tid);
                read_unlock(&journal->j_state_lock);
+                if (need_to_start)
+                        jbd2_log_start_commit(journal, tid);
                schedule();
                finish_wait(&journal->j_wait_transaction_locked, &wait);
                goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
-        int ret;
+        tid_t           tid;
+        int             need_to_start, ret;
        /* If we've had an abort of any type, don't even think about
         * actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
        spin_unlock(&transaction->t_handle_lock);
        jbd_debug(2, "restarting handle %p\n", handle);
-        __jbd2_log_start_commit(journal, transaction->t_tid);
+        tid = transaction->t_tid;
+        need_to_start = !tid_geq(journal->j_commit_request, tid);
        read_unlock(&journal->j_state_lock);
+        if (need_to_start)
+                jbd2_log_start_commit(journal, tid);
        lock_map_release(&handle->h_lockdep_map);
        handle->h_buffer_credits = nblocks;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 5f1bcb2f06f3..b7c99bfb3da6 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
                                        struct nsm_handle *nsm,
                                        const struct nlm_reboot *info)
 {
-        struct nlm_host *host = NULL;
+        struct nlm_host *host;
        struct hlist_head *chain;
        struct hlist_node *pos;
@@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
                        host->h_state++;
                        nlm_get_host(host);
-                        goto out;
+                        mutex_unlock(&nlm_host_mutex);
+                        return host;
                }
        }
-out:
        mutex_unlock(&nlm_host_mutex);
-        return host;
+        return NULL;
 }
 /**
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
                new_de = minix_find_entry(new_dentry, &new_page);
                if (!new_de)
                        goto out_dir;
-                inode_inc_link_count(old_inode);
                minix_set_link(new_de, new_page, old_inode);
                new_inode->i_ctime = CURRENT_TIME_SEC;
                if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
                        if (new_dir->i_nlink >= info->s_link_max)
                                goto out_dir;
                }
-                inode_inc_link_count(old_inode);
                err = minix_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        inode_dec_link_count(old_inode);
                        goto out_dir;
-                }
                if (dir_de)
                        inode_inc_link_count(new_dir);
        }
        minix_delete_entry(old_de, old_page);
-        inode_dec_link_count(old_inode);
+        mark_inode_dirty(old_inode);
        if (dir_de) {
                minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..0087cf9c2c6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
        struct fs_struct *fs = current->fs;
        struct dentry *parent = nd->path.dentry;
-        /*
-         * It can be possible to revalidate the dentry that we started
-         * the path walk with. force_reval_path may also revalidate the
-         * dentry already committed to the nameidata.
-         */
-        if (unlikely(parent == dentry))
-                return nameidata_drop_rcu(nd);
        BUG_ON(!(nd->flags & LOOKUP_RCU));
        if (nd->root.mnt) {
                spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
 */
 void release_open_intent(struct nameidata *nd)
 {
-        if (nd->intent.open.file->f_path.dentry == NULL)
+        struct file *file = nd->intent.open.file;
-                put_filp(nd->intent.open.file);
-        else
-                fput(nd->intent.open.file);
-}
-/*
- * Call d_revalidate and handle filesystems that request rcu-walk
- * to be dropped. This may be called and return in rcu-walk mode,
- * regardless of success or error. If -ECHILD is returned, the caller
- * must return -ECHILD back up the path walk stack so path walk may
- * be restarted in ref-walk mode.
- */
-static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-        int status;
-        status = dentry->d_op->d_revalidate(dentry, nd);
+        if (file && !IS_ERR(file)) {
-        if (status == -ECHILD) {
+                if (file->f_path.dentry == NULL)
-                if (nameidata_dentry_drop_rcu(nd, dentry))
+                        put_filp(file);
-                        return status;
+                else
-                status = dentry->d_op->d_revalidate(dentry, nd);
+                        fput(file);
        }
+}
-        return status;
+static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+        return dentry->d_op->d_revalidate(dentry, nd);
 }
-static inline struct dentry *
+static struct dentry *
 do_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-        int status;
+        int status = d_revalidate(dentry, nd);
-        status = d_revalidate(dentry, nd);
        if (unlikely(status <= 0)) {
                /*
                 * The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
                 * to return a fail status.
                 */
                if (status < 0) {
-                        /* If we're in rcu-walk, we don't have a ref */
+                        dput(dentry);
-                        if (!(nd->flags & LOOKUP_RCU))
-                                dput(dentry);
                        dentry = ERR_PTR(status);
+                } else if (!d_invalidate(dentry)) {
-                } else {
+                        dput(dentry);
-                        /* Don't d_invalidate in rcu-walk mode */
+                        dentry = NULL;
-                        if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
-                                return ERR_PTR(-ECHILD);
-                        if (!d_invalidate(dentry)) {
-                                dput(dentry);
-                                dentry = NULL;
-                        }
                }
        }
        return dentry;
 }
+static inline struct dentry *
+do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
+{
+        int status = d_revalidate(dentry, nd);
+        if (likely(status > 0))
+                return dentry;
+        if (status == -ECHILD) {
+                if (nameidata_dentry_drop_rcu(nd, dentry))
+                        return ERR_PTR(-ECHILD);
+                return do_revalidate(dentry, nd);
+        }
+        if (status < 0)
+                return ERR_PTR(status);
+        /* Don't d_invalidate in rcu-walk mode */
+        if (nameidata_dentry_drop_rcu(nd, dentry))
+                return ERR_PTR(-ECHILD);
+        if (!d_invalidate(dentry)) {
+                dput(dentry);
+                dentry = NULL;
+        }
+        return dentry;
+}
 static inline int need_reval_dot(struct dentry *dentry)
 {
        if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
                return 0;
        if (!status) {
-                /* Don't d_invalidate in rcu-walk mode */
-                if (nameidata_drop_rcu(nd))
-                        return -ECHILD;
                d_invalidate(dentry);
                status = -ESTALE;
        }
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
        int error;
        struct dentry *dentry = link->dentry;
+        BUG_ON(nd->flags & LOOKUP_RCU);
        touch_atime(link->mnt, dentry);
        nd_set_link(nd, NULL);
@@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
 * Without that kind of total limit, nasty chains of consecutive
 * symlinks can cause almost arbitrarily long lookups. 
 */
-static inline int do_follow_link(struct path *path, struct nameidata *nd)
+static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
 {
        void *cookie;
        int err = -ELOOP;
+        /* We drop rcu-walk here */
+        if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+                return -ECHILD;
+        BUG_ON(inode != path->dentry->d_inode);
        if (current->link_count >= MAX_NESTED_LINKS)
                goto loop;
        if (current->total_link_count >= 40)
@@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
                        return -ECHILD;
                nd->seq = seq;
-                if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+                if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
-                        goto need_revalidate;
+                        dentry = do_revalidate_rcu(dentry, nd);
-done2:
+                        if (!dentry)
+                                goto need_lookup;
+                        if (IS_ERR(dentry))
+                                goto fail;
+                        if (!(nd->flags & LOOKUP_RCU))
+                                goto done;
+                }
                path->mnt = mnt;
                path->dentry = dentry;
                if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1270,13 @@ done2:
        if (!dentry)
                goto need_lookup;
 found:
-        if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+        if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
-                goto need_revalidate;
+                dentry = do_revalidate(dentry, nd);
+                if (!dentry)
+                        goto need_lookup;
+                if (IS_ERR(dentry))
+                        goto fail;
+        }
 done:
        path->mnt = mnt;
        path->dentry = dentry;
@@ -1309,16 +1318,6 @@ need_lookup:
        mutex_unlock(&dir->i_mutex);
        goto found;
-need_revalidate:
-        dentry = do_revalidate(dentry, nd);
-        if (!dentry)
-                goto need_lookup;
-        if (IS_ERR(dentry))
-                goto fail;
-        if (nd->flags & LOOKUP_RCU)
-                goto done2;
-        goto done;
 fail:
        return PTR_ERR(dentry);
 }
@@ -1415,11 +1414,7 @@ exec_again:
                        goto out_dput;
                if (inode->i_op->follow_link) {
-                        /* We commonly drop rcu-walk here */
+                        err = do_follow_link(inode, &next, nd);
-                        if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
-                                return -ECHILD;
-                        BUG_ON(inode != next.dentry->d_inode);
-                        err = do_follow_link(&next, nd);
                        if (err)
                                goto return_err;
                        nd->inode = nd->path.dentry->d_inode;
@@ -1463,10 +1458,7 @@ last_component:
                        break;
                if (inode && unlikely(inode->i_op->follow_link) &&
                    (lookup_flags & LOOKUP_FOLLOW)) {
-                        if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+                        err = do_follow_link(inode, &next, nd);
-                                return -ECHILD;
-                        BUG_ON(inode != next.dentry->d_inode);
-                        err = do_follow_link(&next, nd);
                        if (err)
                                goto return_err;
                        nd->inode = nd->path.dentry->d_inode;
@@ -1500,12 +1492,15 @@ return_reval:
                 * We may need to check the cached dentry for staleness.
                 */
                if (need_reval_dot(nd->path.dentry)) {
+                        if (nameidata_drop_rcu_last_maybe(nd))
+                                return -ECHILD;
                        /* Note: we do not d_invalidate() */
                        err = d_revalidate(nd->path.dentry, nd);
                        if (!err)
                                err = -ESTALE;
                        if (err < 0)
                                break;
+                        return 0;
                }
 return_base:
                if (nameidata_drop_rcu_last_maybe(nd))
@@ -2265,8 +2260,6 @@ static struct file *finish_open(struct nameidata *nd,
        return filp;
 exit:
-        if (!IS_ERR(nd->intent.open.file))
-                release_open_intent(nd);
        path_put(&nd->path);
        return ERR_PTR(error);
 }
@@ -2389,8 +2382,6 @@ exit_mutex_unlock:
 exit_dput:
        path_put_conditional(path, nd);
 exit:
-        if (!IS_ERR(nd->intent.open.file))
-                release_open_intent(nd);
        path_put(&nd->path);
        return ERR_PTR(error);
 }
@@ -2477,6 +2468,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
        }
        audit_inode(pathname, nd.path.dentry);
        filp = finish_open(&nd, open_flag, acc_mode);
+        release_open_intent(&nd);
        return filp;
 creat:
@@ -2553,6 +2545,7 @@ out:
                path_put(&nd.root);
        if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
                goto reval;
+        release_open_intent(&nd);
        return filp;
 exit_dput:
@@ -2560,8 +2553,6 @@ exit_dput:
 out_path:
        path_put(&nd.path);
 out_filp:
-        if (!IS_ERR(nd.intent.open.file))
-                release_open_intent(&nd);
        filp = ERR_PTR(error);
        goto out;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
                 */
                br_write_lock(vfsmount_lock);
                if (mnt_get_count(mnt) != 2) {
-                        br_write_lock(vfsmount_lock);
+                        br_write_unlock(vfsmount_lock);
                        return -EBUSY;
                }
                br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 199016528fcb..e3d294269058 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -135,33 +135,6 @@ out_err:
 #if defined(CONFIG_NFS_V4_1)
 /*
- *  * CB_SEQUENCE operations will fail until the callback sessionid is set.
- *   */
-int nfs4_set_callback_sessionid(struct nfs_client *clp)
-{
-        struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
-        struct nfs4_sessionid *bc_sid;
-        if (!serv->sv_bc_xprt)
-                return -EINVAL;
-        /* on success freed in xprt_free */
-        bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
-        if (!bc_sid)
-                return -ENOMEM;
-        memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
-                NFS4_MAX_SESSIONID_LEN);
-        spin_lock_bh(&serv->sv_cb_lock);
-        serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
-        spin_unlock_bh(&serv->sv_cb_lock);
-        dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
-                ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
-                ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
-                serv->sv_bc_xprt);
-        return 0;
-}
-/*
 * The callback service for NFSv4.1 callbacks
 */
 static int
@@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
                struct nfs_callback_data *cb_info)
 {
 }
-int nfs4_set_callback_sessionid(struct nfs_client *clp)
-{
-        return 0;
-}
 #endif /* CONFIG_NFS_V4_1 */
 /*
@@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion)
        mutex_unlock(&nfs_callback_mutex);
 }
-static int check_gss_callback_principal(struct nfs_client *clp,
+/* Boolean check of RPC_AUTH_GSS principal */
-                                        struct svc_rqst *rqstp)
+int
+check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 {
        struct rpc_clnt *r = clp->cl_rpcclient;
        char *p = svc_gss_principal(rqstp);
+        if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
+                return 1;
        /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
        if (clp->cl_minorversion != 0)
-                return SVC_DROP;
+                return 0;
        /*
         * It might just be a normal user principal, in which case
         * userspace won't bother to tell us the name at all.
         */
        if (p == NULL)
-                return SVC_DENIED;
+                return 0;
        /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
        if (memcmp(p, "nfs@", 4) != 0)
-                return SVC_DENIED;
+                return 0;
        p += 4;
        if (strcmp(p, r->cl_server) != 0)
-                return SVC_DENIED;
+                return 0;
-        return SVC_OK;
+        return 1;
 }
-/* pg_authenticate method helper */
+/*
-static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
+ * pg_authenticate method for nfsv4 callback threads.
-{
+ *
-        struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
+ * The authflavor has been negotiated, so an incorrect flavor is a server
-        int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
+ * bug. Drop packets with incorrect authflavor.
+ *
-        dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
+ * All other checking done after NFS decoding where the nfs_client can be
-        if (svc_is_backchannel(rqstp))
+ * found in nfs4_callback_compound
-                /* Sessionid (usually) set after CB_NULL ping */
+ */
-                return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
-                                                  is_cb_compound);
-        else
-                /* No callback identifier in pg_authenticate */
-                return nfs4_find_client_no_ident(svc_addr(rqstp));
-}
-/* pg_authenticate method for nfsv4 callback threads. */
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-        struct nfs_client *clp;
-        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-        int ret = SVC_OK;
-        /* Don't talk to strangers */
-        clp = nfs_cb_find_client(rqstp);
-        if (clp == NULL)
-                return SVC_DROP;
-        dprintk("%s: %s NFSv4 callback!\n", __func__,
-                        svc_print_addr(rqstp, buf, sizeof(buf)));
        switch (rqstp->rq_authop->flavour) {
-                case RPC_AUTH_NULL:
+        case RPC_AUTH_NULL:
-                        if (rqstp->rq_proc != CB_NULL)
+                if (rqstp->rq_proc != CB_NULL)
-                                ret = SVC_DENIED;
+                        return SVC_DROP;
-                        break;
+                break;
-                case RPC_AUTH_UNIX:
+        case RPC_AUTH_GSS:
-                        break;
+                /* No RPC_AUTH_GSS support yet in NFSv4.1 */
-                case RPC_AUTH_GSS:
+                 if (svc_is_backchannel(rqstp))
-                        ret = check_gss_callback_principal(clp, rqstp);
+                        return SVC_DROP;
-                        break;
-                default:
-                        ret = SVC_DENIED;
        }
-        nfs_put_client(clp);
+        return SVC_OK;
-        return ret;
 }
 /*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d3b44f9bd747..46d93ce7311b 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -7,6 +7,7 @@
 */
 #ifndef __LINUX_FS_NFS_CALLBACK_H
 #define __LINUX_FS_NFS_CALLBACK_H
+#include <linux/sunrpc/svc.h>
 #define NFS4_CALLBACK 0x40000000
 #define NFS4_CALLBACK_XDRSIZE 2048
@@ -37,7 +38,6 @@ enum nfs4_callback_opnum {
 struct cb_process_state {
        __be32                  drc_status;
        struct nfs_client       *clp;
-        struct nfs4_sessionid   *svc_sid; /* v4.1 callback service sessionid */
 };
 struct cb_compound_hdr_arg {
@@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall(
 extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
 extern void nfs4_cb_take_slot(struct nfs_client *clp);
 #endif /* CONFIG_NFS_V4_1 */
+extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
                                    struct cb_getattrres *res,
                                    struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4bb91cb2620d..89587573fe50 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 {
        struct nfs_client *clp;
        int i;
-        __be32 status;
+        __be32 status = htonl(NFS4ERR_BADSESSION);
        cps->clp = NULL;
-        status = htonl(NFS4ERR_BADSESSION);
+        clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
-        /* Incoming session must match the callback session */
-        if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
-                goto out;
-        clp = nfs4_find_client_sessionid(args->csa_addr,
-                                         &args->csa_sessionid, 1);
        if (clp == NULL)
                goto out;
@@ -414,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
        res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
        res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
        nfs4_cb_take_slot(clp);
-        cps->clp = clp; /* put in nfs4_callback_compound */
 out:
+        cps->clp = clp; /* put in nfs4_callback_compound */
        for (i = 0; i < args->csa_nrclists; i++)
                kfree(args->csa_rclists[i].rcl_refcalls);
        kfree(args->csa_rclists);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23112c263f81..14e0f9371d14 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
        if (hdr_arg.minorversion == 0) {
                cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
-                if (!cps.clp)
+                if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
                        return rpc_drop_reply;
-        } else
+        }
-                cps.svc_sid = bc_xprt_sid(rqstp);
        hdr_res.taglen = hdr_arg.taglen;
        hdr_res.tag = hdr_arg.tag;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 192f2f860265..bd3ca32879e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident)
 * For CB_COMPOUND calls, find a client by IP address, protocol version,
 * minorversion, and sessionID
 *
- * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
- * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
- * can arrive before the callback sessionid is set. For CB_NULL calls,
- * find a client by IP address protocol version, and minorversion.
- *
 * Returns NULL if no such client
 */
 struct nfs_client *
 nfs4_find_client_sessionid(const struct sockaddr *addr,
-                           struct nfs4_sessionid *sid, int is_cb_compound)
+                           struct nfs4_sessionid *sid)
 {
        struct nfs_client *clp;
@@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
                if (!nfs4_has_session(clp))
                        continue;
-                /* Match sessionid unless cb_null call*/
+                /* Match sessionid*/
-                if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
+                if (memcmp(clp->cl_session->sess_id.data,
-                    sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+                    sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
                        continue;
                atomic_inc(&clp->cl_count);
@@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
 struct nfs_client *
 nfs4_find_client_sessionid(const struct sockaddr *addr,
-                           struct nfs4_sessionid *sid, int is_cb_compound)
+                           struct nfs4_sessionid *sid)
 {
        return NULL;
 }
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 364e4328f392..bbbc6bf5cb2e 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -23,8 +23,6 @@
 static void nfs_do_free_delegation(struct nfs_delegation *delegation)
 {
-        if (delegation->cred)
-                put_rpccred(delegation->cred);
        kfree(delegation);
 }
@@ -37,6 +35,10 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
+        if (delegation->cred) {
+                put_rpccred(delegation->cred);
+                delegation->cred = NULL;
+        }
        call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6ace0d93c71..9943a75bb6d1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                pos += vec->iov_len;
        }
+        /*
+         * If no bytes were started, return the error, and let the
+         * generic layer handle the completion.
+         */
+        if (requested_bytes == 0) {
+                nfs_direct_req_release(dreq);
+                return result < 0 ? result : -EIO;
+        }
        if (put_dreq(dreq))
                nfs_direct_complete(dreq);
+        return 0;
-        if (requested_bytes != 0)
-                return 0;
-        if (result < 0)
-                return result;
-        return -EIO;
 }
 static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
@@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                pos += vec->iov_len;
        }
+        /*
+         * If no bytes were started, return the error, and let the
+         * generic layer handle the completion.
+         */
+        if (requested_bytes == 0) {
+                nfs_direct_req_release(dreq);
+                return result < 0 ? result : -EIO;
+        }
        if (put_dreq(dreq))
                nfs_direct_write_complete(dreq, dreq->inode);
+        return 0;
-        if (requested_bytes != 0)
-                return 0;
-        if (result < 0)
-                return result;
-        return -EIO;
 }
 static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d8512423ba72..1cc600e77bb4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -881,9 +881,10 @@ out:
        return ret;
 }
-static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
+        unsigned long ret = 0;
        if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
                        && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                nfsi->change_attr = fattr->change_attr;
                if (S_ISDIR(inode->i_mode))
                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                ret |= NFS_INO_INVALID_ATTR;
        }
        /* If we have atomic WCC data, we may update some attributes */
        if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
                        && (fattr->valid & NFS_ATTR_FATTR_CTIME)
-                        && timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
+                        && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
-                        memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+                memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+                ret |= NFS_INO_INVALID_ATTR;
+        }
        if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
                        && (fattr->valid & NFS_ATTR_FATTR_MTIME)
                        && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
-                        memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+                memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-                        if (S_ISDIR(inode->i_mode))
+                if (S_ISDIR(inode->i_mode))
-                                nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                ret |= NFS_INO_INVALID_ATTR;
        }
        if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
                        && (fattr->valid & NFS_ATTR_FATTR_SIZE)
                        && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
-                        && nfsi->npages == 0)
+                        && nfsi->npages == 0) {
-                        i_size_write(inode, nfs_size_to_loff_t(fattr->size));
+                i_size_write(inode, nfs_size_to_loff_t(fattr->size));
+                ret |= NFS_INO_INVALID_ATTR;
+        }
+        return ret;
 }
 /**
@@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        | NFS_INO_REVAL_PAGECACHE);
        /* Do atomic weak cache consistency updates */
-        nfs_wcc_update_inode(inode, fattr);
+        invalid |= nfs_wcc_update_inode(inode, fattr);
        /* More cache consistency checks */
        if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4644f04b4b46..cf9fdbdabc67 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
 extern struct nfs_client *nfs4_find_client_ident(int);
 extern struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
-                           int);
 extern struct nfs_server *nfs_create_server(
                                        const struct nfs_parsed_mount_data *,
                                        struct nfs_fh *);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9f88c5f4c7e2..274342771655 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
        if (!nfs_server_capable(inode, NFS_CAP_ACLS))
                goto out;
-        /* We are doing this here, because XDR marshalling can only
+        /* We are doing this here because XDR marshalling does not
-           return -ENOMEM. */
+         * return any results, it BUGs. */
        status = -ENOSPC;
        if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
                goto out;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 01c5e8b1941d..183c6b123d0f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
        encode_nfs_fh3(xdr, NFS_FH(args->inode));
        encode_uint32(xdr, args->mask);
+        base = req->rq_slen;
        if (args->npages != 0)
                xdr_write_pages(xdr, args->pages, 0, args->len);
+        else
+                xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
-        base = req->rq_slen;
        error = nfsacl_encode(xdr->buf, base, args->inode,
                            (args->mask & NFS_ACL) ?
                            args->acl_access : NULL, 1, 0);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 51fe64ace55a..f5c9b125e8cc 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
        /* ipv6 length plus port is legal */
        if (rlen > INET6_ADDRSTRLEN + 8) {
-                dprintk("%s Invalid address, length %d\n", __func__,
+                dprintk("%s: Invalid address, length %d\n", __func__,
                        rlen);
                goto out_err;
        }
@@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
        /* replace the port dots with dashes for the in4_pton() delimiter*/
        for (i = 0; i < 2; i++) {
                char *res = strrchr(buf, '.');
+                if (!res) {
+                        dprintk("%s: Failed finding expected dots in port\n",
+                                __func__);
+                        goto out_free;
+                }
                *res = '-';
        }
@@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
        port = htons((tmp[0] << 8) | (tmp[1]));
        ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
-        dprintk("%s Decoded address and port %s\n", __func__, buf);
+        dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
        kfree(buf);
 out_err:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d992b0346e3..1ff76acc7e98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -50,6 +50,8 @@
 #include <linux/module.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
+#include <linux/utsname.h>
+#include <linux/mm.h>
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -3251,6 +3253,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
        }
 }
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+                struct page **pages, unsigned int *pgbase)
+{
+        struct page *newpage, **spages;
+        int rc = 0;
+        size_t len;
+        spages = pages;
+        do {
+                len = min(PAGE_CACHE_SIZE, buflen);
+                newpage = alloc_page(GFP_KERNEL);
+                if (newpage == NULL)
+                        goto unwind;
+                memcpy(page_address(newpage), buf, len);
+                buf += len;
+                buflen -= len;
+                *pages++ = newpage;
+                rc++;
+        } while (buflen != 0);
+        return rc;
+unwind:
+        for(; rc > 0; rc--)
+                __free_page(spages[rc-1]);
+        return -ENOMEM;
+}
 struct nfs4_cached_acl {
        int cached;
        size_t len;
@@ -3419,13 +3450,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
                .rpc_argp       = &arg,
                .rpc_resp       = &res,
        };
-        int ret;
+        int ret, i;
        if (!nfs4_server_supports_acls(server))
                return -EOPNOTSUPP;
+        i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+        if (i < 0)
+                return i;
        nfs_inode_return_delegation(inode);
-        buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
        ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+        /*
+         * Free each page after tx, so the only ref left is
+         * held by the network stack
+         */
+        for (; i > 0; i--)
+                put_page(pages[i-1]);
        /*
         * Acl update can result in inode attribute update.
         * so mark the attribute cache invalid.
@@ -4572,27 +4613,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
        *p = htonl((u32)clp->cl_boot_time.tv_nsec);
        args.verifier = &verifier;
-        while (1) {
+        args.id_len = scnprintf(args.id, sizeof(args.id),
-                args.id_len = scnprintf(args.id, sizeof(args.id),
+                                "%s/%s.%s/%u",
-                                        "%s/%s %u",
+                                clp->cl_ipaddr,
-                                        clp->cl_ipaddr,
+                                init_utsname()->nodename,
-                                        rpc_peeraddr2str(clp->cl_rpcclient,
+                                init_utsname()->domainname,
-                                                         RPC_DISPLAY_ADDR),
+                                clp->cl_rpcclient->cl_auth->au_flavor);
-                                        clp->cl_id_uniquifier);
-                status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
-                if (status != -NFS4ERR_CLID_INUSE)
-                        break;
-                if (signalled())
-                        break;
-                if (++clp->cl_id_uniquifier == 0)
-                        break;
-        }
-        status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+        status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+        if (!status)
+                status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2336d532cf66..e6742b57a04c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
        status = nfs4_proc_create_session(clp);
        if (status != 0)
                goto out;
-        status = nfs4_set_callback_sessionid(clp);
-        if (status != 0) {
-                printk(KERN_WARNING "Sessionid not set. No callback service\n");
-                nfs_callback_down(1);
-                status = 0;
-        }
        nfs41_setup_state_renewal(clp);
        nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2ab8e5cb8f59..4e2c168b6ee9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6086,11 +6086,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        __be32 *p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
                goto out_overflow;
-        if (!ntohl(*p++)) {
+        if (*p == xdr_zero) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
                        goto out_overflow;
-                if (!ntohl(*p++))
+                if (*p == xdr_zero)
                        return -EAGAIN;
                entry->eof = 1;
                return -EBADCOOKIE;
@@ -6101,7 +6101,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
                goto out_overflow;
        entry->prev_cookie = entry->cookie;
        p = xdr_decode_hyper(p, &entry->cookie);
-        entry->len = ntohl(*p++);
+        entry->len = be32_to_cpup(p);
        p = xdr_inline_decode(xdr, entry->len);
        if (unlikely(!p))
@@ -6132,9 +6132,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
                entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
-        if (verify_attr_len(xdr, p, len) < 0)
-                goto out_overflow;
        return 0;
 out_overflow:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bc4089769735..1b1bc1a0fb0a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp)
 {
        struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
-        dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+        dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
        if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
                int i;
                /* Verify cache is empty */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 10d648ea128b..c8278f4046cb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,7 +932,7 @@ out_bad:
        while (!list_empty(&list)) {
                data = list_entry(list.next, struct nfs_write_data, pages);
                list_del(&data->pages);
-                nfs_writedata_release(data);
+                nfs_writedata_free(data);
        }
        nfs_redirty_request(req);
        return -ENOMEM;
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index fc1c52571c03..84c27d69d421 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -42,6 +42,11 @@ struct nfsacl_encode_desc {
        gid_t gid;
 };
+struct nfsacl_simple_acl {
+        struct posix_acl acl;
+        struct posix_acl_entry ace[4];
+};
 static int
 xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
 {
@@ -72,9 +77,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
        return 0;
 }
-unsigned int
+/**
-nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+ * nfsacl_encode - Encode an NFSv3 ACL
-              struct posix_acl *acl, int encode_entries, int typeflag)
+ *
+ * @buf: destination xdr_buf to contain XDR encoded ACL
+ * @base: byte offset in xdr_buf where XDR'd ACL begins
+ * @inode: inode of file whose ACL this is
+ * @acl: posix_acl to encode
+ * @encode_entries: whether to encode ACEs as well
+ * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
+ *
+ * Returns size of encoded ACL in bytes or a negative errno value.
+ */
+int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+                  struct posix_acl *acl, int encode_entries, int typeflag)
 {
        int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
        struct nfsacl_encode_desc nfsacl_desc = {
@@ -88,17 +104,22 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
                .uid = inode->i_uid,
                .gid = inode->i_gid,
        };
+        struct nfsacl_simple_acl aclbuf;
        int err;
-        struct posix_acl *acl2 = NULL;
        if (entries > NFS_ACL_MAX_ENTRIES ||
            xdr_encode_word(buf, base, entries))
                return -EINVAL;
        if (encode_entries && acl && acl->a_count == 3) {
-                /* Fake up an ACL_MASK entry. */
+                struct posix_acl *acl2 = &aclbuf.acl;
-                acl2 = posix_acl_alloc(4, GFP_KERNEL);
-                if (!acl2)
+                /* Avoid the use of posix_acl_alloc().  nfsacl_encode() is
-                        return -ENOMEM;
+                 * invoked in contexts where a memory allocation failure is
+                 * fatal.  Fortunately this fake ACL is small enough to
+                 * construct on the stack. */
+                memset(acl2, 0, sizeof(acl2));
+                posix_acl_init(acl2, 4);
                /* Insert entries in canonical order: other orders seem
                 to confuse Solaris VxFS. */
                acl2->a_entries[0] = acl->a_entries[0];  /* ACL_USER_OBJ */
@@ -109,8 +130,6 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
                nfsacl_desc.acl = acl2;
        }
        err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
-        if (acl2)
-                posix_acl_release(acl2);
        if (!err)
                err = 8 + nfsacl_desc.desc.elem_size *
                          nfsacl_desc.desc.array_len;
@@ -224,9 +243,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
        return 0;
 }
-unsigned int
+/**
-nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+ * nfsacl_decode - Decode an NFSv3 ACL
-              struct posix_acl **pacl)
+ *
+ * @buf: xdr_buf containing XDR'd ACL data to decode
+ * @base: byte offset in xdr_buf where XDR'd ACL begins
+ * @aclcnt: count of ACEs in decoded posix_acl
+ * @pacl: buffer in which to place decoded posix_acl
+ *
+ * Returns the length of the decoded ACL in bytes, or a negative errno value.
+ */
+int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+                  struct posix_acl **pacl)
 {
        struct nfsacl_decode_desc nfsacl_desc = {
                .desc = {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e18919..cde36cb0f348 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
 out:
        return status;
 out_default:
-        return nfs_cb_stat_to_errno(status);
+        return nfs_cb_stat_to_errno(nfserr);
 }
 /*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
        if (unlikely(status))
                goto out;
        if (unlikely(nfserr != NFS4_OK))
-                goto out_default;
+                status = nfs_cb_stat_to_errno(nfserr);
 out:
        return status;
-out_default:
-        return nfs_cb_stat_to_errno(status);
 }
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285d..54b60bfceb8d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
        dp->dl_client = clp;
        get_nfs4_file(fp);
        dp->dl_file = fp;
-        dp->dl_vfs_file = find_readable_file(fp);
-        get_file(dp->dl_vfs_file);
-        dp->dl_flock = NULL;
        dp->dl_type = type;
        dp->dl_stateid.si_boot = boot_time;
        dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
        fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
        dp->dl_time = 0;
        atomic_set(&dp->dl_count, 1);
-        list_add(&dp->dl_perfile, &fp->fi_delegations);
-        list_add(&dp->dl_perclnt, &clp->cl_delegations);
        INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
        return dp;
 }
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
        if (atomic_dec_and_test(&dp->dl_count)) {
                dprintk("NFSD: freeing dp %p\n",dp);
                put_nfs4_file(dp->dl_file);
-                fput(dp->dl_vfs_file);
                kmem_cache_free(deleg_slab, dp);
                num_delegations--;
        }
 }
-/* Remove the associated file_lock first, then remove the delegation.
+static void nfs4_put_deleg_lease(struct nfs4_file *fp)
- * lease_modify() is called to remove the FS_LEASE file_lock from
- * the i_flock list, eventually calling nfsd's lock_manager
- * fl_release_callback.
- */
-static void
-nfs4_close_delegation(struct nfs4_delegation *dp)
 {
-        dprintk("NFSD: close_delegation dp %p\n",dp);
+        if (atomic_dec_and_test(&fp->fi_delegees)) {
-        /* XXX: do we even need this check?: */
+                vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
-        if (dp->dl_flock)
+                fp->fi_lease = NULL;
-                vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock);
+                fp->fi_deleg_file = NULL;
+        }
 }
 /* Called under the state lock. */
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-        list_del_init(&dp->dl_perfile);
        list_del_init(&dp->dl_perclnt);
        spin_lock(&recall_lock);
+        list_del_init(&dp->dl_perfile);
        list_del_init(&dp->dl_recall_lru);
        spin_unlock(&recall_lock);
-        nfs4_close_delegation(dp);
+        nfs4_put_deleg_lease(dp->dl_file);
        nfs4_put_delegation(dp);
 }
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
        spin_lock(&recall_lock);
        while (!list_empty(&clp->cl_delegations)) {
                dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
-                dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
-                                dp->dl_flock);
                list_del_init(&dp->dl_perclnt);
                list_move(&dp->dl_recall_lru, &reaplist);
        }
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
                fp->fi_inode = igrab(ino);
                fp->fi_id = current_fileid++;
                fp->fi_had_conflict = false;
+                fp->fi_lease = NULL;
                memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
                memset(fp->fi_access, 0, sizeof(fp->fi_access));
                spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
                nfs4_file_put_access(fp, O_RDONLY);
 }
-/*
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
- * Spawn a thread to perform a recall on the delegation represented
- * by the lease (file_lock)
- *
- * Called from break_lease() with lock_flocks() held.
- * Note: we assume break_lease will only call this *once* for any given
- * lease.
- */
-static
-void nfsd_break_deleg_cb(struct file_lock *fl)
 {
-        struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
-        dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
-        if (!dp)
-                return;
        /* We're assuming the state code never drops its reference
         * without first removing the lease.  Since we're in this lease
         * callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
         * it's safe to take a reference: */
        atomic_inc(&dp->dl_count);
-        spin_lock(&recall_lock);
        list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
-        spin_unlock(&recall_lock);
        /* only place dl_time is set. protected by lock_flocks*/
        dp->dl_time = get_seconds();
+        nfsd4_cb_recall(dp);
+}
+/* Called from break_lease() with lock_flocks() held. */
+static void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+        struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
+        struct nfs4_delegation *dp;
+        BUG_ON(!fp);
+        /* We assume break_lease is only called once per lease: */
+        BUG_ON(fp->fi_had_conflict);
        /*
         * We don't want the locks code to timeout the lease for us;
-         * we'll remove it ourself if the delegation isn't returned
+         * we'll remove it ourself if a delegation isn't returned
-         * in time.
+         * in time:
         */
        fl->fl_break_time = 0;
-        dp->dl_file->fi_had_conflict = true;
+        spin_lock(&recall_lock);
-        nfsd4_cb_recall(dp);
+        fp->fi_had_conflict = true;
+        list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+                nfsd_break_one_deleg(dp);
+        spin_unlock(&recall_lock);
 }
 static
@@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 static struct nfs4_delegation *
 find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
-        struct nfs4_delegation *dp;
+        struct nfs4_delegation *dp = NULL;
+        spin_lock(&recall_lock);
        list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
                if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
-                        return dp;
+                        break;
        }
-        return NULL;
+        spin_unlock(&recall_lock);
+        return dp;
 }
 int share_access_to_flags(u32 share_access)
@@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
        return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
 }
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
+{
+        struct file_lock *fl;
+        fl = locks_alloc_lock();
+        if (!fl)
+                return NULL;
+        locks_init_lock(fl);
+        fl->fl_lmops = &nfsd_lease_mng_ops;
+        fl->fl_flags = FL_LEASE;
+        fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+        fl->fl_end = OFFSET_MAX;
+        fl->fl_owner = (fl_owner_t)(dp->dl_file);
+        fl->fl_pid = current->tgid;
+        return fl;
+}
+static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
+{
+        struct nfs4_file *fp = dp->dl_file;
+        struct file_lock *fl;
+        int status;
+        fl = nfs4_alloc_init_lease(dp, flag);
+        if (!fl)
+                return -ENOMEM;
+        fl->fl_file = find_readable_file(fp);
+        list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+        status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
+        if (status) {
+                list_del_init(&dp->dl_perclnt);
+                locks_free_lock(fl);
+                return -ENOMEM;
+        }
+        fp->fi_lease = fl;
+        fp->fi_deleg_file = fl->fl_file;
+        get_file(fp->fi_deleg_file);
+        atomic_set(&fp->fi_delegees, 1);
+        list_add(&dp->dl_perfile, &fp->fi_delegations);
+        return 0;
+}
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+{
+        struct nfs4_file *fp = dp->dl_file;
+        if (!fp->fi_lease)
+                return nfs4_setlease(dp, flag);
+        spin_lock(&recall_lock);
+        if (fp->fi_had_conflict) {
+                spin_unlock(&recall_lock);
+                return -EAGAIN;
+        }
+        atomic_inc(&fp->fi_delegees);
+        list_add(&dp->dl_perfile, &fp->fi_delegations);
+        spin_unlock(&recall_lock);
+        list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+        return 0;
+}
 /*
 * Attempt to hand out a delegation.
 */
@@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
        struct nfs4_delegation *dp;
        struct nfs4_stateowner *sop = stp->st_stateowner;
        int cb_up;
-        struct file_lock *fl;
        int status, flag = 0;
        cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
        }
        dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
-        if (dp == NULL) {
+        if (dp == NULL)
-                flag = NFS4_OPEN_DELEGATE_NONE;
+                goto out_no_deleg;
-                goto out;
+        status = nfs4_set_delegation(dp, flag);
-        }
+        if (status)
-        status = -ENOMEM;
+                goto out_free;
-        fl = locks_alloc_lock();
-        if (!fl)
-                goto out;
-        locks_init_lock(fl);
-        fl->fl_lmops = &nfsd_lease_mng_ops;
-        fl->fl_flags = FL_LEASE;
-        fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
-        fl->fl_end = OFFSET_MAX;
-        fl->fl_owner =  (fl_owner_t)dp;
-        fl->fl_file = find_readable_file(stp->st_file);
-        BUG_ON(!fl->fl_file);
-        fl->fl_pid = current->tgid;
-        dp->dl_flock = fl;
-        /* vfs_setlease checks to see if delegation should be handed out.
-         * the lock_manager callback fl_change is used
-         */
-        if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
-                dprintk("NFSD: setlease failed [%d], no delegation\n", status);
-                dp->dl_flock = NULL;
-                locks_free_lock(fl);
-                unhash_delegation(dp);
-                flag = NFS4_OPEN_DELEGATE_NONE;
-                goto out;
-        }
        memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
@@ -2722,6 +2744,12 @@ out:
                        && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
                dprintk("NFSD: WARNING: refusing delegation reclaim\n");
        open->op_delegate_type = flag;
+        return;
+out_free:
+        nfs4_put_delegation(dp);
+out_no_deleg:
+        flag = NFS4_OPEN_DELEGATE_NONE;
+        goto out;
 }
 /*
@@ -2916,8 +2944,6 @@ nfs4_laundromat(void)
                                test_val = u;
                        break;
                }
-                dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
-                                    dp, dp->dl_flock);
                list_move(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&recall_lock);
@@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
                        goto out;
                renew_client(dp->dl_client);
                if (filpp) {
-                        *filpp = find_readable_file(dp->dl_file);
+                        *filpp = dp->dl_file->fi_deleg_file;
                        BUG_ON(!*filpp);
                }
        } else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..1275b8655070 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ_BUF(dummy32);
                len += (XDR_QUADLEN(dummy32) << 2);
                READMEM(buf, dummy32);
-                if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+                if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
-                        goto out_nfserr;
+                        return status;
                iattr->ia_valid |= ATTR_UID;
        }
        if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ_BUF(dummy32);
                len += (XDR_QUADLEN(dummy32) << 2);
                READMEM(buf, dummy32);
-                if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+                if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
-                        goto out_nfserr;
+                        return status;
                iattr->ia_valid |= ATTR_GID;
        }
        if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7bf..2d31224b07bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
        atomic_t                dl_count;       /* ref count */
        struct nfs4_client      *dl_client;
        struct nfs4_file        *dl_file;
-        struct file             *dl_vfs_file;
-        struct file_lock        *dl_flock;
        u32                     dl_type;
        time_t                  dl_time;
 /* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
         */
        atomic_t                fi_readers;
        atomic_t                fi_writers;
+        struct file             *fi_deleg_file;
+        struct file_lock        *fi_lease;
+        atomic_t                fi_delegees;
        struct inode            *fi_inode;
        u32                     fi_id;      /* used with stateowner->so_id 
                                             * for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188d..da1d9701f8e4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
                if (ra->p_count == 0)
                        frap = rap;
        }
-        depth = nfsdstats.ra_size*11/10;
+        depth = nfsdstats.ra_size;
        if (!frap) {    
                spin_unlock(&rab->pb_lock);
                return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        host_err = nfsd_break_lease(odentry->d_inode);
        if (host_err)
                goto out_drop_write;
+        if (ndentry->d_inode) {
+                host_err = nfsd_break_lease(ndentry->d_inode);
+                if (host_err)
+                        goto out_drop_write;
+        }
+        if (host_err)
+                goto out_drop_write;
        host_err = vfs_rename(fdir, odentry, tdir, ndentry);
        if (!host_err) {
                host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
        if (host_err)
-                goto out_nfserr;
+                goto out_put;
        host_err = nfsd_break_lease(rdentry->d_inode);
        if (host_err)
-                goto out_put;
+                goto out_drop_write;
        if (type != S_IFDIR)
                host_err = vfs_unlink(dirp, rdentry);
        else
                host_err = vfs_rmdir(dirp, rdentry);
-out_put:
-        dput(rdentry);
        if (!host_err)
                host_err = commit_metadata(fhp);
+out_drop_write:
        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+out_put:
+        dput(rdentry);
 out_nfserr:
        err = nfserrno(host_err);
 out:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
 #include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
-        nilfs_mapping_init_once(btnc);
-}
 static const struct address_space_operations def_btnode_aops = {
        .sync_page              = block_sync_page,
 };
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
        struct buffer_head *newbh;
 };
-void nilfs_btnode_cache_init_once(struct address_space *);
 void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
 void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
        struct backing_dev_info *bdi = inode->i_sb->s_bdi;
        INIT_LIST_HEAD(&shadow->frozen_buffers);
-        nilfs_mapping_init_once(&shadow->frozen_data);
+        address_space_init_once(&shadow->frozen_data);
        nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
-        nilfs_mapping_init_once(&shadow->frozen_btnodes);
+        address_space_init_once(&shadow->frozen_btnodes);
        nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
        mi->mi_shadow = shadow;
        return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
                if (!new_de)
                        goto out_dir;
-                inc_nlink(old_inode);
                nilfs_set_link(new_dir, new_de, new_page, old_inode);
                nilfs_mark_inode_dirty(new_dir);
                new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        if (new_dir->i_nlink >= NILFS_LINK_MAX)
                                goto out_dir;
                }
-                inc_nlink(old_inode);
                err = nilfs_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        drop_nlink(old_inode);
-                        nilfs_mark_inode_dirty(old_inode);
                        goto out_dir;
-                }
                if (dir_de) {
                        inc_nlink(new_dir);
                        nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        old_inode->i_ctime = CURRENT_TIME;
        nilfs_delete_entry(old_de, old_page);
-        drop_nlink(old_inode);
        if (dir_de) {
                nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
        return nc;
 }
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
-        memset(mapping, 0, sizeof(*mapping));
-        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
-        spin_lock_init(&mapping->tree_lock);
-        INIT_LIST_HEAD(&mapping->private_list);
-        spin_lock_init(&mapping->private_lock);
-        spin_lock_init(&mapping->i_mmap_lock);
-        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
-        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
 void nilfs_mapping_init(struct address_space *mapping,
                        struct backing_dev_info *bdi,
                        const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
 int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
 void nilfs_mapping_init(struct address_space *mapping,
                        struct backing_dev_info *bdi,
                        const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
        nilfs_segctor_map_segsum_entry(
                sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
-        if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
+        if (NILFS_I(inode)->i_root &&
+            !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
                set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
        /* skip finfo */
 }
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0994f6a76c07..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -704,7 +704,8 @@ skip_mount_setup:
        sbp[0]->s_state =
                cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
        /* synchronize sbp[1] with sbp[0] */
-        memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+        if (sbp[1])
+                memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
        return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
 }
@@ -1278,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
 #ifdef CONFIG_NILFS_XATTR
        init_rwsem(&ii->xattr_sem);
 #endif
-        nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+        address_space_init_once(&ii->i_btnode_cache);
        ii->i_bmap = &ii->i_bmap_data;
        inode_init_once(&ii->vfs_inode);
 }
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b572b6727181..326e7475a22a 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
 /**
 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2006 Anton Altaparmakov
+ * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
 * Copyright (c) 2002 Richard Russon
 *
 * This program/include file is free software; you can redistribute it and/or
@@ -2576,6 +2576,8 @@ mft_rec_already_initialized:
        flush_dcache_page(page);
        SetPageUptodate(page);
        if (base_ni) {
+                MFT_RECORD *m_tmp;
                /*
                 * Setup the base mft record in the extent mft record.  This
                 * completes initialization of the allocated extent mft record
@@ -2588,11 +2590,11 @@ mft_rec_already_initialized:
                 * attach it to the base inode @base_ni and map, pin, and lock
                 * its, i.e. the allocated, mft record.
                 */
-                m = map_extent_mft_record(base_ni, bit, &ni);
+                m_tmp = map_extent_mft_record(base_ni, bit, &ni);
-                if (IS_ERR(m)) {
+                if (IS_ERR(m_tmp)) {
                        ntfs_error(vol->sb, "Failed to map allocated extent "
                                        "mft record 0x%llx.", (long long)bit);
-                        err = PTR_ERR(m);
+                        err = PTR_ERR(m_tmp);
                        /* Set the mft record itself not in use. */
                        m->flags &= cpu_to_le16(
                                        ~le16_to_cpu(MFT_RECORD_IN_USE));
@@ -2603,6 +2605,7 @@ mft_rec_already_initialized:
                        ntfs_unmap_page(page);
                        goto undo_mftbmp_alloc;
                }
+                BUG_ON(m != m_tmp);
                /*
                 * Make sure the allocated mft record is written out to disk.
                 * No need to set the inode dirty because the caller is going
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
               ocfs2_quota_trans_credits(sb);
 }
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
+/* data block for new dir/symlink, allocation of directory block, dx_root
- * bitmap block for the new bit) dx_root update for free list */
+ * update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
 static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
                                        u32 num_clusters, unsigned int e_flags)
 {
        int ret, delete, index, credits =  0;
-        u32 new_bit, new_len;
+        u32 new_bit, new_len, orig_num_clusters;
        unsigned int set_len;
        struct ocfs2_super *osb = OCFS2_SB(sb);
        handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
                goto out;
        }
+        orig_num_clusters = num_clusters;
        while (num_clusters) {
                ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
                                             p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
         * in write-back mode.
         */
        if (context->get_clusters == ocfs2_di_get_clusters) {
-                ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+                ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+                                               orig_num_clusters);
                if (ret)
                        mlog_errno(ret);
        }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
                               struct mount_options *mopt,
                               int is_remount)
 {
-        int status;
+        int status, user_stack = 0;
        char *p;
        u32 tmp;
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
                        memcpy(mopt->cluster_stack, args[0].from,
                               OCFS2_STACK_LABEL_LEN);
                        mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+                        /*
+                         * Open code the memcmp here as we don't have
+                         * an osb to pass to
+                         * ocfs2_userspace_stack().
+                         */
+                        if (memcmp(mopt->cluster_stack,
+                                   OCFS2_CLASSIC_CLUSTER_STACK,
+                                   OCFS2_STACK_LABEL_LEN))
+                                user_stack = 1;
                        break;
                case Opt_inode64:
                        mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
                }
        }
-        /* Ensure only one heartbeat mode */
+        if (user_stack == 0) {
-        tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+                /* Ensure only one heartbeat mode */
-                                 OCFS2_MOUNT_HB_NONE);
+                tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
-        if (hweight32(tmp) != 1) {
+                                         OCFS2_MOUNT_HB_GLOBAL |
-                mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+                                         OCFS2_MOUNT_HB_NONE);
-                status = 0;
+                if (hweight32(tmp) != 1) {
-                goto bail;
+                        mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+                        status = 0;
+                        goto bail;
+                }
        }
        status = 1;
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05b..5a2c6ebc22b5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
        /* Pick up the filp from the open intent */
        filp = nd->intent.open.file;
+        nd->intent.open.file = NULL;
        /* Has the filesystem initialised the file for us? */
        if (filp->f_path.dentry == NULL) {
                path_get(&nd->path);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
        }
        vm->vblk_size     = get_unaligned_be32(data + 0x08);
+        if (vm->vblk_size == 0) {
+                ldm_error ("Illegal VBLK size");
+                return false;
+        }
        vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
        vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
 int mac_partition(struct parsed_partitions *state)
 {
-        int slot = 1;
        Sector sect;
        unsigned char *data;
-        int blk, blocks_in_map;
+        int slot, blocks_in_map;
        unsigned secsize;
 #ifdef CONFIG_PPC_PMAC
        int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
                put_dev_sector(sect);
                return 0;               /* not a MacOS disk */
        }
-        strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
        blocks_in_map = be32_to_cpu(part->map_count);
-        for (blk = 1; blk <= blocks_in_map; ++blk) {
+        if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
-                int pos = blk * secsize;
+                put_dev_sector(sect);
+                return 0;
+        }
+        strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
+        for (slot = 1; slot <= blocks_in_map; ++slot) {
+                int pos = slot * secsize;
                put_dev_sector(sect);
                data = read_part_sector(state, pos/512, &sect);
                if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
                        }
                        if (goodness > found_root_goodness) {
-                                found_root = blk;
+                                found_root = slot;
                                found_root_goodness = goodness;
                        }
                }
 #endif /* CONFIG_PPC_PMAC */
-                ++slot;
        }
 #ifdef CONFIG_PPC_PMAC
        if (found_root_goodness)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 39df95a0ec25..b1cf6bf4b41d 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
 #include <linux/errno.h>
+EXPORT_SYMBOL(posix_acl_init);
 EXPORT_SYMBOL(posix_acl_alloc);
 EXPORT_SYMBOL(posix_acl_clone);
 EXPORT_SYMBOL(posix_acl_valid);
@@ -32,6 +33,16 @@ EXPORT_SYMBOL(posix_acl_chmod_masq);
 EXPORT_SYMBOL(posix_acl_permission);
 /*
+ * Init a fresh posix_acl
+ */
+void
+posix_acl_init(struct posix_acl *acl, int count)
+{
+        atomic_set(&acl->a_refcount, 1);
+        acl->a_count = count;
+}
+/*
 * Allocate a new ACL with the specified number of entries.
 */
 struct posix_acl *
@@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags)
        const size_t size = sizeof(struct posix_acl) +
                            count * sizeof(struct posix_acl_entry);
        struct posix_acl *acl = kmalloc(size, flags);
-        if (acl) {
+        if (acl)
-                atomic_set(&acl->a_refcount, 1);
+                posix_acl_init(acl, count);
-                acl->a_count = count;
-        }
        return acl;
 }
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0f..7c99c1cf7e5c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
        task_cap(m, task);
        task_cpus_allowed(m, task);
        cpuset_task_status_allowed(m, task);
-#if defined(CONFIG_S390)
-        task_show_regs(m, task);
-#endif
        task_context_switch_counts(m, task);
        return 0;
 }
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index eafc22ab1fdd..b701eaa482bf 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
        struct console *con;
        loff_t off = 0;
-        acquire_console_sem();
+        console_lock();
        for_each_console(con)
                if (off++ == *pos)
                        break;
@@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 static void c_stop(struct seq_file *m, void *v)
 {
-        release_console_sem();
+        console_unlock();
 }
 static const struct seq_operations consoles_op = {
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
                return;
        root = of_find_node_by_path("/");
        if (root == NULL) {
-                printk(KERN_ERR "/proc/device-tree: can't find root\n");
+                pr_debug("/proc/device-tree: can't find root\n");
                return;
        }
        proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..68fdf45cc6c9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                                        EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
                                        dentry, inode, &security);
        if (retval) {
-                dir->i_nlink--;
+                DEC_DIR_INODE_NLINK(dir)
                goto out_failed;
        }
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2fb2882f0fa7..8ab48bc2fa7d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
                *length = (unsigned char) bh->b_data[*offset] |
                        (unsigned char) bh->b_data[*offset + 1] << 8;
                *offset += 2;
+                if (*offset == msblk->devblksize) {
+                        put_bh(bh);
+                        bh = sb_bread(sb, ++(*cur_index));
+                        if (bh == NULL)
+                                return NULL;
+                        *offset = 0;
+                }
        }
        return bh;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 856756ca5ee4..c4eb40018256 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        if (!buffer_uptodate(bh[k]))
                                goto release_mutex;
-                        if (avail == 0) {
-                                offset = 0;
-                                put_bh(bh[k++]);
-                                continue;
-                        }
                        stream->buf.in = bh[k]->b_data + offset;
                        stream->buf.in_size = avail;
                        stream->buf.in_pos = 0;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 818a5e063faf..4661ae2b1cec 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        if (!buffer_uptodate(bh[k]))
                                goto release_mutex;
-                        if (avail == 0) {
-                                offset = 0;
-                                put_bh(bh[k++]);
-                                continue;
-                        }
                        stream->next_in = bh[k]->b_data + offset;
                        stream->avail_in = avail;
                        offset = 0;
diff --git a/fs/super.c b/fs/super.c
index 74e149efed81..7e9dd4cc2c01 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
        struct file_system_type *fs = s->s_type;
        if (atomic_dec_and_test(&s->s_active)) {
                fs->kill_sb(s);
+                /*
+                 * We need to call rcu_barrier so all the delayed rcu free
+                 * inodes are flushed before we release the fs module.
+                 */
+                rcu_barrier();
                put_filesystem(fs);
                put_super(s);
        } else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
                new_de = sysv_find_entry(new_dentry, &new_page);
                if (!new_de)
                        goto out_dir;
-                inode_inc_link_count(old_inode);
                sysv_set_link(new_de, new_page, old_inode);
                new_inode->i_ctime = CURRENT_TIME_SEC;
                if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
                        if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
                                goto out_dir;
                }
-                inode_inc_link_count(old_inode);
                err = sysv_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        inode_dec_link_count(old_inode);
                        goto out_dir;
-                }
                if (dir_de)
                        inode_inc_link_count(new_dir);
        }
        sysv_delete_entry(old_de, old_page);
-        inode_dec_link_count(old_inode);
+        mark_inode_dirty(old_inode);
        if (dir_de) {
                sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..b7c338d5e9df 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
+enum { UDF_MAX_LINKS = 0xffff };
 static inline int udf_match(int len1, const unsigned char *name1, int len2,
                            const unsigned char *name2)
 {
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct udf_inode_info *iinfo;
        err = -EMLINK;
-        if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
+        if (dir->i_nlink >= UDF_MAX_LINKS)
                goto out;
        err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
        struct fileIdentDesc cfi, *fi;
        int err;
-        if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
+        if (inode->i_nlink >= UDF_MAX_LINKS)
                return -EMLINK;
-        }
        fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
        if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
                        goto end_rename;
                retval = -EMLINK;
-                if (!new_inode &&
+                if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
-                        new_dir->i_nlink >=
-                                (256 << sizeof(new_dir->i_nlink)) - 1)
                        goto end_rename;
        }
        if (!nfi) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
                new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
                if (!new_de)
                        goto out_dir;
-                inode_inc_link_count(old_inode);
                ufs_set_link(new_dir, new_de, new_page, old_inode);
                new_inode->i_ctime = CURRENT_TIME_SEC;
                if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        if (new_dir->i_nlink >= UFS_LINK_MAX)
                                goto out_dir;
                }
-                inode_inc_link_count(old_inode);
                err = ufs_add_link(new_dentry, old_inode);
-                if (err) {
+                if (err)
-                        inode_dec_link_count(old_inode);
                        goto out_dir;
-                }
                if (dir_de)
                        inode_inc_link_count(new_dir);
        }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
        /*
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
-         * inode_dec_link_count() will mark the inode dirty.
         */
        old_inode->i_ctime = CURRENT_TIME_SEC;
        ufs_delete_entry(old_dir, old_de, old_page);
-        inode_dec_link_count(old_inode);
+        mark_inode_dirty(old_inode);
        if (dir_de) {
                ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
        if (!capable(CAP_SYS_ADMIN))
                return -XFS_ERROR(EPERM);
+        if (!blk_queue_discard(q))
+                return -XFS_ERROR(EOPNOTSUPP);
        if (copy_from_user(&range, urange, sizeof(range)))
                return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b06ede1d0bed..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
        xfs_mount_t             *mp,
        void                    __user *arg)
 {
-        xfs_fsop_geom_v1_t      fsgeo;
+        xfs_fsop_geom_t         fsgeo;
        int                     error;
-        error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+        error = xfs_fs_geometry(mp, &fsgeo, 3);
        if (error)
                return -error;
-        if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+        /*
+         * Caller should have passed an argument of type
+         * xfs_fsop_geom_v1_t.  This is a proper subset of the
+         * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+         */
+        if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
                return -XFS_ERROR(EFAULT);
        return 0;
 }
@@ -985,10 +990,22 @@ xfs_ioctl_setattr(
                /*
                 * Extent size must be a multiple of the appropriate block
-                 * size, if set at all.
+                 * size, if set at all. It must also be smaller than the
+                 * maximum extent size supported by the filesystem.
+                 *
+                 * Also, for non-realtime files, limit the extent size hint to
+                 * half the size of the AGs in the filesystem so alignment
+                 * doesn't result in extents larger than an AG.
                 */
                if (fa->fsx_extsize != 0) {
-                        xfs_extlen_t    size;
+                        xfs_extlen_t    size;
+                        xfs_fsblock_t   extsize_fsb;
+                        extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+                        if (extsize_fsb > MAXEXTLEN) {
+                                code = XFS_ERROR(EINVAL);
+                                goto error_return;
+                        }
                        if (XFS_IS_REALTIME_INODE(ip) ||
                            ((mask & FSX_XFLAGS) &&
@@ -997,6 +1014,10 @@ xfs_ioctl_setattr(
                                       mp->m_sb.sb_blocklog;
                        } else {
                                size = mp->m_sb.sb_blocksize;
+                                if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+                                        code = XFS_ERROR(EINVAL);
+                                        goto error_return;
+                                }
                        }
                        if (fa->fsx_extsize % size) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f8e854b4fde8..206a2815ced6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void)
        xfs_dquot_t     *dqpout;
        xfs_dquot_t     *dqp;
        int             restarts;
+        int             startagain;
        restarts = 0;
        dqpout = NULL;
        /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-startagain:
+again:
+        startagain = 0;
        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
        list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
@@ -1885,13 +1887,10 @@ startagain:
                        ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
                        trace_xfs_dqreclaim_want(dqp);
-                        xfs_dqunlock(dqp);
-                        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                                return NULL;
                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                        goto startagain;
+                        restarts++;
+                        startagain = 1;
+                        goto dqunlock;
                }
                /*
@@ -1906,23 +1905,20 @@ startagain:
                        ASSERT(list_empty(&dqp->q_mplist));
                        list_del_init(&dqp->q_freelist);
                        xfs_Gqm->qm_dqfrlist_cnt--;
-                        xfs_dqunlock(dqp);
                        dqpout = dqp;
                        XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                        break;
+                        goto dqunlock;
                }
                ASSERT(dqp->q_hash);
                ASSERT(!list_empty(&dqp->q_mplist));
                /*
-                 * Try to grab the flush lock. If this dquot is in the process of
+                 * Try to grab the flush lock. If this dquot is in the process
-                 * getting flushed to disk, we don't want to reclaim it.
+                 * of getting flushed to disk, we don't want to reclaim it.
                 */
-                if (!xfs_dqflock_nowait(dqp)) {
+                if (!xfs_dqflock_nowait(dqp))
-                        xfs_dqunlock(dqp);
+                        goto dqunlock;
-                        continue;
-                }
                /*
                 * We have the flush lock so we know that this is not in the
@@ -1944,8 +1940,7 @@ startagain:
                                xfs_fs_cmn_err(CE_WARN, mp,
                        "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
                        }
-                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
+                        goto dqunlock;
-                        continue;
                }
                /*
@@ -1967,13 +1962,8 @@ startagain:
                 */
                if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
                        restarts++;
-                        mutex_unlock(&dqp->q_hash->qh_lock);
+                        startagain = 1;
-                        xfs_dqfunlock(dqp);
+                        goto qhunlock;
-                        xfs_dqunlock(dqp);
-                        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                        if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                                return NULL;
-                        goto startagain;
                }
                ASSERT(dqp->q_nrefs == 0);
@@ -1986,14 +1976,20 @@ startagain:
                xfs_Gqm->qm_dqfrlist_cnt--;
                dqpout = dqp;
                mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
                mutex_unlock(&dqp->q_hash->qh_lock);
 dqfunlock:
                xfs_dqfunlock(dqp);
+dqunlock:
                xfs_dqunlock(dqp);
                if (dqpout)
                        break;
                if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                        return NULL;
+                        break;
+                if (startagain) {
+                        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                        goto again;
+                }
        }
        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
        return dqpout;
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 0ab56b32c7eb..d0b3bc72005b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -75,6 +75,22 @@ typedef unsigned int xfs_alloctype_t;
 #define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
 /*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ *      - the AG superblock, AGF, AGI and AGFL
+ *      - the AGF (bno and cnt) and AGI btree root blocks
+ *      - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+#define XFS_ALLOC_AG_MAX_USABLE(mp)     \
+        ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+/*
 * Argument structure for xfs_alloc routines.
 * This is turned into a structure to avoid having 20 arguments passed
 * down several levels of the stack.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4111cd3966c7..dc3afd7739ff 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real(
                 * Filling in the middle part of a previous delayed allocation.
                 * Contiguity is impossible here.
                 * This case is avoided almost all the time.
+                 *
+                 * We start with a delayed allocation:
+                 *
+                 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
+                 *  PREV @ idx
+                 *
+                 * and we are allocating:
+                 *                     +rrrrrrrrrrrrrrrrr+
+                 *                            new
+                 *
+                 * and we set it up for insertion as:
+                 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
+                 *                            new
+                 *  PREV @ idx          LEFT              RIGHT
+                 *                      inserted at idx + 1
                 */
                temp = new->br_startoff - PREV.br_startoff;
-                trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
-                xfs_bmbt_set_blockcount(ep, temp);
-                r[0] = *new;
-                r[1].br_state = PREV.br_state;
-                r[1].br_startblock = 0;
-                r[1].br_startoff = new_endoff;
                temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
-                r[1].br_blockcount = temp2;
+                trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
-                xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
+                xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
+                LEFT = *new;
+                RIGHT.br_state = PREV.br_state;
+                RIGHT.br_startblock = nullstartblock(
+                                (int)xfs_bmap_worst_indlen(ip, temp2));
+                RIGHT.br_startoff = new_endoff;
+                RIGHT.br_blockcount = temp2;
+                /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
+                xfs_iext_insert(ip, idx + 1, 2, &LEFT, state);
                ip->i_df.if_lastex = idx + 1;
                ip->i_d.di_nextents++;
                if (cur == NULL)
@@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb(
                startag = ag = 0;
        pag = xfs_perag_get(mp, ag);
-        while (*blen < ap->alen) {
+        while (*blen < args->maxlen) {
                if (!pag->pagf_init) {
                        error = xfs_alloc_pagf_init(mp, args->tp, ag,
                                                    XFS_ALLOC_FLAG_TRYLOCK);
@@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb(
                        notinit = 1;
                if (xfs_inode_is_filestream(ap->ip)) {
-                        if (*blen >= ap->alen)
+                        if (*blen >= args->maxlen)
                                break;
                        if (ap->userdata) {
@@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb(
         * If the best seen length is less than the request
         * length, use the best as the minimum.
         */
-        else if (*blen < ap->alen)
+        else if (*blen < args->maxlen)
                args->minlen = *blen;
        /*
-         * Otherwise we've seen an extent as big as alen,
+         * Otherwise we've seen an extent as big as maxlen,
         * use that as the minimum.
         */
        else
-                args->minlen = ap->alen;
+                args->minlen = args->maxlen;
        /*
         * set the failure fallback case to look in the selected
@@ -2573,7 +2590,9 @@ xfs_bmap_btalloc(
        args.tp = ap->tp;
        args.mp = mp;
        args.fsbno = ap->rval;
-        args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+        /* Trim the allocation back to the maximum an AG can fit. */
+        args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
        args.firstblock = ap->firstblock;
        blen = 0;
        if (nullfb) {
@@ -2621,7 +2640,7 @@ xfs_bmap_btalloc(
                        /*
                         * Adjust for alignment
                         */
-                        if (blen > args.alignment && blen <= ap->alen)
+                        if (blen > args.alignment && blen <= args.maxlen)
                                args.minlen = blen - args.alignment;
                        args.minalignslop = 0;
                } else {
@@ -2640,7 +2659,7 @@ xfs_bmap_btalloc(
                         * of minlen+alignment+slop doesn't go up
                         * between the calls.
                         */
-                        if (blen > mp->m_dalign && blen <= ap->alen)
+                        if (blen > mp->m_dalign && blen <= args.maxlen)
                                nextminlen = blen - mp->m_dalign;
                        else
                                nextminlen = args.minlen;
@@ -4485,6 +4504,16 @@ xfs_bmapi(
                                /* Figure out the extent size, adjust alen */
                                extsz = xfs_get_extsz_hint(ip);
                                if (extsz) {
+                                        /*
+                                         * make sure we don't exceed a single
+                                         * extent length when we align the
+                                         * extent by reducing length we are
+                                         * going to allocate by the maximum
+                                         * amount extent size aligment may
+                                         * require.
+                                         */
+                                        alen = XFS_FILBLKS_MIN(len,
+                                                   MAXEXTLEN - (2 * extsz - 1));
                                        error = xfs_bmap_extsize_align(mp,
                                                        &got, &prev, extsz,
                                                        rt, eof,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 98c6f73b6752..6f8c21ce0d6d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -427,13 +427,15 @@ xfs_buf_item_unpin(
                if (remove) {
                        /*
-                         * We have to remove the log item from the transaction
+                         * If we are in a transaction context, we have to
-                         * as we are about to release our reference to the
+                         * remove the log item from the transaction as we are
-                         * buffer.  If we don't, the unlock that occurs later
+                         * about to release our reference to the buffer.  If we
-                         * in xfs_trans_uncommit() will ry to reference the
+                         * don't, the unlock that occurs later in
+                         * xfs_trans_uncommit() will try to reference the
                         * buffer which we no longer have a hold on.
                         */
-                        xfs_trans_del_item(lip);
+                        if (lip->li_desc)
+                                xfs_trans_del_item(lip);
                        /*
                         * Since the transaction no longer refers to the buffer,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 75f2ef60e579..d22e62623437 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -138,7 +138,8 @@ xfs_efi_item_unpin(
        if (remove) {
                ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
-                xfs_trans_del_item(lip);
+                if (lip->li_desc)
+                        xfs_trans_del_item(lip);
                xfs_efi_item_free(efip);
                return;
        }
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
        xfs_fsop_geom_t         *geo,
        int                     new_version)
 {
+        memset(geo, 0, sizeof(*geo));
        geo->blocksize = mp->m_sb.sb_blocksize;
        geo->rtextsize = mp->m_sb.sb_rextsize;
        geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 55582bd66659..8a0f044750c3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -337,7 +337,12 @@ xfs_iomap_prealloc_size(
                int shift = 0;
                int64_t freesp;
-                alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
+                /*
+                 * rounddown_pow_of_two() returns an undefined result
+                 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
+                 * ensure we always pass in a non-zero value.
+                 */
+                alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
                alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
                                        rounddown_pow_of_two(alloc_blocks));
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 916eb7db14d9..3bd3291ef8d2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -191,7 +191,7 @@ void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
 xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
-int     xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                struct xfs_log_vec *log_vector,
                                xfs_lsn_t *commit_lsn, int flags);
 bool    xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9dc8125d04e5..9ca59be08977 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -543,7 +543,7 @@ xlog_cil_push(
        error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
        if (error)
-                goto out_abort;
+                goto out_abort_free_ticket;
        /*
         * now that we've written the checkpoint into the log, strictly
@@ -569,8 +569,9 @@ restart:
        }
        spin_unlock(&cil->xc_cil_lock);
+        /* xfs_log_done always frees the ticket on error. */
        commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
-        if (error || commit_lsn == -1)
+        if (commit_lsn == -1)
                goto out_abort;
        /* attach all the transactions w/ busy extents to iclog */
@@ -600,6 +601,8 @@ out_free_ticket:
        kmem_free(new_ctx);
        return 0;
+out_abort_free_ticket:
+        xfs_log_ticket_put(tic);
 out_abort:
        xlog_cil_committed(ctx, XFS_LI_ABORTED);
        return XFS_ERROR(EIO);
@@ -622,7 +625,7 @@ out_abort:
 * background commit, returns without it held once background commits are
 * allowed again.
 */
-int
+void
 xfs_log_commit_cil(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
@@ -637,11 +640,6 @@ xfs_log_commit_cil(
        if (flags & XFS_TRANS_RELEASE_LOG_RES)
                log_flags = XFS_LOG_REL_PERM_RESERV;
-        if (XLOG_FORCED_SHUTDOWN(log)) {
-                xlog_cil_free_logvec(log_vector);
-                return XFS_ERROR(EIO);
-        }
        /*
         * do all the hard work of formatting items (including memory
         * allocation) outside the CIL context lock. This prevents stalling CIL
@@ -701,7 +699,6 @@ xfs_log_commit_cil(
         */
        if (push)
                xlog_cil_push(log, 0);
-        return 0;
 }
 /*
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 33dbc4e0ad62..76922793f64f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert(
 * Bulk operation version of xfs_trans_committed that takes a log vector of
 * items to insert into the AIL. This uses bulk AIL insertion techniques to
 * minimise lock traffic.
+ *
+ * If we are called with the aborted flag set, it is because a log write during
+ * a CIL checkpoint commit has failed. In this case, all the items in the
+ * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
+ * means that checkpoint commit abort handling is treated exactly the same
+ * as an iclog write error even though we haven't started any IO yet. Hence in
+ * this case all we need to do is IOP_COMMITTED processing, followed by an
+ * IOP_UNPIN(aborted) call.
 */
 void
 xfs_trans_committed_bulk(
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk(
                if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
                        continue;
+                /*
+                 * if we are aborting the operation, no point in inserting the
+                 * object into the AIL as we are in a shutdown situation.
+                 */
+                if (aborted) {
+                        ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
+                        IOP_UNPIN(lip, 1);
+                        continue;
+                }
                if (item_lsn != commit_lsn) {
                        /*
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk(
 }
 /*
- * Called from the trans_commit code when we notice that
+ * Called from the trans_commit code when we notice that the filesystem is in
- * the filesystem is in the middle of a forced shutdown.
+ * the middle of a forced shutdown.
+ *
+ * When we are called here, we have already pinned all the items in the
+ * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
+ * so we can simply walk the items in the transaction, unpin them with an abort
+ * flag and then free the items. Note that unpinning the items can result in
+ * them being freed immediately, so we need to use a safe list traversal method
+ * here.
 */
 STATIC void
 xfs_trans_uncommit(
        struct xfs_trans        *tp,
        uint                    flags)
 {
-        struct xfs_log_item_desc *lidp;
+        struct xfs_log_item_desc *lidp, *n;
-        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+        list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
-                /*
-                 * Unpin all but those that aren't dirty.
-                 */
                if (lidp->lid_flags & XFS_LID_DIRTY)
                        IOP_UNPIN(lidp->lid_item, 1);
        }
@@ -1733,7 +1755,6 @@ xfs_trans_commit_cil(
        int                     flags)
 {
        struct xfs_log_vec      *log_vector;
-        int                     error;
        /*
         * Get each log item to allocate a vector structure for
@@ -1744,9 +1765,7 @@ xfs_trans_commit_cil(
        if (!log_vector)
                return ENOMEM;
-        error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
+        xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
-        if (error)
-                return error;
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
        xfs_trans_free(tp);