83 files changed, 1122 insertions, 1277 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 062a5f6a1448..12a3de0ee6da 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -52,7 +52,8 @@
 struct aio_ring {
        unsigned        id;     /* kernel internal index number */
        unsigned        nr;     /* number of io_events */
-        unsigned        head;
+        unsigned        head;   /* Written to by userland or under ring_lock
+                                 * mutex by aio_read_events_ring(). */
        unsigned        tail;
        unsigned        magic;
@@ -243,6 +244,11 @@ static void aio_free_ring(struct kioctx *ctx)
 {
        int i;
+        /* Disconnect the kiotx from the ring file.  This prevents future
+         * accesses to the kioctx from page migration.
+         */
+        put_aio_ring_file(ctx);
        for (i = 0; i < ctx->nr_pages; i++) {
                struct page *page;
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -254,8 +260,6 @@ static void aio_free_ring(struct kioctx *ctx)
                put_page(page);
        }
-        put_aio_ring_file(ctx);
        if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
                kfree(ctx->ring_pages);
                ctx->ring_pages = NULL;
@@ -283,29 +287,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 {
        struct kioctx *ctx;
        unsigned long flags;
+        pgoff_t idx;
        int rc;
        rc = 0;
-        /* Make sure the old page hasn't already been changed */
+        /* mapping->private_lock here protects against the kioctx teardown.  */
        spin_lock(&mapping->private_lock);
        ctx = mapping->private_data;
-        if (ctx) {
+        if (!ctx) {
-                pgoff_t idx;
+                rc = -EINVAL;
-                spin_lock_irqsave(&ctx->completion_lock, flags);
+                goto out;
-                idx = old->index;
+        }
-                if (idx < (pgoff_t)ctx->nr_pages) {
-                        if (ctx->ring_pages[idx] != old)
+        /* The ring_lock mutex.  The prevents aio_read_events() from writing
-                                rc = -EAGAIN;
+         * to the ring's head, and prevents page migration from mucking in
-                } else
+         * a partially initialized kiotx.
-                        rc = -EINVAL;
+         */
-                spin_unlock_irqrestore(&ctx->completion_lock, flags);
+        if (!mutex_trylock(&ctx->ring_lock)) {
+                rc = -EAGAIN;
+                goto out;
+        }
+        idx = old->index;
+        if (idx < (pgoff_t)ctx->nr_pages) {
+                /* Make sure the old page hasn't already been changed */
+                if (ctx->ring_pages[idx] != old)
+                        rc = -EAGAIN;
        } else
                rc = -EINVAL;
-        spin_unlock(&mapping->private_lock);
        if (rc != 0)
-                return rc;
+                goto out_unlock;
        /* Writeback must be complete */
        BUG_ON(PageWriteback(old));
@@ -314,38 +327,26 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
        rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
        if (rc != MIGRATEPAGE_SUCCESS) {
                put_page(new);
-                return rc;
+                goto out_unlock;
        }
-        /* We can potentially race against kioctx teardown here.  Use the
+        /* Take completion_lock to prevent other writes to the ring buffer
-         * address_space's private data lock to protect the mapping's
+         * while the old page is copied to the new.  This prevents new
-         * private_data.
+         * events from being lost.
         */
-        spin_lock(&mapping->private_lock);
+        spin_lock_irqsave(&ctx->completion_lock, flags);
-        ctx = mapping->private_data;
+        migrate_page_copy(new, old);
-        if (ctx) {
+        BUG_ON(ctx->ring_pages[idx] != old);
-                pgoff_t idx;
+        ctx->ring_pages[idx] = new;
-                spin_lock_irqsave(&ctx->completion_lock, flags);
+        spin_unlock_irqrestore(&ctx->completion_lock, flags);
-                migrate_page_copy(new, old);
-                idx = old->index;
-                if (idx < (pgoff_t)ctx->nr_pages) {
-                        /* And only do the move if things haven't changed */
-                        if (ctx->ring_pages[idx] == old)
-                                ctx->ring_pages[idx] = new;
-                        else
-                                rc = -EAGAIN;
-                } else
-                        rc = -EINVAL;
-                spin_unlock_irqrestore(&ctx->completion_lock, flags);
-        } else
-                rc = -EBUSY;
-        spin_unlock(&mapping->private_lock);
-        if (rc == MIGRATEPAGE_SUCCESS)
+        /* The old page is no longer accessible. */
-                put_page(old);
+        put_page(old);
-        else
-                put_page(new);
+out_unlock:
+        mutex_unlock(&ctx->ring_lock);
+out:
+        spin_unlock(&mapping->private_lock);
        return rc;
 }
 #endif
@@ -380,7 +381,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        file = aio_private_file(ctx, nr_pages);
        if (IS_ERR(file)) {
                ctx->aio_ring_file = NULL;
-                return -EAGAIN;
+                return -ENOMEM;
        }
        ctx->aio_ring_file = file;
@@ -415,7 +416,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        if (unlikely(i != nr_pages)) {
                aio_free_ring(ctx);
-                return -EAGAIN;
+                return -ENOMEM;
        }
        ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -429,7 +430,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        if (IS_ERR((void *)ctx->mmap_base)) {
                ctx->mmap_size = 0;
                aio_free_ring(ctx);
-                return -EAGAIN;
+                return -ENOMEM;
        }
        pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
@@ -556,6 +557,10 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
                                        rcu_read_unlock();
                                        spin_unlock(&mm->ioctx_lock);
+                                        /* While kioctx setup is in progress,
+                                         * we are protected from page migration
+                                         * changes ring_pages by ->ring_lock.
+                                         */
                                        ring = kmap_atomic(ctx->ring_pages[0]);
                                        ring->id = ctx->id;
                                        kunmap_atomic(ring);
@@ -640,24 +645,28 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        ctx->max_reqs = nr_events;
-        if (percpu_ref_init(&ctx->users, free_ioctx_users))
-                goto err;
-        if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
-                goto err;
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->completion_lock);
        mutex_init(&ctx->ring_lock);
+        /* Protect against page migration throughout kiotx setup by keeping
+         * the ring_lock mutex held until setup is complete. */
+        mutex_lock(&ctx->ring_lock);
        init_waitqueue_head(&ctx->wait);
        INIT_LIST_HEAD(&ctx->active_reqs);
+        if (percpu_ref_init(&ctx->users, free_ioctx_users))
+                goto err;
+        if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+                goto err;
        ctx->cpu = alloc_percpu(struct kioctx_cpu);
        if (!ctx->cpu)
                goto err;
-        if (aio_setup_ring(ctx) < 0)
+        err = aio_setup_ring(ctx);
+        if (err < 0)
                goto err;
        atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
@@ -683,6 +692,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        if (err)
                goto err_cleanup;
+        /* Release the ring_lock mutex now that all setup is complete. */
+        mutex_unlock(&ctx->ring_lock);
        pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
                 ctx, ctx->user_id, mm, ctx->nr_events);
        return ctx;
@@ -692,6 +704,7 @@ err_cleanup:
 err_ctx:
        aio_free_ring(ctx);
 err:
+        mutex_unlock(&ctx->ring_lock);
        free_percpu(ctx->cpu);
        free_percpu(ctx->reqs.pcpu_count);
        free_percpu(ctx->users.pcpu_count);
@@ -1024,6 +1037,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
        mutex_lock(&ctx->ring_lock);
+        /* Access to ->ring_pages here is protected by ctx->ring_lock. */
        ring = kmap_atomic(ctx->ring_pages[0]);
        head = ring->head;
        tail = ring->tail;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 3182c0e68b42..232e03d4780d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -103,6 +103,9 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
        if (tmp.size < sizeof(tmp))
                return ERR_PTR(-EINVAL);
+        if (tmp.size > (PATH_MAX + sizeof(tmp)))
+                return ERR_PTR(-ENAMETOOLONG);
        return memdup_user(in, tmp.size);
 }
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 29696b78d1f4..1c2ce0c87711 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -182,6 +182,9 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
 */
 int bio_integrity_enabled(struct bio *bio)
 {
+        if (!bio_is_rw(bio))
+                return 0;
        /* Already protected? */
        if (bio_integrity(bio))
                return 0;
@@ -309,10 +312,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 {
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
        struct blk_integrity_exchg bix;
-        struct bio_vec bv;
+        struct bio_vec *bv;
-        struct bvec_iter iter;
        sector_t sector;
-        unsigned int sectors, ret = 0;
+        unsigned int sectors, ret = 0, i;
        void *prot_buf = bio->bi_integrity->bip_buf;
        if (operate)
@@ -323,16 +325,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
        bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
        bix.sector_size = bi->sector_size;
-        bio_for_each_segment(bv, bio, iter) {
+        bio_for_each_segment_all(bv, bio, i) {
-                void *kaddr = kmap_atomic(bv.bv_page);
+                void *kaddr = kmap_atomic(bv->bv_page);
-                bix.data_buf = kaddr + bv.bv_offset;
+                bix.data_buf = kaddr + bv->bv_offset;
-                bix.data_size = bv.bv_len;
+                bix.data_size = bv->bv_len;
                bix.prot_buf = prot_buf;
                bix.sector = sector;
-                if (operate) {
+                if (operate)
                        bi->generate_fn(&bix);
-                } else {
+                else {
                        ret = bi->verify_fn(&bix);
                        if (ret) {
                                kunmap_atomic(kaddr);
@@ -340,7 +342,7 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
                        }
                }
-                sectors = bv.bv_len / bi->sector_size;
+                sectors = bv->bv_len / bi->sector_size;
                sector += sectors;
                prot_buf += sectors * bi->tuple_size;
diff --git a/fs/bio.c b/fs/bio.c
index b1bc722b89aa..6f0362b77806 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1002,7 +1002,7 @@ struct bio_map_data {
 };
 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
-                             struct sg_iovec *iov, int iov_count,
+                             const struct sg_iovec *iov, int iov_count,
                             int is_our_pages)
 {
        memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
@@ -1022,7 +1022,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs,
                       sizeof(struct sg_iovec) * iov_count, gfp_mask);
 }
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
                          int to_user, int from_user, int do_free_page)
 {
        int ret = 0, i;
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL(bio_uncopy_user);
 */
 struct bio *bio_copy_user_iov(struct request_queue *q,
                              struct rq_map_data *map_data,
-                              struct sg_iovec *iov, int iov_count,
+                              const struct sg_iovec *iov, int iov_count,
                              int write_to_vm, gfp_t gfp_mask)
 {
        struct bio_map_data *bmd;
@@ -1259,7 +1259,7 @@ EXPORT_SYMBOL(bio_copy_user);
 static struct bio *__bio_map_user_iov(struct request_queue *q,
                                      struct block_device *bdev,
-                                      struct sg_iovec *iov, int iov_count,
+                                      const struct sg_iovec *iov, int iov_count,
                                      int write_to_vm, gfp_t gfp_mask)
 {
        int i, j;
@@ -1407,7 +1407,7 @@ EXPORT_SYMBOL(bio_map_user);
 *      device. Returns an error pointer in case of error.
 */
 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
-                             struct sg_iovec *iov, int iov_count,
+                             const struct sg_iovec *iov, int iov_count,
                             int write_to_vm, gfp_t gfp_mask)
 {
        struct bio *bio;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba0d2b05bb78..552a8d13bc32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
        blk_start_plug(&plug);
-        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+        ret = __generic_file_aio_write(iocb, iov, nr_segs);
        if (ret > 0) {
                ssize_t err;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index ecb5832c0967..5a201d81049c 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -323,6 +323,8 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
 void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
 {
+        if (!wq)
+                return;
        wq->normal->max_active = max;
        if (wq->high)
                wq->high->max_active = max;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index aad7201ad11b..10db21fa0926 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -330,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
        }
-        root_level = btrfs_old_root_level(root, time_seq);
+        if (path->search_commit_root)
+                root_level = btrfs_header_level(root->commit_root);
+        else
+                root_level = btrfs_old_root_level(root, time_seq);
        if (root_level + 1 == level) {
                srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -1099,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
 *
 * returns 0 on success, < 0 on error.
 */
-int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
-                                struct btrfs_fs_info *fs_info, u64 bytenr,
+                                  struct btrfs_fs_info *fs_info, u64 bytenr,
-                                u64 time_seq, struct ulist **roots)
+                                  u64 time_seq, struct ulist **roots)
 {
        struct ulist *tmp;
        struct ulist_node *node = NULL;
@@ -1137,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
        return 0;
 }
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+                         struct btrfs_fs_info *fs_info, u64 bytenr,
+                         u64 time_seq, struct ulist **roots)
+{
+        int ret;
+        if (!trans)
+                down_read(&fs_info->commit_root_sem);
+        ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
+        if (!trans)
+                up_read(&fs_info->commit_root_sem);
+        return ret;
+}
 /*
 * this makes the path point to (inum INODE_ITEM ioff)
 */
@@ -1516,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
                btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+        } else {
+                down_read(&fs_info->commit_root_sem);
        }
        ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
@@ -1526,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
        ULIST_ITER_INIT(&ref_uiter);
        while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
-                ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
+                ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
-                                           tree_mod_seq_elem.seq, &roots);
+                                             tree_mod_seq_elem.seq, &roots);
                if (ret)
                        break;
                ULIST_ITER_INIT(&root_uiter);
@@ -1549,6 +1568,8 @@ out:
        if (!search_commit_root) {
                btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
                btrfs_end_transaction(trans, fs_info->extent_root);
+        } else {
+                up_read(&fs_info->commit_root_sem);
        }
        return ret;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 88d1b1eedc9c..1bcfcdb23cf4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2769,9 +2769,13 @@ again:
                 * the commit roots are read only
                 * so we always do read locks
                 */
+                if (p->need_commit_sem)
+                        down_read(&root->fs_info->commit_root_sem);
                b = root->commit_root;
                extent_buffer_get(b);
                level = btrfs_header_level(b);
+                if (p->need_commit_sem)
+                        up_read(&root->fs_info->commit_root_sem);
                if (!p->skip_locking)
                        btrfs_tree_read_lock(b);
        } else {
@@ -5360,7 +5364,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 {
        int ret;
        int cmp;
-        struct btrfs_trans_handle *trans = NULL;
        struct btrfs_path *left_path = NULL;
        struct btrfs_path *right_path = NULL;
        struct btrfs_key left_key;
@@ -5378,9 +5381,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        u64 right_blockptr;
        u64 left_gen;
        u64 right_gen;
-        u64 left_start_ctransid;
-        u64 right_start_ctransid;
-        u64 ctransid;
        left_path = btrfs_alloc_path();
        if (!left_path) {
@@ -5404,21 +5404,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        right_path->search_commit_root = 1;
        right_path->skip_locking = 1;
-        spin_lock(&left_root->root_item_lock);
-        left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
-        spin_unlock(&left_root->root_item_lock);
-        spin_lock(&right_root->root_item_lock);
-        right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
-        spin_unlock(&right_root->root_item_lock);
-        trans = btrfs_join_transaction(left_root);
-        if (IS_ERR(trans)) {
-                ret = PTR_ERR(trans);
-                trans = NULL;
-                goto out;
-        }
        /*
         * Strategy: Go to the first items of both trees. Then do
         *
@@ -5455,6 +5440,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
         *   the right if possible or go up and right.
         */
+        down_read(&left_root->fs_info->commit_root_sem);
        left_level = btrfs_header_level(left_root->commit_root);
        left_root_level = left_level;
        left_path->nodes[left_level] = left_root->commit_root;
@@ -5464,6 +5450,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        right_root_level = right_level;
        right_path->nodes[right_level] = right_root->commit_root;
        extent_buffer_get(right_path->nodes[right_level]);
+        up_read(&left_root->fs_info->commit_root_sem);
        if (left_level == 0)
                btrfs_item_key_to_cpu(left_path->nodes[left_level],
@@ -5482,67 +5469,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        advance_left = advance_right = 0;
        while (1) {
-                /*
-                 * We need to make sure the transaction does not get committed
-                 * while we do anything on commit roots. This means, we need to
-                 * join and leave transactions for every item that we process.
-                 */
-                if (trans && btrfs_should_end_transaction(trans, left_root)) {
-                        btrfs_release_path(left_path);
-                        btrfs_release_path(right_path);
-                        ret = btrfs_end_transaction(trans, left_root);
-                        trans = NULL;
-                        if (ret < 0)
-                                goto out;
-                }
-                /* now rejoin the transaction */
-                if (!trans) {
-                        trans = btrfs_join_transaction(left_root);
-                        if (IS_ERR(trans)) {
-                                ret = PTR_ERR(trans);
-                                trans = NULL;
-                                goto out;
-                        }
-                        spin_lock(&left_root->root_item_lock);
-                        ctransid = btrfs_root_ctransid(&left_root->root_item);
-                        spin_unlock(&left_root->root_item_lock);
-                        if (ctransid != left_start_ctransid)
-                                left_start_ctransid = 0;
-                        spin_lock(&right_root->root_item_lock);
-                        ctransid = btrfs_root_ctransid(&right_root->root_item);
-                        spin_unlock(&right_root->root_item_lock);
-                        if (ctransid != right_start_ctransid)
-                                right_start_ctransid = 0;
-                        if (!left_start_ctransid || !right_start_ctransid) {
-                                WARN(1, KERN_WARNING
-                                        "BTRFS: btrfs_compare_tree detected "
-                                        "a change in one of the trees while "
-                                        "iterating. This is probably a "
-                                        "bug.\n");
-                                ret = -EIO;
-                                goto out;
-                        }
-                        /*
-                         * the commit root may have changed, so start again
-                         * where we stopped
-                         */
-                        left_path->lowest_level = left_level;
-                        right_path->lowest_level = right_level;
-                        ret = btrfs_search_slot(NULL, left_root,
-                                        &left_key, left_path, 0, 0);
-                        if (ret < 0)
-                                goto out;
-                        ret = btrfs_search_slot(NULL, right_root,
-                                        &right_key, right_path, 0, 0);
-                        if (ret < 0)
-                                goto out;
-                }
                if (advance_left && !left_end_reached) {
                        ret = tree_advance(left_root, left_path, &left_level,
                                        left_root_level,
@@ -5672,14 +5598,6 @@ out:
        btrfs_free_path(left_path);
        btrfs_free_path(right_path);
        kfree(tmp_buf);
-        if (trans) {
-                if (!ret)
-                        ret = btrfs_end_transaction(trans, left_root);
-                else
-                        btrfs_end_transaction(trans, left_root);
-        }
        return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bc96c03dd259..4c48df572bd6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -609,6 +609,7 @@ struct btrfs_path {
        unsigned int skip_locking:1;
        unsigned int leave_spinning:1;
        unsigned int search_commit_root:1;
+        unsigned int need_commit_sem:1;
 };
 /*
@@ -986,7 +987,8 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RAID10        (1ULL << 6)
 #define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
 #define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
-#define BTRFS_BLOCK_GROUP_RESERVED      BTRFS_AVAIL_ALLOC_BIT_SINGLE
+#define BTRFS_BLOCK_GROUP_RESERVED      (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+                                         BTRFS_SPACE_INFO_GLOBAL_RSV)
 enum btrfs_raid_types {
        BTRFS_RAID_RAID10,
@@ -1018,6 +1020,12 @@ enum btrfs_raid_types {
 */
 #define BTRFS_AVAIL_ALLOC_BIT_SINGLE    (1ULL << 48)
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV     (1ULL << 49)
 #define BTRFS_EXTENDED_PROFILE_MASK     (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
                                         BTRFS_AVAIL_ALLOC_BIT_SINGLE)
@@ -1440,7 +1448,7 @@ struct btrfs_fs_info {
         */
        struct mutex ordered_extent_flush_mutex;
-        struct rw_semaphore extent_commit_sem;
+        struct rw_semaphore commit_root_sem;
        struct rw_semaphore cleanup_work_sem;
@@ -1711,7 +1719,6 @@ struct btrfs_root {
        struct btrfs_block_rsv *block_rsv;
        /* free ino cache stuff */
-        struct mutex fs_commit_mutex;
        struct btrfs_free_space_ctl *free_ino_ctl;
        enum btrfs_caching_type cached;
        spinlock_t cache_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bd0f752b797b..029d46c2e170 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 {
        struct extent_state *cached_state = NULL;
        int ret;
+        bool need_lock = (current->journal_info ==
+                          (void *)BTRFS_SEND_TRANS_STUB);
        if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
                return 0;
@@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
        if (atomic)
                return -EAGAIN;
+        if (need_lock) {
+                btrfs_tree_read_lock(eb);
+                btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+        }
        lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
                         0, &cached_state);
        if (extent_buffer_uptodate(eb) &&
@@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                       "found %llu\n",
                       eb->start, parent_transid, btrfs_header_generation(eb));
        ret = 1;
-        clear_extent_buffer_uptodate(eb);
+        /*
+         * Things reading via commit roots that don't have normal protection,
+         * like send, can have a really old block in cache that may point at a
+         * block that has been free'd and re-allocated.  So don't clear uptodate
+         * if we find an eb that is under IO (dirty/writeback) because we could
+         * end up reading in the stale data and then writing it back out and
+         * making everybody very sad.
+         */
+        if (!extent_buffer_under_io(eb))
+                clear_extent_buffer_uptodate(eb);
 out:
        unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
                             &cached_state, GFP_NOFS);
+        btrfs_tree_read_unlock_blocking(eb);
        return ret;
 }
@@ -1546,7 +1564,6 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        root->subv_writers = writers;
        btrfs_init_free_ino_ctl(root);
-        mutex_init(&root->fs_commit_mutex);
        spin_lock_init(&root->cache_lock);
        init_waitqueue_head(&root->cache_wait);
@@ -2324,7 +2341,7 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
-        init_rwsem(&fs_info->extent_commit_sem);
+        init_rwsem(&fs_info->commit_root_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
        sema_init(&fs_info->uuid_tree_rescan_sem, 1);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c6b6a6e3e735..1306487c82cf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work)
 again:
        mutex_lock(&caching_ctl->mutex);
        /* need to make sure the commit_root doesn't disappear */
-        down_read(&fs_info->extent_commit_sem);
+        down_read(&fs_info->commit_root_sem);
 next:
        ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
@@ -443,10 +443,10 @@ next:
                                break;
                        if (need_resched() ||
-                            rwsem_is_contended(&fs_info->extent_commit_sem)) {
+                            rwsem_is_contended(&fs_info->commit_root_sem)) {
                                caching_ctl->progress = last;
                                btrfs_release_path(path);
-                                up_read(&fs_info->extent_commit_sem);
+                                up_read(&fs_info->commit_root_sem);
                                mutex_unlock(&caching_ctl->mutex);
                                cond_resched();
                                goto again;
@@ -513,7 +513,7 @@ next:
 err:
        btrfs_free_path(path);
-        up_read(&fs_info->extent_commit_sem);
+        up_read(&fs_info->commit_root_sem);
        free_excluded_extents(extent_root, block_group);
@@ -633,10 +633,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                return 0;
        }
-        down_write(&fs_info->extent_commit_sem);
+        down_write(&fs_info->commit_root_sem);
        atomic_inc(&caching_ctl->count);
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
-        up_write(&fs_info->extent_commit_sem);
+        up_write(&fs_info->commit_root_sem);
        btrfs_get_block_group(cache);
@@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        spin_unlock(&locked_ref->lock);
                        spin_lock(&delayed_refs->lock);
                        spin_lock(&locked_ref->lock);
-                        if (rb_first(&locked_ref->ref_root)) {
+                        if (rb_first(&locked_ref->ref_root) ||
+                            locked_ref->extent_op) {
                                spin_unlock(&locked_ref->lock);
                                spin_unlock(&delayed_refs->lock);
                                continue;
@@ -5470,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *cache;
        struct btrfs_space_info *space_info;
-        down_write(&fs_info->extent_commit_sem);
+        down_write(&fs_info->commit_root_sem);
        list_for_each_entry_safe(caching_ctl, next,
                                 &fs_info->caching_block_groups, list) {
@@ -5489,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        else
                fs_info->pinned_extents = &fs_info->freed_extents[0];
-        up_write(&fs_info->extent_commit_sem);
+        up_write(&fs_info->commit_root_sem);
        list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
                percpu_counter_set(&space_info->total_bytes_pinned, 0);
@@ -5744,6 +5745,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        "unable to find ref byte nr %llu parent %llu root %llu  owner %llu offset %llu",
                        bytenr, parent, root_objectid, owner_objectid,
                        owner_offset);
+                btrfs_abort_transaction(trans, extent_root, ret);
+                goto out;
        } else {
                btrfs_abort_transaction(trans, extent_root, ret);
                goto out;
@@ -8255,14 +8258,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        struct btrfs_caching_control *caching_ctl;
        struct rb_node *n;
-        down_write(&info->extent_commit_sem);
+        down_write(&info->commit_root_sem);
        while (!list_empty(&info->caching_block_groups)) {
                caching_ctl = list_entry(info->caching_block_groups.next,
                                         struct btrfs_caching_control, list);
                list_del(&caching_ctl->list);
                put_caching_control(caching_ctl);
        }
-        up_write(&info->extent_commit_sem);
+        up_write(&info->commit_root_sem);
        spin_lock(&info->block_group_cache_lock);
        while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
@@ -8336,9 +8339,15 @@ static void __link_block_group(struct btrfs_space_info *space_info,
                               struct btrfs_block_group_cache *cache)
 {
        int index = get_block_group_index(cache);
+        bool first = false;
        down_write(&space_info->groups_sem);
-        if (list_empty(&space_info->block_groups[index])) {
+        if (list_empty(&space_info->block_groups[index]))
+                first = true;
+        list_add_tail(&cache->list, &space_info->block_groups[index]);
+        up_write(&space_info->groups_sem);
+        if (first) {
                struct kobject *kobj = &space_info->block_group_kobjs[index];
                int ret;
@@ -8350,8 +8359,6 @@ static void __link_block_group(struct btrfs_space_info *space_info,
                        kobject_put(&space_info->kobj);
                }
        }
-        list_add_tail(&cache->list, &space_info->block_groups[index]);
-        up_write(&space_info->groups_sem);
 }
 static struct btrfs_block_group_cache *
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ae69a00387e7..3955e475ceec 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -749,6 +749,7 @@ again:
                 * our range starts
                 */
                node = tree_search(tree, start);
+process_node:
                if (!node)
                        break;
@@ -769,7 +770,10 @@ again:
                if (start > end)
                        break;
-                cond_resched_lock(&tree->lock);
+                if (!cond_resched_lock(&tree->lock)) {
+                        node = rb_next(node);
+                        goto process_node;
+                }
        }
 out:
        spin_unlock(&tree->lock);
@@ -4306,7 +4310,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
        kmem_cache_free(extent_buffer_cache, eb);
 }
-static int extent_buffer_under_io(struct extent_buffer *eb)
+int extent_buffer_under_io(struct extent_buffer *eb)
 {
        return (atomic_read(&eb->io_pages) ||
                test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 58b27e5ab521..c488b45237bf 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_buffer *eb);
 int clear_extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_uptodate(struct extent_buffer *eb);
+int extent_buffer_under_io(struct extent_buffer *eb);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long min_len, char **map,
                      unsigned long *map_start,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c660527af838..eb742c07e7a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                struct page *page = prepared_pages[pg];
                /*
                 * Copy data from userspace to the current page
-                 *
-                 * Disable pagefault to avoid recursive lock since
-                 * the pages are already locked
                 */
-                pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
-                pagefault_enable();
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
@@ -1665,7 +1660,7 @@ again:
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                                    const struct iovec *iov,
                                    unsigned long nr_segs, loff_t pos,
-                                    loff_t *ppos, size_t count, size_t ocount)
+                                    size_t count, size_t ocount)
 {
        struct file *file = iocb->ki_filp;
        struct iov_iter i;
@@ -1674,7 +1669,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        loff_t endbyte;
        int err;
-        written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+        written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
                                            count, ocount);
        if (written < 0 || written == count)
@@ -1693,7 +1688,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        if (err)
                goto out;
        written += written_buffered;
-        *ppos = pos + written_buffered;
+        iocb->ki_pos = pos + written_buffered;
        invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
                                 endbyte >> PAGE_CACHE_SHIFT);
 out:
@@ -1725,8 +1720,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
-        loff_t *ppos = &iocb->ki_pos;
        u64 start_pos;
+        u64 end_pos;
        ssize_t num_written = 0;
        ssize_t err = 0;
        size_t count, ocount;
@@ -1781,7 +1776,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
-                err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
+                /* Expand hole size to cover write data, preventing empty gap */
+                end_pos = round_up(pos + iov->iov_len, root->sectorsize);
+                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
                        mutex_unlock(&inode->i_mutex);
                        goto out;
@@ -1793,7 +1790,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        if (unlikely(file->f_flags & O_DIRECT)) {
                num_written = __btrfs_direct_write(iocb, iov, nr_segs,
-                                                   pos, ppos, count, ocount);
+                                                   pos, count, ocount);
        } else {
                struct iov_iter i;
@@ -1801,7 +1798,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                num_written = __btrfs_buffered_write(file, &i, pos);
                if (num_written > 0)
-                        *ppos = pos + num_written;
+                        iocb->ki_pos = pos + num_written;
        }
        mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ab485e57b6fe..cc8ca193d830 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -55,7 +55,7 @@ static int caching_kthread(void *data)
        key.type = BTRFS_INODE_ITEM_KEY;
 again:
        /* need to make sure the commit_root doesn't disappear */
-        mutex_lock(&root->fs_commit_mutex);
+        down_read(&fs_info->commit_root_sem);
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
@@ -88,7 +88,7 @@ again:
                                btrfs_item_key_to_cpu(leaf, &key, 0);
                                btrfs_release_path(path);
                                root->cache_progress = last;
-                                mutex_unlock(&root->fs_commit_mutex);
+                                up_read(&fs_info->commit_root_sem);
                                schedule_timeout(1);
                                goto again;
                        } else
@@ -127,7 +127,7 @@ next:
        btrfs_unpin_free_ino(root);
 out:
        wake_up(&root->cache_wait);
-        mutex_unlock(&root->fs_commit_mutex);
+        up_read(&fs_info->commit_root_sem);
        btrfs_free_path(path);
@@ -223,11 +223,11 @@ again:
                 * or the caching work is done.
                 */
-                mutex_lock(&root->fs_commit_mutex);
+                down_write(&root->fs_info->commit_root_sem);
                spin_lock(&root->cache_lock);
                if (root->cached == BTRFS_CACHE_FINISHED) {
                        spin_unlock(&root->cache_lock);
-                        mutex_unlock(&root->fs_commit_mutex);
+                        up_write(&root->fs_info->commit_root_sem);
                        goto again;
                }
                spin_unlock(&root->cache_lock);
@@ -240,7 +240,7 @@ again:
                else
                        __btrfs_add_free_space(pinned, objectid, 1);
-                mutex_unlock(&root->fs_commit_mutex);
+                up_write(&root->fs_info->commit_root_sem);
        }
 }
@@ -250,7 +250,7 @@ again:
 * and others will just be dropped, because the commit root we were
 * searching has changed.
 *
- * Must be called with root->fs_commit_mutex held
+ * Must be called with root->fs_info->commit_root_sem held
 */
 void btrfs_unpin_free_ino(struct btrfs_root *root)
 {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 06e9a4152b14..5f805bc944fa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -394,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode,
            (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
                btrfs_add_inode_defrag(NULL, inode);
+        /*
+         * skip compression for a small file range(<=blocksize) that
+         * isn't an inline extent, since it dosen't save disk space at all.
+         */
+        if ((end - start + 1) <= blocksize &&
+            (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+                goto cleanup_and_bail_uncompressed;
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -1271,6 +1279,15 @@ next_slot:
                        disk_bytenr += cur_offset - found_key.offset;
                        num_bytes = min(end + 1, extent_end) - cur_offset;
                        /*
+                         * if there are pending snapshots for this root,
+                         * we fall into common COW way.
+                         */
+                        if (!nolock) {
+                                err = btrfs_start_nocow_write(root);
+                                if (!err)
+                                        goto out_check;
+                        }
+                        /*
                         * force cow if csum exists in the range.
                         * this ensure that csum for a given extent are
                         * either valid or do not exist.
@@ -1289,6 +1306,8 @@ next_slot:
 out_check:
                if (extent_end <= start) {
                        path->slots[0]++;
+                        if (!nolock && nocow)
+                                btrfs_end_nocow_write(root);
                        goto next_slot;
                }
                if (!nocow) {
@@ -1306,8 +1325,11 @@ out_check:
                        ret = cow_file_range(inode, locked_page,
                                             cow_start, found_key.offset - 1,
                                             page_started, nr_written, 1);
-                        if (ret)
+                        if (ret) {
+                                if (!nolock && nocow)
+                                        btrfs_end_nocow_write(root);
                                goto error;
+                        }
                        cow_start = (u64)-1;
                }
@@ -1354,8 +1376,11 @@ out_check:
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, cur_offset,
                                                      num_bytes);
-                        if (ret)
+                        if (ret) {
+                                if (!nolock && nocow)
+                                        btrfs_end_nocow_write(root);
                                goto error;
+                        }
                }
                extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1363,6 +1388,8 @@ out_check:
                                             locked_page, EXTENT_LOCKED |
                                             EXTENT_DELALLOC, PAGE_UNLOCK |
                                             PAGE_SET_PRIVATE2);
+                if (!nolock && nocow)
+                        btrfs_end_nocow_write(root);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -8476,19 +8503,20 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
                        else
                                iput(inode);
                        ret = -ENOMEM;
-                        break;
+                        goto out;
                }
                list_add_tail(&work->list, &works);
                btrfs_queue_work(root->fs_info->flush_workers,
                                 &work->work);
                ret++;
                if (nr != -1 && ret >= nr)
-                        break;
+                        goto out;
                cond_resched();
                spin_lock(&root->delalloc_lock);
        }
        spin_unlock(&root->delalloc_lock);
+out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0401397b5c92..e79ff6b90cb7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1472,6 +1472,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device = NULL;
        char *sizestr;
+        char *retptr;
        char *devstr = NULL;
        int ret = 0;
        int mod = 0;
@@ -1539,8 +1540,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                        mod = 1;
                        sizestr++;
                }
-                new_size = memparse(sizestr, NULL);
+                new_size = memparse(sizestr, &retptr);
-                if (new_size == 0) {
+                if (*retptr != '\0' || new_size == 0) {
                        ret = -EINVAL;
                        goto out_free;
                }
@@ -3140,8 +3141,9 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                        btrfs_abort_transaction(trans, root,
+                                        if (ret != -EINVAL)
-                                                                ret);
+                                                btrfs_abort_transaction(trans,
+                                                        root, ret);
                                        btrfs_end_transaction(trans, root);
                                        goto out;
                                }
@@ -3538,6 +3540,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                up_read(&info->groups_sem);
        }
+        /*
+         * Global block reserve, exported as a space_info
+         */
+        slot_count++;
        /* space_slots == 0 means they are asking for a count */
        if (space_args.space_slots == 0) {
                space_args.total_spaces = slot_count;
@@ -3596,6 +3603,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                up_read(&info->groups_sem);
        }
+        /*
+         * Add global block reserve
+         */
+        if (slot_count) {
+                struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
+                spin_lock(&block_rsv->lock);
+                space.total_bytes = block_rsv->size;
+                space.used_bytes = block_rsv->size - block_rsv->reserved;
+                spin_unlock(&block_rsv->lock);
+                space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
+                memcpy(dest, &space, sizeof(space));
+                space_args.total_spaces++;
+        }
        user_dest = (struct btrfs_ioctl_space_info __user *)
                (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -4531,9 +4553,8 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
        }
        args64 = kmalloc(sizeof(*args64), GFP_NOFS);
-        if (IS_ERR(args64)) {
+        if (!args64) {
-                ret = PTR_ERR(args64);
+                ret = -ENOMEM;
-                args64 = NULL;
                goto out;
        }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index def428a25b2a..7f92ab1daa87 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2317,7 +2317,6 @@ void free_reloc_roots(struct list_head *list)
 static noinline_for_stack
 int merge_reloc_roots(struct reloc_control *rc)
 {
-        struct btrfs_trans_handle *trans;
        struct btrfs_root *root;
        struct btrfs_root *reloc_root;
        u64 last_snap;
@@ -2375,26 +2374,6 @@ again:
                                list_add_tail(&reloc_root->root_list,
                                              &reloc_roots);
                        goto out;
-                } else if (!ret) {
-                        /*
-                         * recover the last snapshot tranid to avoid
-                         * the space balance break NOCOW.
-                         */
-                        root = read_fs_root(rc->extent_root->fs_info,
-                                            objectid);
-                        if (IS_ERR(root))
-                                continue;
-                        trans = btrfs_join_transaction(root);
-                        BUG_ON(IS_ERR(trans));
-                        /* Check if the fs/file tree was snapshoted or not. */
-                        if (btrfs_root_last_snapshot(&root->root_item) ==
-                            otransid - 1)
-                                btrfs_set_root_last_snapshot(&root->root_item,
-                                                             last_snap);
-                                
-                        btrfs_end_transaction(trans, root);
                }
        }
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 93e6d7172844..0be77993378e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2235,6 +2235,47 @@ behind_scrub_pages:
        return 0;
 }
+/*
+ * Given a physical address, this will calculate it's
+ * logical offset. if this is a parity stripe, it will return
+ * the most left data stripe's logical offset.
+ *
+ * return 0 if it is a data stripe, 1 means parity stripe.
+ */
+static int get_raid56_logic_offset(u64 physical, int num,
+                                   struct map_lookup *map, u64 *offset)
+{
+        int i;
+        int j = 0;
+        u64 stripe_nr;
+        u64 last_offset;
+        int stripe_index;
+        int rot;
+        last_offset = (physical - map->stripes[num].physical) *
+                      nr_data_stripes(map);
+        *offset = last_offset;
+        for (i = 0; i < nr_data_stripes(map); i++) {
+                *offset = last_offset + i * map->stripe_len;
+                stripe_nr = *offset;
+                do_div(stripe_nr, map->stripe_len);
+                do_div(stripe_nr, nr_data_stripes(map));
+                /* Work out the disk rotation on this stripe-set */
+                rot = do_div(stripe_nr, map->num_stripes);
+                /* calculate which stripe this data locates */
+                rot += i;
+                stripe_index = rot % map->num_stripes;
+                if (stripe_index == num)
+                        return 0;
+                if (stripe_index < num)
+                        j++;
+        }
+        *offset = last_offset + j * map->stripe_len;
+        return 1;
+}
 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                                           struct map_lookup *map,
                                           struct btrfs_device *scrub_dev,
@@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 physical;
        u64 logical;
        u64 logic_end;
+        u64 physical_end;
        u64 generation;
        int mirror_num;
        struct reada_control *reada1;
@@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 extent_len;
        struct btrfs_device *extent_dev;
        int extent_mirror_num;
-        int stop_loop;
+        int stop_loop = 0;
-        if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
-                         BTRFS_BLOCK_GROUP_RAID6)) {
-                if (num >= nr_data_stripes(map)) {
-                        return 0;
-                }
-        }
        nstripes = length;
+        physical = map->stripes[num].physical;
        offset = 0;
        do_div(nstripes, map->stripe_len);
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
                increment = map->stripe_len;
                mirror_num = num % map->num_stripes + 1;
+        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                                BTRFS_BLOCK_GROUP_RAID6)) {
+                get_raid56_logic_offset(physical, num, map, &offset);
+                increment = map->stripe_len * nr_data_stripes(map);
+                mirror_num = 1;
        } else {
                increment = map->stripe_len;
                mirror_num = 1;
@@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
         * to not hold off transaction commits
         */
        logical = base + offset;
+        physical_end = physical + nstripes * map->stripe_len;
+        if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                         BTRFS_BLOCK_GROUP_RAID6)) {
+                get_raid56_logic_offset(physical_end, num,
+                                        map, &logic_end);
+                logic_end += base;
+        } else {
+                logic_end = logical + increment * nstripes;
+        }
        wait_event(sctx->list_wait,
                   atomic_read(&sctx->bios_in_flight) == 0);
        scrub_blocked_if_needed(fs_info);
@@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        key_start.objectid = logical;
        key_start.type = BTRFS_EXTENT_ITEM_KEY;
        key_start.offset = (u64)0;
-        key_end.objectid = base + offset + nstripes * increment;
+        key_end.objectid = logic_end;
        key_end.type = BTRFS_METADATA_ITEM_KEY;
        key_end.offset = (u64)-1;
        reada1 = btrfs_reada_add(root, &key_start, &key_end);
@@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        key_start.offset = logical;
        key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
        key_end.type = BTRFS_EXTENT_CSUM_KEY;
-        key_end.offset = base + offset + nstripes * increment;
+        key_end.offset = logic_end;
        reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
        if (!IS_ERR(reada1))
@@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        /*
         * now find all extents for each stripe and scrub them
         */
-        logical = base + offset;
-        physical = map->stripes[num].physical;
-        logic_end = logical + increment * nstripes;
        ret = 0;
-        while (logical < logic_end) {
+        while (physical < physical_end) {
+                /* for raid56, we skip parity stripe */
+                if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                                BTRFS_BLOCK_GROUP_RAID6)) {
+                        ret = get_raid56_logic_offset(physical, num,
+                                        map, &logical);
+                        logical += base;
+                        if (ret)
+                                goto skip;
+                }
                /*
                 * canceled?
                 */
@@ -2504,15 +2559,29 @@ again:
                        scrub_free_csums(sctx);
                        if (extent_logical + extent_len <
                            key.objectid + bytes) {
-                                logical += increment;
+                                if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
-                                physical += map->stripe_len;
+                                        BTRFS_BLOCK_GROUP_RAID6)) {
+                                        /*
+                                         * loop until we find next data stripe
+                                         * or we have finished all stripes.
+                                         */
+                                        do {
+                                                physical += map->stripe_len;
+                                                ret = get_raid56_logic_offset(
+                                                                physical, num,
+                                                                map, &logical);
+                                                logical += base;
+                                        } while (physical < physical_end && ret);
+                                } else {
+                                        physical += map->stripe_len;
+                                        logical += increment;
+                                }
                                if (logical < key.objectid + bytes) {
                                        cond_resched();
                                        goto again;
                                }
-                                if (logical >= logic_end) {
+                                if (physical >= physical_end) {
                                        stop_loop = 1;
                                        break;
                                }
@@ -2521,6 +2590,7 @@ next:
                        path->slots[0]++;
                }
                btrfs_release_path(path);
+skip:
                logical += increment;
                physical += map->stripe_len;
                spin_lock(&sctx->stat_lock);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 9b6da9d55f9a..1ac3ca98c429 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -493,6 +493,7 @@ static struct btrfs_path *alloc_path_for_send(void)
                return NULL;
        path->search_commit_root = 1;
        path->skip_locking = 1;
+        path->need_commit_sem = 1;
        return path;
 }
@@ -771,29 +772,22 @@ out:
 /*
 * Helper function to retrieve some fields from an inode item.
 */
-static int get_inode_info(struct btrfs_root *root,
+static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
-                          u64 ino, u64 *size, u64 *gen,
+                          u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
-                          u64 *mode, u64 *uid, u64 *gid,
+                          u64 *gid, u64 *rdev)
-                          u64 *rdev)
 {
        int ret;
        struct btrfs_inode_item *ii;
        struct btrfs_key key;
-        struct btrfs_path *path;
-        path = alloc_path_for_send();
-        if (!path)
-                return -ENOMEM;
        key.objectid = ino;
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-        if (ret < 0)
-                goto out;
        if (ret) {
-                ret = -ENOENT;
+                if (ret > 0)
-                goto out;
+                        ret = -ENOENT;
+                return ret;
        }
        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -811,7 +805,22 @@ static int get_inode_info(struct btrfs_root *root,
        if (rdev)
                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
-out:
+        return ret;
+}
+static int get_inode_info(struct btrfs_root *root,
+                          u64 ino, u64 *size, u64 *gen,
+                          u64 *mode, u64 *uid, u64 *gid,
+                          u64 *rdev)
+{
+        struct btrfs_path *path;
+        int ret;
+        path = alloc_path_for_send();
+        if (!path)
+                return -ENOMEM;
+        ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
+                               rdev);
        btrfs_free_path(path);
        return ret;
 }
@@ -1085,6 +1094,7 @@ out:
 struct backref_ctx {
        struct send_ctx *sctx;
+        struct btrfs_path *path;
        /* number of total found references */
        u64 found;
@@ -1155,8 +1165,9 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
         * There are inodes that have extents that lie behind its i_size. Don't
         * accept clones from these extents.
         */
-        ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
+        ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
-                        NULL);
+                               NULL, NULL, NULL);
+        btrfs_release_path(bctx->path);
        if (ret < 0)
                return ret;
@@ -1235,12 +1246,17 @@ static int find_extent_clone(struct send_ctx *sctx,
        if (!tmp_path)
                return -ENOMEM;
+        /* We only use this path under the commit sem */
+        tmp_path->need_commit_sem = 0;
        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
        if (!backref_ctx) {
                ret = -ENOMEM;
                goto out;
        }
+        backref_ctx->path = tmp_path;
        if (data_offset >= ino_size) {
                /*
                 * There may be extents that lie behind the file's size.
@@ -1268,8 +1284,10 @@ static int find_extent_clone(struct send_ctx *sctx,
        }
        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
+        down_read(&sctx->send_root->fs_info->commit_root_sem);
        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
                                  &found_key, &flags);
+        up_read(&sctx->send_root->fs_info->commit_root_sem);
        btrfs_release_path(tmp_path);
        if (ret < 0)
@@ -4418,6 +4436,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
        p = fs_path_alloc();
        if (!p)
                return -ENOMEM;
+        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+        if (ret < 0)
+                goto tlv_put_failure;
        memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
        while (offset < end) {
                len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
@@ -4425,9 +4446,6 @@ static int send_hole(struct send_ctx *sctx, u64 end)
                ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
                if (ret < 0)
                        break;
-                ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
-                if (ret < 0)
-                        break;
                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
                TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
                TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
@@ -4968,7 +4986,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
        if (S_ISREG(sctx->cur_inode_mode)) {
                if (need_send_hole(sctx)) {
-                        if (sctx->cur_inode_last_extent == (u64)-1) {
+                        if (sctx->cur_inode_last_extent == (u64)-1 ||
+                            sctx->cur_inode_last_extent <
+                            sctx->cur_inode_size) {
                                ret = get_last_extent(sctx, (u64)-1);
                                if (ret)
                                        goto out;
@@ -5367,57 +5387,21 @@ out:
 static int full_send_tree(struct send_ctx *sctx)
 {
        int ret;
-        struct btrfs_trans_handle *trans = NULL;
        struct btrfs_root *send_root = sctx->send_root;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
        struct extent_buffer *eb;
        int slot;
-        u64 start_ctransid;
-        u64 ctransid;
        path = alloc_path_for_send();
        if (!path)
                return -ENOMEM;
-        spin_lock(&send_root->root_item_lock);
-        start_ctransid = btrfs_root_ctransid(&send_root->root_item);
-        spin_unlock(&send_root->root_item_lock);
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
-join_trans:
-        /*
-         * We need to make sure the transaction does not get committed
-         * while we do anything on commit roots. Join a transaction to prevent
-         * this.
-         */
-        trans = btrfs_join_transaction(send_root);
-        if (IS_ERR(trans)) {
-                ret = PTR_ERR(trans);
-                trans = NULL;
-                goto out;
-        }
-        /*
-         * Make sure the tree has not changed after re-joining. We detect this
-         * by comparing start_ctransid and ctransid. They should always match.
-         */
-        spin_lock(&send_root->root_item_lock);
-        ctransid = btrfs_root_ctransid(&send_root->root_item);
-        spin_unlock(&send_root->root_item_lock);
-        if (ctransid != start_ctransid) {
-                WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
-                                     "send was modified in between. This is "
-                                     "probably a bug.\n");
-                ret = -EIO;
-                goto out;
-        }
        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
        if (ret < 0)
                goto out;
@@ -5425,19 +5409,6 @@ join_trans:
                goto out_finish;
        while (1) {
-                /*
-                 * When someone want to commit while we iterate, end the
-                 * joined transaction and rejoin.
-                 */
-                if (btrfs_should_end_transaction(trans, send_root)) {
-                        ret = btrfs_end_transaction(trans, send_root);
-                        trans = NULL;
-                        if (ret < 0)
-                                goto out;
-                        btrfs_release_path(path);
-                        goto join_trans;
-                }
                eb = path->nodes[0];
                slot = path->slots[0];
                btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -5465,12 +5436,6 @@ out_finish:
 out:
        btrfs_free_path(path);
-        if (trans) {
-                if (!ret)
-                        ret = btrfs_end_transaction(trans, send_root);
-                else
-                        btrfs_end_transaction(trans, send_root);
-        }
        return ret;
 }
@@ -5718,7 +5683,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                        NULL);
        sort_clone_roots = 1;
+        current->journal_info = (void *)BTRFS_SEND_TRANS_STUB;
        ret = send_subvol(sctx);
+        current->journal_info = NULL;
        if (ret < 0)
                goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9dbf42395153..5011aadacab8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,8 @@
 static const struct super_operations btrfs_super_ops;
 static struct file_system_type btrfs_fs_type;
+static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 static const char *btrfs_decode_error(int errno)
 {
        char *errstr = "unknown";
@@ -1185,6 +1187,26 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
        mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
                             newargs);
        kfree(newargs);
+        if (PTR_RET(mnt) == -EBUSY) {
+                if (flags & MS_RDONLY) {
+                        mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
+                                             newargs);
+                } else {
+                        int r;
+                        mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
+                                             newargs);
+                        if (IS_ERR(mnt))
+                                return ERR_CAST(mnt);
+                        r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+                        if (r < 0) {
+                                /* FIXME: release vfsmount mnt ??*/
+                                return ERR_PTR(r);
+                        }
+                }
+        }
        if (IS_ERR(mnt))
                return ERR_CAST(mnt);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a04707f740d6..7579f6d0b854 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -75,10 +75,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
        }
 }
-static noinline void switch_commit_root(struct btrfs_root *root)
+static noinline void switch_commit_roots(struct btrfs_transaction *trans,
+                                         struct btrfs_fs_info *fs_info)
 {
-        free_extent_buffer(root->commit_root);
+        struct btrfs_root *root, *tmp;
-        root->commit_root = btrfs_root_node(root);
+        down_write(&fs_info->commit_root_sem);
+        list_for_each_entry_safe(root, tmp, &trans->switch_commits,
+                                 dirty_list) {
+                list_del_init(&root->dirty_list);
+                free_extent_buffer(root->commit_root);
+                root->commit_root = btrfs_root_node(root);
+                if (is_fstree(root->objectid))
+                        btrfs_unpin_free_ino(root);
+        }
+        up_write(&fs_info->commit_root_sem);
 }
 static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
@@ -208,6 +219,7 @@ loop:
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
        INIT_LIST_HEAD(&cur_trans->ordered_operations);
        INIT_LIST_HEAD(&cur_trans->pending_chunks);
+        INIT_LIST_HEAD(&cur_trans->switch_commits);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
@@ -375,7 +387,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                return ERR_PTR(-EROFS);
-        if (current->journal_info) {
+        if (current->journal_info &&
+            current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
                WARN_ON(type & TRANS_EXTWRITERS);
                h = current->journal_info;
                h->use_count++;
@@ -919,9 +932,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                        return ret;
        }
-        if (root != root->fs_info->extent_root)
-                switch_commit_root(root);
        return 0;
 }
@@ -977,15 +987,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                list_del_init(next);
                root = list_entry(next, struct btrfs_root, dirty_list);
+                if (root != fs_info->extent_root)
+                        list_add_tail(&root->dirty_list,
+                                      &trans->transaction->switch_commits);
                ret = update_cowonly_root(trans, root);
                if (ret)
                        return ret;
        }
-        down_write(&fs_info->extent_commit_sem);
+        list_add_tail(&fs_info->extent_root->dirty_list,
-        switch_commit_root(fs_info->extent_root);
+                      &trans->transaction->switch_commits);
-        up_write(&fs_info->extent_commit_sem);
        btrfs_after_dev_replace_commit(fs_info);
        return 0;
@@ -1042,11 +1053,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        smp_wmb();
                        if (root->commit_root != root->node) {
-                                mutex_lock(&root->fs_commit_mutex);
+                                list_add_tail(&root->dirty_list,
-                                switch_commit_root(root);
+                                        &trans->transaction->switch_commits);
-                                btrfs_unpin_free_ino(root);
-                                mutex_unlock(&root->fs_commit_mutex);
                                btrfs_set_root_node(&root->root_item,
                                                    root->node);
                        }
@@ -1857,11 +1865,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        btrfs_set_root_node(&root->fs_info->tree_root->root_item,
                            root->fs_info->tree_root->node);
-        switch_commit_root(root->fs_info->tree_root);
+        list_add_tail(&root->fs_info->tree_root->dirty_list,
+                      &cur_trans->switch_commits);
        btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
                            root->fs_info->chunk_root->node);
-        switch_commit_root(root->fs_info->chunk_root);
+        list_add_tail(&root->fs_info->chunk_root->dirty_list,
+                      &cur_trans->switch_commits);
+        switch_commit_roots(cur_trans, root->fs_info);
        assert_qgroups_uptodate(trans);
        update_super_roots(root);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 6ac037e9f9f0..b57b924e8e03 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -57,6 +57,7 @@ struct btrfs_transaction {
        struct list_head pending_snapshots;
        struct list_head ordered_operations;
        struct list_head pending_chunks;
+        struct list_head switch_commits;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
 };
@@ -78,6 +79,8 @@ struct btrfs_transaction {
 #define TRANS_EXTWRITERS        (__TRANS_USERSPACE | __TRANS_START |    \
                                 __TRANS_ATTACH)
+#define BTRFS_SEND_TRANS_STUB   1
 struct btrfs_trans_handle {
        u64 transid;
        u64 bytes_reserved;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d241130a32fd..49d7fab73360 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -448,6 +448,14 @@ static void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
+/*
+ * Add new device to list of registered devices
+ *
+ * Returns:
+ * 1   - first time device is seen
+ * 0   - device already known
+ * < 0 - error
+ */
 static noinline int device_list_add(const char *path,
                           struct btrfs_super_block *disk_super,
                           u64 devid, struct btrfs_fs_devices **fs_devices_ret)
@@ -455,6 +463,7 @@ static noinline int device_list_add(const char *path,
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices;
        struct rcu_string *name;
+        int ret = 0;
        u64 found_transid = btrfs_super_generation(disk_super);
        fs_devices = find_fsid(disk_super->fsid);
@@ -495,6 +504,7 @@ static noinline int device_list_add(const char *path,
                fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
+                ret = 1;
                device->fs_devices = fs_devices;
        } else if (!device->name || strcmp(device->name->str, path)) {
                name = rcu_string_strdup(path, GFP_NOFS);
@@ -513,7 +523,8 @@ static noinline int device_list_add(const char *path,
                fs_devices->latest_trans = found_transid;
        }
        *fs_devices_ret = fs_devices;
-        return 0;
+        return ret;
 }
 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
@@ -910,17 +921,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        transid = btrfs_super_generation(disk_super);
        total_devices = btrfs_super_num_devices(disk_super);
-        if (disk_super->label[0]) {
-                if (disk_super->label[BTRFS_LABEL_SIZE - 1])
-                        disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
-                printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
-        } else {
-                printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
-        }
-        printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
+        if (ret > 0) {
+                if (disk_super->label[0]) {
+                        if (disk_super->label[BTRFS_LABEL_SIZE - 1])
+                                disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
+                        printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
+                } else {
+                        printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
+                }
+                printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
+                ret = 0;
+        }
        if (!ret && fs_devices_ret)
                (*fs_devices_ret)->total_devices = total_devices;
diff --git a/fs/buffer.c b/fs/buffer.c
index 8c53a2b15ecb..9ddb9fc7d923 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end);
 * Returns true if all buffers which correspond to a file portion
 * we want to read are uptodate.
 */
-int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+int block_is_partially_uptodate(struct page *page, unsigned long from,
-                                        unsigned long from)
+                                        unsigned long count)
 {
        unsigned block_start, block_end, blocksize;
        unsigned to;
@@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
        head = page_buffers(page);
        blocksize = head->b_size;
-        to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+        to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
        to = from + to;
        if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
                return 0;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 622f4696e484..5b99bafc31d1 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
        /* check parameters */
        ret = -EOPNOTSUPP;
        if (!root->d_inode ||
-            !root->d_inode->i_op ||
            !root->d_inode->i_op->lookup ||
            !root->d_inode->i_op->mkdir ||
            !root->d_inode->i_op->setxattr ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6494d9f673aa..c0a681705104 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        }
        ret = -EPERM;
-        if (!subdir->d_inode->i_op ||
+        if (!subdir->d_inode->i_op->setxattr ||
-            !subdir->d_inode->i_op->setxattr ||
            !subdir->d_inode->i_op->getxattr ||
            !subdir->d_inode->i_op->lookup ||
            !subdir->d_inode->i_op->mkdir ||
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 66075a4ad979..39da1c2efa50 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -601,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                        goto out;
+                        break;
                }
                num_pages = calc_pages_for(page_align, len);
@@ -719,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                        goto out;
+                        break;
                }
                /*
@@ -972,6 +972,7 @@ retry_snap:
                }
        } else {
                loff_t old_size = inode->i_size;
+                struct iov_iter from;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -979,9 +980,10 @@ retry_snap:
                 * are pending vmtruncate. So write and vmtruncate
                 * can not run at the same time
                 */
-                written = generic_file_buffered_write(iocb, iov, nr_segs,
+                iov_iter_init(&from, iov, nr_segs, count, 0);
-                                                      pos, &iocb->ki_pos,
+                written = generic_perform_write(file, &from, pos);
-                                                      count, 0);
+                if (likely(written >= 0))
+                        iocb->ki_pos = pos + written;
                if (inode->i_size > old_size)
                        ceph_fscache_update_objectsize(inode);
                mutex_unlock(&inode->i_mutex);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index efbe08289292..fdf941b44ff1 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -1,9 +1,8 @@
+#include <linux/ceph/ceph_debug.h>
 #include <linux/in.h>
 #include "super.h"
 #include "mds_client.h"
-#include <linux/ceph/ceph_debug.h>
 #include "ioctl.h"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c70cbe35d39..df9c9141c099 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -850,7 +850,6 @@ const struct inode_operations cifs_file_inode_ops = {
 /*      revalidate:cifs_revalidate, */
        .setattr = cifs_setattr,
        .getattr = cifs_getattr, /* do we need this anymore? */
-        .rename = cifs_rename,
        .permission = cifs_permission,
 #ifdef CONFIG_CIFS_XATTR
        .setxattr = cifs_setxattr,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 216d7e99f921..8807442c94dd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2579,19 +2579,32 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
        ssize_t rc = -EACCES;
-        loff_t lock_pos = pos;
+        loff_t lock_pos = iocb->ki_pos;
-        if (file->f_flags & O_APPEND)
-                lock_pos = i_size_read(inode);
        /*
         * We need to hold the sem to be sure nobody modifies lock list
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
+        mutex_lock(&inode->i_mutex);
+        if (file->f_flags & O_APPEND)
+                lock_pos = i_size_read(inode);
        if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
                                     server->vals->exclusive_lock_type, NULL,
-                                     CIFS_WRITE_OP))
+                                     CIFS_WRITE_OP)) {
-                rc = generic_file_aio_write(iocb, iov, nr_segs, pos);
+                rc = __generic_file_aio_write(iocb, iov, nr_segs);
+                mutex_unlock(&inode->i_mutex);
+                if (rc > 0) {
+                        ssize_t err;
+                        err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+                        if (rc < 0)
+                                rc = err;
+                }
+        } else {
+                mutex_unlock(&inode->i_mutex);
+        }
        up_read(&cinode->lock_sem);
        return rc;
 }
@@ -2727,56 +2740,27 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
 /**
 * cifs_readdata_to_iov - copy data from pages in response to an iovec
 * @rdata:      the readdata response with list of pages holding data
- * @iov:        vector in which we should copy the data
+ * @iter:       destination for our data
- * @nr_segs:    number of segments in vector
- * @offset:     offset into file of the first iovec
- * @copied:     used to return the amount of data copied to the iov
 *
 * This function copies data from a list of pages in a readdata response into
 * an array of iovecs. It will first calculate where the data should go
 * based on the info in the readdata and then copy the data into that spot.
 */
-static ssize_t
+static int
-cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
+cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
-                        unsigned long nr_segs, loff_t offset, ssize_t *copied)
 {
-        int rc = 0;
+        size_t remaining = rdata->bytes;
-        struct iov_iter ii;
-        size_t pos = rdata->offset - offset;
-        ssize_t remaining = rdata->bytes;
-        unsigned char *pdata;
        unsigned int i;
-        /* set up iov_iter and advance to the correct offset */
-        iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
-        iov_iter_advance(&ii, pos);
-        *copied = 0;
        for (i = 0; i < rdata->nr_pages; i++) {
-                ssize_t copy;
                struct page *page = rdata->pages[i];
+                size_t copy = min(remaining, PAGE_SIZE);
-                /* copy a whole page or whatever's left */
+                size_t written = copy_page_to_iter(page, 0, copy, iter);
-                copy = min_t(ssize_t, remaining, PAGE_SIZE);
+                remaining -= written;
+                if (written < copy && iov_iter_count(iter) > 0)
-                /* ...but limit it to whatever space is left in the iov */
+                        break;
-                copy = min_t(ssize_t, copy, iov_iter_count(&ii));
-                /* go while there's data to be copied and no errors */
-                if (copy && !rc) {
-                        pdata = kmap(page);
-                        rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
-                                                (int)copy);
-                        kunmap(page);
-                        if (!rc) {
-                                *copied += copy;
-                                remaining -= copy;
-                                iov_iter_advance(&ii, copy);
-                        }
-                }
        }
+        return remaining ? -EFAULT : 0;
-        return rc;
 }
 static void
@@ -2837,20 +2821,21 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
        return total_read > 0 ? total_read : result;
 }
-static ssize_t
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-cifs_iovec_read(struct file *file, const struct iovec *iov,
+                               unsigned long nr_segs, loff_t pos)
-                 unsigned long nr_segs, loff_t *poffset)
 {
+        struct file *file = iocb->ki_filp;
        ssize_t rc;
        size_t len, cur_len;
        ssize_t total_read = 0;
-        loff_t offset = *poffset;
+        loff_t offset = pos;
        unsigned int npages;
        struct cifs_sb_info *cifs_sb;
        struct cifs_tcon *tcon;
        struct cifsFileInfo *open_file;
        struct cifs_readdata *rdata, *tmp;
        struct list_head rdata_list;
+        struct iov_iter to;
        pid_t pid;
        if (!nr_segs)
@@ -2860,6 +2845,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
        if (!len)
                return 0;
+        iov_iter_init(&to, iov, nr_segs, len, 0);
        INIT_LIST_HEAD(&rdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        open_file = file->private_data;
@@ -2917,55 +2904,44 @@ error:
        if (!list_empty(&rdata_list))
                rc = 0;
+        len = iov_iter_count(&to);
        /* the loop below should proceed in the order of increasing offsets */
-restart_loop:
        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+        again:
                if (!rc) {
-                        ssize_t copied;
                        /* FIXME: freezable sleep too? */
                        rc = wait_for_completion_killable(&rdata->done);
                        if (rc)
                                rc = -EINTR;
-                        else if (rdata->result)
+                        else if (rdata->result) {
                                rc = rdata->result;
-                        else {
+                                /* resend call if it's a retryable error */
-                                rc = cifs_readdata_to_iov(rdata, iov,
+                                if (rc == -EAGAIN) {
-                                                        nr_segs, *poffset,
+                                        rc = cifs_retry_async_readv(rdata);
-                                                        &copied);
+                                        goto again;
-                                total_read += copied;
+                                }
+                        } else {
+                                rc = cifs_readdata_to_iov(rdata, &to);
                        }
-                        /* resend call if it's a retryable error */
-                        if (rc == -EAGAIN) {
-                                rc = cifs_retry_async_readv(rdata);
-                                goto restart_loop;
-                        }
                }
                list_del_init(&rdata->list);
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
+        total_read = len - iov_iter_count(&to);
        cifs_stats_bytes_read(tcon, total_read);
-        *poffset += total_read;
        /* mask nodata case */
        if (rc == -ENODATA)
                rc = 0;
-        return total_read ? total_read : rc;
+        if (total_read) {
-}
+                iocb->ki_pos = pos + total_read;
+                return total_read;
-ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+        }
-                               unsigned long nr_segs, loff_t pos)
+        return rc;
-{
-        ssize_t read;
-        read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
-        if (read > 0)
-                iocb->ki_pos = pos;
-        return read;
 }
 ssize_t
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3190ca973dd6..1e5b45359509 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -424,7 +424,7 @@ int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
 }
 /* Data available on socket or listen socket received a connect */
-static void lowcomms_data_ready(struct sock *sk, int count_unused)
+static void lowcomms_data_ready(struct sock *sk)
 {
        struct connection *con = sock2con(sk);
        if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
diff --git a/fs/exec.c b/fs/exec.c
index 9e81c630dfa7..476f3ebf437e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -813,7 +813,7 @@ EXPORT_SYMBOL(kernel_read);
 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 {
-        ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+        ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
        if (res > 0)
                flush_icache_range(addr, addr + len);
        return res;
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7682b970d0f1..4e2c032ab8a1 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -21,12 +21,12 @@
 #undef ORE_DBGMSG2
 #define ORE_DBGMSG2 ORE_DBGMSG
-struct page *_raid_page_alloc(void)
+static struct page *_raid_page_alloc(void)
 {
        return alloc_page(GFP_KERNEL);
 }
-void _raid_page_free(struct page *p)
+static void _raid_page_free(struct page *p)
 {
        __free_page(p);
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9d9763328734..ed73ed8ebbee 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -543,7 +543,7 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
        return !(odi->systemid_len || odi->osdname_len);
 }
-int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
+static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
                      struct exofs_dev **peds)
 {
        struct __alloc_ore_devs_and_exofs_devs {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4e508fc83dcf..ca7502d89fde 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
                        overwrite = 1;
        }
-        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+        ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
        if (ret > 0) {
diff --git a/fs/file.c b/fs/file.c
index b61293badfb1..8f294cfac697 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -25,7 +25,10 @@
 int sysctl_nr_open __read_mostly = 1024*1024;
 int sysctl_nr_open_min = BITS_PER_LONG;
-int sysctl_nr_open_max = 1024 * 1024; /* raised later */
+/* our max() is unusable in constant expressions ;-/ */
+#define __const_max(x, y) ((x) < (y) ? (x) : (y))
+int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
+                         -BITS_PER_LONG;
 static void *alloc_fdmem(size_t size)
 {
@@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk)
        }
 }
-void __init files_defer_init(void)
-{
-        sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
-                             -BITS_PER_LONG;
-}
 struct files_struct init_files = {
        .count          = ATOMIC_INIT(1),
        .fdt            = &init_files.fdtab,
diff --git a/fs/file_table.c b/fs/file_table.c
index 01071c4d752e..a374f5033e97 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
        percpu_counter_dec(&nr_files);
-        file_check_state(f);
        call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
@@ -178,47 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
        file->f_mapping = path->dentry->d_inode->i_mapping;
        file->f_mode = mode;
        file->f_op = fop;
-        /*
-         * These mounts don't really matter in practice
-         * for r/o bind mounts.  They aren't userspace-
-         * visible.  We do this for consistency, and so
-         * that we can do debugging checks at __fput()
-         */
-        if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
-                file_take_write(file);
-                WARN_ON(mnt_clone_write(path->mnt));
-        }
        if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_inc(path->dentry->d_inode);
        return file;
 }
 EXPORT_SYMBOL(alloc_file);
-/**
- * drop_file_write_access - give up ability to write to a file
- * @file: the file to which we will stop writing
- *
- * This is a central place which will give up the ability
- * to write to @file, along with access to write through
- * its vfsmount.
- */
-static void drop_file_write_access(struct file *file)
-{
-        struct vfsmount *mnt = file->f_path.mnt;
-        struct dentry *dentry = file->f_path.dentry;
-        struct inode *inode = dentry->d_inode;
-        put_write_access(inode);
-        if (special_file(inode->i_mode))
-                return;
-        if (file_check_writeable(file) != 0)
-                return;
-        __mnt_drop_write(mnt);
-        file_release_write(file);
-}
 /* the real guts of fput() - releasing the last reference to file
 */
 static void __fput(struct file *file)
@@ -253,8 +217,10 @@ static void __fput(struct file *file)
        put_pid(file->f_owner.pid);
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_dec(inode);
-        if (file->f_mode & FMODE_WRITE)
+        if (file->f_mode & FMODE_WRITER) {
-                drop_file_write_access(file);
+                put_write_access(inode);
+                __mnt_drop_write(mnt);
+        }
        file->f_path.dentry = NULL;
        file->f_path.mnt = NULL;
        file->f_inode = NULL;
@@ -359,6 +325,5 @@ void __init files_init(unsigned long mempages)
        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-        files_defer_init();
        percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0a648bb455ae..aac71ce373e4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -667,15 +667,15 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
                struct pipe_buffer *buf = cs->currbuf;
                if (!cs->write) {
-                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+                        kunmap_atomic(cs->mapaddr);
                } else {
-                        kunmap(buf->page);
+                        kunmap_atomic(cs->mapaddr);
                        buf->len = PAGE_SIZE - cs->len;
                }
                cs->currbuf = NULL;
                cs->mapaddr = NULL;
        } else if (cs->mapaddr) {
-                kunmap(cs->pg);
+                kunmap_atomic(cs->mapaddr);
                if (cs->write) {
                        flush_dcache_page(cs->pg);
                        set_page_dirty_lock(cs->pg);
@@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        BUG_ON(!cs->nr_segs);
                        cs->currbuf = buf;
-                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+                        cs->mapaddr = kmap_atomic(buf->page);
                        cs->len = buf->len;
                        cs->buf = cs->mapaddr + buf->offset;
                        cs->pipebufs++;
@@ -726,7 +726,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        buf->len = 0;
                        cs->currbuf = buf;
-                        cs->mapaddr = kmap(page);
+                        cs->mapaddr = kmap_atomic(page);
                        cs->buf = cs->mapaddr;
                        cs->len = PAGE_SIZE;
                        cs->pipebufs++;
@@ -745,7 +745,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        return err;
                BUG_ON(err != 1);
                offset = cs->addr % PAGE_SIZE;
-                cs->mapaddr = kmap(cs->pg);
+                cs->mapaddr = kmap_atomic(cs->pg);
                cs->buf = cs->mapaddr + offset;
                cs->len = min(PAGE_SIZE - offset, cs->seglen);
                cs->seglen -= cs->len;
@@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 out_fallback_unlock:
        unlock_page(newpage);
 out_fallback:
-        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+        cs->mapaddr = kmap_atomic(buf->page);
        cs->buf = cs->mapaddr + buf->offset;
        err = lock_request(cs->fc, cs->req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 48992cac714b..13f8bdec5110 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1086,9 +1086,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
-                pagefault_disable();
                tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
-                pagefault_enable();
                flush_dcache_page(page);
                mark_page_accessed(page);
@@ -1237,8 +1235,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                goto out;
        if (file->f_flags & O_DIRECT) {
-                written = generic_file_direct_write(iocb, iov, &nr_segs,
+                written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
-                                                    pos, &iocb->ki_pos,
                                                    count, ocount);
                if (written < 0 || written == count)
                        goto out;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 10d6c41aecad..6bf06a07f3e0 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -235,6 +235,7 @@ out_err:
        if (warned++ == 0)
                printk(KERN_WARNING
                        "lockd_up: makesock failed, error=%d\n", err);
+        svc_shutdown_net(serv, net);
        return err;
 }
diff --git a/fs/mount.h b/fs/mount.h
index b29e42f05f34..d55297f2fa05 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,7 +10,7 @@ struct mnt_namespace {
        struct user_namespace   *user_ns;
        u64                     seq;    /* Sequence number to prevent loops */
        wait_queue_head_t poll;
-        int event;
+        u64 event;
 };
 struct mnt_pcp {
@@ -104,6 +104,9 @@ struct proc_mounts {
        struct mnt_namespace *ns;
        struct path root;
        int (*show)(struct seq_file *, struct vfsmount *);
+        void *cached_mount;
+        u64 cached_event;
+        loff_t cached_index;
 };
 #define proc_mounts(p) (container_of((p), struct proc_mounts, m))
diff --git a/fs/namei.c b/fs/namei.c
index 88339f59efb5..c6157c894fce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -358,6 +358,7 @@ int generic_permission(struct inode *inode, int mask)
        return -EACCES;
 }
+EXPORT_SYMBOL(generic_permission);
 /*
 * We _really_ want to just do "generic_permission()" without
@@ -455,6 +456,7 @@ int inode_permission(struct inode *inode, int mask)
                return retval;
        return __inode_permission(inode, mask);
 }
+EXPORT_SYMBOL(inode_permission);
 /**
 * path_get - get a reference to a path
@@ -924,6 +926,7 @@ int follow_up(struct path *path)
        path->mnt = &parent->mnt;
        return 1;
 }
+EXPORT_SYMBOL(follow_up);
 /*
 * Perform an automount
@@ -1085,6 +1088,7 @@ int follow_down_one(struct path *path)
        }
        return 0;
 }
+EXPORT_SYMBOL(follow_down_one);
 static inline bool managed_dentry_might_block(struct dentry *dentry)
 {
@@ -1223,6 +1227,7 @@ int follow_down(struct path *path)
        }
        return 0;
 }
+EXPORT_SYMBOL(follow_down);
 /*
 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
@@ -2025,6 +2030,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
                *path = nd.path;
        return res;
 }
+EXPORT_SYMBOL(kern_path);
 /**
 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
@@ -2049,6 +2055,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
                *path = nd.path;
        return err;
 }
+EXPORT_SYMBOL(vfs_path_lookup);
 /*
 * Restricted form of lookup. Doesn't follow links, single-component only,
@@ -2111,6 +2118,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        return __lookup_hash(&this, base, 0);
 }
+EXPORT_SYMBOL(lookup_one_len);
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                 struct path *path, int *empty)
@@ -2135,6 +2143,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
 {
        return user_path_at_empty(dfd, name, flags, path, NULL);
 }
+EXPORT_SYMBOL(user_path_at);
 /*
 * NB: most callers don't do anything directly with the reference to the
@@ -2477,6 +2486,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
        return NULL;
 }
+EXPORT_SYMBOL(lock_rename);
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
@@ -2486,6 +2496,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
 }
+EXPORT_SYMBOL(unlock_rename);
 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                bool want_excl)
@@ -2506,6 +2517,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_create);
 static int may_open(struct path *path, int acc_mode, int flag)
 {
@@ -3375,6 +3387,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_mknod);
 static int may_mknod(umode_t mode)
 {
@@ -3464,6 +3477,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                fsnotify_mkdir(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_mkdir);
 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 {
@@ -3518,6 +3532,7 @@ void dentry_unhash(struct dentry *dentry)
                __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
 }
+EXPORT_SYMBOL(dentry_unhash);
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -3555,6 +3570,7 @@ out:
                d_delete(dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_rmdir);
 static long do_rmdir(int dfd, const char __user *pathname)
 {
@@ -3672,6 +3688,7 @@ out:
        return error;
 }
+EXPORT_SYMBOL(vfs_unlink);
 /*
 * Make sure that the actual truncation of the file will occur outside its
@@ -3785,6 +3802,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_symlink);
 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
                int, newdfd, const char __user *, newname)
@@ -3893,6 +3911,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                fsnotify_link(dir, inode, new_dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_link);
 /*
 * Hardlinks are often used in delicate situations.  We avoid
@@ -4152,6 +4171,7 @@ out:
        return error;
 }
+EXPORT_SYMBOL(vfs_rename);
 SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname, unsigned int, flags)
@@ -4304,11 +4324,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
        return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
-int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
+int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
-        int len;
+        int len = PTR_ERR(link);
-        len = PTR_ERR(link);
        if (IS_ERR(link))
                goto out;
@@ -4320,6 +4338,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c
 out:
        return len;
 }
+EXPORT_SYMBOL(readlink_copy);
 /*
 * A helper for ->readlink().  This should be used *ONLY* for symlinks that
@@ -4337,11 +4356,12 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        if (IS_ERR(cookie))
                return PTR_ERR(cookie);
-        res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+        res = readlink_copy(buffer, buflen, nd_get_link(&nd));
        if (dentry->d_inode->i_op->put_link)
                dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
        return res;
 }
+EXPORT_SYMBOL(generic_readlink);
 /* get the link contents into pagecache */
 static char *page_getlink(struct dentry * dentry, struct page **ppage)
@@ -4361,14 +4381,14 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
        struct page *page = NULL;
-        char *s = page_getlink(dentry, &page);
+        int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
-        int res = vfs_readlink(dentry,buffer,buflen,s);
        if (page) {
                kunmap(page);
                page_cache_release(page);
        }
        return res;
 }
+EXPORT_SYMBOL(page_readlink);
 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
 {
@@ -4376,6 +4396,7 @@ void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
        nd_set_link(nd, page_getlink(dentry, &page));
        return page;
 }
+EXPORT_SYMBOL(page_follow_link_light);
 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
@@ -4386,6 +4407,7 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
                page_cache_release(page);
        }
 }
+EXPORT_SYMBOL(page_put_link);
 /*
 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4423,45 +4445,18 @@ retry:
 fail:
        return err;
 }
+EXPORT_SYMBOL(__page_symlink);
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
        return __page_symlink(inode, symname, len,
                        !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
 }
+EXPORT_SYMBOL(page_symlink);
 const struct inode_operations page_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
 };
-EXPORT_SYMBOL(user_path_at);
-EXPORT_SYMBOL(follow_down_one);
-EXPORT_SYMBOL(follow_down);
-EXPORT_SYMBOL(follow_up);
-EXPORT_SYMBOL(get_write_access); /* nfsd */
-EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_one_len);
-EXPORT_SYMBOL(page_follow_link_light);
-EXPORT_SYMBOL(page_put_link);
-EXPORT_SYMBOL(page_readlink);
-EXPORT_SYMBOL(__page_symlink);
-EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(kern_path);
-EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(unlock_rename);
-EXPORT_SYMBOL(vfs_create);
-EXPORT_SYMBOL(vfs_link);
-EXPORT_SYMBOL(vfs_mkdir);
-EXPORT_SYMBOL(vfs_mknod);
-EXPORT_SYMBOL(generic_permission);
-EXPORT_SYMBOL(vfs_readlink);
-EXPORT_SYMBOL(vfs_rename);
-EXPORT_SYMBOL(vfs_rmdir);
-EXPORT_SYMBOL(vfs_symlink);
-EXPORT_SYMBOL(vfs_unlink);
-EXPORT_SYMBOL(dentry_unhash);
-EXPORT_SYMBOL(generic_readlink);
diff --git a/fs/namespace.c b/fs/namespace.c
index 2ffc5a2905d4..182bc41cd887 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -52,7 +52,7 @@ static int __init set_mphash_entries(char *str)
 }
 __setup("mphash_entries=", set_mphash_entries);
-static int event;
+static u64 event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
 static DEFINE_SPINLOCK(mnt_id_lock);
@@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
 */
 int __mnt_want_write_file(struct file *file)
 {
-        struct inode *inode = file_inode(file);
+        if (!(file->f_mode & FMODE_WRITER))
-        if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
                return __mnt_want_write(file->f_path.mnt);
        else
                return mnt_clone_write(file->f_path.mnt);
@@ -570,13 +568,17 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 static void free_vfsmnt(struct mount *mnt)
 {
        kfree(mnt->mnt_devname);
-        mnt_free_id(mnt);
 #ifdef CONFIG_SMP
        free_percpu(mnt->mnt_pcp);
 #endif
        kmem_cache_free(mnt_cache, mnt);
 }
+static void delayed_free_vfsmnt(struct rcu_head *head)
+{
+        free_vfsmnt(container_of(head, struct mount, mnt_rcu));
+}
 /* call under rcu_read_lock */
 bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
 {
@@ -848,6 +850,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
        root = mount_fs(type, flags, name, data);
        if (IS_ERR(root)) {
+                mnt_free_id(mnt);
                free_vfsmnt(mnt);
                return ERR_CAST(root);
        }
@@ -885,7 +888,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                        goto out_free;
        }
-        mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+        mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
        /* Don't allow unprivileged users to change mount flags */
        if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
                mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
@@ -928,20 +931,11 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
        return mnt;
 out_free:
+        mnt_free_id(mnt);
        free_vfsmnt(mnt);
        return ERR_PTR(err);
 }
-static void delayed_free(struct rcu_head *head)
-{
-        struct mount *mnt = container_of(head, struct mount, mnt_rcu);
-        kfree(mnt->mnt_devname);
-#ifdef CONFIG_SMP
-        free_percpu(mnt->mnt_pcp);
-#endif
-        kmem_cache_free(mnt_cache, mnt);
-}
 static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
@@ -991,7 +985,7 @@ put_again:
        dput(mnt->mnt.mnt_root);
        deactivate_super(mnt->mnt.mnt_sb);
        mnt_free_id(mnt);
-        call_rcu(&mnt->mnt_rcu, delayed_free);
+        call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
 }
 void mntput(struct vfsmount *mnt)
@@ -1100,14 +1094,29 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        struct proc_mounts *p = proc_mounts(m);
        down_read(&namespace_sem);
-        return seq_list_start(&p->ns->list, *pos);
+        if (p->cached_event == p->ns->event) {
+                void *v = p->cached_mount;
+                if (*pos == p->cached_index)
+                        return v;
+                if (*pos == p->cached_index + 1) {
+                        v = seq_list_next(v, &p->ns->list, &p->cached_index);
+                        return p->cached_mount = v;
+                }
+        }
+        p->cached_event = p->ns->event;
+        p->cached_mount = seq_list_start(&p->ns->list, *pos);
+        p->cached_index = *pos;
+        return p->cached_mount;
 }
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct proc_mounts *p = proc_mounts(m);
-        return seq_list_next(v, &p->ns->list, pos);
+        p->cached_mount = seq_list_next(v, &p->ns->list, pos);
+        p->cached_index = *pos;
+        return p->cached_mount;
 }
 static void m_stop(struct seq_file *m, void *v)
@@ -1661,9 +1670,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                if (err)
                        goto out;
                err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+                lock_mount_hash();
                if (err)
                        goto out_cleanup_ids;
-                lock_mount_hash();
                for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                        set_mnt_shared(p);
        } else {
@@ -1690,6 +1699,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        return 0;
 out_cleanup_ids:
+        while (!hlist_empty(&tree_list)) {
+                child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+                umount_tree(child, 0);
+        }
+        unlock_mount_hash();
        cleanup_group_ids(source_mnt, NULL);
 out:
        return err;
@@ -2044,7 +2058,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
        struct mount *parent;
        int err;
-        mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
+        mnt_flags &= ~MNT_INTERNAL_FLAGS;
        mp = lock_mount(path);
        if (IS_ERR(mp))
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index c320ac52353e..08b8ea8c353e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,7 +339,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
        if (val)
                goto finished;
-        DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
+        ncp_dbg(2, "%pd2 not valid, age=%ld, server lookup\n",
                dentry, NCP_GET_AGE(dentry));
        len = sizeof(__name);
@@ -358,7 +358,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        res = ncp_obtain_info(server, dir, __name, &(finfo.i));
        }
        finfo.volume = finfo.i.volNumber;
-        DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
+        ncp_dbg(2, "looked for %pd/%s, res=%d\n",
                dentry->d_parent, __name, res);
        /*
         * If we didn't find it, or if it has a different dirEntNum to
@@ -372,14 +372,14 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                        ncp_new_dentry(dentry);
                        val=1;
                } else
-                        DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n");
+                        ncp_dbg(2, "found, but dirEntNum changed\n");
                ncp_update_inode2(inode, &finfo);
                mutex_unlock(&inode->i_mutex);
        }
 finished:
-        DDPRINTK("ncp_lookup_validate: result=%d\n", val);
+        ncp_dbg(2, "result=%d\n", val);
        dput(parent);
        return val;
 }
@@ -453,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
        ctl.page  = NULL;
        ctl.cache = NULL;
-        DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
+        ncp_dbg(2, "reading %pD2, pos=%d\n", file, (int)ctx->pos);
-                (int) ctx->pos);
        result = -EIO;
        /* Do not generate '.' and '..' when server is dead. */
@@ -697,8 +696,7 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
        struct ncp_entry_info entry;
        int i;
-        DPRINTK("ncp_read_volume_list: pos=%ld\n",
+        ncp_dbg(1, "pos=%ld\n", (unsigned long)ctx->pos);
-                        (unsigned long) ctx->pos);
        for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) {
                int inval_dentry;
@@ -708,12 +706,11 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
                if (!strlen(info.volume_name))
                        continue;
-                DPRINTK("ncp_read_volume_list: found vol: %s\n",
+                ncp_dbg(1, "found vol: %s\n", info.volume_name);
-                        info.volume_name);
                if (ncp_lookup_volume(server, info.volume_name,
                                        &entry.i)) {
-                        DPRINTK("ncpfs: could not lookup vol %s\n",
+                        ncp_dbg(1, "could not lookup vol %s\n",
                                info.volume_name);
                        continue;
                }
@@ -738,14 +735,13 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
        int more;
        size_t bufsize;
-        DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
+        ncp_dbg(1, "%pD2, fpos=%ld\n", file, (unsigned long)ctx->pos);
-                (unsigned long) ctx->pos);
+        ncp_vdbg("init %pD, volnum=%d, dirent=%u\n",
-        PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
+                 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
-                file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
        err = ncp_initialize_search(server, dir, &seq);
        if (err) {
-                DPRINTK("ncp_do_readdir: init failed, err=%d\n", err);
+                ncp_dbg(1, "init failed, err=%d\n", err);
                return;
        }
        /* We MUST NOT use server->buffer_size handshaked with server if we are
@@ -808,8 +804,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                        goto out;
                result = -ENOENT;
                if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) {
-                        PPRINTK("ncp_conn_logged_in: %s not found\n",
+                        ncp_vdbg("%s not found\n", server->m.mounted_vol);
-                                server->m.mounted_vol);
                        goto out;
                }
                dent = sb->s_root;
@@ -822,10 +817,10 @@ int ncp_conn_logged_in(struct super_block *sb)
                                NCP_FINFO(ino)->DosDirNum = DosDirNum;
                                result = 0;
                        } else {
-                                DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n");
+                                ncp_dbg(1, "sb->s_root->d_inode == NULL!\n");
                        }
                } else {
-                        DPRINTK("ncpfs: sb->s_root == NULL!\n");
+                        ncp_dbg(1, "sb->s_root == NULL!\n");
                }
        } else
                result = 0;
@@ -846,7 +841,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
        if (!ncp_conn_valid(server))
                goto finished;
-        PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
+        ncp_vdbg("server lookup for %pd2\n", dentry);
        len = sizeof(__name);
        if (ncp_is_server_root(dir)) {
@@ -854,15 +849,15 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
                                 dentry->d_name.len, 1);
                if (!res)
                        res = ncp_lookup_volume(server, __name, &(finfo.i));
-                        if (!res)
+                if (!res)
-                                ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
+                        ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
        } else {
                res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
                                 dentry->d_name.len, !ncp_preserve_case(dir));
                if (!res)
                        res = ncp_obtain_info(server, dir, __name, &(finfo.i));
        }
-        PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
+        ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
        /*
         * If we didn't find an entry, make a negative dentry.
         */
@@ -886,7 +881,7 @@ add_entry:
        }
 finished:
-        PPRINTK("ncp_lookup: result=%d\n", error);
+        ncp_vdbg("result=%d\n", error);
        return ERR_PTR(error);
 }
@@ -909,7 +904,7 @@ out:
        return error;
 out_close:
-        PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
+        ncp_vdbg("%pd2 failed, closing file\n", dentry);
        ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
        goto out;
 }
@@ -923,7 +918,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
        int opmode;
        __u8 __name[NCP_MAXPATHLEN + 1];
        
-        PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
+        ncp_vdbg("creating %pd2, mode=%hx\n", dentry, mode);
        ncp_age_dentry(server, dentry);
        len = sizeof(__name);
@@ -952,7 +947,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
                                error = -ENAMETOOLONG;
                        else if (result < 0)
                                error = result;
-                        DPRINTK("ncp_create: %pd2 failed\n", dentry);
+                        ncp_dbg(1, "%pd2 failed\n", dentry);
                        goto out;
                }
                opmode = O_WRONLY;
@@ -985,7 +980,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        int error, len;
        __u8 __name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_mkdir: making %pd2\n", dentry);
+        ncp_dbg(1, "making %pd2\n", dentry);
        ncp_age_dentry(server, dentry);
        len = sizeof(__name);
@@ -1022,7 +1017,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
        int error, result, len;
        __u8 __name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
+        ncp_dbg(1, "removing %pd2\n", dentry);
        len = sizeof(__name);
        error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1067,13 +1062,13 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
        int error;
        server = NCP_SERVER(dir);
-        DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
+        ncp_dbg(1, "unlinking %pd2\n", dentry);
        
        /*
         * Check whether to close the file ...
         */
        if (inode) {
-                PPRINTK("ncp_unlink: closing file\n");
+                ncp_vdbg("closing file\n");
                ncp_make_closed(inode);
        }
@@ -1087,7 +1082,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 #endif
        switch (error) {
                case 0x00:
-                        DPRINTK("ncp: removed %pd2\n", dentry);
+                        ncp_dbg(1, "removed %pd2\n", dentry);
                        break;
                case 0x85:
                case 0x8A:
@@ -1120,7 +1115,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
        int old_len, new_len;
        __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
-        DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
+        ncp_dbg(1, "%pd2 to %pd2\n", old_dentry, new_dentry);
        ncp_age_dentry(server, old_dentry);
        ncp_age_dentry(server, new_dentry);
@@ -1150,8 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 #endif
        switch (error) {
                case 0x00:
-                        DPRINTK("ncp renamed %pd -> %pd.\n",
+                        ncp_dbg(1, "renamed %pd -> %pd\n",
-                                old_dentry, new_dentry);
+                                old_dentry, new_dentry);
                        break;
                case 0x9E:
                        error = -ENAMETOOLONG;
@@ -1173,7 +1168,7 @@ static int ncp_mknod(struct inode * dir, struct dentry *dentry,
        if (!new_valid_dev(rdev))
                return -EINVAL;
        if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
-                DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
+                ncp_dbg(1, "mode = 0%ho\n", mode);
                return ncp_create_new(dir, dentry, mode, rdev, 0);
        }
        return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 8f5074e1ecb9..77640a8bfb87 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -6,6 +6,8 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <asm/uaccess.h>
 #include <linux/time.h>
@@ -34,11 +36,11 @@ int ncp_make_open(struct inode *inode, int right)
        error = -EINVAL;
        if (!inode) {
-                printk(KERN_ERR "ncp_make_open: got NULL inode\n");
+                pr_err("%s: got NULL inode\n", __func__);
                goto out;
        }
-        DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n",
+        ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n",
                atomic_read(&NCP_FINFO(inode)->opened), 
                NCP_FINFO(inode)->volNumber, 
                NCP_FINFO(inode)->dirEntNum);
@@ -71,7 +73,7 @@ int ncp_make_open(struct inode *inode, int right)
                                break;
                }
                if (result) {
-                        PPRINTK("ncp_make_open: failed, result=%d\n", result);
+                        ncp_vdbg("failed, result=%d\n", result);
                        goto out_unlock;
                }
                /*
@@ -83,7 +85,7 @@ int ncp_make_open(struct inode *inode, int right)
        }
        access = NCP_FINFO(inode)->access;
-        PPRINTK("ncp_make_open: file open, access=%x\n", access);
+        ncp_vdbg("file open, access=%x\n", access);
        if (access == right || access == O_RDWR) {
                atomic_inc(&NCP_FINFO(inode)->opened);
                error = 0;
@@ -107,7 +109,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        void* freepage;
        size_t freelen;
-        DPRINTK("ncp_file_read: enter %pd2\n", dentry);
+        ncp_dbg(1, "enter %pd2\n", dentry);
        pos = *ppos;
@@ -124,7 +126,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        error = ncp_make_open(inode, O_RDONLY);
        if (error) {
-                DPRINTK(KERN_ERR "ncp_file_read: open failed, error=%d\n", error);
+                ncp_dbg(1, "open failed, error=%d\n", error);
                return error;
        }
@@ -165,7 +167,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        file_accessed(file);
-        DPRINTK("ncp_file_read: exit %pd2\n", dentry);
+        ncp_dbg(1, "exit %pd2\n", dentry);
 outrel:
        ncp_inode_close(inode);         
        return already_read ? already_read : error;
@@ -182,7 +184,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
        int errno;
        void* bouncebuffer;
-        DPRINTK("ncp_file_write: enter %pd2\n", dentry);
+        ncp_dbg(1, "enter %pd2\n", dentry);
        if ((ssize_t) count < 0)
                return -EINVAL;
        pos = *ppos;
@@ -211,7 +213,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
                return 0;
        errno = ncp_make_open(inode, O_WRONLY);
        if (errno) {
-                DPRINTK(KERN_ERR "ncp_file_write: open failed, error=%d\n", errno);
+                ncp_dbg(1, "open failed, error=%d\n", errno);
                return errno;
        }
        bufsize = NCP_SERVER(inode)->buffer_size;
@@ -261,7 +263,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
                        i_size_write(inode, pos);
                mutex_unlock(&inode->i_mutex);
        }
-        DPRINTK("ncp_file_write: exit %pd2\n", dentry);
+        ncp_dbg(1, "exit %pd2\n", dentry);
 outrel:
        ncp_inode_close(inode);         
        return already_written ? already_written : errno;
@@ -269,7 +271,7 @@ outrel:
 static int ncp_release(struct inode *inode, struct file *file) {
        if (ncp_make_closed(inode)) {
-                DPRINTK("ncp_release: failed to close\n");
+                ncp_dbg(1, "failed to close\n");
        }
        return 0;
 }
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
index 0af3349de851..03ffde1f44d6 100644
--- a/fs/ncpfs/getopt.c
+++ b/fs/ncpfs/getopt.c
@@ -2,6 +2,8 @@
 * getopt.c
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -46,8 +48,8 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
                                if (opts->has_arg & OPT_NOPARAM) {
                                        return opts->val;
                                }
-                                printk(KERN_INFO "%s: the %s option requires an argument\n",
+                                pr_info("%s: the %s option requires an argument\n",
-                                       caller, token);
+                                        caller, token);
                                return -EINVAL;
                        }
                        if (opts->has_arg & OPT_INT) {
@@ -57,18 +59,18 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
                                if (!*v) {
                                        return opts->val;
                                }
-                                printk(KERN_INFO "%s: invalid numeric value in %s=%s\n",
+                                pr_info("%s: invalid numeric value in %s=%s\n",
                                        caller, token, val);
                                return -EDOM;
                        }
                        if (opts->has_arg & OPT_STRING) {
                                return opts->val;
                        }
-                        printk(KERN_INFO "%s: unexpected argument %s to the %s option\n",
+                        pr_info("%s: unexpected argument %s to the %s option\n",
                                caller, val, token);
                        return -EINVAL;
                }
        }
-        printk(KERN_INFO "%s: Unrecognized mount option %s\n", caller, token);
+        pr_info("%s: Unrecognized mount option %s\n", caller, token);
        return -EOPNOTSUPP;
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 647d86d2db39..e31e589369a4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -9,6 +9,8 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <asm/uaccess.h>
@@ -133,7 +135,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
        NCP_FINFO(inode)->access = nwinfo->access;
        memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
                        sizeof(nwinfo->file_handle));
-        DPRINTK("ncp_update_inode: updated %s, volnum=%d, dirent=%u\n",
+        ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n",
                nwinfo->i.entryName, NCP_FINFO(inode)->volNumber,
                NCP_FINFO(inode)->dirEntNum);
 }
@@ -141,8 +143,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
 static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
 {
        /* NFS namespace mode overrides others if it's set. */
-        DPRINTK(KERN_DEBUG "ncp_update_dates_and_mode: (%s) nfs.mode=0%o\n",
+        ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode);
-                nwi->entryName, nwi->nfs.mode);
        if (nwi->nfs.mode) {
                /* XXX Security? */
                inode->i_mode = nwi->nfs.mode;
@@ -230,7 +231,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
        
        ncp_update_attrs(inode, nwinfo);
-        DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
+        ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode);
        set_nlink(inode, 1);
        inode->i_uid = server->m.uid;
@@ -258,7 +259,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
        struct inode *inode;
        if (info == NULL) {
-                printk(KERN_ERR "ncp_iget: info is NULL\n");
+                pr_err("%s: info is NULL\n", __func__);
                return NULL;
        }
@@ -290,7 +291,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
                }
                insert_inode_hash(inode);
        } else
-                printk(KERN_ERR "ncp_iget: iget failed!\n");
+                pr_err("%s: iget failed!\n", __func__);
        return inode;
 }
@@ -301,12 +302,12 @@ ncp_evict_inode(struct inode *inode)
        clear_inode(inode);
        if (S_ISDIR(inode->i_mode)) {
-                DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
+                ncp_dbg(2, "put directory %ld\n", inode->i_ino);
        }
        if (ncp_make_closed(inode) != 0) {
                /* We can't do anything but complain. */
-                printk(KERN_ERR "ncp_evict_inode: could not close\n");
+                pr_err("%s: could not close\n", __func__);
        }
 }
@@ -469,9 +470,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 {
        struct ncp_mount_data_kernel data;
        struct ncp_server *server;
-        struct file *ncp_filp;
        struct inode *root_inode;
-        struct inode *sock_inode;
        struct socket *sock;
        int error;
        int default_bufsize;
@@ -540,18 +539,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
            !gid_valid(data.gid))
                goto out;
-        error = -EBADF;
+        sock = sockfd_lookup(data.ncp_fd, &error);
-        ncp_filp = fget(data.ncp_fd);
-        if (!ncp_filp)
-                goto out;
-        error = -ENOTSOCK;
-        sock_inode = file_inode(ncp_filp);
-        if (!S_ISSOCK(sock_inode->i_mode))
-                goto out_fput;
-        sock = SOCKET_I(sock_inode);
        if (!sock)
-                goto out_fput;
+                goto out;
-                
        if (sock->type == SOCK_STREAM)
                default_bufsize = 0xF000;
        else
@@ -573,27 +564,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (error)
                goto out_fput;
-        server->ncp_filp = ncp_filp;
        server->ncp_sock = sock;
        
        if (data.info_fd != -1) {
-                struct socket *info_sock;
+                struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
-                error = -EBADF;
-                server->info_filp = fget(data.info_fd);
-                if (!server->info_filp)
-                        goto out_bdi;
-                error = -ENOTSOCK;
-                sock_inode = file_inode(server->info_filp);
-                if (!S_ISSOCK(sock_inode->i_mode))
-                        goto out_fput2;
-                info_sock = SOCKET_I(sock_inode);
                if (!info_sock)
-                        goto out_fput2;
+                        goto out_bdi;
+                server->info_sock = info_sock;
                error = -EBADFD;
                if (info_sock->type != SOCK_STREAM)
                        goto out_fput2;
-                server->info_sock = info_sock;
        }
 /*      server->lock = 0;       */
@@ -621,7 +601,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
           now because of PATH_MAX changes.. */
        if (server->m.time_out < 1) {
                server->m.time_out = 10;
-                printk(KERN_INFO "You need to recompile your ncpfs utils..\n");
+                pr_info("You need to recompile your ncpfs utils..\n");
        }
        server->m.time_out = server->m.time_out * HZ / 100;
        server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG;
@@ -682,7 +662,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        ncp_unlock_server(server);
        if (error < 0)
                goto out_rxbuf;
-        DPRINTK("ncp_fill_super: NCP_SBP(sb) = %x\n", (int) NCP_SBP(sb));
+        ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb));
        error = -EMSGSIZE;      /* -EREMOTESIDEINCOMPATIBLE */
 #ifdef CONFIG_NCPFS_PACKET_SIGNING
@@ -710,7 +690,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (ncp_negotiate_buffersize(server, default_bufsize,
                                     &(server->buffer_size)) != 0)
                goto out_disconnect;
-        DPRINTK("ncpfs: bufsize = %d\n", server->buffer_size);
+        ncp_dbg(1, "bufsize = %d\n", server->buffer_size);
        memset(&finfo, 0, sizeof(finfo));
        finfo.i.attributes      = aDIR;
@@ -739,7 +719,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        root_inode = ncp_iget(sb, &finfo);
        if (!root_inode)
                goto out_disconnect;
-        DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
+        ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
        sb->s_root = d_make_root(root_inode);
        if (!sb->s_root)
                goto out_disconnect;
@@ -765,17 +745,12 @@ out_nls:
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
 out_fput2:
-        if (server->info_filp)
+        if (server->info_sock)
-                fput(server->info_filp);
+                sockfd_put(server->info_sock);
 out_bdi:
        bdi_destroy(&server->bdi);
 out_fput:
-        /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
+        sockfd_put(sock);
-         * 
-         * The previously used put_filp(ncp_filp); was bogus, since
-         * it doesn't perform proper unlocking.
-         */
-        fput(ncp_filp);
 out:
        put_pid(data.wdog_pid);
        sb->s_fs_info = NULL;
@@ -808,9 +783,9 @@ static void ncp_put_super(struct super_block *sb)
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
-        if (server->info_filp)
+        if (server->info_sock)
-                fput(server->info_filp);
+                sockfd_put(server->info_sock);
-        fput(server->ncp_filp);
+        sockfd_put(server->ncp_sock);
        kill_pid(server->m.wdog_pid, SIGTERM, 1);
        put_pid(server->m.wdog_pid);
@@ -985,8 +960,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
                int written;
-                DPRINTK("ncpfs: trying to change size to %ld\n",
+                ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size);
-                        attr->ia_size);
                if ((result = ncp_make_open(inode, O_WRONLY)) < 0) {
                        result = -EACCES;
@@ -1072,7 +1046,7 @@ MODULE_ALIAS_FS("ncpfs");
 static int __init init_ncp_fs(void)
 {
        int err;
-        DPRINTK("ncpfs: init_ncp_fs called\n");
+        ncp_dbg(1, "called\n");
        err = init_inodecache();
        if (err)
@@ -1089,7 +1063,7 @@ out1:
 static void __exit exit_ncp_fs(void)
 {
-        DPRINTK("ncpfs: exit_ncp_fs called\n");
+        ncp_dbg(1, "called\n");
        unregister_filesystem(&ncp_fs_type);
        destroy_inodecache();
 }
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60426ccb3b65..d5659d96ee7f 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -41,7 +41,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info.version != NCP_GET_FS_INFO_VERSION) {
-                DPRINTK("info.version invalid: %d\n", info.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info.version);
                return -EINVAL;
        }
        /* TODO: info.addr = server->m.serv_addr; */
@@ -66,7 +66,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
-                DPRINTK("info.version invalid: %d\n", info2.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -132,7 +132,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
                return -EFAULT;
        if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
-                DPRINTK("info.version invalid: %d\n", info2.version);
+                ncp_dbg(1, "info.version invalid: %d\n", info2.version);
                return -EINVAL;
        }
        info2.mounted_uid   = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -308,8 +308,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                else
                        result = server->reply_size;
                ncp_unlock_server(server);
-                DPRINTK("ncp_ioctl: copy %d bytes\n",
+                ncp_dbg(1, "copy %d bytes\n", result);
-                        result);
                if (result >= 0)
                        if (copy_to_user(request.data, bouncebuffer, result))
                                result = -EFAULT;
@@ -385,9 +384,9 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                sr.namespace = server->name_space[sr.volNumber];
                                                result = 0;
                                        } else
-                                                DPRINTK("ncpfs: s_root->d_inode==NULL\n");
+                                                ncp_dbg(1, "s_root->d_inode==NULL\n");
                                } else
-                                        DPRINTK("ncpfs: s_root==NULL\n");
+                                        ncp_dbg(1, "s_root==NULL\n");
                        } else {
                                sr.volNumber = -1;
                                sr.namespace = 0;
@@ -440,11 +439,11 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                        NCP_FINFO(s_inode)->DosDirNum = dosde;
                                                        server->root_setuped = 1;
                                                } else {
-                                                        DPRINTK("ncpfs: s_root->d_inode==NULL\n");
+                                                        ncp_dbg(1, "s_root->d_inode==NULL\n");
                                                        result = -EIO;
                                                }
                                        } else {
-                                                DPRINTK("ncpfs: s_root==NULL\n");
+                                                ncp_dbg(1, "s_root==NULL\n");
                                                result = -EIO;
                                        }
                                }
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 3c5dd55d284c..b359d12eb359 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -107,7 +107,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *inode = file_inode(file);
        
-        DPRINTK("ncp_mmap: called\n");
+        ncp_dbg(1, "called\n");
        if (!ncp_conn_valid(NCP_SERVER(inode)))
                return -EIO;
diff --git a/fs/ncpfs/ncp_fs.h b/fs/ncpfs/ncp_fs.h
index 31831afe1c3b..b9f69e1b1f43 100644
--- a/fs/ncpfs/ncp_fs.h
+++ b/fs/ncpfs/ncp_fs.h
@@ -2,30 +2,32 @@
 #include "ncp_fs_i.h"
 #include "ncp_fs_sb.h"
-/* define because it is easy to change PRINTK to {*}PRINTK */
-#define PRINTK(format, args...) printk(KERN_DEBUG format , ## args)
 #undef NCPFS_PARANOIA
 #ifdef NCPFS_PARANOIA
-#define PPRINTK(format, args...) PRINTK(format , ## args)
+#define ncp_vdbg(fmt, ...)                                      \
+        pr_debug(fmt, ##__VA_ARGS__)
 #else
-#define PPRINTK(format, args...)
+#define ncp_vdbg(fmt, ...)                                      \
+do {                                                            \
+        if (0)                                                  \
+                pr_debug(fmt, ##__VA_ARGS__);                   \
+} while (0)
 #endif
 #ifndef DEBUG_NCP
 #define DEBUG_NCP 0
 #endif
-#if DEBUG_NCP > 0
-#define DPRINTK(format, args...) PRINTK(format , ## args)
+#if DEBUG_NCP > 0 && !defined(DEBUG)
-#else
+#define DEBUG
-#define DPRINTK(format, args...)
-#endif
-#if DEBUG_NCP > 1
-#define DDPRINTK(format, args...) PRINTK(format , ## args)
-#else
-#define DDPRINTK(format, args...)
 #endif
+#define ncp_dbg(level, fmt, ...)                                \
+do {                                                            \
+        if (level <= DEBUG_NCP)                                 \
+                pr_debug(fmt, ##__VA_ARGS__);                   \
+} while (0)
 #define NCP_MAX_RPC_TIMEOUT (6*HZ)
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index b81e97adc5a9..55e26fd80886 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -45,9 +45,7 @@ struct ncp_server {
        __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2];
-        struct file *ncp_filp;  /* File pointer to ncp socket */
        struct socket *ncp_sock;/* ncp socket */
-        struct file *info_filp;
        struct socket *info_sock;
        u8 sequence;
@@ -111,7 +109,7 @@ struct ncp_server {
        spinlock_t requests_lock;       /* Lock accesses to tx.requests, tx.creq and rcv.creq when STREAM mode */
-        void (*data_ready)(struct sock* sk, int len);
+        void (*data_ready)(struct sock* sk);
        void (*error_report)(struct sock* sk);
        void (*write_space)(struct sock* sk);   /* STREAM mode only */
        struct {
@@ -153,7 +151,7 @@ extern void ncp_tcp_tx_proc(struct work_struct *work);
 extern void ncpdgram_rcv_proc(struct work_struct *work);
 extern void ncpdgram_timeout_proc(struct work_struct *work);
 extern void ncpdgram_timeout_call(unsigned long server);
-extern void ncp_tcp_data_ready(struct sock* sk, int len);
+extern void ncp_tcp_data_ready(struct sock* sk);
 extern void ncp_tcp_write_space(struct sock* sk);
 extern void ncp_tcp_error_report(struct sock* sk);
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 981a95617fc9..482387532f54 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -9,14 +9,14 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include "ncp_fs.h"
 static inline void assert_server_locked(struct ncp_server *server)
 {
        if (server->lock == 0) {
-                DPRINTK("ncpfs: server not locked!\n");
+                ncp_dbg(1, "server not locked!\n");
        }
 }
@@ -75,7 +75,7 @@ static void ncp_add_pstring(struct ncp_server *server, const char *s)
        int len = strlen(s);
        assert_server_locked(server);
        if (len > 255) {
-                DPRINTK("ncpfs: string too long: %s\n", s);
+                ncp_dbg(1, "string too long: %s\n", s);
                len = 255;
        }
        ncp_add_byte(server, len);
@@ -225,7 +225,7 @@ int ncp_get_volume_info_with_number(struct ncp_server* server,
        result = -EIO;
        len = ncp_reply_byte(server, 29);
        if (len > NCP_VOLNAME_LEN) {
-                DPRINTK("ncpfs: volume name too long: %d\n", len);
+                ncp_dbg(1, "volume name too long: %d\n", len);
                goto out;
        }
        memcpy(&(target->volume_name), ncp_reply_data(server, 30), len);
@@ -259,7 +259,7 @@ int ncp_get_directory_info(struct ncp_server* server, __u8 n,
        result = -EIO;
        len = ncp_reply_byte(server, 21);
        if (len > NCP_VOLNAME_LEN) {
-                DPRINTK("ncpfs: volume name too long: %d\n", len);
+                ncp_dbg(1, "volume name too long: %d\n", len);
                goto out;
        }
        memcpy(&(target->volume_name), ncp_reply_data(server, 22), len);
@@ -295,9 +295,9 @@ ncp_make_closed(struct inode *inode)
                err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
                if (!err)
-                        PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n",
+                        ncp_vdbg("volnum=%d, dirent=%u, error=%d\n",
-                                NCP_FINFO(inode)->volNumber,
+                                 NCP_FINFO(inode)->volNumber,
-                                NCP_FINFO(inode)->dirEntNum, err);
+                                 NCP_FINFO(inode)->dirEntNum, err);
        }
        mutex_unlock(&NCP_FINFO(inode)->open_mutex);
        return err;
@@ -394,8 +394,7 @@ int ncp_obtain_nfs_info(struct ncp_server *server,
                if ((result = ncp_request(server, 87)) == 0) {
                        ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs);
-                        DPRINTK(KERN_DEBUG
+                        ncp_dbg(1, "(%s) mode=0%o, rdev=0x%x\n",
-                                "ncp_obtain_nfs_info: (%s) mode=0%o, rdev=0x%x\n",
                                target->entryName, target->nfs.mode,
                                target->nfs.rdev);
                } else {
@@ -425,7 +424,7 @@ int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *pa
        int result;
        if (target == NULL) {
-                printk(KERN_ERR "ncp_obtain_info: invalid call\n");
+                pr_err("%s: invalid call\n", __func__);
                return -EINVAL;
        }
        ncp_init_request(server);
@@ -498,7 +497,7 @@ ncp_get_known_namespace(struct ncp_server *server, __u8 volume)
        namespace = ncp_reply_data(server, 2);
        while (no_namespaces > 0) {
-                DPRINTK("get_namespaces: found %d on %d\n", *namespace, volume);
+                ncp_dbg(1, "found %d on %d\n", *namespace, volume);
 #ifdef CONFIG_NCPFS_NFS_NS
                if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS)) 
@@ -531,8 +530,7 @@ ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns)
        if (ret_ns)
                *ret_ns = ns;
-        DPRINTK("lookup_vol: namespace[%d] = %d\n",
+        ncp_dbg(1, "namespace[%d] = %d\n", volume, server->name_space[volume]);
-                volume, server->name_space[volume]);
        if (server->name_space[volume] == ns)
                return 0;
@@ -596,7 +594,7 @@ ncp_get_volume_root(struct ncp_server *server,
 {
        int result;
-        DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname);
+        ncp_dbg(1, "looking up vol %s\n", volname);
        ncp_init_request(server);
        ncp_add_byte(server, 22);       /* Subfunction: Generate dir handle */
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 3a1587222c8a..471bc3d1139e 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -8,6 +8,7 @@
 *
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/time.h>
 #include <linux/errno.h>
@@ -96,11 +97,11 @@ static void ncp_req_put(struct ncp_request_reply *req)
                kfree(req);
 }
-void ncp_tcp_data_ready(struct sock *sk, int len)
+void ncp_tcp_data_ready(struct sock *sk)
 {
        struct ncp_server *server = sk->sk_user_data;
-        server->data_ready(sk, len);
+        server->data_ready(sk);
        schedule_work(&server->rcv.tq);
 }
@@ -231,7 +232,7 @@ static void __ncptcp_try_send(struct ncp_server *server)
                return;
        if (result < 0) {
-                printk(KERN_ERR "ncpfs: tcp: Send failed: %d\n", result);
+                pr_err("tcp: Send failed: %d\n", result);
                __ncp_abort_request(server, rq, result);
                return;
        }
@@ -332,7 +333,7 @@ static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *
        mutex_lock(&server->rcv.creq_mutex);
        if (!ncp_conn_valid(server)) {
                mutex_unlock(&server->rcv.creq_mutex);
-                printk(KERN_ERR "ncpfs: tcp: Server died\n");
+                pr_err("tcp: Server died\n");
                return -EIO;
        }
        ncp_req_get(req);
@@ -405,15 +406,15 @@ void ncpdgram_rcv_proc(struct work_struct *work)
                                }
                                result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
                                if (result < 0) {
-                                        DPRINTK("recv failed with %d\n", result);
+                                        ncp_dbg(1, "recv failed with %d\n", result);
                                        continue;
                                }
                                if (result < 10) {
-                                        DPRINTK("too short (%u) watchdog packet\n", result);
+                                        ncp_dbg(1, "too short (%u) watchdog packet\n", result);
                                        continue;
                                }
                                if (buf[9] != '?') {
-                                        DPRINTK("bad signature (%02X) in watchdog packet\n", buf[9]);
+                                        ncp_dbg(1, "bad signature (%02X) in watchdog packet\n", buf[9]);
                                        continue;
                                }
                                buf[9] = 'Y';
@@ -448,7 +449,7 @@ void ncpdgram_rcv_proc(struct work_struct *work)
                                                        result -= 8;
                                                        hdrl = sock->sk->sk_family == AF_INET ? 8 : 6;
                                                        if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) {
-                                                                printk(KERN_INFO "ncpfs: Signature violation\n");
+                                                                pr_info("Signature violation\n");
                                                                result = -EIO;
                                                        }
                                                }
@@ -524,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
                return result;
        }
        if (result > len) {
-                printk(KERN_ERR "ncpfs: tcp: bug in recvmsg (%u > %Zu)\n", result, len);
+                pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
                return -EIO;                    
        }
        return result;
@@ -552,9 +553,9 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
                                        __ncptcp_abort(server);
                                }
                                if (result < 0) {
-                                        printk(KERN_ERR "ncpfs: tcp: error in recvmsg: %d\n", result);
+                                        pr_err("tcp: error in recvmsg: %d\n", result);
                                } else {
-                                        DPRINTK(KERN_ERR "ncpfs: tcp: EOF\n");
+                                        ncp_dbg(1, "tcp: EOF\n");
                                }
                                return -EIO;
                        }
@@ -566,20 +567,20 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
                switch (server->rcv.state) {
                        case 0:
                                if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
+                                        pr_err("tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
                                        __ncptcp_abort(server);
                                        return -EIO;
                                }
                                datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF;
                                if (datalen < 10) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen);
+                                        pr_err("tcp: Unexpected reply len %d\n", datalen);
                                        __ncptcp_abort(server);
                                        return -EIO;
                                }
 #ifdef CONFIG_NCPFS_PACKET_SIGNING                              
                                if (server->sign_active) {
                                        if (datalen < 18) {
-                                                printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen);
+                                                pr_err("tcp: Unexpected reply len %d\n", datalen);
                                                __ncptcp_abort(server);
                                                return -EIO;
                                        }
@@ -604,7 +605,7 @@ cont:;
                                                server->rcv.len = datalen - 10;
                                                break;
                                        }                                       
-                                        DPRINTK("ncpfs: tcp: Unexpected NCP type %02X\n", type);
+                                        ncp_dbg(1, "tcp: Unexpected NCP type %02X\n", type);
 skipdata2:;
                                        server->rcv.state = 2;
 skipdata:;
@@ -614,11 +615,11 @@ skipdata:;
                                }
                                req = server->rcv.creq;
                                if (!req) {
-                                        DPRINTK(KERN_ERR "ncpfs: Reply without appropriate request\n");
+                                        ncp_dbg(1, "Reply without appropriate request\n");
                                        goto skipdata2;
                                }
                                if (datalen > req->datalen + 8) {
-                                        printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
+                                        pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
                                        server->rcv.state = 3;
                                        goto skipdata;
                                }
@@ -638,12 +639,12 @@ skipdata:;
                                req = server->rcv.creq;
                                if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) {
                                        if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) {
-                                                printk(KERN_ERR "ncpfs: tcp: Bad sequence number\n");
+                                                pr_err("tcp: Bad sequence number\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
                                        if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) {
-                                                printk(KERN_ERR "ncpfs: tcp: Connection number mismatch\n");
+                                                pr_err("tcp: Connection number mismatch\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
@@ -651,7 +652,7 @@ skipdata:;
 #ifdef CONFIG_NCPFS_PACKET_SIGNING                              
                                if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) {
                                        if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) {
-                                                printk(KERN_ERR "ncpfs: tcp: Signature violation\n");
+                                                pr_err("tcp: Signature violation\n");
                                                __ncp_abort_request(server, req, -EIO);
                                                return -EIO;
                                        }
@@ -742,7 +743,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
        int result;
        if (server->lock == 0) {
-                printk(KERN_ERR "ncpfs: Server not locked!\n");
+                pr_err("Server not locked!\n");
                return -EIO;
        }
        if (!ncp_conn_valid(server)) {
@@ -781,7 +782,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
                spin_unlock_irqrestore(&current->sighand->siglock, flags);
        }
-        DDPRINTK("do_ncp_rpc_call returned %d\n", result);
+        ncp_dbg(2, "do_ncp_rpc_call returned %d\n", result);
        return result;
 }
@@ -811,7 +812,7 @@ int ncp_request2(struct ncp_server *server, int function,
        result = ncp_do_request(server, server->current_size, reply, size);
        if (result < 0) {
-                DPRINTK("ncp_request_error: %d\n", result);
+                ncp_dbg(1, "ncp_request_error: %d\n", result);
                goto out;
        }
        server->completion = reply->completion_code;
@@ -822,7 +823,7 @@ int ncp_request2(struct ncp_server *server, int function,
        result = reply->completion_code;
        if (result != 0)
-                PPRINTK("ncp_request: completion code=%x\n", result);
+                ncp_vdbg("completion code=%x\n", result);
 out:
        return result;
 }
@@ -865,14 +866,14 @@ void ncp_lock_server(struct ncp_server *server)
 {
        mutex_lock(&server->mutex);
        if (server->lock)
-                printk(KERN_WARNING "ncp_lock_server: was locked!\n");
+                pr_warn("%s: was locked!\n", __func__);
        server->lock = 1;
 }
 void ncp_unlock_server(struct ncp_server *server)
 {
        if (!server->lock) {
-                printk(KERN_WARNING "ncp_unlock_server: was not locked!\n");
+                pr_warn("%s: was not locked!\n", __func__);
                return;
        }
        server->lock = 0;
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 52439ddc8de0..1a63bfdb4a65 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -112,7 +112,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
        __le32 attr;
        unsigned int hdr;
-        DPRINTK("ncp_symlink(dir=%p,dentry=%p,symname=%s)\n",dir,dentry,symname);
+        ncp_dbg(1, "dir=%p, dentry=%p, symname=%s\n", dir, dentry, symname);
        if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber))
                kludge = 0;
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a812fd1b92a4..b481e1f5eecc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,9 +39,13 @@ struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
-/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
+/*
- * fit in a page: */
+ * Maximum ACL we'll accept from a client; chosen (somewhat
-#define NFS4_ACL_MAX 170
+ * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
+ * high-order allocation.  This allows 204 ACEs on x86_64:
+ */
+#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
+                        / sizeof(struct nfs4_ace))
 struct nfs4_acl *nfs4_acl_new(int);
 int nfs4_acl_get_whotype(char *, u32);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d190e33d0ec2..6f3f392d48af 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -542,7 +542,10 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
         * up setting a 3-element effective posix ACL with all
         * permissions zero.
         */
-        nace = 4 + state->users->n + state->groups->n;
+        if (!state->users->n && !state->groups->n)
+                nace = 3;
+        else /* Note we also include a MASK ACE in this case: */
+                nace = 4 + state->users->n + state->groups->n;
        pacl = posix_acl_alloc(nace, GFP_KERNEL);
        if (!pacl)
                return ERR_PTR(-ENOMEM);
@@ -586,9 +589,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
                add_to_mask(state, &state->groups->aces[i].perms);
        }
-        pace++;
+        if (!state->users->n && !state->groups->n) {
-        pace->e_tag = ACL_MASK;
+                pace++;
-        low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+                pace->e_tag = ACL_MASK;
+                low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+        }
        pace++;
        pace->e_tag = ACL_OTHER;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7f05cd140de3..39c8ef875f91 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
 */
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/slab.h>
 #include "nfsd.h"
@@ -635,6 +636,22 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
        }
 }
+static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
+{
+        struct rpc_xprt *xprt;
+        if (args->protocol != XPRT_TRANSPORT_BC_TCP)
+                return rpc_create(args);
+        xprt = args->bc_xprt->xpt_bc_xprt;
+        if (xprt) {
+                xprt_get(xprt);
+                return rpc_create_xprt(args, xprt);
+        }
+        return rpc_create(args);
+}
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
        struct rpc_timeout      timeparms = {
@@ -674,7 +691,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
                args.authflavor = ses->se_cb_sec.flavor;
        }
        /* Create RPC client */
-        client = rpc_create(&args);
+        client = create_backchannel_client(&args);
        if (IS_ERR(client)) {
                dprintk("NFSD: couldn't create callback client: %ld\n",
                        PTR_ERR(client));
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 82189b208af3..d543222babf3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1273,6 +1273,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        struct nfsd4_op *op;
        struct nfsd4_operation *opdesc;
        struct nfsd4_compound_state *cstate = &resp->cstate;
+        struct svc_fh *current_fh = &cstate->current_fh;
+        struct svc_fh *save_fh = &cstate->save_fh;
        int             slack_bytes;
        u32             plen = 0;
        __be32          status;
@@ -1288,11 +1290,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        resp->tag = args->tag;
        resp->opcnt = 0;
        resp->rqstp = rqstp;
-        resp->cstate.minorversion = args->minorversion;
+        cstate->minorversion = args->minorversion;
-        resp->cstate.replay_owner = NULL;
+        cstate->replay_owner = NULL;
-        resp->cstate.session = NULL;
+        cstate->session = NULL;
-        fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+        fh_init(current_fh, NFS4_FHSIZE);
-        fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+        fh_init(save_fh, NFS4_FHSIZE);
        /*
         * Don't use the deferral mechanism for NFSv4; compounds make it
         * too hard to avoid non-idempotency problems.
@@ -1345,20 +1347,28 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                opdesc = OPDESC(op);
-                if (!cstate->current_fh.fh_dentry) {
+                if (!current_fh->fh_dentry) {
                        if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
                                op->status = nfserr_nofilehandle;
                                goto encode_op;
                        }
-                } else if (cstate->current_fh.fh_export->ex_fslocs.migrated &&
+                } else if (current_fh->fh_export->ex_fslocs.migrated &&
                          !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
                        op->status = nfserr_moved;
                        goto encode_op;
                }
+                fh_clear_wcc(current_fh);
                /* If op is non-idempotent */
                if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
                        plen = opdesc->op_rsize_bop(rqstp, op);
+                        /*
+                         * If there's still another operation, make sure
+                         * we'll have space to at least encode an error:
+                         */
+                        if (resp->opcnt < args->opcnt)
+                                plen += COMPOUND_ERR_SLACK_SPACE;
                        op->status = nfsd4_check_resp_size(resp, plen);
                }
@@ -1377,12 +1387,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                                clear_current_stateid(cstate);
                        if (need_wrongsec_check(rqstp))
-                                op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+                                op->status = check_nfsd_access(current_fh->fh_export, rqstp);
                }
 encode_op:
                /* Only from SEQUENCE */
-                if (resp->cstate.status == nfserr_replay_cache) {
+                if (cstate->status == nfserr_replay_cache) {
                        dprintk("%s NFS4.1 replay from cache\n", __func__);
                        status = op->status;
                        goto out;
@@ -1411,10 +1421,10 @@ encode_op:
                nfsd4_increment_op_stats(op->opnum);
        }
-        resp->cstate.status = status;
+        cstate->status = status;
-        fh_put(&resp->cstate.current_fh);
+        fh_put(current_fh);
-        fh_put(&resp->cstate.save_fh);
+        fh_put(save_fh);
-        BUG_ON(resp->cstate.replay_owner);
+        BUG_ON(cstate->replay_owner);
 out:
        /* Reset deferral mechanism for RPC deferrals */
        rqstp->rq_usedeferral = 1;
@@ -1523,7 +1533,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-        return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+        return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
+                                                                sizeof(__be32);
 }
 static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5d070fbeb35..3ba65979a3cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1538,7 +1538,7 @@ out_err:
 }
 /*
- * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
+ * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
 */
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
@@ -1596,7 +1596,7 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 * The sequence operation is not cached because we can use the slot and
 * session values.
 */
-__be32
+static __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
                         struct nfsd4_sequence *seq)
 {
@@ -1605,9 +1605,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
        dprintk("--> %s slot %p\n", __func__, slot);
-        /* Either returns 0 or nfserr_retry_uncached */
        status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
-        if (status == nfserr_retry_uncached_rep)
+        if (status)
                return status;
        /* The sequence operation has been encoded, cstate->datap set. */
@@ -2287,7 +2286,8 @@ out:
        if (!list_empty(&clp->cl_revoked))
                seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
 out_no_session:
-        kfree(conn);
+        if (conn)
+                free_conn(conn);
        spin_unlock(&nn->client_lock);
        return status;
 out_put_session:
@@ -3627,8 +3627,11 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
                return nfserr_bad_stateid;
        status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
                                                        nn, &cl);
-        if (status == nfserr_stale_clientid)
+        if (status == nfserr_stale_clientid) {
+                if (sessions)
+                        return nfserr_bad_stateid;
                return nfserr_stale_stateid;
+        }
        if (status)
                return status;
        *s = find_stateid_by_type(cl, stateid, typemask);
@@ -5062,7 +5065,6 @@ nfs4_state_destroy_net(struct net *net)
        int i;
        struct nfs4_client *clp = NULL;
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-        struct rb_node *node, *tmp;
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -5071,13 +5073,11 @@ nfs4_state_destroy_net(struct net *net)
                }
        }
-        node = rb_first(&nn->unconf_name_tree);
+        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-        while (node != NULL) {
+                while (!list_empty(&nn->unconf_id_hashtbl[i])) {
-                tmp = node;
+                        clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
-                node = rb_next(tmp);
+                        destroy_client(clp);
-                clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
+                }
-                rb_erase(tmp, &nn->unconf_name_tree);
-                destroy_client(clp);
        }
        kfree(nn->sessionid_hashtbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 63f2395c57ed..2723c1badd01 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -294,7 +294,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ32(nace);
                if (nace > NFS4_ACL_MAX)
-                        return nfserr_resource;
+                        return nfserr_fbig;
                *acl = nfs4_acl_new(nace);
                if (*acl == NULL)
@@ -1222,7 +1222,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
        }
        write->wr_head.iov_base = p;
        write->wr_head.iov_len = avail;
-        WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
        write->wr_pagelist = argp->pagelist;
        len = XDR_QUADLEN(write->wr_buflen) << 2;
@@ -2483,6 +2482,8 @@ out_acl:
                        goto out;
        }
        if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+                if ((buflen -= 16) < 0)
+                        goto out_resource;
                WRITE32(3);
                WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
                WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
@@ -2499,8 +2500,10 @@ out:
                security_release_secctx(context, contextlen);
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
        kfree(acl);
-        if (tempfh)
+        if (tempfh) {
                fh_put(tempfh);
+                kfree(tempfh);
+        }
        return status;
 out_nfserr:
        status = nfserrno(err);
@@ -3471,6 +3474,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
        struct nfsd4_test_stateid_id *stateid, *next;
        __be32 *p;
+        if (nfserr)
+                return nfserr;
        RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
        *p++ = htonl(test_stateid->ts_num_ids);
@@ -3579,8 +3585,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
                return 0;
        session = resp->cstate.session;
-        if (session == NULL)
-                return 0;
        if (xb->page_len == 0) {
                length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3620,9 +3624,17 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
               !nfsd4_enc_ops[op->opnum]);
        op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
-        /* nfsd4_check_drc_limit guarantees enough room for error status */
+        /* nfsd4_check_resp_size guarantees enough room for error status */
        if (!op->status)
                op->status = nfsd4_check_resp_size(resp, 0);
+        if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
+                struct nfsd4_slot *slot = resp->cstate.slot;
+                if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+                        op->status = nfserr_rep_too_big_to_cache;
+                else
+                        op->status = nfserr_rep_too_big;
+        }
        if (so) {
                so->so_replay.rp_status = op->status;
                so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
@@ -3691,6 +3703,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 int
 nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
+        if (rqstp->rq_arg.head[0].iov_len % 4) {
+                /* client is nuts */
+                dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
+                        __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
+                return 0;
+        }
        args->p = p;
        args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
        args->pagelist = rqstp->rq_arg.pages;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7f555179bf81..f34d9de802ab 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
        if (err != 0 || fd < 0)
                return -EINVAL;
+        if (svc_alien_sock(net, fd)) {
+                printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+                return -EINVAL;
+        }
        err = nfsd_create_serv(net);
        if (err != 0)
                return err;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 30f34ab02137..479eb681c27c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -282,7 +282,7 @@ void		nfsd_lockd_shutdown(void);
 * reason.
 */
 #define COMPOUND_SLACK_SPACE            140    /* OP_GETFH */
-#define COMPOUND_ERR_SLACK_SPACE        12     /* OP_SETATTR */
+#define COMPOUND_ERR_SLACK_SPACE        16     /* OP_SETATTR */
 #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4775bc4896c8..ad67964d0bb1 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -133,6 +133,17 @@ fh_init(struct svc_fh *fhp, int maxsize)
 #ifdef CONFIG_NFSD_V3
 /*
+ * The wcc data stored in current_fh should be cleared
+ * between compound ops.
+ */
+static inline void
+fh_clear_wcc(struct svc_fh *fhp)
+{
+        fhp->fh_post_saved = 0;
+        fhp->fh_pre_saved = 0;
+}
+/*
 * Fill in the pre_op attr for the wcc data
 */
 static inline void
@@ -152,7 +163,8 @@ fill_pre_wcc(struct svc_fh *fhp)
 extern void fill_post_wcc(struct svc_fh *);
 #else
-#define fill_pre_wcc(ignored)
+#define fh_clear_wcc(ignored)
+#define fill_pre_wcc(ignored)
 #define fill_post_wcc(notused)
 #endif /* CONFIG_NFSD_V3 */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b17d93214d01..9c769a47ac5a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,7 +152,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
        type = (stat->mode & S_IFMT);
        *p++ = htonl(nfs_ftypes[type >> 12]);
-        *p++ = htonl((u32) (stat->mode & S_IALLUGO));
+        *p++ = htonl((u32) stat->mode);
        *p++ = htonl((u32) stat->nlink);
        *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
        *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 915808b36df7..16f0673a423c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,6 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        umode_t         ftype = 0;
        __be32          err;
        int             host_err;
+        bool            get_write_count;
        int             size_change = 0;
        if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
@@ -411,10 +412,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        if (iap->ia_valid & ATTR_SIZE)
                ftype = S_IFREG;
+        /* Callers that do fh_verify should do the fh_want_write: */
+        get_write_count = !fhp->fh_dentry;
        /* Get inode */
        err = fh_verify(rqstp, fhp, ftype, accmode);
        if (err)
                goto out;
+        if (get_write_count) {
+                host_err = fh_want_write(fhp);
+                if (host_err)
+                        return nfserrno(host_err);
+        }
        dentry = fhp->fh_dentry;
        inode = dentry->d_inode;
@@ -1706,10 +1715,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        dput(odentry);
 out_nfserr:
        err = nfserrno(host_err);
+        /*
-        /* we cannot reply on fh_unlock on the two filehandles,
+         * We cannot rely on fh_unlock on the two filehandles,
         * as that would do the wrong thing if the two directories
-         * were the same, so again we do it by hand
+         * were the same, so again we do it by hand.
         */
        fill_post_wcc(ffhp);
        fill_post_wcc(tfhp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d278a0d03496..5ea7df305083 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -574,8 +574,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *,
                struct nfsd4_setclientid_confirm *setclientid_confirm);
 extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
-extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
-                struct nfsd4_sequence *seq);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
 extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9d8153ebacfb..f47af5e6e230 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1704,8 +1704,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
        iput(bvi);
 skip_large_index_stuff:
        /* Setup the operations for this index inode. */
-        vi->i_op = NULL;
-        vi->i_fop = NULL;
        vi->i_mapping->a_ops = &ntfs_mst_aops;
        vi->i_blocks = ni->allocated_size >> 9;
        /*
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eb649d23a4de..c6b90e670389 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -137,7 +137,7 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
 static void o2net_sc_connect_completed(struct work_struct *work);
 static void o2net_rx_until_empty(struct work_struct *work);
 static void o2net_shutdown_sc(struct work_struct *work);
-static void o2net_listen_data_ready(struct sock *sk, int bytes);
+static void o2net_listen_data_ready(struct sock *sk);
 static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
@@ -597,9 +597,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 }
 /* see o2net_register_callbacks() */
-static void o2net_data_ready(struct sock *sk, int bytes)
+static void o2net_data_ready(struct sock *sk)
 {
-        void (*ready)(struct sock *sk, int bytes);
+        void (*ready)(struct sock *sk);
        read_lock(&sk->sk_callback_lock);
        if (sk->sk_user_data) {
@@ -613,7 +613,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
        }
        read_unlock(&sk->sk_callback_lock);
-        ready(sk, bytes);
+        ready(sk);
 }
 /* see o2net_register_callbacks() */
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
 static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
-        int ret;
+        struct kvec vec = { .iov_len = len, .iov_base = data, };
-        mm_segment_t oldfs;
+        struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
-        struct kvec vec = {
+        return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
-                .iov_len = len,
-                .iov_base = data,
-        };
-        struct msghdr msg = {
-                .msg_iovlen = 1,
-                .msg_iov = (struct iovec *)&vec,
-                .msg_flags = MSG_DONTWAIT,
-        };
-        oldfs = get_fs();
-        set_fs(get_ds());
-        ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
-        set_fs(oldfs);
-        return ret;
 }
 static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
                              size_t veclen, size_t total)
 {
        int ret;
-        mm_segment_t oldfs;
+        struct msghdr msg;
-        struct msghdr msg = {
-                .msg_iov = (struct iovec *)vec,
-                .msg_iovlen = veclen,
-        };
        if (sock == NULL) {
                ret = -EINVAL;
                goto out;
        }
-        oldfs = get_fs();
+        ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
-        set_fs(get_ds());
+        if (likely(ret == total))
-        ret = sock_sendmsg(sock, &msg, total);
+                return 0;
-        set_fs(oldfs);
+        mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
-        if (ret != total) {
+        if (ret >= 0)
-                mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret,
+                ret = -EPIPE; /* should be smarter, I bet */
-                     total);
-                if (ret >= 0)
-                        ret = -EPIPE; /* should be smarter, I bet */
-                goto out;
-        }
-        ret = 0;
 out:
-        if (ret < 0)
+        mlog(0, "returning error: %d\n", ret);
-                mlog(0, "returning error: %d\n", ret);
        return ret;
 }
@@ -1953,9 +1926,9 @@ static void o2net_accept_many(struct work_struct *work)
                cond_resched();
 }
-static void o2net_listen_data_ready(struct sock *sk, int bytes)
+static void o2net_listen_data_ready(struct sock *sk)
 {
-        void (*ready)(struct sock *sk, int bytes);
+        void (*ready)(struct sock *sk);
        read_lock(&sk->sk_callback_lock);
        ready = sk->sk_user_data;
@@ -1978,7 +1951,6 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
         */
        if (sk->sk_state == TCP_LISTEN) {
-                mlog(ML_TCP, "bytes: %d\n", bytes);
                queue_work(o2net_wq, &o2net_listen_work);
        } else {
                ready = NULL;
@@ -1987,7 +1959,7 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
 out:
        read_unlock(&sk->sk_callback_lock);
        if (ready != NULL)
-                ready(sk, bytes);
+                ready(sk);
 }
 static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4cbcb65784a3..dc024367110a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -165,7 +165,7 @@ struct o2net_sock_container {
        /* original handlers for the sockets */
        void                    (*sc_state_change)(struct sock *sk);
-        void                    (*sc_data_ready)(struct sock *sk, int bytes);
+        void                    (*sc_data_ready)(struct sock *sk);
        u32                     sc_msg_key;
        u16                     sc_msg_type;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ff33c5ef87f2..8970dcf74de5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2367,15 +2367,18 @@ relock:
        if (direct_io) {
                written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
-                                                    ppos, count, ocount);
+                                                    count, ocount);
                if (written < 0) {
                        ret = written;
                        goto out_dio;
                }
        } else {
+                struct iov_iter from;
+                iov_iter_init(&from, iov, nr_segs, count, 0);
                current->backing_dev_info = file->f_mapping->backing_dev_info;
-                written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
+                written = generic_perform_write(file, &from, *ppos);
-                                                      ppos, count, 0);
+                if (likely(written >= 0))
+                        iocb->ki_pos = *ppos + written;
                current->backing_dev_info = NULL;
        }
diff --git a/fs/open.c b/fs/open.c
index 631aea815def..3d30eb1fc95e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -655,35 +655,6 @@ out:
        return error;
 }
-/*
- * You have to be very careful that these write
- * counts get cleaned up in error cases and
- * upon __fput().  This should probably never
- * be called outside of __dentry_open().
- */
-static inline int __get_file_write_access(struct inode *inode,
-                                          struct vfsmount *mnt)
-{
-        int error;
-        error = get_write_access(inode);
-        if (error)
-                return error;
-        /*
-         * Do not take mount writer counts on
-         * special files since no writes to
-         * the mount itself will occur.
-         */
-        if (!special_file(inode->i_mode)) {
-                /*
-                 * Balanced in __fput()
-                 */
-                error = __mnt_want_write(mnt);
-                if (error)
-                        put_write_access(inode);
-        }
-        return error;
-}
 int open_check_o_direct(struct file *f)
 {
        /* NB: we're sure to have correct a_ops only after f_op->open */
@@ -708,26 +679,28 @@ static int do_dentry_open(struct file *f,
        f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
                                FMODE_PREAD | FMODE_PWRITE;
-        if (unlikely(f->f_flags & O_PATH))
-                f->f_mode = FMODE_PATH;
        path_get(&f->f_path);
        inode = f->f_inode = f->f_path.dentry->d_inode;
-        if (f->f_mode & FMODE_WRITE) {
-                error = __get_file_write_access(inode, f->f_path.mnt);
-                if (error)
-                        goto cleanup_file;
-                if (!special_file(inode->i_mode))
-                        file_take_write(f);
-        }
        f->f_mapping = inode->i_mapping;
-        if (unlikely(f->f_mode & FMODE_PATH)) {
+        if (unlikely(f->f_flags & O_PATH)) {
+                f->f_mode = FMODE_PATH;
                f->f_op = &empty_fops;
                return 0;
        }
+        if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+                error = get_write_access(inode);
+                if (unlikely(error))
+                        goto cleanup_file;
+                error = __mnt_want_write(f->f_path.mnt);
+                if (unlikely(error)) {
+                        put_write_access(inode);
+                        goto cleanup_file;
+                }
+                f->f_mode |= FMODE_WRITER;
+        }
        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
        if (S_ISREG(inode->i_mode))
                f->f_mode |= FMODE_ATOMIC_POS;
@@ -764,18 +737,9 @@ static int do_dentry_open(struct file *f,
 cleanup_all:
        fops_put(f->f_op);
-        if (f->f_mode & FMODE_WRITE) {
+        if (f->f_mode & FMODE_WRITER) {
                put_write_access(inode);
-                if (!special_file(inode->i_mode)) {
+                __mnt_drop_write(f->f_path.mnt);
-                        /*
-                         * We don't consider this a real
-                         * mnt_want/drop_write() pair
-                         * because it all happenend right
-                         * here, so just reset the state.
-                         */
-                        file_reset_write(f);
-                        __mnt_drop_write(f->f_path.mnt);
-                }
        }
 cleanup_file:
        path_put(&f->f_path);
diff --git a/fs/pipe.c b/fs/pipe.c
index 78fd0d0788db..034bffac3f97 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -142,55 +142,6 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
        return 0;
 }
-static int
-pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
-                      int atomic)
-{
-        unsigned long copy;
-        while (len > 0) {
-                while (!iov->iov_len)
-                        iov++;
-                copy = min_t(unsigned long, len, iov->iov_len);
-                if (atomic) {
-                        if (__copy_to_user_inatomic(iov->iov_base, from, copy))
-                                return -EFAULT;
-                } else {
-                        if (copy_to_user(iov->iov_base, from, copy))
-                                return -EFAULT;
-                }
-                from += copy;
-                len -= copy;
-                iov->iov_base += copy;
-                iov->iov_len -= copy;
-        }
-        return 0;
-}
-/*
- * Attempt to pre-fault in the user memory, so we can use atomic copies.
- * Returns the number of bytes not faulted in.
- */
-static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
-{
-        while (!iov->iov_len)
-                iov++;
-        while (len > 0) {
-                unsigned long this_len;
-                this_len = min_t(unsigned long, len, iov->iov_len);
-                if (fault_in_pages_writeable(iov->iov_base, this_len))
-                        break;
-                len -= this_len;
-                iov++;
-        }
-        return len;
-}
 /*
 * Pre-fault in the user memory, so we can use atomic copies.
 */
@@ -226,52 +177,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 /**
- * generic_pipe_buf_map - virtually map a pipe buffer
- * @pipe:       the pipe that the buffer belongs to
- * @buf:        the buffer that should be mapped
- * @atomic:     whether to use an atomic map
- *
- * Description:
- *      This function returns a kernel virtual address mapping for the
- *      pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
- *      and the caller has to be careful not to fault before calling
- *      the unmap function.
- *
- *      Note that this function calls kmap_atomic() if @atomic != 0.
- */
-void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
-                           struct pipe_buffer *buf, int atomic)
-{
-        if (atomic) {
-                buf->flags |= PIPE_BUF_FLAG_ATOMIC;
-                return kmap_atomic(buf->page);
-        }
-        return kmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_map);
-/**
- * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
- * @pipe:       the pipe that the buffer belongs to
- * @buf:        the buffer that should be unmapped
- * @map_data:   the data that the mapping function returned
- *
- * Description:
- *      This function undoes the mapping that ->map() provided.
- */
-void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
-                            struct pipe_buffer *buf, void *map_data)
-{
-        if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
-                buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
-                kunmap_atomic(map_data);
-        } else
-                kunmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_unmap);
-/**
 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
 * @pipe:       the pipe that the buffer belongs to
 * @buf:        the buffer to attempt to steal
@@ -351,8 +256,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release);
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -361,8 +264,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 static const struct pipe_buf_operations packet_pipe_buf_ops = {
        .can_merge = 0,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -379,12 +280,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
        ssize_t ret;
        struct iovec *iov = (struct iovec *)_iov;
        size_t total_len;
+        struct iov_iter iter;
        total_len = iov_length(iov, nr_segs);
        /* Null read succeeds. */
        if (unlikely(total_len == 0))
                return 0;
+        iov_iter_init(&iter, iov, nr_segs, total_len, 0);
        do_wakeup = 0;
        ret = 0;
        __pipe_lock(pipe);
@@ -394,9 +298,9 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        int curbuf = pipe->curbuf;
                        struct pipe_buffer *buf = pipe->bufs + curbuf;
                        const struct pipe_buf_operations *ops = buf->ops;
-                        void *addr;
                        size_t chars = buf->len;
-                        int error, atomic;
+                        size_t written;
+                        int error;
                        if (chars > total_len)
                                chars = total_len;
@@ -408,21 +312,10 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                                break;
                        }
-                        atomic = !iov_fault_in_pages_write(iov, chars);
+                        written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
-redo:
+                        if (unlikely(written < chars)) {
-                        addr = ops->map(pipe, buf, atomic);
-                        error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
-                        ops->unmap(pipe, buf, addr);
-                        if (unlikely(error)) {
-                                /*
-                                 * Just retry with the slow path if we failed.
-                                 */
-                                if (atomic) {
-                                        atomic = 0;
-                                        goto redo;
-                                }
                                if (!ret)
-                                        ret = error;
+                                        ret = -EFAULT;
                                break;
                        }
                        ret += chars;
@@ -538,10 +431,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
                        iov_fault_in_pages_read(iov, chars);
 redo1:
-                        addr = ops->map(pipe, buf, atomic);
+                        if (atomic)
+                                addr = kmap_atomic(buf->page);
+                        else
+                                addr = kmap(buf->page);
                        error = pipe_iov_copy_from_user(offset + addr, iov,
                                                        chars, atomic);
-                        ops->unmap(pipe, buf, addr);
+                        if (atomic)
+                                kunmap_atomic(addr);
+                        else
+                                kunmap(buf->page);
                        ret = error;
                        do_wakeup = 1;
                        if (error) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 88396df725b4..302bf22c4a30 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -164,46 +164,94 @@ static struct mount *propagation_next(struct mount *m,
        }
 }
-/*
+static struct mount *next_group(struct mount *m, struct mount *origin)
- * return the source mount to be used for cloning
- *
- * @dest        the current destination mount
- * @last_dest   the last seen destination mount
- * @last_src    the last seen source mount
- * @type        return CL_SLAVE if the new mount has to be
- *              cloned as a slave.
- */
-static struct mount *get_source(struct mount *dest,
-                                struct mount *last_dest,
-                                struct mount *last_src,
-                                int *type)
 {
-        struct mount *p_last_src = NULL;
+        while (1) {
-        struct mount *p_last_dest = NULL;
+                while (1) {
+                        struct mount *next;
-        while (last_dest != dest->mnt_master) {
+                        if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
-                p_last_dest = last_dest;
+                                return first_slave(m);
-                p_last_src = last_src;
+                        next = next_peer(m);
-                last_dest = last_dest->mnt_master;
+                        if (m->mnt_group_id == origin->mnt_group_id) {
-                last_src = last_src->mnt_master;
+                                if (next == origin)
+                                        return NULL;
+                        } else if (m->mnt_slave.next != &next->mnt_slave)
+                                break;
+                        m = next;
+                }
+                /* m is the last peer */
+                while (1) {
+                        struct mount *master = m->mnt_master;
+                        if (m->mnt_slave.next != &master->mnt_slave_list)
+                                return next_slave(m);
+                        m = next_peer(master);
+                        if (master->mnt_group_id == origin->mnt_group_id)
+                                break;
+                        if (master->mnt_slave.next == &m->mnt_slave)
+                                break;
+                        m = master;
+                }
+                if (m == origin)
+                        return NULL;
        }
+}
-        if (p_last_dest) {
+/* all accesses are serialized by namespace_sem */
-                do {
+static struct user_namespace *user_ns;
-                        p_last_dest = next_peer(p_last_dest);
+static struct mount *last_dest, *last_source, *dest_master;
-                } while (IS_MNT_NEW(p_last_dest));
+static struct mountpoint *mp;
-                /* is that a peer of the earlier? */
+static struct hlist_head *list;
-                if (dest == p_last_dest) {
-                        *type = CL_MAKE_SHARED;
+static int propagate_one(struct mount *m)
-                        return p_last_src;
+{
+        struct mount *child;
+        int type;
+        /* skip ones added by this propagate_mnt() */
+        if (IS_MNT_NEW(m))
+                return 0;
+        /* skip if mountpoint isn't covered by it */
+        if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
+                return 0;
+        if (m->mnt_group_id == last_dest->mnt_group_id) {
+                type = CL_MAKE_SHARED;
+        } else {
+                struct mount *n, *p;
+                for (n = m; ; n = p) {
+                        p = n->mnt_master;
+                        if (p == dest_master || IS_MNT_MARKED(p)) {
+                                while (last_dest->mnt_master != p) {
+                                        last_source = last_source->mnt_master;
+                                        last_dest = last_source->mnt_parent;
+                                }
+                                if (n->mnt_group_id != last_dest->mnt_group_id) {
+                                        last_source = last_source->mnt_master;
+                                        last_dest = last_source->mnt_parent;
+                                }
+                                break;
+                        }
                }
+                type = CL_SLAVE;
+                /* beginning of peer group among the slaves? */
+                if (IS_MNT_SHARED(m))
+                        type |= CL_MAKE_SHARED;
        }
-        /* slave of the earlier, then */
+                
-        *type = CL_SLAVE;
+        /* Notice when we are propagating across user namespaces */
-        /* beginning of peer group among the slaves? */
+        if (m->mnt_ns->user_ns != user_ns)
-        if (IS_MNT_SHARED(dest))
+                type |= CL_UNPRIVILEGED;
-                *type |= CL_MAKE_SHARED;
+        child = copy_tree(last_source, last_source->mnt.mnt_root, type);
-        return last_src;
+        if (IS_ERR(child))
+                return PTR_ERR(child);
+        mnt_set_mountpoint(m, mp, child);
+        last_dest = m;
+        last_source = child;
+        if (m->mnt_master != dest_master) {
+                read_seqlock_excl(&mount_lock);
+                SET_MNT_MARK(m->mnt_master);
+                read_sequnlock_excl(&mount_lock);
+        }
+        hlist_add_head(&child->mnt_hash, list);
+        return 0;
 }
 /*
@@ -222,56 +270,48 @@ static struct mount *get_source(struct mount *dest,
 int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
                    struct mount *source_mnt, struct hlist_head *tree_list)
 {
-        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+        struct mount *m, *n;
-        struct mount *m, *child;
        int ret = 0;
-        struct mount *prev_dest_mnt = dest_mnt;
-        struct mount *prev_src_mnt  = source_mnt;
+        /*
-        HLIST_HEAD(tmp_list);
+         * we don't want to bother passing tons of arguments to
+         * propagate_one(); everything is serialized by namespace_sem,
-        for (m = propagation_next(dest_mnt, dest_mnt); m;
+         * so globals will do just fine.
-                        m = propagation_next(m, dest_mnt)) {
+         */
-                int type;
+        user_ns = current->nsproxy->mnt_ns->user_ns;
-                struct mount *source;
+        last_dest = dest_mnt;
+        last_source = source_mnt;
-                if (IS_MNT_NEW(m))
+        mp = dest_mp;
-                        continue;
+        list = tree_list;
+        dest_master = dest_mnt->mnt_master;
-                source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+        /* all peers of dest_mnt, except dest_mnt itself */
-                /* Notice when we are propagating across user namespaces */
+        for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
-                if (m->mnt_ns->user_ns != user_ns)
+                ret = propagate_one(n);
-                        type |= CL_UNPRIVILEGED;
+                if (ret)
-                child = copy_tree(source, source->mnt.mnt_root, type);
-                if (IS_ERR(child)) {
-                        ret = PTR_ERR(child);
-                        tmp_list = *tree_list;
-                        tmp_list.first->pprev = &tmp_list.first;
-                        INIT_HLIST_HEAD(tree_list);
                        goto out;
-                }
+        }
-                if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+        /* all slave groups */
-                        mnt_set_mountpoint(m, dest_mp, child);
+        for (m = next_group(dest_mnt, dest_mnt); m;
-                        hlist_add_head(&child->mnt_hash, tree_list);
+                        m = next_group(m, dest_mnt)) {
-                } else {
+                /* everything in that slave group */
-                        /*
+                n = m;
-                         * This can happen if the parent mount was bind mounted
+                do {
-                         * on some subdirectory of a shared/slave mount.
+                        ret = propagate_one(n);
-                         */
+                        if (ret)
-                        hlist_add_head(&child->mnt_hash, &tmp_list);
+                                goto out;
-                }
+                        n = next_peer(n);
-                prev_dest_mnt = m;
+                } while (n != m);
-                prev_src_mnt  = child;
        }
 out:
-        lock_mount_hash();
+        read_seqlock_excl(&mount_lock);
-        while (!hlist_empty(&tmp_list)) {
+        hlist_for_each_entry(n, tree_list, mnt_hash) {
-                child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
+                m = n->mnt_parent;
-                umount_tree(child, 0);
+                if (m->mnt_master != dest_mnt->mnt_master)
+                        CLEAR_MNT_MARK(m->mnt_master);
        }
-        unlock_mount_hash();
+        read_sequnlock_excl(&mount_lock);
        return ret;
 }
diff --git a/fs/pnode.h b/fs/pnode.h
index fc28a27fa892..4a246358b031 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -16,6 +16,9 @@
 #define IS_MNT_NEW(m)  (!(m)->mnt_ns)
 #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
+#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
+#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
+#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
 #define CL_EXPIRE               0x01
 #define CL_SLAVE                0x02
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6b7087e2e8fb..2d696b0c93bf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -200,41 +200,9 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
        return result;
 }
-static int proc_pid_cmdline(struct task_struct *task, char * buffer)
+static int proc_pid_cmdline(struct task_struct *task, char *buffer)
 {
-        int res = 0;
+        return get_cmdline(task, buffer, PAGE_SIZE);
-        unsigned int len;
-        struct mm_struct *mm = get_task_mm(task);
-        if (!mm)
-                goto out;
-        if (!mm->arg_end)
-                goto out_mm;    /* Shh! No looking before we're done */
-        len = mm->arg_end - mm->arg_start;
- 
-        if (len > PAGE_SIZE)
-                len = PAGE_SIZE;
- 
-        res = access_process_vm(task, mm->arg_start, buffer, len, 0);
-        // If the nul at the end of args has been overwritten, then
-        // assume application is using setproctitle(3).
-        if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
-                len = strnlen(buffer, res);
-                if (len < res) {
-                    res = len;
-                } else {
-                        len = mm->env_end - mm->env_start;
-                        if (len > PAGE_SIZE - res)
-                                len = PAGE_SIZE - res;
-                        res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
-                        res = strnlen(buffer, res);
-                }
-        }
-out_mm:
-        mmput(mm);
-out:
-        return res;
 }
 static int proc_pid_auxv(struct task_struct *task, char *buffer)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 9ae46b87470d..89026095f2b5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
        struct task_struct *task;
        void *ns;
        char name[50];
-        int len = -EACCES;
+        int res = -EACCES;
        task = get_proc_task(inode);
        if (!task)
@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
        if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out_put_task;
-        len = -ENOENT;
+        res = -ENOENT;
        ns = ns_ops->get(task);
        if (!ns)
                goto out_put_task;
        snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
-        len = strlen(name);
+        res = readlink_copy(buffer, buflen, name);
-        if (len > buflen)
-                len = buflen;
-        if (copy_to_user(buffer, name, len))
-                len = -EFAULT;
        ns_ops->put(ns);
 out_put_task:
        put_task_struct(task);
 out:
-        return len;
+        return res;
 }
 static const struct inode_operations proc_ns_link_inode_operations = {
diff --git a/fs/proc/self.c b/fs/proc/self.c
index ffeb202ec942..4348bb8907c2 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
        if (!tgid)
                return -ENOENT;
        sprintf(tmp, "%d", tgid);
-        return vfs_readlink(dentry,buffer,buflen,tmp);
+        return readlink_copy(buffer, buflen, tmp);
 }
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 7be26f03a3f5..1a81373947f3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -267,6 +267,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
        p->root = root;
        p->m.poll_event = ns->event;
        p->show = show;
+        p->cached_event = ~0ULL;
        return 0;
diff --git a/fs/splice.c b/fs/splice.c
index 12028fa41def..9bc07d2b53cf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -136,8 +136,6 @@ error:
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = page_cache_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
@@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .can_merge = 0,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = user_page_pipe_buf_steal,
@@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read);
 static const struct pipe_buf_operations default_pipe_buf_ops = {
        .can_merge = 0,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
 /* Pipe buffer operations for a socket and similar. */
 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
        .can_merge = 0,
-        .map = generic_pipe_buf_map,
-        .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
        .steal = generic_pipe_buf_nosteal,
@@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                goto out;
        if (buf->page != page) {
-                char *src = buf->ops->map(pipe, buf, 1);
+                char *src = kmap_atomic(buf->page);
                char *dst = kmap_atomic(page);
                memcpy(dst + offset, src + buf->offset, this_len);
                flush_dcache_page(page);
                kunmap_atomic(dst);
-                buf->ops->unmap(pipe, buf, src);
+                kunmap_atomic(src);
        }
        ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
                                page, fsdata);
@@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        void *data;
        loff_t tmp = sd->pos;
-        data = buf->ops->map(pipe, buf, 0);
+        data = kmap(buf->page);
        ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
-        buf->ops->unmap(pipe, buf, data);
+        kunmap(buf->page);
        return ret;
 }
@@ -1528,116 +1520,48 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                        struct splice_desc *sd)
 {
-        char *src;
+        int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
-        int ret;
+        return n == sd->len ? n : -EFAULT;
-        /*
-         * See if we can use the atomic maps, by prefaulting in the
-         * pages and doing an atomic copy
-         */
-        if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
-                src = buf->ops->map(pipe, buf, 1);
-                ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
-                                                        sd->len);
-                buf->ops->unmap(pipe, buf, src);
-                if (!ret) {
-                        ret = sd->len;
-                        goto out;
-                }
-        }
-        /*
-         * No dice, use slow non-atomic map and copy
-         */
-        src = buf->ops->map(pipe, buf, 0);
-        ret = sd->len;
-        if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
-                ret = -EFAULT;
-        buf->ops->unmap(pipe, buf, src);
-out:
-        if (ret > 0)
-                sd->u.userptr += ret;
-        return ret;
 }
 /*
 * For lack of a better implementation, implement vmsplice() to userspace
 * as a simple copy of the pipes pages to the user iov.
 */
-static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
        struct splice_desc sd;
-        ssize_t size;
-        int error;
        long ret;
+        struct iovec iovstack[UIO_FASTIOV];
+        struct iovec *iov = iovstack;
+        struct iov_iter iter;
+        ssize_t count = 0;
        pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
-        pipe_lock(pipe);
+        ret = rw_copy_check_uvector(READ, uiov, nr_segs,
+                                    ARRAY_SIZE(iovstack), iovstack, &iov);
-        error = ret = 0;
+        if (ret <= 0)
-        while (nr_segs) {
+                return ret;
-                void __user *base;
-                size_t len;
-                /*
-                 * Get user address base and length for this iovec.
-                 */
-                error = get_user(base, &iov->iov_base);
-                if (unlikely(error))
-                        break;
-                error = get_user(len, &iov->iov_len);
-                if (unlikely(error))
-                        break;
-                /*
-                 * Sanity check this iovec. 0 read succeeds.
-                 */
-                if (unlikely(!len))
-                        break;
-                if (unlikely(!base)) {
-                        error = -EFAULT;
-                        break;
-                }
-                if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
-                        error = -EFAULT;
-                        break;
-                }
-                sd.len = 0;
-                sd.total_len = len;
-                sd.flags = flags;
-                sd.u.userptr = base;
-                sd.pos = 0;
-                size = __splice_from_pipe(pipe, &sd, pipe_to_user);
-                if (size < 0) {
-                        if (!ret)
-                                ret = size;
-                        break;
-                }
-                ret += size;
-                if (size < len)
+        iov_iter_init(&iter, iov, nr_segs, count, 0);
-                        break;
-                nr_segs--;
+        sd.len = 0;
-                iov++;
+        sd.total_len = count;
-        }
+        sd.flags = flags;
+        sd.u.data = &iter;
+        sd.pos = 0;
+        pipe_lock(pipe);
+        ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
        pipe_unlock(pipe);
-        if (!ret)
+        if (iov != iovstack)
-                ret = error;
+                kfree(iov);
        return ret;
 }
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1037637957c7..d2c170f8b035 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        } else
                up_write(&iinfo->i_data_sem);
-        retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+        retval = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
        if (retval > 0) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 003c0051b62f..79e96ce98733 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -699,7 +699,7 @@ xfs_file_dio_aio_write(
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
        ret = generic_file_direct_write(iocb, iovp,
-                        &nr_segs, pos, &iocb->ki_pos, count, ocount);
+                        &nr_segs, pos, count, ocount);
 out:
        xfs_rw_iunlock(ip, iolock);
@@ -715,7 +715,7 @@ xfs_file_buffered_aio_write(
        const struct iovec      *iovp,
        unsigned long           nr_segs,
        loff_t                  pos,
-        size_t                  ocount)
+        size_t                  count)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
@@ -724,7 +724,7 @@ xfs_file_buffered_aio_write(
        ssize_t                 ret;
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
-        size_t                  count = ocount;
+        struct iov_iter         from;
        xfs_rw_ilock(ip, iolock);
@@ -732,14 +732,15 @@ xfs_file_buffered_aio_write(
        if (ret)
                goto out;
+        iov_iter_init(&from, iovp, nr_segs, count, 0);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 write_retry:
        trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-        ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+        ret = generic_perform_write(file, &from, pos);
-                        pos, &iocb->ki_pos, count, 0);
+        if (likely(ret >= 0))
+                iocb->ki_pos = pos + ret;
        /*
         * If we just got an ENOSPC, try to write back all dirty inodes to
         * convert delalloc space to free up some of the excess reserved
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bcfe61202115..0b18776b075e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -271,32 +271,6 @@ xfs_open_by_handle(
        return error;
 }
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-        char __user             *buffer,
-        int                     buflen,
-        const char              *link)
-{
-        int len;
-        len = PTR_ERR(link);
-        if (IS_ERR(link))
-                goto out;
-        len = strlen(link);
-        if (len > (unsigned) buflen)
-                len = buflen;
-        if (copy_to_user(buffer, link, len))
-                len = -EFAULT;
- out:
-        return len;
-}
 int
 xfs_readlink_by_handle(
        struct file             *parfilp,
@@ -334,7 +308,7 @@ xfs_readlink_by_handle(
        error = -xfs_readlink(XFS_I(dentry->d_inode), link);
        if (error)
                goto out_kfree;
-        error = do_readlink(hreq->ohandle, olen, link);
+        error = readlink_copy(hreq->ohandle, olen, link);
        if (error)
                goto out_kfree;